#!/usr/bin/env python
import Queue
import threading
import urllib2
import time
import sys,os, re
from numpy import *
#from collections import deque
#over proxy
#proxy_handler = urllib2.ProxyHandler({'http': '192.168.250.134:8888'})
#opener = urllib2.build_opener(proxy_handler)
#urllib2.install_opener(opener)
def main():
""" A very simple data downloader that is using multithreading and direct access to saved files in order to reach maximal downloading speed
"""
t = time.time()
diagn = 'plasma_current'
shots = range(10000,11000)
results = ParallelDownload(diagn,shots,n_threats=10)
print "saving data"
for shot,d in results.iteritems():
print shot
savez_compressed(diagn + '_' + str(shot), **d )
print "mean time", (time.time()-t)/len(results)
class ThreadUrl(threading.Thread):
"""Threaded Url Grab"""
def __init__(self, queue,output_queue,N):
threading.Thread.__init__(self)
self.queue = queue
self.queue_out = output_queue
self.N = N
def run(self):
while True:
n = self.queue_out.qsize()
sys.stdout.write('\rdownloaded: %3.0f %%' % (100*n/self.N))
sys.stdout.flush()
#grabs host from queue
host,shot = self.queue.get()
d= DataSource()
try:
f = d.open(host)
f = load(f)
self.queue_out.put((shot,f))
except IOError:
print host+' was not found'
finally:
self.queue.task_done()
def ParallelDownload( diagn ,shots,n_threats=5):
url_0 = get_address(diagn)
start = time.time()
links = [ url_0 % s for s in shots]
queue = Queue.Queue()
output_queue = Queue.Queue()
if n_threats > 10:
print 'DDos attack :D, n_threats decreased to 10'
n_threats = 10
#spawn a pool of threads, and pass them queue instance
for i in range(n_threats):
t = ThreadUrl(queue,output_queue,len(links))
t.setDaemon(True)
t.start()
#populate queue with data
for host,shot in zip(links,shots):
queue.put((host,shot))
#wait on the queue until everything has been processed
queue.join()
print "\nmean time: %2.3fs" % ((time.time() - start)/len(shots))
results = {}
while not output_queue.empty():
shot,file = output_queue.get()
results[shot] = file
return results
def get_address(diagnostics):
address = DataSource().open('http://golem.fjfi.cvut.cz/utils/data/0/'+diagnostics+'.link' ).read()
ShotNo = int(DataSource().open('http://golem.fjfi.cvut.cz/shots/0/ShotNo').read())
address = re.sub( r'/'+str(ShotNo)+'/', r'/%d/', address)
return address
main()