#!/usr/bin/env python import Queue import threading import urllib2 import time import sys,os, re from numpy import * #from collections import deque #over proxy #proxy_handler = urllib2.ProxyHandler({'http': '192.168.250.134:8888'}) #opener = urllib2.build_opener(proxy_handler) #urllib2.install_opener(opener) def main(): """ A very simple data downloader that is using multithreading and direct access to saved files in order to reach maximal downloading speed """ t = time.time() diagn = 'plasma_current' shots = range(10000,11000) results = ParallelDownload(diagn,shots,n_threats=10) print "saving data" for shot,d in results.iteritems(): print shot savez_compressed(diagn + '_' + str(shot), **d ) print "mean time", (time.time()-t)/len(results) class ThreadUrl(threading.Thread): """Threaded Url Grab""" def __init__(self, queue,output_queue,N): threading.Thread.__init__(self) self.queue = queue self.queue_out = output_queue self.N = N def run(self): while True: n = self.queue_out.qsize() sys.stdout.write('\rdownloaded: %3.0f %%' % (100*n/self.N)) sys.stdout.flush() #grabs host from queue host,shot = self.queue.get() d= DataSource() try: f = d.open(host) f = load(f) self.queue_out.put((shot,f)) except IOError: print host+' was not found' finally: self.queue.task_done() def ParallelDownload( diagn ,shots,n_threats=5): url_0 = get_address(diagn) start = time.time() links = [ url_0 % s for s in shots] queue = Queue.Queue() output_queue = Queue.Queue() if n_threats > 10: print 'DDos attack :D, n_threats decreased to 10' n_threats = 10 #spawn a pool of threads, and pass them queue instance for i in range(n_threats): t = ThreadUrl(queue,output_queue,len(links)) t.setDaemon(True) t.start() #populate queue with data for host,shot in zip(links,shots): queue.put((host,shot)) #wait on the queue until everything has been processed queue.join() print "\nmean time: %2.3fs" % ((time.time() - start)/len(shots)) results = {} while not output_queue.empty(): shot,file = output_queue.get() results[shot] = file return results def get_address(diagnostics): address = DataSource().open('http://golem.fjfi.cvut.cz/utils/data/0/'+diagnostics+'.link' ).read() ShotNo = int(DataSource().open('http://golem.fjfi.cvut.cz/shots/0/ShotNo').read()) address = re.sub( r'/'+str(ShotNo)+'/', r'/%d/', address) return address main()