SW/pygolem/utilities.py

#!/usr/bin/python2

"""
AUTHOR: Michal Odstrcil 2012
Provide simple web interface for pygolem for data downloading
"""



from numpy import *
import os, re, sys, time
from scipy.signal import medfilt

def fix_str(string, remove_latex = False):
    """ FIx the string to avoid any problems during plotting """
    if isnone(string): return ""
    #print "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%"
    #print string, len(string)
    string = str(string)
    if remove_latex:
	string = re.sub('[\$\{\}]', "", string)
	return string
    
    if not re.match(".*\$.*", string):  # hot fix of "latex font" if you can use $ solve it yourself
	for s in [r'\_', r'\^', r'\{', r'\}', r'\#', r'\%', r'<', r'>']:
	    string = re.sub('([^\\\])'+s, r'\1'+s, string) 
    return string

def get_units(label):
    """ Exctract only the units from the ylabel full name """
    if re.match(".*\[.+\]", label):
	label = re.sub("(.+)(\[.+\])", r"\2", label )
	label = re.sub("[\[\]]", "", label )
	label = fix_str(label, True)
    else:
	label = ""
    #label = re.sub("(.+)", r"[\1]", label )
    return label
	
###################### SIMPLE DATA LOADING ##############################


def saveconst(fname, const, fmt = "%g"):
    """ Save number `const` as text file `fname`
    param str fname:  Name of saved file
    param scalar const: saved constant (or string)
    """
    
    with open(fname, 'w') as fhandle:
	if not  type(const) is str:
	    fhandle.write( "%g\n" % const )
	else:
	    fhandle.write( "%s\n" % const )  
	
def loadconst(fname, isfloat = True):
    """
    Read number (or string)  from file
    param str fname:  Name of saved file
    """

    with open(fname, 'r') as fhandle:
	const = fhandle.readline()
	try:
	    assert isfloat
	    return float(const)    #return the raw string
	except:
	    return re.sub('\n', '',  const) # return as string !!! 


def cat(path, lines = [0], return_array = False):
    """ read file from `path` and return string. If there is several lines, it can return array if `return_array == True`
    """
    try:
	with open(path, 'r') as f:
	    content_tmp = f.readlines()
	    if return_array: return content_tmp
	    if len(lines) == 0:
		lines = range(len(content_tmp))
	    content = ""
	    for i in lines:
		content += content_tmp[i]
    except:
        content = 'N/A'

    return content #return the raw string



def loadSpectra(shot = None):
    """ Special function to load data stored in the spectrometry file
    """
    from pygolem_lite import Shot
    DataFile =  Shot(shot).exist('spectrometr:data')


    #DataFile = path+name+'.txt'

    if not os.path.isfile(DataFile):
	raise Exception('Data file '+DataFile+' does not exists.')
    
    Data = {}
	
    
	
    f = open(DataFile, 'r')
    line_header = 0
    for i,line in enumerate(f):
	line_header +=1
	ind = line.find('Serial Number')
	if ind != -1:
	    SN =  line[ind+15:-1]
	ind = line.find('Date and time (GMT)')
	#if ind != -1:    
	    #Data['time'] =  mktime(strptime(line[ind+21:-1]))
	ind = line.find('Number of spectra')
	if ind != -1:
	    Data['n_spectra'] = int(line[ind+19:-1])
	ind = line.find('Resolution')
	if ind != -1:
	    Data['n_pixels'] = int( line[ind+16:-1])  
	ind = line.find('Integration time [ms]')
	if ind != -1:
	    Data['integ_time'] = float(line[ind+22:-1])
	ind = line.find('Board temperature [C]')
	if ind != -1:
	    Data['temperature'] = float(line[ind+22:-1])
	ind = line.find('Time stamps [ms]')
	if ind != -1:
	    s = line[ind+18:-1]
	    Data['time_stamps'] = float_(s.strip('[]').split())
	ind = line.find('Noise RMS')
	if ind != -1:	
	    Data['readoutNoiseRMS'] = float(line[ind+20:-1])
	if line.find('***************') != -1:
	    break
    

    f.close()
    
    
    if Data['readoutNoiseRMS'] > 10:#probably failura in the calculation
	Data['readoutNoiseRMS'] = 10   
    
    #t0 = time.time()
    f = open(DataFile, 'r')
    data = genfromtxt(f,skip_header=line_header)
    f.close()
    #print "time", time.time() - t0

    Data['wavelength'] = data[:,0]
    Data['spectrum'] =  data[:,1:]
    
    
    return Data
    
    

####################### OTHER USEFUL ROUTINES #################################x

def nanmedian(data):
    return median(data[~isnan(data)])


def list2array(L):
    """
    convert list output from pygolem to array / scalar
    """

    if type(L)  is not list and  type(L)  is not tuple :
        return L

    if ndim(L[1]) == 1:
        dim = 1
    else:
        dim = size(L[1],1)

    out = empty([len(L[0]), dim+1])
    out[:,0] = L[0]
    out[:,1:] = reshape(L[1], (-1, dim))
    return out

def isnone(data):
    """ Simple function to check if input is None
    """
    if data is None:
	return True
    if type(data) is ndarray:
	if ndim(data) == 0:
	    return data.item() is None
	else:
	    return data == array([None])
    if type(data) is list or type(str) is tuple:
	return [ data[i]  is None for i in range(len(data)) ]
    else:
	return False

def find_data(fname):
    """
    Test if file fname exists and return path, otherwise False
    """
    end_list = [ '', '.npz',  '.npy', '.gz', '.csv', '.lvm', '.txt', '_dp.csv' ]
    for end in end_list:
	if os.path.exists(fname + end):  
	    return fname + end
    return False


def read_config(path, file = ""):
    """Read data configuration from the fileobject and return it as a dictionary"""
    try:
	data_types = load(path+file+'.npy').item()   # speed-up
    except:
	import ConfigParser
	from collections import OrderedDict
	config = ConfigParser.RawConfigParser()
	data_types = OrderedDict()
	config.readfp(open(path+file, 'r'))
	for data_type in config.sections():
	    data_types[data_type.lower()] = dict(config.items(data_type))  # data_type is not case sensitive

	try:
	    save(path+file, data_types )   # save for faster loading next time..
	except:
	    pass
    return data_types

def check_data_config(config):
    """ Check that all items in config contains obligatory items """
    items = sort( array(['identifier', 'datadir', 'name', 'type','ylabel']))
    for key in config.keys():
	items_tmp = sorted(array( config[key].keys() ))
	if any(~in1d(items, items_tmp)):
	    print "Error" ,  key, 'missing item', items[~in1d(items, items_tmp)]
	    print "Item " + key + " is removed from config !!! "
	    config.pop(key)  # remove key from data set !!! 
	    
    return config

def unique_ind(ind_new, ind_orig):
    """ Return intersection of sets as sorted indices 
    """
    #return where(in1d(ind_1,ind_2))[0]
    ind = zeros(len(ind_orig))
    for i in range(len(ind_orig)):
	ind[i] = where(ind_new == ind_orig[i])[0][0]
    return int_(ind)

####  mathematical ================
    
def smooth(data, ker_size):
    """ Use gausian kernel for smoothing """

    data = copy(data)
    from scipy.signal import fftconvolve
    kernel = linspace(-3, 3, 2*ceil(ker_size/2)+1)
    kernel = exp(-kernel**2)
    kernel /= sum(kernel)
    if ndim(data) ==1:
	data = data[:,None]
    for i in range(size(data,1)):
	ind = ~isnan(data[:,i])
	data[ind,i]  = medfilt(data[ind,i],   max(3,2*ceil(ker_size/4)+1) )  # remove outliers 
	p = polyfit(where(ind)[0], data[ind,i], 1)
	data[ind,i] -= polyval(p, where(ind)[0])
	data[ind,i] = fftconvolve(data[ind,i], kernel, mode="same")
	data[ind,i] += polyval(p, where(ind)[0])

    return squeeze(data)