#!/usr/bin/env python # -*- coding: utf-8 -*- """ calculate nonegative projections of the spectral basis to the measured spectra. Projection basis was found as nonnegative sparse decomposition of about 3500 spectra to 11 components. In the first part of the algoritm is remove stray light and made a corrections of the instrumental function It is very important because highly subpixel precision is necassary. First correction is made as a blind deconvolution of shape and then a zero and first order of the wavelength polynome are made. Than a projections even with realible estimate of the statistical errors are made. In the second part of the script is Integrated data analyzis of the radiation. Data from spectrometer with low time but very high wavelength resolution van be combinated with higt time, but low wavelength resoltion from othe diagnostics as photodiodes, cameras etc.. Advantages: extremly high sensitivity, can be identified even weak lines lost between strong surrounding lines of other ions. Immune agains line overburning. (Relatively) fast caculaton. It can never missidentified one of the strong lines. Disadvantage: In rare cases there can happend something interesting which can not be described by these limited number of components, one kind of event is here:7307,7960;9298,9156;10097 and also probably a new iont from unknow element can occure 9664,9665,9670,9839,9906,10131,10136. And finally a molecular spectrum 9504 On the other hand, without this algoritm it would by imposible to find that there is something interesting in this discharges. Autor: Tomas Odstrcil date:12.12.2012 """ from numpy import * import matplotlib matplotlib.rcParams['backend'] = 'Agg' from matplotlib.pyplot import * from SpectrometerControl import * from scipy.linalg import norm, inv,qr,pinv from scipy.special import erfinv, erf from scipy.stats.mstats import mquantiles from scikits.sparse.cholmod import cholesky, analyze, cholesky_AAt,CholmodError from scipy.signal import fftconvolve,convolve,gaussian,order_filter from numpy import linalg import time from scipy import sparse from scipy.sparse import * from scipy.optimize import minimize_scalar,nnls,fmin_powell,fmin_cg,fmin_bfgs,fmin_ncg from scipy.fftpack import fft,ifft from fftshift import fftshift from pygolem_lite.modules import save_adv,load_adv from scipy.sparse import block_diag from pygolem_lite import saveconst from scipy.interpolate import interp1d names = ('HI','OI','OII', 'OIII', 'HeI', 'CII', 'CIII', 'NI','NII', 'NIII','Mystery1', 'OIV' ) styles = ('b-','r-','r--', 'r-.' , 'g-' , 'k--', 'k-.' , 'y-','y--', 'y-.' , 'c-' , 'm:' ) def Shift(x,s,win,lam): #místo přímého posuvu udělám inverzi posuvu zpět, je to stabilnější s-= 0.5 # systematický posuv, aby se to shodovalo s lineární interpolací s*= -1 #invezní posuv win = (win/2)*2 diag_data = ones((2,win)) diag_data[1,:] *=s-floor(s) diag_data[0,:] *=floor(s)+1-s S = spdiags(diag_data, (-floor(s),-floor(s)-1), win, win,format='csr') f0 = zeros(win) f0[win/2] = 1 factor = cholesky_AAt(S.T,beta = lam) g = squeeze(factor(S.T*f0)) Sx = fftconvolve(x,g,mode = 'same') return Sx def GapsFilling(signal,win = 100, lam = 1e-1): #BUG loadovat to z pygolema """ =============================== Gaps Filling Filter 0.1 ===================== reconstruct the corrupted data (data with nans) by tikhonov-philips regularization with regulariting by laplace operator. And return smoothed retrofit Reconstruction is based on the invertation of the identical operator with zeros at the lines corresponding to the mising signal. due to memory and speed limitation the reconstruction is done on the overalaping intervals with width "win" signal - long data vector win - width of the recosntruction interval - it mas be much bigger than the gaps width lam - regularization parameter, dependes on the noise in data Autor: Tomas Odstrcil 2012 """ from scikits.sparse.cholmod import cholesky, analyze,cholesky_AAt from scipy.sparse import spdiags, eye n = len(signal) # signal extension -- avoid boundary effects n_ext = (n/win+3)*win ext_signal = ones(n_ext)*nan side = (n_ext-n)/2 ext_signal[side:-side-n%2] = signal ext_signal[:side+1] = median(signal[~isnan(signal)][:win/2]) ext_signal[-side-1:] = median(signal[~isnan(signal)][-win/2:]) intervals = arange(0,n_ext, win/2) ind_nan = isnan(ext_signal) ext_signal[ind_nan] = 0 recon = copy(ext_signal) diag_data = ones((2,win)) diag_data[1,:]*=-1 D = spdiags(diag_data, (0,1), win, win,format='csr') DD = D.T*D I = eye(win,win,format='csc') Factor = cholesky_AAt(DD, 1./lam) for i in range(len(intervals)-2): gaps = spdiags( int_(ind_nan[intervals[i]:intervals[i+2]]),0, win, win,format='csc') # use overlapping intervals !!! Factor.update_inplace(gaps/sqrt(lam), subtract=True) # speed up trick g = Factor(ext_signal[intervals[i]:intervals[i+2]]/lam) Factor.update_inplace(gaps/sqrt(lam), subtract=False) # speed up trick recon[(intervals[i]+intervals[i+1])/2:(intervals[i+1]+intervals[i+2])/2] = g[len(g)/4:-len(g)/4,0] #plot(recon,'r') #plot(ext_signal,'b') #savefig('gapsfill.png') #clf() chi2 = sum((ext_signal-recon)[~ind_nan]**2)/n recon = recon[(n_ext-n)/2:-(n_ext-n)/2] return recon, chi2 def Resample(tvec,vec,n,t_min = None,t_max = None,left = 0,right = 0): if t_min == None: t_min = amin(tvec) if t_max == None: t_max = amax(tvec) std = (t_max-t_min)/(tvec[1]-tvec[0])/n/2 std = max(std,1) gauss_win = gaussian(std*3,std) gauss_win/= sum(gauss_win) vec_smooth = fftconvolve(vec,gauss_win, mode = 'same') tvec_new = linspace(t_min,t_max,n) vec_new = interp(tvec_new,tvec, vec_smooth, left=left, right=right) return tvec_new,vec_new def downsample(data, sampleInteg = False, downsample = 1): downsampled = reshape(data, (-1,downsample), order = 'C') if sampleInteg: downsampled = sum(downsampled, axis = 1)/downsample else: downsampled = downsampled[:,0] return downsampled def upsample_smooth(data, win,lam, sampleInteg = False, upsample = 1): win = 2*(win/2) n = len(data) npix = upsample*win n_ext = (n/win+1)*win ext_signal = empty(n_ext) ext_signal[(n_ext-n)/2:-(n_ext-n)/2] = data ext_signal[:(n_ext-n)/2+1] = 0#median(data[:win/2]) ext_signal[-(n_ext-n)/2-1:] = 0#median(data[win/2:]) retrofit = zeros(n_ext*upsample) #solve the upsamling problem only for short interval win diag_data = ones((2,npix)) diag_data[1,:]*=-1 D = spdiags(diag_data, (0,1), npix, npix,format='csr') D = D.T*D DD = D.T*D if sampleInteg: ReductMatrix = kron(eye(win,win),ones((1,upsample))/upsample,format='csr') else: ReductMatrix = kron(eye(win,win),eye(1,upsample),format='csr') T = ReductMatrix f = ext_signal f0 =zeros(win) f0[win/2] = 1 factor = cholesky(T.T*T+lam*DD) g = squeeze(factor(T.T*f0)) #apply the single point solution g on the whole data vector T_Tf = zeros((n_ext,upsample)) T_Tf[:,0] = f T_Tf = squeeze(reshape(T_Tf, (-1,1), order = 'C')) if win*upsample > 100: #choose the faster way upsampled = fftconvolve(T_Tf,g,mode = 'same') else: upsampled = convolve(T_Tf,g,mode = 'same') retrofit[:-1] = upsampled[1:] #I don't know why but this must by done. retrofit = reshape(retrofit, (-1,upsample), order = 'C') if sampleInteg: retrofit = mean(retrofit, axis = 1) else: retrofit = retrofit[:,0] upsampled = upsampled[(n_ext-n)/2*upsample:-(n_ext-n)/2*upsample] retrofit = retrofit[(n_ext-n)/2:-(n_ext-n)/2] #print 'error ',sum(abs(data-retrofit)) return upsampled,retrofit def removeStrayLight(data): #experimentálně ověřeno, lepší algoritmus mě nenapadl m = len(data) win = 101 quantile = 0.1 shift = 1.4*(10*erfinv((quantile-0.5)*2)) domain = ones(win) rank = int(win*quantile) background = order_filter(data-shift,domain,rank) gauss = exp(-linspace(-1,1,win)**2) gauss-= min(gauss) gauss/= sum(gauss) background = fftconvolve(background,gauss , mode = 'same') data-= background*0.8 return data import pyfftw def deconv_opt(spectra,retrofit,overburned,lam,ker0=None): n = len(spectra) normal = fftconvolve(overburned, ones(5),mode='same') < 1./5 fr = fft(retrofit[normal], n)/sqrt(n) fs = fft( spectra[normal], n)/sqrt(n) ker_full = pyfftw.n_byte_align_empty(n, 16,dtype=complex) Fker_full = pyfftw.n_byte_align_empty(n, 16,dtype=complex) fft_forward = pyfftw.FFTW(ker_full,Fker_full, direction='FFTW_FORWARD', flags=['FFTW_ESTIMATE','FFTW_DESTROY_INPUT']) doF = sum(normal)*5. def f(ker): ker_full[:]=0 ker_full[n/2-(len(ker))/2+1:n/2+(len(ker))/2+1] = ker ker_full[:] = fftshift(ker_full) fft_forward() chi2 = norm(fs-fr*Fker_full)**2/doF return chi2+norm(ker)*lam if ker0 == None: L = 10 ker0 = zeros(L) ker0[L/2-1] = 1 else: L = len(ker0) #t = time.time() #y = fmin_powell(f, ker0, xtol=1e-5,ftol=1e-5,maxfun=1e6,disp=True) y= fmin_bfgs(f, ker0, gtol=1e-1,maxiter=1e6,disp=False) #print time.time()-t #exit() ker_full = zeros(n) ker_full[n/2-L/2:n/2+(L+1)/2] = real(y) return real(y),ker_full def optim_fun(noise,spectra,retrofit,overburned,lim): dec = deconv(noise,spectra,retrofit,overburned,lim) chi2 = norm(spectra-fftconvolve(retrofit,dec,mode='same'))**2/size(spectra)/5 return exp(abs(log(chi2))) def CalcProjections(spectrometr,shot, plasma_start,plasma_end): shot_number = 100000 #BUG!!!! tvec = shot.time_stamps/1000. wavelength, intensities = shot.getData() MaxIntensity = 32000 # hodnota po korekci nelinearity detektoru! if shot_number < 8500: MaxIntensity = 18200 overburned = intensities > MaxIntensity if shot_number < 7625: #koukali jsme se skrze sklo overburned[wavelength < 330,:] = True if shot_number > 10300 and shot_number < 18466: #koukali jsme se skrze sklo overburned[wavelength < 330,:] = True if shot_number > 18700 and shot_number < 21516: #koukali jsme se skrze sklo overburned[wavelength < 330,:] = True #if shot_number > 18700: #koukali jsme se skrze sklo #overburned[wavelength < 330,:] = True overburned[spectrometr.black_pixels,:] = False if spectrometr.hotPixels != []: overburned[spectrometr.hotPixels,:] = True data = load('SpectraComponents2.npz') #the most critical part, weeks of my work components = data['components'] intensities-= mean((intensities[spectrometr.black_pixels,:])) n_features = size(components,1) n_components = size(components,0) n_measurement = size(intensities,1) upsample = 10 projection = zeros((n_measurement, n_components)) projectionError = zeros((n_measurement, n_components)) dataSTD = shot.readoutNoiseRMS #detect plasma plasma = nanmax(intensities, axis = 0) > 10*dataSTD #překročení této hranice šumem je sttaisticky vellmi nepravděpodobné save_adv('./data/plasma', tvec,plasma) if not any(plasma): return True imin = amin(where(plasma)[0])-1 imax = amax(where(plasma)[0]) #print plasma #print 'tvec',imin,imax, isfinite(plasma_start), isfinite(plasma_end) #if isfinite(plasma_start) and isfinite(plasma_end): ##print 'tvec ', (arange(n_measurement)-imin)*(plasma_end-plasma_start)/(imax-imin)+plasma_start #tvec = (arange(n_measurement)-imin)*(plasma_end-plasma_start)/(imax-imin)+plasma_start+shot.integ_time/2000 plasma[1:] = plasma[1:] | plasma[:-1] plasma[:-1] = plasma[1:] | plasma[:-1] ##preprocess for i in where(plasma)[0]: intensities[:,i] = removeStrayLight(intensities[:,i]) intensities /= dataSTD #normalize by staistical error components_reshaped = reshape(components.T, (1,-1), order = 'F')[0,:] upsampled, retrofit = upsample_smooth(components_reshaped, 30,1, sampleInteg = True, upsample = upsample) upsampled_components = reshape(upsampled, (n_features*upsample,-1), order = 'F').T spect_lines = sum(components,axis = 0) != 0 #TODO 60% je nulových => jde to tím urychlit def estimateResid(components ): resid = zeros(n_measurement) for j in where(plasma)[0]: normalPx = ~overburned[:,j] _,resid[j] = nnls(components[:,normalPx].T,intensities[normalPx,j]) chi2 = norm(resid)**2/(sum(plasma)*size(intensities,0)) return chi2 def shiftSpectra(components,(s1,s2)): n = size(components,1) shift_components = interp1d(linspace(s2,n-s2-1,n),components, bounds_error=False,fill_value=0)(s1+arange(n)) return shift_components def fitfun(params): shift_components = shiftSpectra(components_corr, params) chi2 = estimateResid(shift_components) #print chi2 return chi2 total_intens = sum( intensities[:,plasma], axis = 1) L = 10 ker = zeros(L) ker[L/2-1] = 1 ker_full = zeros(n_features) ker_full[n_features/2] = 1 normalPx = sum(overburned ,1) == 0 components_corr = zeros_like(components) print 'calc inst fun' for i in range(5): for j in range(n_components): components_corr[j,:] = fftconvolve(components[j,:],ker_full,mode='same') proj, resid = nnls(components_corr[:,normalPx].T,total_intens[normalPx]) retrofit = dot(proj.T,components).T ker,ker_full = deconv_opt(total_intens,retrofit,~normalPx,2,ker) t = time.time() par0 = [0,0] #print par0, [components_corr,] y = fmin_powell(fitfun,par0, xtol=1e-4,ftol=1e-3,maxfun=1e6,disp=False) components_corr = shiftSpectra(components_corr, y) proj, resid = nnls(components_corr[:,normalPx].T,total_intens[normalPx]) #print resid**2/n_features/sum(plasma) print 'calc inst fun finished' components = components_corr components[components < 0] = 0 difference = zeros(n_features) chi2ion = zeros(n_components) resid = ones(n_measurement)*sqrt(n_features) q,r = qr(components.T,mode = 'economic') F = matrix(dot(inv(r),q.T)) for j in where(plasma)[0]: normalPx = ~overburned[:,j] projection[j, :], resid[j] = nnls(components[:,normalPx].T, intensities[normalPx,j]) #try to account error nonstatistical errors in fit difference[normalPx] = intensities[normalPx,j]-dot(components.T,projection[j, :])[normalPx] difference[~normalPx] = amax(difference[normalPx] ) #prostě tam bude velká chyba DataError = sqrt(difference**2+1)-(sqrt(2)-1) projectionError[j, :] = sqrt(diag(F*diag(DataError**2)*F.T)) for i in range(n_components): chi2ion[i] += norm(difference*(components[i,:]/(1e-3+sum(components,axis = 0))))**2/n_features #plot(wavelength, (dot(diag(projection[j, :]),components)).T,linewidth = 0.2) #plot(wavelength, intensities[:,j],'b-.') #plot(wavelength,dot(components.T,projection[j, :]), 'k') #plot(wavelength[:], intensities[:,j]-dot(components.T,projection[j, :]), 'r:' ) #ylim(-2, 100) #show() spectra = sum( intensities[:,plasma], axis = 1) retrofit = sum( dot(projection,components).T, axis = 1) chi2 = norm(spectra-retrofit)**2/n_features/sum(plasma) print 'standart chi2',chi2 normalPx = sum(overburned ,1) == 0 #calculate global charasteristics of the fit and spectra chi2 = sum(resid**2)/(n_measurement*n_features) print chi2, resid**2/n_features print chi2,chi2ion savetxt('./results/IonChi2'+'.txt',chi2ion) savetxt('./results/TimeChi2'+'.txt',resid**2/n_features) save('./results/TotalChi2',chi2) energy_constants = zeros(n_components) for i in range(n_components): norm_comp = spectrometr.convertCountToPhotons(shot.wavelength, copy(components[i,:]),shot.integ_time) energy_constants[i] = sum(norm_comp) abs_calibration = loadtxt('absolute_calibration.txt') energy_constants *= abs_calibration rel_proj = copy(projection) rel_proj_err = copy(projectionError) projection*= energy_constants projectionError *= energy_constants P_total = sum(projection,1) P_total_error = sqrt(sum(projectionError**2,1)) if isfinite(plasma_start) and isfinite(plasma_end): mean_tvec = sum(P_total*tvec)/sum(P_total) print mean_tvec print (plasma_start+plasma_end)/2 tvec = tvec-mean_tvec+(plasma_start+plasma_end)/2-shot.integ_time/1e3/2 save_adv('./data/TotalPower',tvec,P_total,data_err=P_total_error, tvec_err = 1e-3 ) n_frames = (plasma_end-plasma_start)/(shot.integ_time/1000) saveconst('./HistoricalAnalysis/meanPower',sum(P_total[plasma])/n_frames) save_adv('./data/projection', tvec,rel_proj, data_err=rel_proj_err, tvec_err=1e-3 ) save_adv('./data/intensities', tvec,intensities) save_adv('./data/components', wavelength,components) save_adv('./data/plasma', tvec,plasma) save_adv('./data/overburned', wavelength,overburned) save('./data/energy_constants',energy_constants)