#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
===================================================================
Hledání nejbližšího výstřelu
===================================================================
"""
from scipy.stats.mstats import mquantiles
from numpy import *
#from matplotlib.pyplot import *
import sys, getopt, os, re
set_printoptions(precision=4,linewidth=400)
import urllib
from scipy.stats import nanmedian
from pygolem_lite.sql import load_data
from pygolem_lite import get_current_shot
#./Closest_Shot.py --Ub=500 --Ucd=200 --Ubd=50 --Tbd=0 --Tcd=0.003 --gas_filling=1 --preionization=1 --pressure_request=20
def search_closest(user_vals, names, shots, data, N ):
#print shape(user_vals), shape(shots), shape(data)
# přenormovní dat
M = mean(data,0)
S = std(data,0)
user_vals = (user_vals - M)/S
data -= M
data /= S
# výpočet vzdálenosti
distance = data.copy()
distance -= user_vals
dist = sqrt(nansum(distance**2,1))
ind = argsort(dist)
ind = ind[:N] # vrátí první N
dist = dist[ind]
return ind, dist
def download_data(shot):
try:
return loadtxt('loop_voltage_' + str(shot) )
except:
address="http://golem.fjfi.cvut.cz/utils/data/"+str(shot)+"/loop_voltage"
print "stahuji", address
data = urllib.urlopen(address)
data = loadtxt(data)
savetxt('loop_voltage_' + str(shot), data )
return data
def prepare_data(diagns):
""" load the data from disk and remove problematic shots """
diagns_tmp = diagns + ['plasma_status', 'plasma','plasma_life', 'loop_voltage_max','transformator_saturation','session_name','Ucd','Ucd','Ub']
data_dict = load_data(diagns_tmp, arange(5000, get_current_shot()) )
shots = data_dict['shot']
# use only keys allowed in Vars
names = array(diagns)
X = array([ data_dict[i] for i in names ]).T
N = len(X)
# remove damaged signal
ind = array( [(True if re.match( 'OK', data_dict['plasma_status'][i]) else False) for i in range(N)] )
ind &= ~isnan(data_dict['plasma']) & ~any(isnan(X) , axis = 1)
# remove outling points (mistakes)
ind &= all(abs(X) <= mquantiles(abs(X[ind,:]), 0.99, axis=0), axis=1)
# remove outling points (mistakes)
ind &= (data_dict['loop_voltage_max'] > 5) & (data_dict['loop_voltage_max'] < 40)
# remove vacuum shots
# too low Ucd or Ubd => again some error
ind &= (data_dict['ucd'] > 10) & (data_dict['ub'] > 10)
# too long plasma
ind &= (data_dict['plasma_life'] < 17e-3 ) | isnan(data_dict['plasma_life'])
# too saturated trafo => probably false detect
ind &= ((data_dict['transformator_saturation'] < 0.8) & (data_dict['plasma'] == 1)) \
| (data_dict['plasma'] != 1) | isnan(data_dict['transformator_saturation'])
# remove problematic sessions
ind &= array( [ False if re.match('^Technological/(DAS|Technological|Problems|Software|Repairs)',
data_dict['session_name'][i]) else True for i in range(N)] )
return shots[ind], X[ind,:], names
def main():
params = dict()
for a in sys.argv[1:]:
params[re.sub("(.+)=(.+)", r"\1", a).lower()] = double(re.sub("(.+)=(.+)", r"\2", a))
close_shots = get_nearest(params)
savetxt('closest_shots', close_shots, fmt="%i")
def get_nearest(params):
shots, data, names = prepare_data( params.keys() )
user_vals = array([ d for d in params.itervalues() ])
# ================== set apriori knowlenge
if not user_vals[names == 'gas_filling']:
user_vals[names == 'pressure_request'] = nan
user_vals[names == 'preionization'] = nan
if user_vals[names == 'Ubd'] == 0:
user_vals[names == 'Tbd'] = nan
print "============================="
for i in range(len(names)):
print "Var: %s = %g " % ( names[i], user_vals[i])
print "============================="
# ============ find closest shot
closest_ind, dist = search_closest(user_vals, names, shots, data.copy(), 5 )
close_shots = shots[closest_ind]
#close_dist = dist[closest_ind]
# ================================
print " closest shots ============================="
for k,j in enumerate(closest_ind):
print "shots: %i distance %g"% ( shots[j], dist[k])
for i in range(len(names)):
print " Var: %s = %g " % ( names[i], data[j,i])
print "============================="
return close_shots
if __name__ == "__main__":
main()