These libraries are necessary
# %matplotlib notebook
import numpy as np
from matplotlib import pyplot as plt
import os
from urllib import request
This code takes care of dowloading data from golem server.
~~Data are stored locally, so you can run this notebook multiple times without transfering huge amount of data over the internet.
This single function loads data from golem server (if not already present), stores them localy, and returns desired channel as numpy array.
'''def load_from_golem_db(shot_num, device, channel):
#path to localy stored data file
path = "./data/"+str(shot_num)+"/"+channel+".csv"
#Check whether data already downloaded
if not os.path.isfile(path) or shot_num == 0:
#prepare directory structure (if not already present)
try:
os.mkdir("./data")
except(FileExistsError):
pass
#directory for each shot
try:
os.mkdir("./data/"+str(shot_num))
except(FileExistsError):
pass
#compose URL of desired data on golem server
url = "http://golem.fjfi.cvut.cz/shots/"+str(shot_num)+"/DASs/"+device+"/" + channel+ ".csv"
#download now
print("Downloading data from: " + url)
request.urlretrieve(url, path)
return(np.loadtxt(path, delimiter=","))
'''
#for golem processing usage local storing is not desired bahviour as it creates duplicities on golem server
#Simplified function for online data access
#for offline data analysis on own computer, use data storing version for faster access
def load_from_golem_db(shot_num, device, channel):
#compose URL of desired data on golem server
url = "http://golem.fjfi.cvut.cz/shots/"+str(shot_num)+"/DASs/"+device+"/" + channel+ ".csv"
#download now
print("Downloading data from: " + url)
return(np.loadtxt(request.urlopen(url), delimiter=","))
shot_no = 33305
#Always will be using 20 channels
n=20
#download first channel, just to get number of samples
first = load_from_golem_db(shot_no,"Bolometry", "bolo1")
k = first.shape[0]
#now store time as separate array
t = np.empty(k, dtype=np.float64)
#multiplicate by 1000 to get time in ms
t = first[:,0]*1000
#now allocate array for data
#concatentating array would be quite inefficient
data = np.empty((k, n), dtype=np.float64)
#fill first column with already loaded data
data[:, 0] = first[:, 1]
for i in range(1, n):
channel = "bolo" + str(i+1)
device = "Bolometry" if i<12 else "PetiSondaBolom"
try:
data[:, i] = load_from_golem_db(shot_no, device, channel)[:,1]
except(request.HTTPError):
raise SystemExit("Remote data error")
#and now load useful values
plasma_start = np.loadtxt(request.urlopen('http://golem.fjfi.cvut.cz/shots/'+str(shot_no)+'/analysis_wave_i/BasicDiagnostics/t_plasma_start'))
plasma_end = np.loadtxt(request.urlopen('http://golem.fjfi.cvut.cz/shots/'+str(shot_no)+'/analysis_wave_i/BasicDiagnostics/t_plasma_end'))
Begin with simple plotting all waveforms
plt.rcParams["figure.figsize"] =(16, 20)
fig, ax = plt.subplots(n, sharex=True, sharey=False)
fig.subplots_adjust(hspace=0)
for i in range(n):
ax[i].plot(t, data[:, i])
ax[0].set_xlim(plasma_start-2, plasma_end+2)
#ax.xlabel("t (ms)")
Some channel tends to fail (Papouch KO DAS ch 2 is known to be problematic)
This code is supposed to detect faulty signals and estimate the signal by averaging neighbohrs
from scipy import signal
from scipy import stats
#calculate pearson correlation coeff
def correlated(ch1, ch2, data=data, threshold = 0.1):
return (stats.pearsonr(data[:, ch1], data[:, ch2])[0] > threshold)
corrs = np.empty(n-1, dtype=np.bool)
for i in range(n-1):
corrs[i] = correlated(i, i+1)
fails = np.empty(n-1, dtype=np.bool)
for i in range(n-2):
if (not corrs[i] and not corrs[i+1] and correlated(i, i+2)):
print("faulty signal detected on channel {}, replacing by averaging neighbohrs".format(2+i))
#opravit porusena data
data[:, i+1] = np.mean(data[:, i:i+3:2], axis=1)
plt.rcParams["figure.figsize"] =(16,9)
plt.figure()
plt.imshow(data.T, aspect="auto", extent=(t[0], t[-1], n, 1))
plt.colorbar()
plt.xlim(plasma_start-2, plasma_end+2)
plt.xlabel("t [ms]")
plt.ylabel("ch [-]")
plt.savefig('icon-fig')
plt.show()
Following code is meant to calculate plasma position and variance
It is quite posiible, that it does not work correctly or reliably
#center_channel=5
xmin = -10
xmax = +10
weights = np.linspace(xmax, xmin, num=n)
plasma_center = (weights*data).sum(axis=1)/np.abs(weights).sum()
plasma_variance = (weights*(data-plasma_center[:,np.newaxis])**2).sum(axis=1)/np.abs(weights).sum()
plt.rcParams["figure.figsize"] =(16,9)
plt.figure()
plt.plot(t, plasma_center, label = "center")
plt.plot(t, plasma_center + plasma_variance*.5, color="red", linewidth=.2, label="variance")
plt.plot(t, plasma_center - plasma_variance*.5, color="red", linewidth=.2)
plt.xlim(plasma_start -2, plasma_end + 2)
plt.xlabel("t [ms]")
plt.ylabel("position [channel]")
plt.legend()
plt.show()