Revision c53c242fda4591e569825403e1ae68f35584d7b7 (click the page title to view the current version)

Handling/DataMining/Depot/python/Tunkl-2025-ExtractTemp/extract_T_ch_from_ShotLog.py

import re
import requests
import pandas as pd
import matplotlib.pyplot as plt

def extract_predischarge_temp(shot_no, DEBUG=False, VERBOSE=False):
    """
    Extracts the predischarge chamber temperature (T_ch) for a given shot number from the GOLEM ShotLogbook.
    Returns the temperature closest to the SecurePostDischargeState trigger and the time difference.
    """
    try:
        url = f"http://golem.fjfi.cvut.cz/shots/{shot_no}/ShotLogbook"
        response = requests.get(url)
        response.raise_for_status()
        text = response.text
    except Exception:
        print(f"{shot_no} Error loading LOG")
        return float("nan"), float("nan")

    # Select the appropriate regex pattern based on shot number format changes
    if shot_no < 41065:
        # Old format: Chamber/VacuumLog line
        pattern = re.compile(
            r"^(\d{2}:\d{2}:\d{2}).*VacuumLog:.*mPa,  ([\d\.]+)\s*C", re.MULTILINE
        )
    elif shot_no < 42831:
        # Intermediate format: VacuumLog line with T=xx.xx C
        pattern = re.compile(
            r"^(\d{2}:\d{2}:\d{2}).*VacuumLog:.*T=([\d\.]+)\s*C", re.MULTILINE
        )
    else:
        # New format: VacuumLog line with T_ch=xx.xx C
        pattern = re.compile(
            r"^(\d{2}:\d{2}:\d{2}).*VacuumLog:.*T_ch=([\d\.]+)\s*C", re.MULTILINE
        )

    # With DEBUG, print matching lines and highlight matches
    if DEBUG:
        data = []
        for i, line in enumerate(text.splitlines()):
            match = pattern.search(line)
            if match:
                # Highlight matched part in green
                start, end = match.span()
                highlighted = (
                    line[:start] + "\033[92m" + line[start:end] + "\033[0m" + line[end:]
                )
                print(f"{i:03d}: {highlighted}  <-- MATCH")
                data.append(match.groups())
            else:
                print(f"{i:03d}: {line}")
    else:
        # Find all matches in the text
        data = pattern.findall(text)

    # Create DataFrame from extracted data
    df = pd.DataFrame(data, columns=["timestamp", "T_ch"])
    df["T_ch"] = df["T_ch"].astype(float)
    df["timestamp"] = pd.to_datetime(df["timestamp"], format="%H:%M:%S")

    # Extract Trigger timestamp
    discharge_trigger_pattern = re.compile(r"^(\d{2}:\d{2}:\d{2}).*Trigger", re.MULTILINE)
    discharge_trigger_match = discharge_trigger_pattern.search(text)
    trigger_time = (
        pd.to_datetime(discharge_trigger_match.group(1), format="%H:%M:%S")
        if discharge_trigger_match
        else None
    )
    
    # If no temperature data found, return NaN
    if df.empty:
        if VERBOSE:
            print(f'{shot_no} no T_ch found')
        return float('nan'), float('nan')

    closest_T_ch, closest_T_ch_time_diff = float("nan"), float("nan")
    # Find the T_ch value closest in time to the Trigger
    if trigger_time is not None:
        idx = (df["timestamp"] - trigger_time).abs().idxmin()
        closest_T_ch = df.loc[idx, "T_ch"]
        closest_T_ch_time_diff = (
            df.loc[idx, "timestamp"] - trigger_time
        ).total_seconds()

        if VERBOSE:
            print(
                f"{shot_no} T_ch_discharge = {closest_T_ch:.2f} C, t_diff = {closest_T_ch_time_diff} s"
            )

    # If DEBUG, plot the temperature vs timestamp and mark the trigger time
    if DEBUG:
        plt.figure(figsize=(10, 5))
        plt.plot(df["timestamp"], df["T_ch"], marker="o")
        plt.xlabel("Timestamp")
        plt.ylabel("T_ch [deg]")
        plt.title("T_ch vs Timestamp")

        # Add vertical line for Trigger
        if trigger_time is not None:
            plt.axvline(
                x=trigger_time,
                color="r",
                linestyle="--",
                label="Trigger",
            )
            plt.legend()

        plt.xticks(rotation=45)
        plt.tight_layout()
        plt.show()

    return closest_T_ch, closest_T_ch_time_diff

def main():
    """
    Iterates over a range of shot numbers, extracts predischarge temperature for each,
    and saves the results to a CSV file.
    """
    index = pd.Index(list(range(33170,50055)), name='shot_no')
    df = pd.DataFrame(index=index, columns='T_ch t_delta'.split())
    try:
        for shot_no, _ in df.iterrows():
            T_ch, t_delta = extract_predischarge_temp(shot_no, VERBOSE=True)
            df.loc[shot_no] = T_ch, t_delta
    except Exception:
        pass
                
    df.to_csv('T_ch_discharge.csv')

if __name__ == "__main__":
    ## for testing 
    #T_ch, t_delta = extract_predischarge_temp(50055, VERBOSE=True, DEBUG=True)
    
    ## go over all Logs and save to csv
    main()