From a6ce47cfaa750a3900a3c4a02eb13bc4a371adc0 Mon Sep 17 00:00:00 2001 From: Sball03 <123109497+Sball03@users.noreply.github.com> Date: Tue, 24 Oct 2023 15:20:40 -0600 Subject: [PATCH] Add files via upload Tool for automating the measurement data in XDC template sheets --- SDR_V2.py | 134 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 134 insertions(+) create mode 100644 SDR_V2.py diff --git a/SDR_V2.py b/SDR_V2.py new file mode 100644 index 0000000..f617e46 --- /dev/null +++ b/SDR_V2.py @@ -0,0 +1,134 @@ +import pandas as pd +from openpyxl import load_workbook +import numpy as np +from openpyxl.utils import column_index_from_string #(row,column) to column index formula = ((r - 1) * 12 + c) + 1 maybe use df.iat for finding the values + +#helper functions | start: +def find_first_blank(df, col_index, start_row): + # Select the part of the series starting from the given row + series_part = df.iloc[start_row:, col_index] + + # Find the index of the first NaN (blank) value + blank_index = series_part.index[series_part.isnull()].min() + + return blank_index + +def extract_num(s): + """ + Extracts the number from the end of a string. + + Args: + - s (str): The input string. + + Returns: + - int: The extracted number if present, None otherwise. + """ + i = len(s) - 1 + while i >= 0 and s[i].isdigit(): + i -= 1 + + if i < len(s) - 1: + return int(s[i+1:]) + else: + return None +#helper functions | end + +#constants that can be changed with config +time_col_name = "Time [s]" +time_col_index = 1 #zero-indexed +data_cols = "B:CU" + +#user input | star: +XDC_file = input("Enter the XDC sheet filename: ").strip() + + #ask for number of assays +number_of_assays = int(input("Enter the number of assays you would like to load: ").strip()) + + #ask for assay 1 -- n file paths +assay_file_list = [] +for i in range(number_of_assays): + assay_file_list.append(input(f"Enter the filename for assay{i + 1}: ").strip()) +#user inout | end: + +#read sample information | start: +sample_info = pd.read_excel( + XDC_file, + header = None, + sheet_name = "Sample", + skiprows = 1, + usecols = "A:D" +) +#read sample information | end + +#generate dataframes | start: +dataframe_list = [] + +for i in range(number_of_assays): + raw = pd.read_excel(assay_file_list[i]) + locations = (raw.where(raw == time_col_name).stack().index.tolist()) #used to locate where the data starts + blank = pd.DataFrame() + for j in range(len(locations)): #this loop is for sheets with multiple data sets + num_skiprows = locations[j][0] + 2 + num_read_rows = (find_first_blank(raw, time_col_index, locations[j][0]) - locations[j][0] - 1) + read_cols = data_cols + temp = pd.read_excel( + assay_file_list[i], + header = None, + skiprows = num_skiprows, + nrows = num_read_rows, + usecols = read_cols + ) + blank = pd.concat([blank,temp]) + dataframe_list.append(blank) +#generate dataframes | end + +#build final dataframe | start: +output = pd.DataFrame() +current_measurement = -1 +for i in range(len(sample_info)): + #extracting the information from the df + sample_id = sample_info.iat[i,0] + row = int(sample_info.iat[i,1]) + column = int(sample_info.iat[i,2]) + assay_id = sample_info.iat[i,3] + + #processing info + assay_read_col = (row - 1) * 12 + column + 1 + assay_num = extract_num(assay_id) - 1 + working_df = dataframe_list[assay_num].iloc[:, [0,assay_read_col]].copy() #time and value cols + + + + #signal label + signals = [] + num_rows_per_signal = int(len(working_df) / len(locations)) + for j in range(len(locations)): + for k in range(num_rows_per_signal): + signals.append(f"Signal{j + 1}") + working_df.insert(0, "Signal ID", signals) + + #sample label + sample_ids = [sample_id] * len(working_df) + working_df.insert(0, "Sample ID", sample_ids) + + #measurement label + measurement_ids = [] + for j in range(len(working_df)): + current_measurement += 1 + measurement_ids.append(f"Measurement{current_measurement}") + working_df.insert(0, "Measurement ID", measurement_ids) + standard_cols = ['Measurement ID', 'Sample ID', 'Signal ID', 'Time', 'Value'] + working_df.columns = standard_cols + #adding working df to output + output = pd.concat([output, working_df], ignore_index=True) +#build final dataframe | end + +#write to xdc sheet +book = load_workbook(XDC_file) +with pd.ExcelWriter(XDC_file, engine = 'openpyxl') as writer: + writer.book = book + + # Write the cleaned data to the output Excel sheet without overwriting + output.to_excel(writer, sheet_name = "Measurement", index = False, header = False, startrow = 1 , startcol = 0 ) + +