hse-python-assistant/app/utils/preprocess.py

28 lines
1.2 KiB
Python

import pandas as pd
def preprocess_test(test_solutions_path: str, test_tasks_path: str, test_tests_path: str, save_path: str) -> None:
solutions_df = pd.read_excel(test_solutions_path)
tasks_df = pd.read_excel(test_tasks_path)
tests_df = pd.read_excel(test_tests_path)
preprocessed_df = solutions_df.merge(tasks_df[['id', 'description', 'author_solution']],
left_on='task_id', right_on='id', how='left')
preprocessed_df = preprocessed_df.merge(tests_df[['task_id', 'input', 'output']],
left_on='task_id', right_on='task_id', how='left')
preprocessed_df['input_output'] = preprocessed_df.apply(
lambda row: f"{row['input']}-{row['output']}" if pd.notna(row['input']) or pd.notna(row['output']) else "",
axis=1
)
grouped_df = preprocessed_df.groupby('id_x').agg({
'student_solution': 'first',
'description': 'first',
'author_solution': 'first',
'input_output': lambda x: '\n'.join(filter(None, x))
}).reset_index()
grouped_df = grouped_df.rename(columns={'id_x': 'id'})
grouped_df.to_excel(save_path, index=False)