hse-python-assistant/app/utils/preprocess.py

import pandas as pd

def preprocess_test(test_solutions_path: str, test_tasks_path: str, test_tests_path: str, save_path: str) -> None:
    solutions_df = pd.read_excel(test_solutions_path)
    tasks_df = pd.read_excel(test_tasks_path)
    tests_df = pd.read_excel(test_tests_path)

    preprocessed_df = solutions_df.merge(tasks_df[['id', 'description', 'author_solution']],
                                         left_on='task_id', right_on='id', how='left')

    preprocessed_df = preprocessed_df.merge(tests_df[['task_id', 'input', 'output']],
                                            left_on='task_id', right_on='task_id', how='left')

    preprocessed_df['input_output'] = preprocessed_df.apply(
        lambda row: f"{row['input']}-{row['output']}" if pd.notna(row['input']) or pd.notna(row['output']) else "",
        axis=1
    )

    grouped_df = preprocessed_df.groupby('id_x').agg({
        'student_solution': 'first',
        'description': 'first',
        'author_solution': 'first',
        'input_output': lambda x: '\n'.join(filter(None, x))
    }).reset_index()

    grouped_df = grouped_df.rename(columns={'id_x': 'id'})
    grouped_df.to_excel(save_path, index=False)