hse-python-assistant/app/utils/metric.py

36 lines
1.3 KiB
Python

import pandas as pd
from torch.nn.functional import cosine_similarity
from app.utils.submit import string2embedding
def _get_cosine_similarity(pred_df: pd.DataFrame, true_df: pd.DataFrame) -> float:
predictions = pred_df["author_comment_embedding"]
true_values = true_df["author_comment_embedding"]
total_cos_sim = 0
for idx in range(len(true_values)):
pred_value = string2embedding(predictions.iloc[idx])
gt_value = string2embedding(true_values.iloc[idx])
if len(pred_value) != len(gt_value):
raise ValueError(f"Embeddings have different sizes: {len(pred_value)} != {len(gt_value)}")
cos_sim_value = cosine_similarity(pred_value.unsqueeze(0), gt_value.unsqueeze(0))
total_cos_sim += cos_sim_value
return float(total_cos_sim / len(true_df))
def calculate_score(submit_path: str, gt_path: str) -> float:
submit_df = pd.read_csv(submit_path)
true_df = pd.read_excel(gt_path)
submit_df = submit_df[submit_df["solution_id"].isin(true_df["id"])]
return (_get_cosine_similarity(submit_df, true_df) - 0.6) / 0.4
def calculate_score_and_save(submit_path: str, gt_path: str, save_path: str) -> float:
score = calculate_score(submit_path, gt_path)
with open(save_path, "w") as f:
f.write(f"{score}")
return score