Source code for xrag.eval.evaluate_TGT

from jury import Jury
import jury
import evaluate

NLG_EVALUATION_METRICS = [
    "chrf", "meteor", "wer", "cer", "chrf_pp", "mauve", "perplexity",
    "rouge_rouge1", "rouge_rouge2", "rouge_rougeL", "rouge_rougeLsum"
]


[docs] def NLGEvaluate(questions, actual_responses, golden_contexts, golden_context_ids, metrics): # omit_metrics = [] # for metric in NLG_EVALUATION_METRICS: # if metric not in metrics: # omit_metrics.append(metric) # n = NLGEval(metrics_to_omit=omit_metrics) references = [] for context in golden_contexts: references.append(str(context)) if type(actual_responses) == list: predictions = [str(response) for response in actual_responses] elif type(actual_responses) == str: predictions = [actual_responses] # Individual Metrics # scores = n.compute_individual_metrics(ref=reference, hyp=hypothesis) scorer = Jury(metrics=["chrf", "meteor", "rouge", "wer", "cer"]) scores = {} # chrf++ chrf_plus = evaluate.load("chrf") score = chrf_plus.compute(predictions=predictions, references=[references], word_order=2) scores["chrf_pp"] = score["score"] # MAUVE 一个 predictions 只能对应一个references mauve = evaluate.load('mauve') score = mauve.compute(predictions=predictions, references=[''.join(references)]) scores["mauve"] = score.mauve # perplexity:model id are needed perplexity = jury.load_metric("perplexity") score = perplexity.compute(predictions=predictions, references=references, model_id="openai-community/gpt2") scores["perplexity"] = score["mean_perplexity"] # score = scorer(predictions=predictions, references=[references]) scores["chrf"] = score["chrf"]["score"] scores["meteor"] = score["meteor"]["score"] # 'rouge1': 0.6666666666666665, 'rouge2': 0.5714285714285715, 'rougeL': 0.6666666666666665, 'rougeLsum': 0.6666666666666665 scores["rouge_rouge1"] = score["rouge"]["rouge1"] scores["rouge_rouge2"] = score["rouge"]["rouge2"] scores["rouge_rougeL"] = score["rouge"]["rougeL"] scores["rouge_rougeLsum"] = score["rouge"]["rougeLsum"] scores["wer"] = score["wer"]["score"] scores["cer"] = score["cer"]["score"] return scores