Source code for xrag.eval.DeepEvalLocalModel

import torch
from llama_index.llms.huggingface import HuggingFaceLLM
from transformers import AutoModelForCausalLM, AutoTokenizer
from deepeval.models.base_model import DeepEvalBaseLLM

[docs] class DeepEvalLocalModel(DeepEvalBaseLLM): def __init__( self, model, tokenizer ): self.model = model self.tokenizer = tokenizer
[docs] def load_model(self): return self.model
[docs] def generate(self, prompt: str) -> str: model = self.load_model() if self.tokenizer == "": return model.invoke(prompt).content device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") model_inputs = self.tokenizer([prompt], return_tensors="pt").to(device) model.to(device) generated_ids = model.generate(**model_inputs, max_new_tokens=100, do_sample=True) return self.tokenizer.batch_decode(generated_ids)[0]
[docs] async def a_generate(self, prompt: str) -> str: return self.generate(prompt)
[docs] def get_model_name(self): return "Custom model"