Source code for xrag.eval.DeepEvalLocalModel
import torch
from llama_index.llms.huggingface import HuggingFaceLLM
from transformers import AutoModelForCausalLM, AutoTokenizer
from deepeval.models.base_model import DeepEvalBaseLLM
[docs]
class DeepEvalLocalModel(DeepEvalBaseLLM):
def __init__(
self,
model,
tokenizer
):
self.model = model
self.tokenizer = tokenizer
[docs]
def load_model(self):
return self.model
[docs]
def generate(self, prompt: str) -> str:
model = self.load_model()
if self.tokenizer == "":
return model.invoke(prompt).content
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model_inputs = self.tokenizer([prompt], return_tensors="pt").to(device)
model.to(device)
generated_ids = model.generate(**model_inputs, max_new_tokens=100, do_sample=True)
return self.tokenizer.batch_decode(generated_ids)[0]
[docs]
async def a_generate(self, prompt: str) -> str:
return self.generate(prompt)
[docs]
def get_model_name(self):
return "Custom model"