openpilot_comma/tinygrad_repo/test/external/external_llm_eval.py

# eval for tinygrad.apps.llm
import pyarrow.parquet as pq
from tinygrad.helpers import fetch, colored
from tinygrad.apps.llm import Transformer, SimpleTokenizer, models
from tinygrad import Tensor

if __name__ == "__main__":
  dat = fetch("https://huggingface.co/datasets/allenai/ai2_arc/resolve/main/ARC-Challenge/test-00000-of-00001.parquet")
  table = pq.read_table(dat)

  model, kv = Transformer.from_gguf(Tensor.from_url(models["1B"]), max_context=4096)

  tok = SimpleTokenizer(kv["tokenizer.ggml.tokens"])
  bos_id: int = kv['tokenizer.ggml.bos_token_id']
  eos_id: int = kv['tokenizer.ggml.eos_token_id']

  num_correct, num_answered = 0, 0
  total_questions = len(table["question"])
  for question, choices, answer in zip(table["question"], table["choices"], table["answerKey"]):
    phrasing = f"Question: {question}\n\n" + \
               '\n'.join([f"{k}) {v}" for k,v in zip(choices['label'], choices['text'])]) +\
               "\n\nReply with the letter of the correct answer only."
    try:
      ids = [bos_id] + tok.role("user") + tok.encode(phrasing) + [eos_id] + tok.role("assistant") + tok.encode("Answer: ")
    except RuntimeError:
      # TODO: fix the tokenizer
      pass
    next_id = next(model.generate(ids))
    correct, given = answer.as_py().strip(), tok.decode([next_id]).strip()
    num_correct += correct == given
    num_answered += 1
    print(f"{num_answered:4d}/{total_questions:4d}  "+\
          f"Correct Answer: {correct}  "+\
          f"Given Answer: {colored(given, 'green' if correct==given else 'red')}  "+\
          f"Percent: {num_correct*100.0/num_answered:.2f}%")
openpilot v0.10.0 release date: 2025-07-14T09:06:06 master commit: 0b65a04bc10901d48bb972321b175f695d8e6e9a 1 day ago			`# eval for tinygrad.apps.llm`
			`import pyarrow.parquet as pq`
			`from tinygrad.helpers import fetch, colored`
			`from tinygrad.apps.llm import Transformer, SimpleTokenizer, models`
			`from tinygrad import Tensor`

			`if __name__ == "__main__":`
			`dat = fetch("https://huggingface.co/datasets/allenai/ai2_arc/resolve/main/ARC-Challenge/test-00000-of-00001.parquet")`
			`table = pq.read_table(dat)`

			`model, kv = Transformer.from_gguf(Tensor.from_url(models["1B"]), max_context=4096)`

			`tok = SimpleTokenizer(kv["tokenizer.ggml.tokens"])`
			`bos_id: int = kv['tokenizer.ggml.bos_token_id']`
			`eos_id: int = kv['tokenizer.ggml.eos_token_id']`

			`num_correct, num_answered = 0, 0`
			`total_questions = len(table["question"])`
			`for question, choices, answer in zip(table["question"], table["choices"], table["answerKey"]):`
			`phrasing = f"Question: {question}\n\n" + \`
			`'\n'.join([f"{k}) {v}" for k,v in zip(choices['label'], choices['text'])]) +\`
			`"\n\nReply with the letter of the correct answer only."`
			`try:`
			`ids = [bos_id] + tok.role("user") + tok.encode(phrasing) + [eos_id] + tok.role("assistant") + tok.encode("Answer: ")`
			`except RuntimeError:`
			`# TODO: fix the tokenizer`
			`pass`
			`next_id = next(model.generate(ids))`
			`correct, given = answer.as_py().strip(), tok.decode([next_id]).strip()`
			`num_correct += correct == given`
			`num_answered += 1`
			`print(f"{num_answered:4d}/{total_questions:4d} "+\`
			`f"Correct Answer: {correct} "+\`
			`f"Given Answer: {colored(given, 'green' if correct==given else 'red')} "+\`
			`f"Percent: {num_correct*100.0/num_answered:.2f}%")`