from tinygrad.ops import Variable from tinygrad.engine.jit import GraphRunner from tinygrad.engine.realize import CompiledRunner, ExecItem from tinygrad.device import Device, Buffer from tinygrad.runtime.ops_remote import GraphComputeItem, GraphAlloc, GraphFree, GraphExec from tinygrad.helpers import unwrap, flatten, dedup, all_same class RemoteGraph(GraphRunner): def __init__(self, jit_cache: list[ExecItem], rawbufs: list[Buffer], var_vals: dict[Variable, int]): super().__init__(jit_cache, rawbufs, var_vals) self.devices = dedup(flatten([[Device[unwrap(buf).device] for buf in ji.bufs] for ji in jit_cache])) assert all_same(self.devices), self.devices self.iids = sorted(self.input_replace.values()) def _process_ji(ji: ExecItem): assert isinstance(ji.prg, CompiledRunner), f'Only compiled runners are supported: {ji.prg}' return GraphComputeItem(ji.prg._prg.name, ji.prg._prg.datahash, tuple(unwrap(buf)._buf for buf in ji.bufs), tuple(ji.prg.p.vars), tuple(ji.prg.p.global_size) if ji.prg.p.global_size is not None else None, tuple(ji.prg.p.local_size) if ji.prg.p.local_size is not None else None) self.graph_num = self.devices[0].graph_num self.devices[0].graph_num += 1 self.devices[0].q(GraphAlloc(self.graph_num, tuple(_process_ji(ji) for ji in jit_cache), tuple(rawbufs[i]._buf for i in self.iids), var_vals)) def __del__(self): self.devices[0].q(GraphFree(self.graph_num)) def __call__(self, rawbufs: list[Buffer], var_vals: dict[Variable, int], wait=False): self.devices[0].q(GraphExec(self.graph_num, tuple(rawbufs[i]._buf for i in self.iids), var_vals, wait)) if wait: return float(self.devices[0].batch_submit())