You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
			
				
					47 lines
				
				1.3 KiB
			
		
		
			
		
	
	
					47 lines
				
				1.3 KiB
			| 
								 
											2 weeks ago
										 
									 | 
							
								# https://tvm.apache.org/docs/tutorial/tensor_expr_get_started.html#example-2-manually-optimizing-matrix-multiplication-with-te
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								M, N, K = 1024, 1024, 1024
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								try:
							 | 
						||
| 
								 | 
							
								  import tvm
							 | 
						||
| 
								 | 
							
								  from tvm import te
							 | 
						||
| 
								 | 
							
								  #print(tvm.target.Target.list_kinds())
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  # c, opencl
							 | 
						||
| 
								 | 
							
								  target = tvm.target.Target(target="c")
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  # TVM Matrix Multiplication using TE
							 | 
						||
| 
								 | 
							
								  k = te.reduce_axis((0, K), "k")
							 | 
						||
| 
								 | 
							
								  A = te.placeholder((M, K), name="A")
							 | 
						||
| 
								 | 
							
								  B = te.placeholder((K, N), name="B")
							 | 
						||
| 
								 | 
							
								  C = te.compute((M, N), lambda x, y: te.sum(A[x, k] * B[k, y], axis=k), name="C")
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  # Default schedule
							 | 
						||
| 
								 | 
							
								  s = te.create_schedule(C.op)
							 | 
						||
| 
								 | 
							
								  #print(tvm.lower(s, [A, B, C], simple_mode=True))
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  # Output C code
							 | 
						||
| 
								 | 
							
								  func = tvm.build(s, [A, B, C], target=target, name="mmult")
							 | 
						||
| 
								 | 
							
								  print(func.get_source())
							 | 
						||
| 
								 | 
							
								except ImportError:
							 | 
						||
| 
								 | 
							
								  print("** please install TVM for TVM output")
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								# tinygrad version
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								import os
							 | 
						||
| 
								 | 
							
								from tinygrad.tensor import Tensor
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								# define the compute
							 | 
						||
| 
								 | 
							
								A = Tensor.rand(M, K, device="CPU")
							 | 
						||
| 
								 | 
							
								B = Tensor.rand(K, N, device="CPU")
							 | 
						||
| 
								 | 
							
								C = (A.reshape(M, 1, K) * B.permute(1,0).reshape(1, N, K)).sum(axis=2)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								sched = C.schedule()
							 | 
						||
| 
								 | 
							
								from tinygrad.codegen.opt.kernel import Kernel
							 | 
						||
| 
								 | 
							
								from tinygrad.device import CompilerOptions
							 | 
						||
| 
								 | 
							
								lin = Kernel(sched[-1].ast, CompilerOptions(has_local=False, supports_float4=False))
							 | 
						||
| 
								 | 
							
								lin.to_program()
							 | 
						||
| 
								 | 
							
								from tinygrad.runtime.ops_cpu import renderer
							 | 
						||
| 
								 | 
							
								src = renderer("mmult", lin.uops)
							 | 
						||
| 
								 | 
							
								print(src)
							 |