You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					
					
						
							33 lines
						
					
					
						
							1.1 KiB
						
					
					
				
			
		
		
	
	
							33 lines
						
					
					
						
							1.1 KiB
						
					
					
				import time
 | 
						|
import tensorflow as tf
 | 
						|
 | 
						|
gpus = tf.config.list_physical_devices('GPU')
 | 
						|
if gpus:
 | 
						|
  try:
 | 
						|
    # Currently, memory growth needs to be the same across GPUs
 | 
						|
    for gpu in gpus:
 | 
						|
      tf.config.experimental.set_memory_growth(gpu, True)
 | 
						|
    logical_gpus = tf.config.list_logical_devices('GPU')
 | 
						|
    print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
 | 
						|
  except RuntimeError as e:
 | 
						|
    # Memory growth must be set before GPUs have been initialized
 | 
						|
    print(e)
 | 
						|
 | 
						|
for dtype in [tf.float16, tf.float32]:
 | 
						|
  for N in [256, 512, 1024, 2048, 4096, 8192]:
 | 
						|
    FLOPS = N*N*N*2
 | 
						|
 | 
						|
    b = tf.random.uniform((N, N), dtype=dtype)
 | 
						|
    c = tf.random.uniform((N, N), dtype=dtype)
 | 
						|
 | 
						|
    b = tf.Variable(b)
 | 
						|
    c = tf.Variable(c)
 | 
						|
 | 
						|
    def tf_prog(b, c):
 | 
						|
      st = time.perf_counter()
 | 
						|
      a = tf.matmul(b, c)
 | 
						|
      tf.debugging.check_numerics(a, "Nan or Inf in result") # Ensures that the calculation is done.
 | 
						|
      return time.perf_counter() - st
 | 
						|
 | 
						|
    tm = min([tf_prog(b, c) for _ in range(20)])
 | 
						|
    print(f"{N*N:10d} {tm*1e6:9.2f} us, would be {FLOPS*1e-9/tm:9.2f} GFLOPS {N:4d}x{N:4d}x{N:4d} matmul in {dtype}") |