You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					
					
						
							159 lines
						
					
					
						
							6.2 KiB
						
					
					
				
			
		
		
	
	
							159 lines
						
					
					
						
							6.2 KiB
						
					
					
				/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
 | 
						|
 | 
						|
Licensed under the Apache License, Version 2.0 (the "License");
 | 
						|
you may not use this file except in compliance with the License.
 | 
						|
You may obtain a copy of the License at
 | 
						|
 | 
						|
    http://www.apache.org/licenses/LICENSE-2.0
 | 
						|
 | 
						|
Unless required by applicable law or agreed to in writing, software
 | 
						|
distributed under the License is distributed on an "AS IS" BASIS,
 | 
						|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
						|
See the License for the specific language governing permissions and
 | 
						|
limitations under the License.
 | 
						|
==============================================================================*/
 | 
						|
 | 
						|
// Before you start, make sure libtpu.so, libtpu.h and libtpu_client.c are in
 | 
						|
// the same working directory.
 | 
						|
//
 | 
						|
// To compile: gcc -o libtpu_client libtpu_client.c -ldl
 | 
						|
// To run: sudo ./libtpu_client
 | 
						|
 | 
						|
#include <dlfcn.h>
 | 
						|
#include <stdio.h>
 | 
						|
#include <stdlib.h>
 | 
						|
 | 
						|
#include "libtpu.h"
 | 
						|
 | 
						|
void hexdump(void *dat, int len) {
 | 
						|
  /*unsigned char *cdat = (unsigned char*)dat;
 | 
						|
  for (int i = 0; i < len; i++) {
 | 
						|
    if (i!=0 && i%0x10 == 0) printf("\n");
 | 
						|
    printf("%2.2X ", cdat[i]);
 | 
						|
  }
 | 
						|
  printf("\n");*/
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
int main(int argc, char** argv) {
 | 
						|
  struct TpuDriverFn driver_fn;
 | 
						|
  TpuDriver_Initialize(&driver_fn, true);
 | 
						|
 | 
						|
  fprintf(stdout, "------ Going to Query Version ------\n");
 | 
						|
  fprintf(stdout, "TPU Driver Version: %s\n", driver_fn.TpuDriver_Version());
 | 
						|
 | 
						|
  fprintf(stdout, "------ Going to Open a TPU Driver ------\n");
 | 
						|
  struct TpuDriver* driver = driver_fn.TpuDriver_Open("local://");
 | 
						|
 | 
						|
  fprintf(stdout, "------ Going to Query for System Information ------\n");
 | 
						|
  struct TpuSystemInfo* info = driver_fn.TpuDriver_QuerySystemInfo(driver);
 | 
						|
  driver_fn.TpuDriver_FreeSystemInfo(info);
 | 
						|
 | 
						|
  // An example of simple program to sum two parameters.
 | 
						|
  const char* hlo_module_text = R"(HloModule add_vec_module
 | 
						|
    ENTRY %add_vec (a: s32[256], b: s32[256]) -> s32[256] {
 | 
						|
      %a = s32[256] parameter(0)
 | 
						|
      %b = s32[256] parameter(1)
 | 
						|
      ROOT %sum = s32[256] add(%a, %b)
 | 
						|
    }
 | 
						|
    )";
 | 
						|
 | 
						|
  fprintf(stdout, "------ Going to Compile a TPU program ------\n");
 | 
						|
  struct TpuCompiledProgramHandle* cph =
 | 
						|
      driver_fn.TpuDriver_CompileProgramFromText(driver, hlo_module_text,
 | 
						|
      /*num_replicas=*/1, /*eventc=*/0, /*eventv*/NULL);
 | 
						|
 | 
						|
  //hexdump(cph->internal_handle, 0x100);
 | 
						|
 | 
						|
  TpuEvent* compile_events[] = {cph->event};
 | 
						|
  fprintf(stdout, "------ Going to Load a TPU program ------\n");
 | 
						|
  struct TpuLoadedProgramHandle* lph =
 | 
						|
      driver_fn.TpuDriver_LoadProgram(driver, /*core_id=*/0, cph,
 | 
						|
      /*eventc=*/1, /*eventv=*/compile_events);
 | 
						|
 | 
						|
  const int size = 1024;
 | 
						|
 | 
						|
  fprintf(stdout, "------ Going to Allocate a TPU Buffer ------\n");
 | 
						|
  struct TpuBufferHandle* buf_a_handle =
 | 
						|
      driver_fn.TpuDriver_Allocate(driver, /*core-id=*/0, /*memory_region=*/1,
 | 
						|
        /*bytes=*/size, /*eventc=*/0, /*eventv=*/NULL);
 | 
						|
  fprintf(stdout, "------ Going to Allocate a TPU Buffer ------\n");
 | 
						|
  struct TpuBufferHandle* buf_b_handle =
 | 
						|
      driver_fn.TpuDriver_Allocate(driver, /*core-id=*/0, /*memory_region=*/1,
 | 
						|
        /*bytes=*/size, /*eventc=*/0, /*eventv=*/NULL);
 | 
						|
  fprintf(stdout, "------ Going to Allocate a TPU Buffer ------\n");
 | 
						|
  struct TpuBufferHandle* buf_sum_handle =
 | 
						|
      driver_fn.TpuDriver_Allocate(driver, /*core-id=*/0, /*memory_region=*/1,
 | 
						|
        /*bytes=*/size, /*eventc=*/0, /*eventv=*/NULL);
 | 
						|
 | 
						|
  char a_src[size], b_src[size], sum_src[size];
 | 
						|
  for (int i = 0; i < size; ++i) {
 | 
						|
    a_src[i] = 1;
 | 
						|
    b_src[i] = 2;
 | 
						|
    sum_src[i] = 0;
 | 
						|
  }
 | 
						|
 | 
						|
  TpuEvent* allocate_buf_a_events[] = {buf_a_handle->event};
 | 
						|
  fprintf(stdout, "------ Going to Transfer To Device ------\n");
 | 
						|
  struct TpuEvent* transfer_ev1 =
 | 
						|
      driver_fn.TpuDriver_TransferToDevice(driver, a_src, buf_a_handle,
 | 
						|
        /*eventc=*/1, /*eventv=*/allocate_buf_a_events);
 | 
						|
  TpuEvent* allocate_buf_b_events[] = {buf_a_handle->event};
 | 
						|
  fprintf(stdout, "------ Going to Transfer To Device ------\n");
 | 
						|
  struct TpuEvent* transfer_ev2 =
 | 
						|
      driver_fn.TpuDriver_TransferToDevice(driver, b_src, buf_b_handle,
 | 
						|
        /*eventc=*/1, /*eventv=*/allocate_buf_b_events);
 | 
						|
 | 
						|
  //getchar();
 | 
						|
 | 
						|
  fprintf(stdout, "------ Going to Execute a TPU program ------\n");
 | 
						|
  DeviceAssignment device_assignment = {NULL, 0};
 | 
						|
  TpuBufferHandle* input_buffer_handle[] = {buf_a_handle, buf_b_handle};
 | 
						|
  TpuBufferHandle* output_buffer_handle[] = {buf_sum_handle};
 | 
						|
  TpuEvent* transfer_events[] = {transfer_ev1, transfer_ev2};
 | 
						|
  struct TpuEvent* execute_event =
 | 
						|
      driver_fn.TpuDriver_ExecuteProgram(driver, lph,
 | 
						|
      /*inputc=*/2, /*input_buffer_handle=*/input_buffer_handle,
 | 
						|
      /*outputc=*/1, /*output_buffer_handle=*/output_buffer_handle,
 | 
						|
      device_assignment,
 | 
						|
      /*eventc=*/2, /*eventv*/transfer_events);
 | 
						|
 | 
						|
  fprintf(stdout, "------ Going to Transfer From Device ------\n");
 | 
						|
  TpuEvent* execute_events[] = {execute_event};
 | 
						|
  struct TpuEvent* transfer_sum_event =
 | 
						|
      driver_fn.TpuDriver_TransferFromDevice(driver, buf_sum_handle, sum_src,
 | 
						|
        /*eventc=*/1, /*eventv=*/execute_events);
 | 
						|
 | 
						|
  TpuStatus* status = driver_fn.TpuDriver_EventAwait(transfer_sum_event,
 | 
						|
                                                     10000000);
 | 
						|
  if (status->code != 0) {
 | 
						|
    fprintf(stdout, "Transfer Event Await: Code: %d, Message: %s\n",
 | 
						|
          status->code, status->msg);
 | 
						|
  }
 | 
						|
 | 
						|
  fprintf(stdout, "------ Going to Unload a TPU program ------\n");
 | 
						|
  struct TpuEvent* unload_program_event = driver_fn.TpuDriver_UnloadProgram(
 | 
						|
      driver, lph, /*eventc=*/1, /*eventv=*/execute_events);
 | 
						|
 | 
						|
  fprintf(stdout, "------ Going to Deallocate a TPU Buffer ------\n");
 | 
						|
  struct TpuEvent* dealloc_ev1 = driver_fn.TpuDriver_Deallocate(driver,
 | 
						|
      buf_a_handle, /*eventc=*/0, /*eventv=*/NULL);
 | 
						|
  driver_fn.TpuDriver_FreeEvent(dealloc_ev1);
 | 
						|
 | 
						|
  fprintf(stdout, "------ Going to Deallocate a TPU Buffer ------\n");
 | 
						|
  struct TpuEvent* dealloc_ev2 = driver_fn.TpuDriver_Deallocate(driver,
 | 
						|
      buf_b_handle, /*eventc=*/0, /*eventv=*/NULL);
 | 
						|
  driver_fn.TpuDriver_FreeEvent(dealloc_ev2);
 | 
						|
 | 
						|
  fprintf(stdout, "------ Going to Deallocate a TPU Buffer ------\n");
 | 
						|
  struct TpuEvent* dealloc_ev3 = driver_fn.TpuDriver_Deallocate(driver,
 | 
						|
      buf_sum_handle, /*eventc=*/0, /*eventv=*/NULL);
 | 
						|
  driver_fn.TpuDriver_FreeEvent(dealloc_ev3);
 | 
						|
 | 
						|
  fprintf(stdout, "sum:\n");
 | 
						|
  for (size_t i = 0; i < size; ++i) {
 | 
						|
    fprintf(stdout, "%d ", sum_src[i]);
 | 
						|
  }
 | 
						|
 | 
						|
  exit(EXIT_SUCCESS);
 | 
						|
}
 | 
						|
 |