parent
54f1b00447
commit
0abe348283
5 changed files with 0 additions and 219 deletions
@ -1,101 +0,0 @@ |
||||
// clang++ -O2 repro.cc && ./a.out
|
||||
|
||||
#include <sched.h> |
||||
#include <sys/types.h> |
||||
#include <unistd.h> |
||||
|
||||
#include <cstdint> |
||||
#include <cstdio> |
||||
#include <cstdlib> |
||||
#include <cstring> |
||||
#include <ctime> |
||||
|
||||
static inline double millis_since_boot() { |
||||
struct timespec t; |
||||
clock_gettime(CLOCK_BOOTTIME, &t); |
||||
return t.tv_sec * 1000.0 + t.tv_nsec * 1e-6; |
||||
} |
||||
|
||||
#define MODEL_WIDTH 320 |
||||
#define MODEL_HEIGHT 640 |
||||
|
||||
// null function still breaks it
|
||||
#define input_lambda(x) x |
||||
|
||||
// this is copied from models/dmonitoring.cc, and is the code that triggers the issue
|
||||
void inner(uint8_t *resized_buf, float *net_input_buf) { |
||||
int resized_width = MODEL_WIDTH; |
||||
int resized_height = MODEL_HEIGHT; |
||||
|
||||
// one shot conversion, O(n) anyway
|
||||
// yuvframe2tensor, normalize
|
||||
for (int r = 0; r < MODEL_HEIGHT/2; r++) { |
||||
for (int c = 0; c < MODEL_WIDTH/2; c++) { |
||||
// Y_ul
|
||||
net_input_buf[(c*MODEL_HEIGHT/2) + r] = input_lambda(resized_buf[(2*r*resized_width) + (2*c)]); |
||||
// Y_ur
|
||||
net_input_buf[(c*MODEL_HEIGHT/2) + r + (2*(MODEL_WIDTH/2)*(MODEL_HEIGHT/2))] = input_lambda(resized_buf[(2*r*resized_width) + (2*c+1)]); |
||||
// Y_dl
|
||||
net_input_buf[(c*MODEL_HEIGHT/2) + r + ((MODEL_WIDTH/2)*(MODEL_HEIGHT/2))] = input_lambda(resized_buf[(2*r*resized_width+1) + (2*c)]); |
||||
// Y_dr
|
||||
net_input_buf[(c*MODEL_HEIGHT/2) + r + (3*(MODEL_WIDTH/2)*(MODEL_HEIGHT/2))] = input_lambda(resized_buf[(2*r*resized_width+1) + (2*c+1)]); |
||||
// U
|
||||
net_input_buf[(c*MODEL_HEIGHT/2) + r + (4*(MODEL_WIDTH/2)*(MODEL_HEIGHT/2))] = input_lambda(resized_buf[(resized_width*resized_height) + (r*resized_width/2) + c]); |
||||
// V
|
||||
net_input_buf[(c*MODEL_HEIGHT/2) + r + (5*(MODEL_WIDTH/2)*(MODEL_HEIGHT/2))] = input_lambda(resized_buf[(resized_width*resized_height) + ((resized_width/2)*(resized_height/2)) + (r*resized_width/2) + c]); |
||||
} |
||||
} |
||||
} |
||||
|
||||
float trial() { |
||||
int resized_width = MODEL_WIDTH; |
||||
int resized_height = MODEL_HEIGHT; |
||||
|
||||
int yuv_buf_len = (MODEL_WIDTH/2) * (MODEL_HEIGHT/2) * 6; // Y|u|v -> y|y|y|y|u|v
|
||||
|
||||
// allocate the buffers
|
||||
uint8_t *resized_buf = (uint8_t*)malloc(resized_width*resized_height*3/2); |
||||
float *net_input_buf = (float*)malloc(yuv_buf_len*sizeof(float)); |
||||
printf("allocate -- %p 0x%x -- %p 0x%lx\n", resized_buf, resized_width*resized_height*3/2, net_input_buf, yuv_buf_len*sizeof(float)); |
||||
|
||||
// test for bad buffers
|
||||
static int CNT = 20; |
||||
float avg = 0.0; |
||||
for (int i = 0; i < CNT; i++) { |
||||
double s4 = millis_since_boot(); |
||||
inner(resized_buf, net_input_buf); |
||||
double s5 = millis_since_boot(); |
||||
avg += s5-s4; |
||||
} |
||||
avg /= CNT; |
||||
|
||||
// once it's bad, it's reliably bad
|
||||
if (avg > 10) { |
||||
printf("HIT %f\n", avg); |
||||
printf("BAD\n"); |
||||
|
||||
for (int i = 0; i < 200; i++) { |
||||
double s4 = millis_since_boot(); |
||||
inner(resized_buf, net_input_buf); |
||||
double s5 = millis_since_boot(); |
||||
printf("%.2f ", s5-s4); |
||||
} |
||||
printf("\n"); |
||||
|
||||
exit(0); |
||||
} |
||||
|
||||
// don't free so we get a different buffer each time
|
||||
//free(resized_buf);
|
||||
//free(net_input_buf);
|
||||
|
||||
return avg; |
||||
} |
||||
|
||||
int main() { |
||||
while (true) { |
||||
float ret = trial(); |
||||
printf("got %f\n", ret); |
||||
} |
||||
} |
||||
|
@ -1,2 +0,0 @@ |
||||
#!/usr/bin/env bash |
||||
clang++ -I /home/batman/one/external/tensorflow/include/ -L /home/batman/one/external/tensorflow/lib -Wl,-rpath=/home/batman/one/external/tensorflow/lib main.cc -ltensorflow |
@ -1,69 +0,0 @@ |
||||
#include <cassert> |
||||
#include <cstdio> |
||||
#include <cstdlib> |
||||
#include "tensorflow/c/c_api.h" |
||||
|
||||
void* read_file(const char* path, size_t* out_len) { |
||||
FILE* f = fopen(path, "r"); |
||||
if (!f) { |
||||
return NULL; |
||||
} |
||||
fseek(f, 0, SEEK_END); |
||||
long f_len = ftell(f); |
||||
rewind(f); |
||||
|
||||
char* buf = (char*)calloc(f_len, 1); |
||||
assert(buf); |
||||
|
||||
size_t num_read = fread(buf, f_len, 1, f); |
||||
fclose(f); |
||||
|
||||
if (num_read != 1) { |
||||
free(buf); |
||||
return NULL; |
||||
} |
||||
|
||||
if (out_len) { |
||||
*out_len = f_len; |
||||
} |
||||
|
||||
return buf; |
||||
} |
||||
|
||||
static void DeallocateBuffer(void* data, size_t) { |
||||
free(data); |
||||
} |
||||
|
||||
int main(int argc, char* argv[]) { |
||||
TF_Buffer* buf; |
||||
TF_Graph* graph; |
||||
TF_Status* status; |
||||
char *path = argv[1]; |
||||
|
||||
// load model
|
||||
{ |
||||
size_t model_size; |
||||
char tmp[1024]; |
||||
snprintf(tmp, sizeof(tmp), "%s.pb", path); |
||||
printf("loading model %s\n", tmp); |
||||
uint8_t *model_data = (uint8_t *)read_file(tmp, &model_size); |
||||
buf = TF_NewBuffer(); |
||||
buf->data = model_data; |
||||
buf->length = model_size; |
||||
buf->data_deallocator = DeallocateBuffer; |
||||
printf("loaded model of size %d\n", model_size); |
||||
} |
||||
|
||||
// import graph
|
||||
status = TF_NewStatus(); |
||||
graph = TF_NewGraph(); |
||||
TF_ImportGraphDefOptions *opts = TF_NewImportGraphDefOptions(); |
||||
TF_GraphImportGraphDef(graph, buf, opts, status); |
||||
TF_DeleteImportGraphDefOptions(opts); |
||||
TF_DeleteBuffer(buf); |
||||
if (TF_GetCode(status) != TF_OK) { |
||||
printf("FAIL: %s\n", TF_Message(status)); |
||||
} else { |
||||
printf("SUCCESS\n"); |
||||
} |
||||
} |
@ -1,8 +0,0 @@ |
||||
#!/usr/bin/env python3 |
||||
import sys |
||||
import tensorflow as tf |
||||
|
||||
with open(sys.argv[1], "rb") as f: |
||||
graph_def = tf.compat.v1.GraphDef() |
||||
graph_def.ParseFromString(f.read()) |
||||
#tf.io.write_graph(graph_def, '', sys.argv[1]+".try") |
@ -1,39 +0,0 @@ |
||||
#!/usr/bin/env python3 |
||||
# type: ignore |
||||
|
||||
import os |
||||
import time |
||||
import numpy as np |
||||
|
||||
import cereal.messaging as messaging |
||||
from openpilot.system.manager.process_config import managed_processes |
||||
|
||||
|
||||
N = int(os.getenv("N", "5")) |
||||
TIME = int(os.getenv("TIME", "30")) |
||||
|
||||
if __name__ == "__main__": |
||||
sock = messaging.sub_sock('modelV2', conflate=False, timeout=1000) |
||||
|
||||
execution_times = [] |
||||
|
||||
for _ in range(N): |
||||
os.environ['LOGPRINT'] = 'debug' |
||||
managed_processes['modeld'].start() |
||||
time.sleep(5) |
||||
|
||||
t = [] |
||||
start = time.monotonic() |
||||
while time.monotonic() - start < TIME: |
||||
msgs = messaging.drain_sock(sock, wait_for_one=True) |
||||
for m in msgs: |
||||
t.append(m.modelV2.modelExecutionTime) |
||||
|
||||
execution_times.append(np.array(t[10:]) * 1000) |
||||
managed_processes['modeld'].stop() |
||||
|
||||
print("\n\n") |
||||
print(f"ran modeld {N} times for {TIME}s each") |
||||
for _, t in enumerate(execution_times): |
||||
print(f"\tavg: {sum(t)/len(t):0.2f}ms, min: {min(t):0.2f}ms, max: {max(t):0.2f}ms") |
||||
print("\n\n") |
Loading…
Reference in new issue