diff --git a/launch_chffrplus.sh b/launch_chffrplus.sh index 6c98cce1b0..9afdc6f53a 100755 --- a/launch_chffrplus.sh +++ b/launch_chffrplus.sh @@ -8,6 +8,11 @@ source "$BASEDIR/launch_env.sh" DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )" +function tici_init { + sudo su -c 'echo "performance" > /sys/class/devfreq/soc:qcom,memlat-cpu0/governor' + sudo su -c 'echo "performance" > /sys/class/devfreq/soc:qcom,memlat-cpu4/governor' +} + function two_init { # Restrict Android and other system processes to the first two cores echo 0-1 > /dev/cpuset/background/cpus @@ -123,6 +128,10 @@ function launch { two_init fi + if [ -f /TICI ]; then + tici_init + fi + # handle pythonpath ln -sfn $(pwd) /data/pythonpath export PYTHONPATH="$PWD" diff --git a/scripts/waste.c b/scripts/waste.c index afdcbddb25..62233b7fc4 100644 --- a/scripts/waste.c +++ b/scripts/waste.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -22,21 +23,21 @@ void waste(int pid) { int ret = sched_setaffinity(0, sizeof(cpu_set_t), &my_set); printf("set affinity to %d: %d\n", pid, ret); - // 256 MB - float32x4_t *tmp = (float32x4_t *)malloc(0x1000000*sizeof(float32x4_t)); + // 128 MB + float32x4_t *tmp = (float32x4_t *)malloc(0x800000*sizeof(float32x4_t)); // comment out the memset for CPU only and not RAM // otherwise we need this to avoid the zero page #ifdef MEM - memset(tmp, 0xaa, 0x1000000*sizeof(float32x4_t)); + memset(tmp, 0xaa, 0x800000*sizeof(float32x4_t)); #endif float32x4_t out; double sec = seconds_since_boot(); while (1) { - for (int i = 0; i < 8; i++) { - for (int j = 0; j < 0x1000000; j+=0x20) { + for (int i = 0; i < 0x10; i++) { + for (int j = 0; j < 0x800000; j+=0x20) { out = vmlaq_f32(out, tmp[j+0], tmp[j+1]); out = vmlaq_f32(out, tmp[j+2], tmp[j+3]); out = vmlaq_f32(out, tmp[j+4], tmp[j+5]); @@ -80,7 +81,7 @@ int main() { iavg += 1/ttime[i]; printf("%4.2f ", ttime[i]); } - double mb_per_sec = (8.*0x1000000/(1024*1024))*sizeof(float32x4_t)*iavg; + double mb_per_sec = (16.*0x800000/(1024*1024))*sizeof(float32x4_t)*iavg; printf("-- %4.2f -- %.2f MB/s \n", avg/CORES, mb_per_sec); sleep(1); } diff --git a/selfdrive/modeld/SConscript b/selfdrive/modeld/SConscript index 4409033c55..569bcf0e75 100644 --- a/selfdrive/modeld/SConscript +++ b/selfdrive/modeld/SConscript @@ -19,7 +19,11 @@ if arch == "aarch64": lenv['CFLAGS'].append("-DUSE_THNEED") lenv['CXXFLAGS'].append("-DUSE_THNEED") elif arch == "larch64": - libs += ['gsl', 'CB', 'pthread'] + libs += ['gsl', 'CB', 'pthread', 'dl'] + if not TEST_THNEED: + common_src += ["thneed/thneed.cc"] + lenv['CFLAGS'].append("-DUSE_THNEED") + lenv['CXXFLAGS'].append("-DUSE_THNEED") else: libs += ['pthread'] diff --git a/selfdrive/modeld/runners/snpemodel.cc b/selfdrive/modeld/runners/snpemodel.cc index 38da13453a..2197c24cd4 100644 --- a/selfdrive/modeld/runners/snpemodel.cc +++ b/selfdrive/modeld/runners/snpemodel.cc @@ -1,6 +1,7 @@ #pragma clang diagnostic ignored "-Wexceptions" #include +#include #include #include "common/util.h" #include "snpemodel.h" diff --git a/selfdrive/modeld/thneed/thneed.cc b/selfdrive/modeld/thneed/thneed.cc index 11f10cfeb5..4c1adc1ab3 100644 --- a/selfdrive/modeld/thneed/thneed.cc +++ b/selfdrive/modeld/thneed/thneed.cc @@ -1,10 +1,11 @@ -#include "thneed.h" #include #include #include #include #include +#include #include +#include "thneed.h" Thneed *g_thneed = NULL; int g_fd = -1; @@ -31,6 +32,7 @@ extern "C" { int (*my_ioctl)(int filedes, unsigned long request, void *argp) = NULL; #undef ioctl int ioctl(int filedes, unsigned long request, void *argp) { + request &= 0xFFFFFFFF; // needed on QCOM2 if (my_ioctl == NULL) my_ioctl = reinterpret_cast(dlsym(RTLD_NEXT, "ioctl")); Thneed *thneed = g_thneed; @@ -440,7 +442,14 @@ cl_program thneed_clCreateProgramWithSource(cl_context context, cl_uint count, c #endif void *dlsym(void *handle, const char *symbol) { + // TODO: Find dlsym in a better way. Currently this is hand looked up in libdl.so +#if defined QCOM void *(*my_dlsym)(void *handle, const char *symbol) = (void *(*)(void *handle, const char *symbol))((uintptr_t)dlopen-0x2d4); +#elif defined QCOM2 + void *(*my_dlsym)(void *handle, const char *symbol) = (void *(*)(void *handle, const char *symbol))((uintptr_t)dlopen+0x138); +#else + #error "Unsupported platform for thneed" +#endif if (memcmp("REAL_", symbol, 5) == 0) { return my_dlsym(handle, symbol+5); } else if (strcmp("clEnqueueNDRangeKernel", symbol) == 0) { diff --git a/selfdrive/modeld/thneed/thneed.h b/selfdrive/modeld/thneed/thneed.h index 36f0bfed7f..a145a28476 100644 --- a/selfdrive/modeld/thneed/thneed.h +++ b/selfdrive/modeld/thneed/thneed.h @@ -1,8 +1,14 @@ #pragma once +#ifndef __user + #define __user __attribute__(()) +#endif + +#include #include #include "include/msm_kgsl.h" #include +#include #include using namespace std;