diff --git a/RELEASES.md b/RELEASES.md index 47e9c8e1bf..d8f55f8072 100644 --- a/RELEASES.md +++ b/RELEASES.md @@ -1,4 +1,8 @@ -Version 0.8.13 (2022-02-16) +Version 0.8.14 (2022-0X-XX) +======================== + * bigmodel! + +Version 0.8.13 (2022-02-18) ======================== * Improved driver monitoring * Retuned driver pose learner for relaxed driving positions diff --git a/SConstruct b/SConstruct index 832550b14b..ea9afc68e2 100644 --- a/SConstruct +++ b/SConstruct @@ -119,27 +119,26 @@ else: cxxflags = [] cpppath = [] + # MacOS if arch == "Darwin": + brew_prefix = subprocess.check_output(['brew', '--prefix'], encoding='utf8').strip() yuv_dir = "mac" if real_arch != "arm64" else "mac_arm64" libpath = [ f"#third_party/libyuv/{yuv_dir}/lib", - "/usr/local/lib", - "/opt/homebrew/lib", - "/usr/local/Homebrew/Library", - "/usr/local/opt/openssl/lib", - "/opt/homebrew/opt/openssl/lib", - "/usr/local/Cellar", + f"{brew_prefix}/lib", + f"{brew_prefix}/Library", + f"{brew_prefix}/opt/openssl/lib", + f"{brew_prefix}/Cellar", f"#third_party/acados/{arch}/lib", "/System/Library/Frameworks/OpenGL.framework/Libraries", ] cflags += ["-DGL_SILENCE_DEPRECATION"] cxxflags += ["-DGL_SILENCE_DEPRECATION"] cpppath += [ - "/opt/homebrew/include", - "/usr/local/include", - "/usr/local/opt/openssl/include", - "/opt/homebrew/opt/openssl/include" + f"{brew_prefix}/include", + f"{brew_prefix}/opt/openssl/include", ] + # Linux 86_64 else: libpath = [ "#third_party/acados/x86_64/lib", diff --git a/cereal b/cereal index 03860ae0b2..28d458a9af 160000 --- a/cereal +++ b/cereal @@ -1 +1 @@ -Subproject commit 03860ae0b2b8128cae7768e4301d889e627c9275 +Subproject commit 28d458a9af49b38bd0a9052f09fbe927324320fb diff --git a/common/window.py b/common/window.py index 983dc43984..ce1eecb881 100644 --- a/common/window.py +++ b/common/window.py @@ -11,11 +11,18 @@ class Window(): self.double = double self.halve = halve if self.double: - self.screen = pygame.display.set_mode((w*2, h*2)) + self.rw, self.rh = w*2, h*2 elif self.halve: - self.screen = pygame.display.set_mode((w//2, h//2)) + self.rw, self.rh = w//2, h//2 else: - self.screen = pygame.display.set_mode((w, h)) + self.rw, self.rh = w, h + self.screen = pygame.display.set_mode((self.rw, self.rh)) + pygame.display.flip() + + # hack for xmonad, it shrinks the window by 6 pixels after the display.flip + if self.screen.get_width() != self.rw: + self.screen = pygame.display.set_mode((self.rw+(self.rw-self.screen.get_width()), self.rh+(self.rh-self.screen.get_height()))) + pygame.display.flip() def draw(self, out): pygame.event.pump() diff --git a/docs/CARS.md b/docs/CARS.md index 5f14a9278d..c32b31dbf2 100644 --- a/docs/CARS.md +++ b/docs/CARS.md @@ -99,7 +99,7 @@ | Genesis | G90 2018 | All | Stock | 0mph | 0mph | | GMC | Acadia 20181 | Adaptive Cruise | openpilot | 0mph | 7mph | | Hyundai | Elantra 2017-19 | SCC + LKAS | Stock | 19mph | 34mph | -| Hyundai | Elantra 2021 | SCC + LKAS | Stock | 0mph | 0mph | +| Hyundai | Elantra 2021-22 | SCC + LKAS | Stock | 0mph | 0mph | | Hyundai | Elantra Hybrid 2021 | SCC + LKAS | Stock | 0mph | 0mph | | Hyundai | Genesis 2015-16 | SCC + LKAS | Stock | 19mph | 37mph | | Hyundai | Ioniq Electric 2019 | SCC + LKAS | Stock | 0mph | 32mph | diff --git a/docs/conf.py b/docs/conf.py index 5715a40b8b..c11d17455d 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -56,7 +56,7 @@ myst_html_meta = { "property=og:locale": "en_US", "property=og:site_name": "docs.comma.ai", "property=og:url": "https://docs.comma.ai", - "property=og:title": "openpilot Docuemntation", + "property=og:title": "openpilot Documentation", "property=og:type": "website", "property=og:image:type": "image/jpeg", "property=og:image:width": "400", diff --git a/models/big_supercombo.dlc b/models/big_supercombo.dlc new file mode 100644 index 0000000000..a27e3d1180 --- /dev/null +++ b/models/big_supercombo.dlc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba3fe3e61853cc1434e3e220f40c8e9d1f1b9bab8458196ba3bea6a10b82c6ed +size 72718099 diff --git a/models/big_supercombo.onnx b/models/big_supercombo.onnx new file mode 100644 index 0000000000..3039035fbc --- /dev/null +++ b/models/big_supercombo.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bda57c1a66944f5a633ecd739a24d62702c717a234f2fdcc499dfa1d61c3c19e +size 73147489 diff --git a/opendbc b/opendbc index c3d3c71aa7..46a942d679 160000 --- a/opendbc +++ b/opendbc @@ -1 +1 @@ -Subproject commit c3d3c71aa7a37364814f029778070fcb550c7cd3 +Subproject commit 46a942d6790531cf5b94b14266140e43afcfda3e diff --git a/panda b/panda index f56ebf5b77..51ccb9fbd2 160000 --- a/panda +++ b/panda @@ -1 +1 @@ -Subproject commit f56ebf5b776b677bf12ec772b0223274dd798999 +Subproject commit 51ccb9fbd266796e1bf6ffda8b93c4119ab09ff4 diff --git a/release/files_common b/release/files_common index faf6a41db7..cfc8150e3b 100644 --- a/release/files_common +++ b/release/files_common @@ -58,6 +58,7 @@ common/transformations/transformations.pyx common/api/__init__.py models/supercombo.dlc +models/big_supercombo.dlc models/dmonitoring_model_q.dlc release/* @@ -426,7 +427,9 @@ selfdrive/modeld/transforms/transform.cl selfdrive/modeld/thneed/thneed.* selfdrive/modeld/thneed/serialize.cc selfdrive/modeld/thneed/compile.cc +selfdrive/modeld/thneed/optimizer.cc selfdrive/modeld/thneed/include/* +selfdrive/modeld/thneed/kernels/*.cl selfdrive/modeld/runners/snpemodel.cc selfdrive/modeld/runners/snpemodel.h @@ -569,12 +572,10 @@ opendbc/acura_rdx_2018_can_generated.dbc opendbc/acura_rdx_2020_can_generated.dbc opendbc/honda_civic_touring_2016_can_generated.dbc opendbc/honda_civic_hatchback_ex_2017_can_generated.dbc -opendbc/honda_civic_sedan_16_diesel_2019_can_generated.dbc opendbc/honda_crv_touring_2016_can_generated.dbc opendbc/honda_crv_ex_2017_can_generated.dbc opendbc/honda_crv_ex_2017_body_generated.dbc opendbc/honda_crv_executive_2016_can_generated.dbc -opendbc/honda_crv_hybrid_2019_can_generated.dbc opendbc/honda_fit_ex_2018_can_generated.dbc opendbc/honda_odyssey_exl_2018_generated.dbc opendbc/honda_odyssey_extreme_edition_2018_china_can_generated.dbc diff --git a/selfdrive/athena/athenad.py b/selfdrive/athena/athenad.py index cc7cdd87ad..7250754d54 100755 --- a/selfdrive/athena/athenad.py +++ b/selfdrive/athena/athenad.py @@ -163,6 +163,8 @@ def upload_handler(end_event: threading.Event) -> None: sm = messaging.SubMaster(['deviceState']) tid = threading.get_ident() + cellular_unmetered = Params().get_bool("CellularUnmetered") + while not end_event.is_set(): cur_upload_items[tid] = None @@ -182,7 +184,7 @@ def upload_handler(end_event: threading.Event) -> None: # Check if uploading over cell is allowed sm.update(0) cell = sm['deviceState'].networkType not in [NetworkType.wifi, NetworkType.ethernet] - if cell and (not cur_upload_items[tid].allow_cellular): + if cell and (not cur_upload_items[tid].allow_cellular) and (not cellular_unmetered): retry_upload(tid, end_event, False) continue @@ -191,7 +193,7 @@ def upload_handler(end_event: threading.Event) -> None: # Abort transfer if connection changed to cell after starting upload sm.update(0) cell = sm['deviceState'].networkType not in [NetworkType.wifi, NetworkType.ethernet] - if cell and (not cur_upload_items[tid].allow_cellular): + if cell and (not cur_upload_items[tid].allow_cellular) and (not cellular_unmetered): raise AbortTransferException cur_upload_items[tid] = cur_upload_items[tid]._replace(progress=cur / sz if sz else 1) diff --git a/selfdrive/athena/tests/helpers.py b/selfdrive/athena/tests/helpers.py index 26655b4a37..bb5fcd1063 100644 --- a/selfdrive/athena/tests/helpers.py +++ b/selfdrive/athena/tests/helpers.py @@ -53,7 +53,8 @@ class MockParams(): default_params = { "DongleId": b"0000000000000000", "GithubSshKeys": b"ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQC307aE+nuHzTAgaJhzSf5v7ZZQW9gaperjhCmyPyl4PzY7T1mDGenTlVTN7yoVFZ9UfO9oMQqo0n1OwDIiqbIFxqnhrHU0cYfj88rI85m5BEKlNu5RdaVTj1tcbaPpQc5kZEolaI1nDDjzV0lwS7jo5VYDHseiJHlik3HH1SgtdtsuamGR2T80q1SyW+5rHoMOJG73IH2553NnWuikKiuikGHUYBd00K1ilVAK2xSiMWJp55tQfZ0ecr9QjEsJ+J/efL4HqGNXhffxvypCXvbUYAFSddOwXUPo5BTKevpxMtH+2YrkpSjocWA04VnTYFiPG6U4ItKmbLOTFZtPzoez private", # noqa: E501 - "AthenadUploadQueue": '[]' + "AthenadUploadQueue": '[]', + "CellularUnmetered": False, } params = default_params.copy() @@ -61,6 +62,9 @@ class MockParams(): def restore_defaults(): MockParams.params = MockParams.default_params.copy() + def get_bool(self, k): + return bool(MockParams.params.get(k)) + def get(self, k, encoding=None): ret = MockParams.params.get(k) if ret is not None and encoding is not None: diff --git a/selfdrive/boardd/boardd.cc b/selfdrive/boardd/boardd.cc index c5310f879e..d7e1adf1eb 100644 --- a/selfdrive/boardd/boardd.cc +++ b/selfdrive/boardd/boardd.cc @@ -610,11 +610,20 @@ void pigeon_thread(Panda *panda) { } void boardd_main_thread(std::vector serials) { - if (serials.size() == 0) serials.push_back(""); - PubMaster pm({"pandaStates", "peripheralState"}); LOGW("attempting to connect"); + if (serials.size() == 0) { + // connect to all + serials = Panda::list(); + + // exit if no pandas are connected + if (serials.size() == 0) { + LOGW("no pandas found, exiting"); + return; + } + } + // connect to all provided serials std::vector pandas; for (int i = 0; i < serials.size() && !do_exit; /**/) { diff --git a/selfdrive/camerad/cameras/camera_common.cc b/selfdrive/camerad/cameras/camera_common.cc index b313f74e7e..f534cad99c 100644 --- a/selfdrive/camerad/cameras/camera_common.cc +++ b/selfdrive/camerad/cameras/camera_common.cc @@ -22,6 +22,7 @@ #include "CL/cl_ext_qcom.h" #include "selfdrive/camerad/cameras/camera_qcom.h" #elif QCOM2 +#include "CL/cl_ext_qcom.h" #include "selfdrive/camerad/cameras/camera_qcom2.h" #elif WEBCAM #include "selfdrive/camerad/cameras/camera_webcam.h" @@ -36,6 +37,7 @@ public: Debayer(cl_device_id device_id, cl_context context, const CameraBuf *b, const CameraState *s) { char args[4096]; const CameraInfo *ci = &s->ci; + hdr_ = ci->hdr; snprintf(args, sizeof(args), "-cl-fast-relaxed-math -cl-denorms-are-zero " "-DFRAME_WIDTH=%d -DFRAME_HEIGHT=%d -DFRAME_STRIDE=%d " @@ -62,9 +64,20 @@ public: CL_CHECK(clSetKernelArg(krnl_, 2, localMemSize, 0)); CL_CHECK(clEnqueueNDRangeKernel(q, krnl_, 2, NULL, globalWorkSize, localWorkSize, 0, 0, debayer_event)); } else { - const size_t debayer_work_size = height; // doesn't divide evenly, is this okay? - CL_CHECK(clSetKernelArg(krnl_, 2, sizeof(float), &gain)); - CL_CHECK(clEnqueueNDRangeKernel(q, krnl_, 1, NULL, &debayer_work_size, NULL, 0, 0, debayer_event)); + if (hdr_) { + // HDR requires a 1-D kernel due to the DPCM compression + const size_t debayer_local_worksize = 128; + const size_t debayer_work_size = height; // doesn't divide evenly, is this okay? + CL_CHECK(clSetKernelArg(krnl_, 2, sizeof(float), &gain)); + CL_CHECK(clEnqueueNDRangeKernel(q, krnl_, 1, NULL, &debayer_work_size, &debayer_local_worksize, 0, 0, debayer_event)); + } else { + const int debayer_local_worksize = 32; + assert(width % 2 == 0); + const size_t globalWorkSize[] = {size_t(height), size_t(width / 2)}; + const size_t localWorkSize[] = {debayer_local_worksize, debayer_local_worksize}; + CL_CHECK(clSetKernelArg(krnl_, 2, sizeof(float), &gain)); + CL_CHECK(clEnqueueNDRangeKernel(q, krnl_, 2, NULL, globalWorkSize, localWorkSize, 0, 0, debayer_event)); + } } } @@ -74,6 +87,7 @@ public: private: cl_kernel krnl_; + bool hdr_; }; void CameraBuf::init(cl_device_id device_id, cl_context context, CameraState *s, VisionIpcServer * v, int frame_cnt, VisionStreamType init_rgb_type, VisionStreamType init_yuv_type, release_cb init_release_callback) { @@ -427,8 +441,7 @@ void common_process_driver_camera(MultiCameraState *s, CameraState *c, int cnt) void camerad_thread() { cl_device_id device_id = cl_get_device_id(CL_DEVICE_TYPE_DEFAULT); - // TODO: do this for QCOM2 too -#if defined(QCOM) +#if defined(QCOM) || defined(QCOM2) const cl_context_properties props[] = {CL_CONTEXT_PRIORITY_HINT_QCOM, CL_PRIORITY_HINT_HIGH_QCOM, 0}; cl_context context = CL_CHECK_ERR(clCreateContext(props, 1, &device_id, NULL, NULL, &err)); #else @@ -447,3 +460,16 @@ void camerad_thread() { CL_CHECK(clReleaseContext(context)); } + +int open_v4l_by_name_and_index(const char name[], int index, int flags) { + for (int v4l_index = 0; /**/; ++v4l_index) { + std::string v4l_name = util::read_file(util::string_format("/sys/class/video4linux/v4l-subdev%d/name", v4l_index)); + if (v4l_name.empty()) return -1; + if (v4l_name.find(name) == 0) { + if (index == 0) { + return HANDLE_EINTR(open(util::string_format("/dev/v4l-subdev%d", v4l_index).c_str(), flags)); + } + index--; + } + } +} diff --git a/selfdrive/camerad/cameras/camera_common.h b/selfdrive/camerad/cameras/camera_common.h index f53f06dceb..c2d2904f54 100644 --- a/selfdrive/camerad/cameras/camera_common.h +++ b/selfdrive/camerad/cameras/camera_common.h @@ -135,3 +135,5 @@ void cameras_run(MultiCameraState *s); void cameras_close(MultiCameraState *s); void camera_autoexposure(CameraState *s, float grey_frac); void camerad_thread(); + +int open_v4l_by_name_and_index(const char name[], int index = 0, int flags = O_RDWR | O_NONBLOCK); diff --git a/selfdrive/camerad/cameras/camera_qcom.cc b/selfdrive/camerad/cameras/camera_qcom.cc index ecb1c8ffe4..da01b94177 100644 --- a/selfdrive/camerad/cameras/camera_qcom.cc +++ b/selfdrive/camerad/cameras/camera_qcom.cc @@ -399,7 +399,7 @@ static void sensors_init(MultiCameraState *s) { .output_format = MSM_SENSOR_BAYER, }}; - unique_fd sensorinit_fd = HANDLE_EINTR(open("/dev/v4l-subdev11", O_RDWR | O_NONBLOCK)); + unique_fd sensorinit_fd = open_v4l_by_name_and_index("msm_sensor_init"); assert(sensorinit_fd >= 0); for (auto &info : slave_infos) { info.power_setting_array.power_setting = &info.power_setting_array.power_setting_a[0]; @@ -417,31 +417,22 @@ static void camera_open(CameraState *s, bool is_road_cam) { struct msm_actuator_cfg_data actuator_cfg_data = {}; // open devices - const char *sensor_dev; + s->csid_fd = open_v4l_by_name_and_index("msm_csid", is_road_cam ? 0 : 2); + assert(s->csid_fd >= 0); + s->csiphy_fd = open_v4l_by_name_and_index("msm_csiphy", is_road_cam ? 0 : 2); + assert(s->csiphy_fd >= 0); + s->isp_fd = open_v4l_by_name_and_index("vfe", is_road_cam ? 0 : 1); + assert(s->isp_fd >= 0); + if (is_road_cam) { - s->csid_fd = HANDLE_EINTR(open("/dev/v4l-subdev3", O_RDWR | O_NONBLOCK)); - assert(s->csid_fd >= 0); - s->csiphy_fd = HANDLE_EINTR(open("/dev/v4l-subdev0", O_RDWR | O_NONBLOCK)); - assert(s->csiphy_fd >= 0); - sensor_dev = "/dev/v4l-subdev17"; - s->isp_fd = HANDLE_EINTR(open("/dev/v4l-subdev13", O_RDWR | O_NONBLOCK)); - assert(s->isp_fd >= 0); - s->actuator_fd = HANDLE_EINTR(open("/dev/v4l-subdev7", O_RDWR | O_NONBLOCK)); + s->actuator_fd = open_v4l_by_name_and_index("msm_actuator"); assert(s->actuator_fd >= 0); - } else { - s->csid_fd = HANDLE_EINTR(open("/dev/v4l-subdev5", O_RDWR | O_NONBLOCK)); - assert(s->csid_fd >= 0); - s->csiphy_fd = HANDLE_EINTR(open("/dev/v4l-subdev2", O_RDWR | O_NONBLOCK)); - assert(s->csiphy_fd >= 0); - sensor_dev = "/dev/v4l-subdev18"; - s->isp_fd = HANDLE_EINTR(open("/dev/v4l-subdev14", O_RDWR | O_NONBLOCK)); - assert(s->isp_fd >= 0); } // wait for sensor device // on first startup, these devices aren't present yet for (int i = 0; i < 10; i++) { - s->sensor_fd = HANDLE_EINTR(open(sensor_dev, O_RDWR | O_NONBLOCK)); + s->sensor_fd = open_v4l_by_name_and_index(is_road_cam ? "imx298" : "ov8865_sunny"); if (s->sensor_fd >= 0) break; LOGW("waiting for sensors..."); util::sleep_for(1000); // sleep one second @@ -908,7 +899,7 @@ void cameras_open(MultiCameraState *s) { s->v4l_fd = HANDLE_EINTR(open("/dev/video0", O_RDWR | O_NONBLOCK)); assert(s->v4l_fd >= 0); - s->ispif_fd = HANDLE_EINTR(open("/dev/v4l-subdev15", O_RDWR | O_NONBLOCK)); + s->ispif_fd = open_v4l_by_name_and_index("msm_ispif"); assert(s->ispif_fd >= 0); // ISPIF: stop diff --git a/selfdrive/camerad/cameras/camera_qcom2.cc b/selfdrive/camerad/cameras/camera_qcom2.cc index e86de9ffd6..f75e982be2 100644 --- a/selfdrive/camerad/cameras/camera_qcom2.cc +++ b/selfdrive/camerad/cameras/camera_qcom2.cc @@ -578,19 +578,6 @@ static void camera_init(MultiCameraState *multi_cam_state, VisionIpcServer * v, s->buf.init(device_id, ctx, s, v, FRAME_BUF_COUNT, rgb_type, yuv_type); } -int open_v4l_by_name_and_index(const char name[], int index, int flags = O_RDWR | O_NONBLOCK) { - for (int v4l_index = 0; /**/; ++v4l_index) { - std::string v4l_name = util::read_file(util::string_format("/sys/class/video4linux/v4l-subdev%d/name", v4l_index)); - if (v4l_name.empty()) return -1; - if (v4l_name.find(name) == 0) { - if (index == 0) { - return open(util::string_format("/dev/v4l-subdev%d", v4l_index).c_str(), flags); - } - index--; - } - } -} - static void camera_open(CameraState *s) { s->sensor_fd = open_v4l_by_name_and_index("cam-sensor-driver", s->camera_num); assert(s->sensor_fd >= 0); diff --git a/selfdrive/camerad/cameras/debayer.cl b/selfdrive/camerad/cameras/debayer.cl index 5188dc88c1..4e4b832203 100644 --- a/selfdrive/camerad/cameras/debayer.cl +++ b/selfdrive/camerad/cameras/debayer.cl @@ -26,6 +26,8 @@ float3 srgb_gamma(float3 p) { return select(ph, pl, islessequal(p, 0.0031308f)); } +#if HDR + __constant int dpcm_lookup[512] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16, -17, -18, -19, -20, -21, -22, -23, -24, -25, -26, -27, -28, -29, -30, -31, 935, 951, 967, 983, 999, 1015, 1031, 1047, 1063, 1079, 1095, 1111, 1127, 1143, 1159, 1175, 1191, 1207, 1223, 1239, 1255, 1271, 1287, 1303, 1319, 1335, 1351, 1367, 1383, 1399, 1415, 1431, -935, -951, -967, -983, -999, -1015, -1031, -1047, -1063, -1079, -1095, -1111, -1127, -1143, -1159, -1175, -1191, -1207, -1223, -1239, -1255, -1271, -1287, -1303, -1319, -1335, -1351, -1367, -1383, -1399, -1415, -1431, 419, 427, 435, 443, 451, 459, 467, 475, 483, 491, 499, 507, 515, 523, 531, 539, 547, 555, 563, 571, 579, 587, 595, 603, 611, 619, 627, 635, 643, 651, 659, 667, 675, 683, 691, 699, 707, 715, 723, 731, 739, 747, 755, 763, 771, 779, 787, 795, 803, 811, 819, 827, 835, 843, 851, 859, 867, 875, 883, 891, 899, 907, 915, 923, -419, -427, -435, -443, -451, -459, -467, -475, -483, -491, -499, -507, -515, -523, -531, -539, -547, -555, -563, -571, -579, -587, -595, -603, -611, -619, -627, -635, -643, -651, -659, -667, -675, -683, -691, -699, -707, -715, -723, -731, -739, -747, -755, -763, -771, -779, -787, -795, -803, -811, -819, -827, -835, -843, -851, -859, -867, -875, -883, -891, -899, -907, -915, -923, 161, 165, 169, 173, 177, 181, 185, 189, 193, 197, 201, 205, 209, 213, 217, 221, 225, 229, 233, 237, 241, 245, 249, 253, 257, 261, 265, 269, 273, 277, 281, 285, 289, 293, 297, 301, 305, 309, 313, 317, 321, 325, 329, 333, 337, 341, 345, 349, 353, 357, 361, 365, 369, 373, 377, 381, 385, 389, 393, 397, 401, 405, 409, 413, -161, -165, -169, -173, -177, -181, -185, -189, -193, -197, -201, -205, -209, -213, -217, -221, -225, -229, -233, -237, -241, -245, -249, -253, -257, -261, -265, -269, -273, -277, -281, -285, -289, -293, -297, -301, -305, -309, -313, -317, -321, -325, -329, -333, -337, -341, -345, -349, -353, -357, -361, -365, -369, -373, -377, -381, -385, -389, -393, -397, -401, -405, -409, -413, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76, 78, 80, 82, 84, 86, 88, 90, 92, 94, 96, 98, 100, 102, 104, 106, 108, 110, 112, 114, 116, 118, 120, 122, 124, 126, 128, 130, 132, 134, 136, 138, 140, 142, 144, 146, 148, 150, 152, 154, 156, 158, -32, -34, -36, -38, -40, -42, -44, -46, -48, -50, -52, -54, -56, -58, -60, -62, -64, -66, -68, -70, -72, -74, -76, -78, -80, -82, -84, -86, -88, -90, -92, -94, -96, -98, -100, -102, -104, -106, -108, -110, -112, -114, -116, -118, -120, -122, -124, -126, -128, -130, -132, -134, -136, -138, -140, -142, -144, -146, -148, -150, -152, -154, -156, -158}; inline uint4 decompress(uint4 p, uint4 pl) { @@ -35,6 +37,8 @@ inline uint4 decompress(uint4 p, uint4 pl) { return select(r2, r1, p < 0x200); } +#endif + __kernel void debayer10(__global uchar const * const in, __global uchar * out, float digital_gain) { @@ -42,8 +46,13 @@ __kernel void debayer10(__global uchar const * const in, if (oy >= RGB_HEIGHT) return; const int iy = oy * 2; +#if HDR uint4 pint_last; for (int ox = 0; ox < RGB_WIDTH; ox += 2) { +#else + int ox = get_global_id(1) * 2; + { +#endif const int ix = (ox/2) * 5; // TODO: why doesn't this work for the frontview diff --git a/selfdrive/car/honda/values.py b/selfdrive/car/honda/values.py index 6e347b8da3..50ea52faae 100644 --- a/selfdrive/car/honda/values.py +++ b/selfdrive/car/honda/values.py @@ -1346,11 +1346,11 @@ DBC = { CAR.ACURA_RDX_3G: dbc_dict('acura_rdx_2020_can_generated', None), CAR.CIVIC: dbc_dict('honda_civic_touring_2016_can_generated', 'acura_ilx_2016_nidec'), CAR.CIVIC_BOSCH: dbc_dict('honda_civic_hatchback_ex_2017_can_generated', None), - CAR.CIVIC_BOSCH_DIESEL: dbc_dict('honda_civic_sedan_16_diesel_2019_can_generated', None), + CAR.CIVIC_BOSCH_DIESEL: dbc_dict('honda_accord_2018_can_generated', None), CAR.CRV: dbc_dict('honda_crv_touring_2016_can_generated', 'acura_ilx_2016_nidec'), CAR.CRV_5G: dbc_dict('honda_crv_ex_2017_can_generated', None, body_dbc='honda_crv_ex_2017_body_generated'), CAR.CRV_EU: dbc_dict('honda_crv_executive_2016_can_generated', 'acura_ilx_2016_nidec'), - CAR.CRV_HYBRID: dbc_dict('honda_crv_hybrid_2019_can_generated', None), + CAR.CRV_HYBRID: dbc_dict('honda_accord_2018_can_generated', None), CAR.FIT: dbc_dict('honda_fit_ex_2018_can_generated', 'acura_ilx_2016_nidec'), CAR.FREED: dbc_dict('honda_fit_ex_2018_can_generated', 'acura_ilx_2016_nidec'), CAR.HRV: dbc_dict('honda_fit_ex_2018_can_generated', 'acura_ilx_2016_nidec'), diff --git a/selfdrive/car/hyundai/interface.py b/selfdrive/car/hyundai/interface.py index 463ff3569a..379e6937ae 100644 --- a/selfdrive/car/hyundai/interface.py +++ b/selfdrive/car/hyundai/interface.py @@ -30,6 +30,11 @@ class CarInterface(CarInterfaceBase): ret.pcmCruise = not ret.openpilotLongitudinalControl + # These cars have been put into dashcam only due to both a lack of users and test coverage. + # These cars likely still work fine. Once a user confirms each car works and a test route is + # added to selfdrive/test/test_routes, we can remove it from this list. + ret.dashcamOnly = candidate in {CAR.KIA_OPTIMA_H, CAR.ELANTRA_GT_I30} + ret.steerActuatorDelay = 0.1 # Default delay ret.steerRateCost = 0.5 ret.steerLimitTimer = 0.4 diff --git a/selfdrive/car/hyundai/values.py b/selfdrive/car/hyundai/values.py index 818dbe57f6..e689a57f48 100644 --- a/selfdrive/car/hyundai/values.py +++ b/selfdrive/car/hyundai/values.py @@ -225,6 +225,7 @@ FW_VERSIONS = { b'\xf1\x00AE MDPS C 1.00 1.02 56310G7300\x00 4AEEC102', b'\xf1\x00AE MDPS C 1.00 1.04 56310/G7501 4AEEC104', b'\xf1\x00AE MDPS C 1.00 1.03 56310/G7300 4AEEC103', + b'\xf1\x00AE MDPS C 1.00 1.03 56310G7300\x00 4AEEC103', ], (Ecu.fwdCamera, 0x7c4, None): [ b'\xf1\x00AEE MFC AT EUR LHD 1.00 1.00 95740-G7200 160418', @@ -292,6 +293,8 @@ FW_VERSIONS = { b'HM6M2_0a0_BD0', ], (Ecu.eps, 0x7d4, None): [ + b'\xf1\x00DN8 MDPS C 1,00 1,01 56310L0010\x00 4DNAC101', # modified firmware + b'\xf1\x8756310L0010\x00\xf1\x00DN8 MDPS C 1,00 1,01 56310L0010\x00 4DNAC101', # modified firmware b'\xf1\x00DN8 MDPS C 1.00 1.01 \x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00 4DNAC101', b'\xf1\x00DN8 MDPS C 1.00 1.01 56310-L0010 4DNAC101', b'\xf1\x00DN8 MDPS C 1.00 1.01 56310L0010\x00 4DNAC101', @@ -647,10 +650,14 @@ FW_VERSIONS = { b'\xf1\x00JS__ SCC H-CUP 1.00 1.02 95650-J3200 ', b'\xf1\x00JS__ SCC HNCUP 1.00 1.02 95650-J3100 ', ], - (Ecu.esp, 0x7d1, None): [b'\xf1\x00\x00\x00\x00\x00\x00\x00', ], + (Ecu.esp, 0x7d1, None): [ + b'\xf1\x00\x00\x00\x00\x00\x00\x00', + b'\xf1\x816V8RAC00121.ELF\xf1\x00\x00\x00\x00\x00\x00\x00', + ], (Ecu.engine, 0x7e0, None): [ b'\x01TJS-JNU06F200H0A', b'\x01TJS-JDK06F200H0A', + b'391282BJF5 ', ], (Ecu.eps, 0x7d4, None): [b'\xf1\x00JSL MDPS C 1.00 1.03 56340-J3000 8308', ], (Ecu.fwdCamera, 0x7c4, None): [ @@ -907,6 +914,7 @@ FW_VERSIONS = { (Ecu.fwdCamera, 0x7c4, None): [ b'\xf1\x00CN7 MFC AT USA LHD 1.00 1.00 99210-AB000 200819', b'\xf1\x00CN7 MFC AT USA LHD 1.00 1.03 99210-AA000 200819', + b'\xf1\x00CN7 MFC AT USA LHD 1.00 1.01 99210-AB000 210205', ], (Ecu.esp, 0x7d1, None): [ b'\xf1\x00CN ESC \t 101 \x10\x03 58910-AB800', @@ -919,11 +927,13 @@ FW_VERSIONS = { b'\xf1\x87CXMQFM2135005JB2E\xb9\x89\x98W\xa9y\x97h\xa9\x98\x99wxvwh\x87\177\xffx\xff\xff\xff,,\xf1\x89HT6VA640A1\xf1\x82CCN0N20NS5\x00\x00\x00\x00\x00\x00', b'\xf1\x87CXMQFM1916035JB2\x88vvgg\x87Wuwgev\xa9\x98\x88\x98h\x99\x9f\xffh\xff\xff\xff\xa5\xee\xf1\x89HT6VA640A1\xf1\x82CCN0N20NS5\x00\x00\x00\x00\x00\x00', b'\xf1\x87CXLQF40189012JL2f\x88\x86\x88\x88vUex\xb8\x88\x88\x88\x87\x88\x89fh?\xffz\xff\xff\xff\x08z\xf1\x89HT6VA640A1\xf1\x82CCN0N20NS5\x00\x00\x00\x00\x00\x00', + b'\xf1\x87CXMQFM2728305JB2E\x97\x87xw\x87vwgw\x84x\x88\x88w\x89EI\xbf\xff{\xff\xff\xff\xe6\x0e\xf1\x89HT6VA640A1\xf1\x82CCN0N20NS5\x00\x00\x00\x00\x00\x00' ], (Ecu.engine, 0x7e0, None): [ b'\xf1\x82CNCWD0AMFCXCSFFA', b'\xf1\x82CNCWD0AMFCXCSFFB', b'\xf1\x82CNCVD0AMFCXCSFFB', + b'\xf1\x870\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf1\x82CNDWD0AMFCXCSG8A', ], }, CAR.ELANTRA_HEV_2021: { @@ -997,6 +1007,23 @@ FW_VERSIONS = { b'\xf1\x87391162J013', ], }, + CAR.KIA_SORENTO: { + (Ecu.fwdCamera, 0x7c4, None): [ + b'\xf1\x00UMP LKAS AT USA LHD 1.01 1.01 95740-C6550 d01' + ], + (Ecu.esp, 0x7d1, None): [ + b'\xf1\x00UM ESC \x0c 12 \x18\x05\x06 58910-C6330' + ], + (Ecu.fwdRadar, 0x7D0, None): [ + b'\xf1\x00UM__ SCC F-CUP 1.00 1.00 96400-C6500 ' + ], + (Ecu.transmission, 0x7e1, None): [ + b'\xf1\x87LDKUAA0348164HE3\x87www\x87www\x88\x88\xa8\x88w\x88\x97xw\x88\x97x\x86o\xf8\xff\x87f\x7f\xff\x15\xe0\xf1\x81U811\x00\x00\x00\x00\x00\x00\xf1\x00bcsh8p54 U811\x00\x00\x00\x00\x00\x00TUM4G33NL3V|DG' + ], + (Ecu.engine, 0x7e0, None): [ + b'\xf1\x81640F0051\x00\x00\x00\x00\x00\x00\x00\x00' + ], + }, } CHECKSUM = { diff --git a/selfdrive/car/interfaces.py b/selfdrive/car/interfaces.py index 8436a06611..b19eae8730 100644 --- a/selfdrive/car/interfaces.py +++ b/selfdrive/car/interfaces.py @@ -94,10 +94,12 @@ class CarInterfaceBase(ABC): ret.stoppingControl = True ret.longitudinalTuning.deadzoneBP = [0.] ret.longitudinalTuning.deadzoneV = [0.] + ret.longitudinalTuning.kf = 1. ret.longitudinalTuning.kpBP = [0.] ret.longitudinalTuning.kpV = [1.] ret.longitudinalTuning.kiBP = [0.] ret.longitudinalTuning.kiV = [1.] + # TODO estimate car specific lag, use .15s for now ret.longitudinalActuatorDelayLowerBound = 0.15 ret.longitudinalActuatorDelayUpperBound = 0.15 ret.steerLimitTimer = 1.0 diff --git a/selfdrive/car/subaru/values.py b/selfdrive/car/subaru/values.py index 4e17453663..9bd3d009f8 100644 --- a/selfdrive/car/subaru/values.py +++ b/selfdrive/car/subaru/values.py @@ -26,9 +26,6 @@ class CAR: OUTBACK_PREGLOBAL_2018 = "SUBARU OUTBACK 2018 - 2019" FINGERPRINTS = { - CAR.IMPREZA: [{ - 2: 8, 64: 8, 65: 8, 72: 8, 73: 8, 280: 8, 281: 8, 290: 8, 312: 8, 313: 8, 314: 8, 315: 8, 316: 8, 326: 8, 372: 8, 544: 8, 545: 8, 546: 8, 552: 8, 554: 8, 557: 8, 576: 8, 577: 8, 722: 8, 801: 8, 802: 8, 805: 8, 808: 8, 811: 8, 816: 8, 826: 8, 827: 8, 837: 8, 838: 8, 839: 8, 842: 8, 912: 8, 915: 8, 940: 8, 1614: 8, 1617: 8, 1632: 8, 1650: 8, 1657: 8, 1658: 8, 1677: 8, 1697: 8, 1722: 8, 1743: 8, 1759: 8, 1786: 5, 1787: 5, 1788: 8, 1809: 8, 1813: 8, 1817: 8, 1821: 8, 1840: 8, 1848: 8, 1924: 8, 1932: 8, 1952: 8, 1960: 8 - }], CAR.IMPREZA_2020: [{ 2: 8, 64: 8, 65: 8, 72: 8, 73: 8, 280: 8, 281: 8, 282: 8, 290: 8, 312: 8, 313: 8, 314: 8, 315: 8, 316: 8, 326: 8, 372: 8, 544: 8, 545: 8, 546: 8, 552: 8, 554: 8, 557: 8, 576: 8, 577: 8, 722: 8, 801: 8, 802: 8, 803: 8, 805: 8, 808: 8, 816: 8, 826: 8, 837: 8, 838: 8, 839: 8, 842: 8, 912: 8, 915: 8, 940: 8, 1617: 8, 1632: 8, 1650: 8, 1677: 8, 1697: 8, 1722: 8, 1743: 8, 1759: 8, 1786: 5, 1787: 5, 1788: 8, 1809: 8, 1813: 8, 1817: 8, 1821: 8, 1840: 8, 1848: 8, 1924: 8, 1932: 8, 1952: 8, 1960: 8, 1968: 8, 1976: 8, 2015: 8, 2016: 8, 2024: 8 }, @@ -56,6 +53,7 @@ FW_VERSIONS = { b'\x00\x00d\xb9\x1f@ \x10', b'\000\000e~\037@ \'', b'\x00\x00e@\x1f@ $', + b'\x00\x00d\xb9\x00\x00\x00\x00', ], (Ecu.engine, 0x7e0, None): [ b'\xbb,\xa0t\a', @@ -68,6 +66,7 @@ FW_VERSIONS = { b'\x00\xfe\xf7\x00\x00', b'\001\xfe\xf9\000\000', b'\x01\xfe\xf7\x00\x00', + b'\xf1\x00\xa4\x10@', ], }, CAR.IMPREZA: { @@ -135,6 +134,7 @@ FW_VERSIONS = { b'\xe4\xf5\002\000\000', b'\xe3\xd0\x081\x00', b'\xe3\xf5\x06\x00\x00', + b'\xf1\x00\xa4\x10@', ], }, CAR.IMPREZA_2020: { diff --git a/selfdrive/car/tesla/interface.py b/selfdrive/car/tesla/interface.py index 03012bc52e..746cd356ea 100755 --- a/selfdrive/car/tesla/interface.py +++ b/selfdrive/car/tesla/interface.py @@ -41,14 +41,14 @@ class CarInterface(CarInterfaceBase): ret.safetyConfigs = [get_safety_config(car.CarParams.SafetyModel.tesla, 0)] ret.steerLimitTimer = 1.0 - ret.steerActuatorDelay = 0.1 + ret.steerActuatorDelay = 0.25 ret.steerRateCost = 0.5 if candidate in (CAR.AP2_MODELS, CAR.AP1_MODELS): ret.mass = 2100. + STD_CARGO_KG ret.wheelbase = 2.959 ret.centerToFront = ret.wheelbase * 0.5 - ret.steerRatio = 13.5 + ret.steerRatio = 15.0 else: raise ValueError(f"Unsupported car: {candidate}") diff --git a/selfdrive/car/toyota/interface.py b/selfdrive/car/toyota/interface.py index 65c2faacab..be9d7fd587 100644 --- a/selfdrive/car/toyota/interface.py +++ b/selfdrive/car/toyota/interface.py @@ -2,7 +2,7 @@ from cereal import car from selfdrive.config import Conversions as CV from selfdrive.car.toyota.tunes import LatTunes, LongTunes, set_long_tune, set_lat_tune -from selfdrive.car.toyota.values import Ecu, CAR, ToyotaFlags, TSS2_CAR, NO_DSU_CAR, MIN_ACC_SPEED, EPS_SCALE, CarControllerParams +from selfdrive.car.toyota.values import Ecu, CAR, ToyotaFlags, TSS2_CAR, NO_DSU_CAR, MIN_ACC_SPEED, EPS_SCALE, EV_HYBRID_CAR, CarControllerParams from selfdrive.car import STD_CARGO_KG, scale_rot_inertia, scale_tire_stiffness, gen_empty_fingerprint, get_safety_config from selfdrive.car.interfaces import CarInterfaceBase @@ -217,7 +217,9 @@ class CarInterface(CarInterfaceBase): # if the smartDSU is detected, openpilot can send ACC_CMD (and the smartDSU will block it from the DSU) or not (the DSU is "connected") ret.openpilotLongitudinalControl = smartDsu or ret.enableDsu or candidate in TSS2_CAR - if 0x245 in fingerprint[0]: + # we can't use the fingerprint to detect this reliably, since + # the EV gas pedal signal can take a couple seconds to appear + if candidate in EV_HYBRID_CAR: ret.flags |= ToyotaFlags.HYBRID.value # min speed to enable ACC. if car can do stop and go, then set enabling speed diff --git a/selfdrive/car/toyota/toyotacan.py b/selfdrive/car/toyota/toyotacan.py index ca58126793..d57131dcb9 100644 --- a/selfdrive/car/toyota/toyotacan.py +++ b/selfdrive/car/toyota/toyotacan.py @@ -19,7 +19,7 @@ def create_lta_steer_command(packer, steer, steer_req, raw_cnt): "SETME_X3": 3, "PERCENTAGE": 100, "SETME_X64": 0x64, - "ANGLE": 0, # Rate limit? Lower values seeem to work better, but needs more testing + "ANGLE": 0, "STEER_ANGLE_CMD": steer, "STEER_REQUEST": steer_req, "STEER_REQUEST_2": steer_req, diff --git a/selfdrive/car/toyota/values.py b/selfdrive/car/toyota/values.py index 99937561a7..b8a6ae7e0d 100644 --- a/selfdrive/car/toyota/values.py +++ b/selfdrive/car/toyota/values.py @@ -398,6 +398,7 @@ FW_VERSIONS = { (Ecu.fwdCamera, 0x750, 109): [ b'\x028646F3305200\x00\x00\x00\x008646G5301200\x00\x00\x00\x00', b'\x028646F3305300\x00\x00\x00\x008646G5301200\x00\x00\x00\x00', + b'\x028646F3305300\x00\x00\x00\x008646G3304000\x00\x00\x00\x00', ], }, CAR.CHR: { @@ -1718,5 +1719,9 @@ TSS2_CAR = {CAR.RAV4_TSS2, CAR.COROLLA_TSS2, CAR.COROLLAH_TSS2, CAR.LEXUS_ES_TSS NO_DSU_CAR = TSS2_CAR | {CAR.CHR, CAR.CHRH, CAR.CAMRY, CAR.CAMRYH} +EV_HYBRID_CAR = {CAR.AVALONH_2019, CAR.CAMRYH, CAR.CAMRYH_TSS2, CAR.CHRH, CAR.COROLLAH_TSS2, CAR.HIGHLANDERH, CAR.HIGHLANDERH_TSS2, CAR.PRIUS, + CAR.PRIUS_V, CAR.RAV4H, CAR.RAV4H_TSS2, CAR.LEXUS_CTH, CAR.MIRAI, CAR.LEXUS_ESH, CAR.LEXUS_ESH_TSS2, CAR.LEXUS_NXH, CAR.LEXUS_RXH, + CAR.LEXUS_RXH_TSS2, CAR.PRIUS_TSS2} + # no resume button press required NO_STOP_TIMER_CAR = TSS2_CAR | {CAR.PRIUS_V, CAR.RAV4H, CAR.HIGHLANDERH, CAR.HIGHLANDER, CAR.SIENNA, CAR.LEXUS_ESH} diff --git a/selfdrive/common/modeldata.h b/selfdrive/common/modeldata.h index 8d91f7be10..9ebf7d5ff0 100644 --- a/selfdrive/common/modeldata.h +++ b/selfdrive/common/modeldata.h @@ -40,9 +40,10 @@ const mat3 fcam_intrinsic_matrix = 0.0, 2648.0, 1208.0 / 2, 0.0, 0.0, 1.0}}; -// without unwarp, focal length is for center portion only -const mat3 ecam_intrinsic_matrix = (mat3){{620.0, 0.0, 1928.0 / 2, - 0.0, 620.0, 1208.0 / 2, +// tici ecam focal probably wrong? magnification is not consistent across frame +// Need to retrain model before this can be changed +const mat3 ecam_intrinsic_matrix = (mat3){{567.0, 0.0, 1928.0 / 2, + 0.0, 567.0, 1208.0 / 2, 0.0, 0.0, 1.0}}; static inline mat3 get_model_yuv_transform(bool bayer = true) { diff --git a/selfdrive/common/params.cc b/selfdrive/common/params.cc index 25062b4975..675a45fd7f 100644 --- a/selfdrive/common/params.cc +++ b/selfdrive/common/params.cc @@ -91,6 +91,7 @@ std::unordered_map keys = { {"CarParams", CLEAR_ON_MANAGER_START | CLEAR_ON_IGNITION_ON}, {"CarParamsCache", CLEAR_ON_MANAGER_START}, {"CarVin", CLEAR_ON_MANAGER_START | CLEAR_ON_IGNITION_ON}, + {"CellularUnmetered", PERSISTENT}, {"CompletedTrainingVersion", PERSISTENT}, {"ControlsReady", CLEAR_ON_MANAGER_START | CLEAR_ON_IGNITION_ON}, {"CurrentRoute", CLEAR_ON_MANAGER_START | CLEAR_ON_IGNITION_ON}, diff --git a/selfdrive/common/version.h b/selfdrive/common/version.h index f1f7df9baf..1fcbdac271 100644 --- a/selfdrive/common/version.h +++ b/selfdrive/common/version.h @@ -1 +1 @@ -#define COMMA_VERSION "0.8.13" +#define COMMA_VERSION "0.8.14" diff --git a/selfdrive/controls/controlsd.py b/selfdrive/controls/controlsd.py index f9260762ed..1a34d31311 100755 --- a/selfdrive/controls/controlsd.py +++ b/selfdrive/controls/controlsd.py @@ -492,7 +492,8 @@ class Controls: if not self.joystick_mode: # accel PID loop pid_accel_limits = self.CI.get_pid_accel_limits(self.CP, CS.vEgo, self.v_cruise_kph * CV.KPH_TO_MS) - actuators.accel = self.LoC.update(self.active, CS, self.CP, long_plan, pid_accel_limits) + t_since_plan = (self.sm.frame - self.sm.rcv_frame['longitudinalPlan']) * DT_CTRL + actuators.accel = self.LoC.update(self.active, CS, self.CP, long_plan, pid_accel_limits, t_since_plan) # Steering PID loop and lateral MPC lat_active = self.active and not CS.steerWarning and not CS.steerError and CS.vEgo > self.CP.minSteerSpeed diff --git a/selfdrive/controls/lib/drive_helpers.py b/selfdrive/controls/lib/drive_helpers.py index 14be3d5ed8..01d7600345 100644 --- a/selfdrive/controls/lib/drive_helpers.py +++ b/selfdrive/controls/lib/drive_helpers.py @@ -37,13 +37,6 @@ class MPC_COST_LAT: STEER_RATE = 1.0 -class MPC_COST_LONG: - TTC = 5.0 - DISTANCE = 0.1 - ACCELERATION = 10.0 - JERK = 20.0 - - def rate_limit(new_value, last_value, dw_step, up_step): return clip(new_value, last_value + dw_step, last_value + up_step) diff --git a/selfdrive/controls/lib/events.py b/selfdrive/controls/lib/events.py index fbce9460a8..931932cc0d 100644 --- a/selfdrive/controls/lib/events.py +++ b/selfdrive/controls/lib/events.py @@ -1,3 +1,4 @@ +import os from enum import IntEnum from typing import Dict, Union, Callable, List, Optional @@ -6,6 +7,7 @@ import cereal.messaging as messaging from common.realtime import DT_CTRL from selfdrive.config import Conversions as CV from selfdrive.locationd.calibrationd import MIN_SPEED_FILTER +from selfdrive.version import get_short_branch AlertSize = log.ControlsState.AlertSize AlertStatus = log.ControlsState.AlertStatus @@ -206,7 +208,6 @@ def soft_disable_alert(alert_text_2: str) -> AlertCallbackType: return SoftDisableAlert(alert_text_2) return func - def user_soft_disable_alert(alert_text_2: str) -> AlertCallbackType: def func(CP: car.CarParams, sm: messaging.SubMaster, metric: bool, soft_disable_time: int) -> Alert: if soft_disable_time < int(0.5 / DT_CTRL): @@ -214,6 +215,12 @@ def user_soft_disable_alert(alert_text_2: str) -> AlertCallbackType: return UserSoftDisableAlert(alert_text_2) return func +def startup_master_alert(CP: car.CarParams, sm: messaging.SubMaster, metric: bool, soft_disable_time: int) -> Alert: + branch = get_short_branch("") + if "REPLAY" in os.environ: + branch = "replay" + + return StartupAlert("WARNING: This branch is not tested", branch, alert_status=AlertStatus.userPrompt) def below_engage_speed_alert(CP: car.CarParams, sm: messaging.SubMaster, metric: bool, soft_disable_time: int) -> Alert: return NoEntryAlert(f"Speed Below {get_display_speed(CP.minEnableSpeed, metric)}") @@ -280,8 +287,7 @@ EVENTS: Dict[int, Dict[str, Union[Alert, AlertCallbackType]]] = { }, EventName.startupMaster: { - ET.PERMANENT: StartupAlert("WARNING: This branch is not tested", - alert_status=AlertStatus.userPrompt), + ET.PERMANENT: startup_master_alert, }, # Car is recognized, but marked as dashcam only diff --git a/selfdrive/controls/lib/lane_planner.py b/selfdrive/controls/lib/lane_planner.py index 160808cff0..0230255ef9 100644 --- a/selfdrive/controls/lib/lane_planner.py +++ b/selfdrive/controls/lib/lane_planner.py @@ -9,17 +9,16 @@ from selfdrive.swaglog import cloudlog TRAJECTORY_SIZE = 33 # camera offset is meters from center car to camera -# model path is in the frame of EON's camera. TICI is 0.1 m away, -# however the average measured path difference is 0.04 m +# model path is in the frame of the camera. Empirically +# the model knows the difference between TICI and EON +# so a path offset is not needed +PATH_OFFSET = 0.00 if EON: CAMERA_OFFSET = -0.06 - PATH_OFFSET = 0.0 elif TICI: CAMERA_OFFSET = 0.04 - PATH_OFFSET = 0.04 else: CAMERA_OFFSET = 0.0 - PATH_OFFSET = 0.0 class LanePlanner: diff --git a/selfdrive/controls/lib/lateral_mpc_lib/lat_mpc.py b/selfdrive/controls/lib/lateral_mpc_lib/lat_mpc.py index eeda25627a..e4a73bf97d 100755 --- a/selfdrive/controls/lib/lateral_mpc_lib/lat_mpc.py +++ b/selfdrive/controls/lib/lateral_mpc_lib/lat_mpc.py @@ -6,7 +6,7 @@ from casadi import SX, vertcat, sin, cos from common.realtime import sec_since_boot from selfdrive.controls.lib.drive_helpers import LAT_MPC_N as N -from selfdrive.controls.lib.drive_helpers import T_IDXS +from selfdrive.modeld.constants import T_IDXS if __name__ == '__main__': # generating code from pyextra.acados_template import AcadosModel, AcadosOcp, AcadosOcpSolver diff --git a/selfdrive/controls/lib/longcontrol.py b/selfdrive/controls/lib/longcontrol.py index 21c34aa2d6..3ba50fd0cf 100644 --- a/selfdrive/controls/lib/longcontrol.py +++ b/selfdrive/controls/lib/longcontrol.py @@ -44,7 +44,7 @@ class LongControl(): self.long_control_state = LongCtrlState.off # initialized to off self.pid = PIController((CP.longitudinalTuning.kpBP, CP.longitudinalTuning.kpV), (CP.longitudinalTuning.kiBP, CP.longitudinalTuning.kiV), - rate=1 / DT_CTRL) + k_f = CP.longitudinalTuning.kf, rate=1 / DT_CTRL) self.v_pid = 0.0 self.last_output_accel = 0.0 @@ -53,20 +53,21 @@ class LongControl(): self.pid.reset() self.v_pid = v_pid - def update(self, active, CS, CP, long_plan, accel_limits): + def update(self, active, CS, CP, long_plan, accel_limits, t_since_plan): """Update longitudinal control. This updates the state machine and runs a PID loop""" # Interp control trajectory - # TODO estimate car specific lag, use .15s for now speeds = long_plan.speeds if len(speeds) == CONTROL_N: - v_target_lower = interp(CP.longitudinalActuatorDelayLowerBound, T_IDXS[:CONTROL_N], speeds) - a_target_lower = 2 * (v_target_lower - speeds[0])/CP.longitudinalActuatorDelayLowerBound - long_plan.accels[0] + v_target = interp(t_since_plan, T_IDXS[:CONTROL_N], speeds) + a_target = interp(t_since_plan, T_IDXS[:CONTROL_N], long_plan.accels) - v_target_upper = interp(CP.longitudinalActuatorDelayUpperBound, T_IDXS[:CONTROL_N], speeds) - a_target_upper = 2 * (v_target_upper - speeds[0])/CP.longitudinalActuatorDelayUpperBound - long_plan.accels[0] + v_target_lower = interp(CP.longitudinalActuatorDelayLowerBound + t_since_plan, T_IDXS[:CONTROL_N], speeds) + a_target_lower = 2 * (v_target_lower - v_target) / CP.longitudinalActuatorDelayLowerBound - a_target + + v_target_upper = interp(CP.longitudinalActuatorDelayUpperBound + t_since_plan, T_IDXS[:CONTROL_N], speeds) + a_target_upper = 2 * (v_target_upper - v_target) / CP.longitudinalActuatorDelayUpperBound - a_target a_target = min(a_target_lower, a_target_upper) - v_target = speeds[0] v_target_future = speeds[-1] else: v_target = 0.0 diff --git a/selfdrive/controls/lib/longitudinal_mpc_lib/long_mpc.py b/selfdrive/controls/lib/longitudinal_mpc_lib/long_mpc.py index 8625df745a..f9f9c31bb4 100644 --- a/selfdrive/controls/lib/longitudinal_mpc_lib/long_mpc.py +++ b/selfdrive/controls/lib/longitudinal_mpc_lib/long_mpc.py @@ -219,19 +219,20 @@ class LongitudinalMpc: self.x0 = np.zeros(X_DIM) self.set_weights() - def set_weights(self): + def set_weights(self, prev_accel_constraint=True): if self.e2e: self.set_weights_for_xva_policy() self.params[:,0] = -10. self.params[:,1] = 10. self.params[:,2] = 1e5 else: - self.set_weights_for_lead_policy() + self.set_weights_for_lead_policy(prev_accel_constraint) - def set_weights_for_lead_policy(self): - W = np.asfortranarray(np.diag([X_EGO_OBSTACLE_COST, X_EGO_COST, V_EGO_COST, A_EGO_COST, A_CHANGE_COST, J_EGO_COST])) + def set_weights_for_lead_policy(self, prev_accel_constraint=True): + a_change_cost = A_CHANGE_COST if prev_accel_constraint else 0 + W = np.asfortranarray(np.diag([X_EGO_OBSTACLE_COST, X_EGO_COST, V_EGO_COST, A_EGO_COST, a_change_cost, J_EGO_COST])) for i in range(N): - W[4,4] = A_CHANGE_COST * np.interp(T_IDXS[i], [0.0, 1.0, 2.0], [1.0, 1.0, 0.0]) + W[4,4] = a_change_cost * np.interp(T_IDXS[i], [0.0, 1.0, 2.0], [1.0, 1.0, 0.0]) self.solver.cost_set(i, 'W', W) # Setting the slice without the copy make the array not contiguous, # causing issues with the C interface. @@ -300,9 +301,8 @@ class LongitudinalMpc: self.cruise_min_a = min_a self.cruise_max_a = max_a - def update(self, carstate, radarstate, v_cruise, prev_accel_constraint=False): + def update(self, carstate, radarstate, v_cruise): v_ego = self.x0[1] - a_ego = self.x0[2] self.status = radarstate.leadOne.status or radarstate.leadTwo.status lead_xv_0 = self.process_lead(radarstate.leadOne) @@ -330,10 +330,7 @@ class LongitudinalMpc: x_obstacles = np.column_stack([lead_0_obstacle, lead_1_obstacle, cruise_obstacle]) self.source = SOURCES[np.argmin(x_obstacles[0])] self.params[:,2] = np.min(x_obstacles, axis=1) - if prev_accel_constraint: - self.params[:,3] = np.copy(self.prev_a) - else: - self.params[:,3] = a_ego + self.params[:,3] = np.copy(self.prev_a) self.run() if (np.any(lead_xv_0[:,0] - self.x_sol[:,0] < CRASH_DISTANCE) and diff --git a/selfdrive/controls/lib/longitudinal_planner.py b/selfdrive/controls/lib/longitudinal_planner.py index 8c79404058..865442f735 100755 --- a/selfdrive/controls/lib/longitudinal_planner.py +++ b/selfdrive/controls/lib/longitudinal_planner.py @@ -58,7 +58,6 @@ class Planner: def update(self, sm): v_ego = sm['carState'].vEgo - a_ego = sm['carState'].aEgo v_cruise_kph = sm['controlsState'].vCruise v_cruise_kph = min(v_cruise_kph, V_CRUISE_MAX) @@ -67,12 +66,16 @@ class Planner: long_control_state = sm['controlsState'].longControlState force_slow_decel = sm['controlsState'].forceDecel - prev_accel_constraint = True - if long_control_state == LongCtrlState.off or sm['carState'].gasPressed: + # Reset current state when not engaged, or user is controlling the speed + reset_state = long_control_state == LongCtrlState.off + reset_state = reset_state or sm['carState'].gasPressed + + # No change cost when user is controlling the speed, or when standstill + prev_accel_constraint = not (reset_state or sm['carState'].standstill) + + if reset_state: self.v_desired_filter.x = v_ego - self.a_desired = a_ego - # Smoothly changing between accel trajectory is only relevant when OP is driving - prev_accel_constraint = False + self.a_desired = 0.0 # Prevent divergence, smooth in current v_ego self.v_desired_filter.x = max(0.0, self.v_desired_filter.update(v_ego)) @@ -86,9 +89,12 @@ class Planner: # clip limits, cannot init MPC outside of bounds accel_limits_turns[0] = min(accel_limits_turns[0], self.a_desired + 0.05) accel_limits_turns[1] = max(accel_limits_turns[1], self.a_desired - 0.05) + + self.mpc.set_weights(prev_accel_constraint) self.mpc.set_accel_limits(accel_limits_turns[0], accel_limits_turns[1]) self.mpc.set_cur_state(self.v_desired_filter.x, self.a_desired) - self.mpc.update(sm['carState'], sm['radarState'], v_cruise, prev_accel_constraint=prev_accel_constraint) + self.mpc.update(sm['carState'], sm['radarState'], v_cruise) + self.v_desired_trajectory = np.interp(T_IDXS[:CONTROL_N], T_IDXS_MPC, self.mpc.v_solution) self.a_desired_trajectory = np.interp(T_IDXS[:CONTROL_N], T_IDXS_MPC, self.mpc.a_solution) self.j_desired_trajectory = np.interp(T_IDXS[:CONTROL_N], T_IDXS_MPC[:-1], self.mpc.j_solution) diff --git a/selfdrive/controls/lib/pid.py b/selfdrive/controls/lib/pid.py index 28c6438191..c91eb692cb 100644 --- a/selfdrive/controls/lib/pid.py +++ b/selfdrive/controls/lib/pid.py @@ -13,7 +13,7 @@ def apply_deadzone(error, deadzone): return error class PIController(): - def __init__(self, k_p, k_i, k_f=1., pos_limit=None, neg_limit=None, rate=100): + def __init__(self, k_p, k_i, k_f=0., pos_limit=None, neg_limit=None, rate=100): self._k_p = k_p # proportional gain self._k_i = k_i # integral gain self.k_f = k_f # feedforward gain diff --git a/selfdrive/debug/can_table.py b/selfdrive/debug/can_table.py new file mode 100755 index 0000000000..1569849053 --- /dev/null +++ b/selfdrive/debug/can_table.py @@ -0,0 +1,39 @@ +#!/usr/bin/env python3 +import argparse +import pandas as pd # pylint: disable=import-error + +import cereal.messaging as messaging + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Cabana-like table of bits for your terminal", + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument("addr", type=str, nargs=1) + parser.add_argument("bus", type=int, default=0, nargs='?') + + args = parser.parse_args() + + addr = int(args.addr[0], 0) + can = messaging.sub_sock('can', conflate=False, timeout=None) + + print(f"waiting for {hex(addr)} ({addr}) on bus {args.bus}...") + + latest = None + while True: + for msg in messaging.drain_sock(can, wait_for_one=True): + for m in msg.can: + if m.address == addr and m.src == args.bus: + latest = m + + if latest is None: + continue + + rows = [] + for b in latest.dat: + r = list(bin(b).lstrip('0b').zfill(8)) + r += [hex(b)] + rows.append(r) + + df = pd.DataFrame(data=rows) + table = df.to_markdown(tablefmt='grid') + print(f"\n\n{hex(addr)} ({addr}) on bus {args.bus}\n{table}") diff --git a/selfdrive/debug/clear_dtc.py b/selfdrive/debug/clear_dtc.py new file mode 100755 index 0000000000..f4d38367b1 --- /dev/null +++ b/selfdrive/debug/clear_dtc.py @@ -0,0 +1,30 @@ +#!/usr/bin/env python3 +import sys +from subprocess import check_output, CalledProcessError +from panda import Panda +from panda.python.uds import UdsClient, MessageTimeoutError, SESSION_TYPE, DTC_GROUP_TYPE + +try: + check_output(["pidof", "boardd"]) + print("boardd is running, please kill openpilot before running this script! (aborted)") + sys.exit(1) +except CalledProcessError as e: + if e.returncode != 1: # 1 == no process found (boardd not running) + raise e + +panda = Panda() +panda.set_safety_mode(Panda.SAFETY_ELM327) +address = 0x7DF # functional (broadcast) address +uds_client = UdsClient(panda, address, bus=0, debug=False) +print("extended diagnostic session ...") +try: + uds_client.diagnostic_session_control(SESSION_TYPE.EXTENDED_DIAGNOSTIC) +except MessageTimeoutError: + pass # functional address isn't properly handled so a timeout occurs +print("clear diagnostic info ...") +try: + uds_client.clear_diagnostic_information(DTC_GROUP_TYPE.ALL) +except MessageTimeoutError: + pass # functional address isn't properly handled so a timeout occurs +print("") +print("you may need to power cycle your vehicle now") diff --git a/selfdrive/debug/test_fw_query_on_routes.py b/selfdrive/debug/test_fw_query_on_routes.py index 44b3ba16af..6b29ae820d 100755 --- a/selfdrive/debug/test_fw_query_on_routes.py +++ b/selfdrive/debug/test_fw_query_on_routes.py @@ -15,6 +15,7 @@ from selfdrive.car.honda.values import FW_VERSIONS as HONDA_FW_VERSIONS from selfdrive.car.hyundai.values import FW_VERSIONS as HYUNDAI_FW_VERSIONS from selfdrive.car.volkswagen.values import FW_VERSIONS as VW_FW_VERSIONS from selfdrive.car.mazda.values import FW_VERSIONS as MAZDA_FW_VERSIONS +from selfdrive.car.subaru.values import FW_VERSIONS as SUBARU_FW_VERSIONS NO_API = "NO_API" in os.environ @@ -23,6 +24,7 @@ SUPPORTED_CARS |= set(interface_names['honda']) SUPPORTED_CARS |= set(interface_names['hyundai']) SUPPORTED_CARS |= set(interface_names['volkswagen']) SUPPORTED_CARS |= set(interface_names['mazda']) +SUPPORTED_CARS |= set(interface_names['subaru']) try: from xx.pipeline.c.CarState import migration @@ -120,7 +122,7 @@ if __name__ == "__main__": print("Mismatches") found = False - for car_fws in [TOYOTA_FW_VERSIONS, HONDA_FW_VERSIONS, HYUNDAI_FW_VERSIONS, VW_FW_VERSIONS, MAZDA_FW_VERSIONS]: + for car_fws in [TOYOTA_FW_VERSIONS, HONDA_FW_VERSIONS, HYUNDAI_FW_VERSIONS, VW_FW_VERSIONS, MAZDA_FW_VERSIONS, SUBARU_FW_VERSIONS]: if live_fingerprint in car_fws: found = True expected = car_fws[live_fingerprint] diff --git a/selfdrive/loggerd/bootlog.cc b/selfdrive/loggerd/bootlog.cc index c5897091dc..a723124847 100644 --- a/selfdrive/loggerd/bootlog.cc +++ b/selfdrive/loggerd/bootlog.cc @@ -51,8 +51,6 @@ static kj::Array build_boot_log() { } int main(int argc, char** argv) { - clear_locks(LOG_ROOT); - const std::string path = LOG_ROOT + "/boot/" + logger_get_route_name() + ".bz2"; LOGW("bootlog to %s", path.c_str()); diff --git a/selfdrive/loggerd/logger.cc b/selfdrive/loggerd/logger.cc index 81cfd131f6..6ba520d162 100644 --- a/selfdrive/loggerd/logger.cc +++ b/selfdrive/loggerd/logger.cc @@ -267,15 +267,3 @@ void lh_close(LoggerHandle* h) { } pthread_mutex_unlock(&h->lock); } - -int clear_locks_fn(const char* fpath, const struct stat *sb, int tyupeflag) { - const char* dot = strrchr(fpath, '.'); - if (dot && strcmp(dot, ".lock") == 0) { - unlink(fpath); - } - return 0; -} - -void clear_locks(const std::string log_root) { - ftw(log_root.c_str(), clear_locks_fn, 16); -} diff --git a/selfdrive/loggerd/logger.h b/selfdrive/loggerd/logger.h index e85d7810e0..bdda9d6917 100644 --- a/selfdrive/loggerd/logger.h +++ b/selfdrive/loggerd/logger.h @@ -96,4 +96,3 @@ void logger_log(LoggerState *s, uint8_t* data, size_t data_size, bool in_qlog); void lh_log(LoggerHandle* h, uint8_t* data, size_t data_size, bool in_qlog); void lh_close(LoggerHandle* h); -void clear_locks(const std::string log_root); diff --git a/selfdrive/loggerd/omx_encoder.cc b/selfdrive/loggerd/omx_encoder.cc index 672238eaeb..f37c900150 100644 --- a/selfdrive/loggerd/omx_encoder.cc +++ b/selfdrive/loggerd/omx_encoder.cc @@ -66,7 +66,6 @@ OMX_ERRORTYPE OmxEncoder::event_handler(OMX_HANDLETYPE component, OMX_PTR app_da OMX_ERRORTYPE OmxEncoder::empty_buffer_done(OMX_HANDLETYPE component, OMX_PTR app_data, OMX_BUFFERHEADERTYPE *buffer) { - // printf("empty_buffer_done\n"); OmxEncoder *e = (OmxEncoder*)app_data; e->free_in.push(buffer); return OMX_ErrorNone; @@ -74,7 +73,6 @@ OMX_ERRORTYPE OmxEncoder::empty_buffer_done(OMX_HANDLETYPE component, OMX_PTR ap OMX_ERRORTYPE OmxEncoder::fill_buffer_done(OMX_HANDLETYPE component, OMX_PTR app_data, OMX_BUFFERHEADERTYPE *buffer) { - // printf("fill_buffer_done\n"); OmxEncoder *e = (OmxEncoder*)app_data; e->done_out.push(buffer); return OMX_ErrorNone; @@ -155,7 +153,6 @@ static const char* omx_color_fomat_name(uint32_t format) { // ***** encoder functions ***** - OmxEncoder::OmxEncoder(const char* filename, int width, int height, int fps, int bitrate, bool h265, bool downscale, bool write) { this->filename = filename; this->write = write; @@ -325,27 +322,82 @@ OmxEncoder::OmxEncoder(const char* filename, int width, int height, int fps, int for (auto &buf : this->in_buf_headers) { this->free_in.push(buf); } + + LOGE("omx initialized - in: %d - out %d", this->in_buf_headers.size(), this->out_buf_headers.size()); +} + +void OmxEncoder::callback_handler(OmxEncoder *e) { + // OMX documentation specifies to not empty the buffer from the callback function + // so we use this intermediate handler to copy the buffer for further writing + // and give it back to OMX. We could also send the data over msgq from here. + bool exit = false; + + while (!exit) { + OMX_BUFFERHEADERTYPE *buffer = e->done_out.pop(); + OmxBuffer *new_buffer = (OmxBuffer*)malloc(sizeof(OmxBuffer) + buffer->nFilledLen); + assert(new_buffer); + + new_buffer->header = *buffer; + memcpy(new_buffer->data, buffer->pBuffer + buffer->nOffset, buffer->nFilledLen); + + e->to_write.push(new_buffer); + +#ifdef QCOM2 + if (buffer->nFlags & OMX_BUFFERFLAG_CODECCONFIG) { + buffer->nTimeStamp = 0; + } + + if (buffer->nFlags & OMX_BUFFERFLAG_EOS) { + buffer->nTimeStamp = 0; + } +#endif + + if (buffer->nFlags & OMX_BUFFERFLAG_EOS) { + exit = true; + } + + // give omx back the buffer + // TOOD: fails when shutting down + OMX_CHECK(OMX_FillThisBuffer(e->handle, buffer)); + } } -void OmxEncoder::handle_out_buf(OmxEncoder *e, OMX_BUFFERHEADERTYPE *out_buf) { + +void OmxEncoder::write_handler(OmxEncoder *e){ + bool exit = false; + while (!exit) { + OmxBuffer *out_buf = e->to_write.pop(); + OmxEncoder::handle_out_buf(e, out_buf); + + if (out_buf->header.nFlags & OMX_BUFFERFLAG_EOS) { + exit = true; + } + + free(out_buf); + } +} + + +void OmxEncoder::handle_out_buf(OmxEncoder *e, OmxBuffer *out_buf) { int err; - uint8_t *buf_data = out_buf->pBuffer + out_buf->nOffset; - if (out_buf->nFlags & OMX_BUFFERFLAG_CODECCONFIG) { - if (e->codec_config_len < out_buf->nFilledLen) { - e->codec_config = (uint8_t *)realloc(e->codec_config, out_buf->nFilledLen); + if (out_buf->header.nFlags & OMX_BUFFERFLAG_CODECCONFIG) { + if (e->codec_config_len < out_buf->header.nFilledLen) { + e->codec_config = (uint8_t *)realloc(e->codec_config, out_buf->header.nFilledLen); } - e->codec_config_len = out_buf->nFilledLen; - memcpy(e->codec_config, buf_data, out_buf->nFilledLen); + e->codec_config_len = out_buf->header.nFilledLen; + memcpy(e->codec_config, out_buf->data, out_buf->header.nFilledLen); + + // TODO: is still needed? #ifdef QCOM2 - out_buf->nTimeStamp = 0; + out_buf->header.nTimeStamp = 0; #endif } if (e->of) { //printf("write %d flags 0x%x\n", out_buf->nFilledLen, out_buf->nFlags); - size_t written = util::safe_fwrite(buf_data, 1, out_buf->nFilledLen, e->of); - if (written != out_buf->nFilledLen) { + size_t written = util::safe_fwrite(out_buf->data, 1, out_buf->header.nFilledLen, e->of); + if (written != out_buf->header.nFilledLen) { LOGE("failed to write file.errno=%d", errno); } } @@ -365,20 +417,20 @@ void OmxEncoder::handle_out_buf(OmxEncoder *e, OMX_BUFFERHEADERTYPE *out_buf) { e->wrote_codec_config = true; } - if (out_buf->nTimeStamp > 0) { + if (out_buf->header.nTimeStamp > 0) { // input timestamps are in microseconds AVRational in_timebase = {1, 1000000}; AVPacket pkt; av_init_packet(&pkt); - pkt.data = buf_data; - pkt.size = out_buf->nFilledLen; + pkt.data = out_buf->data; + pkt.size = out_buf->header.nFilledLen; enum AVRounding rnd = static_cast(AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX); - pkt.pts = pkt.dts = av_rescale_q_rnd(out_buf->nTimeStamp, in_timebase, e->ofmt_ctx->streams[0]->time_base, rnd); + pkt.pts = pkt.dts = av_rescale_q_rnd(out_buf->header.nTimeStamp, in_timebase, e->ofmt_ctx->streams[0]->time_base, rnd); pkt.duration = av_rescale_q(50*1000, in_timebase, e->ofmt_ctx->streams[0]->time_base); - if (out_buf->nFlags & OMX_BUFFERFLAG_SYNCFRAME) { + if (out_buf->header.nFlags & OMX_BUFFERFLAG_SYNCFRAME) { pkt.flags |= AV_PKT_FLAG_KEY; } @@ -388,14 +440,6 @@ void OmxEncoder::handle_out_buf(OmxEncoder *e, OMX_BUFFERHEADERTYPE *out_buf) { av_free_packet(&pkt); } } - - // give omx back the buffer -#ifdef QCOM2 - if (out_buf->nFlags & OMX_BUFFERFLAG_EOS) { - out_buf->nTimeStamp = 0; - } -#endif - OMX_CHECK(OMX_FillThisBuffer(e->handle, out_buf)); } int OmxEncoder::encode_frame(const uint8_t *y_ptr, const uint8_t *u_ptr, const uint8_t *v_ptr, @@ -459,15 +503,6 @@ int OmxEncoder::encode_frame(const uint8_t *y_ptr, const uint8_t *u_ptr, const u OMX_CHECK(OMX_EmptyThisBuffer(this->handle, in_buf)); - // pump output - while (true) { - OMX_BUFFERHEADERTYPE *out_buf; - if (!this->done_out.try_pop(out_buf)) { - break; - } - handle_out_buf(this, out_buf); - } - this->dirty = true; this->counter++; @@ -524,6 +559,10 @@ void OmxEncoder::encoder_open(const char* path) { assert(lock_fd >= 0); close(lock_fd); + // start writer threads + callback_handler_thread = std::thread(OmxEncoder::callback_handler, this); + write_handler_thread = std::thread(OmxEncoder::write_handler, this); + this->is_open = true; this->counter = 0; } @@ -541,18 +580,12 @@ void OmxEncoder::encoder_close() { OMX_CHECK(OMX_EmptyThisBuffer(this->handle, in_buf)); - while (true) { - OMX_BUFFERHEADERTYPE *out_buf = this->done_out.pop(); - - handle_out_buf(this, out_buf); - - if (out_buf->nFlags & OMX_BUFFERFLAG_EOS) { - break; - } - } this->dirty = false; } + callback_handler_thread.join(); + write_handler_thread.join(); + if (this->remuxing) { av_write_trailer(this->ofmt_ctx); avcodec_free_context(&this->codec_ctx); @@ -591,9 +624,14 @@ OmxEncoder::~OmxEncoder() { OMX_CHECK(OMX_FreeHandle(this->handle)); - OMX_BUFFERHEADERTYPE *out_buf; - while (this->free_in.try_pop(out_buf)); - while (this->done_out.try_pop(out_buf)); + OMX_BUFFERHEADERTYPE *buf; + while (this->free_in.try_pop(buf)); + while (this->done_out.try_pop(buf)); + + OmxBuffer *write_buf; + while (this->to_write.try_pop(write_buf)) { + free(write_buf); + }; if (this->codec_config) { free(this->codec_config); diff --git a/selfdrive/loggerd/omx_encoder.h b/selfdrive/loggerd/omx_encoder.h index 74ea17f640..e3087e31bf 100644 --- a/selfdrive/loggerd/omx_encoder.h +++ b/selfdrive/loggerd/omx_encoder.h @@ -3,6 +3,7 @@ #include #include #include +#include #include extern "C" { @@ -12,6 +13,12 @@ extern "C" { #include "selfdrive/common/queue.h" #include "selfdrive/loggerd/encoder.h" +struct OmxBuffer { + OMX_BUFFERHEADERTYPE header; + OMX_U8 data[]; +}; + + // OmxEncoder, lossey codec using hardware hevc class OmxEncoder : public VideoEncoder { public: @@ -32,7 +39,9 @@ public: private: void wait_for_state(OMX_STATETYPE state); - static void handle_out_buf(OmxEncoder *e, OMX_BUFFERHEADERTYPE *out_buf); + static void callback_handler(OmxEncoder *e); + static void write_handler(OmxEncoder *e); + static void handle_out_buf(OmxEncoder *e, OmxBuffer *out_buf); int width, height, fps; char vid_path[1024]; @@ -41,6 +50,8 @@ private: bool dirty = false; bool write = false; int counter = 0; + std::thread callback_handler_thread; + std::thread write_handler_thread; const char* filename; FILE *of = nullptr; @@ -62,6 +73,7 @@ private: SafeQueue free_in; SafeQueue done_out; + SafeQueue to_write; AVFormatContext *ofmt_ctx; AVCodecContext *codec_ctx; diff --git a/selfdrive/loggerd/raw_logger.cc b/selfdrive/loggerd/raw_logger.cc index f103822a13..8d27e4a7a1 100644 --- a/selfdrive/loggerd/raw_logger.cc +++ b/selfdrive/loggerd/raw_logger.cc @@ -28,7 +28,6 @@ RawLogger::RawLogger(const char* filename, int width, int height, int fps, // TODO: respect write arg - av_register_all(); codec = avcodec_find_encoder(AV_CODEC_ID_FFVHUFF); // codec = avcodec_find_encoder(AV_CODEC_ID_FFV1); assert(codec); @@ -111,8 +110,6 @@ void RawLogger::encoder_close() { int err = av_write_trailer(format_ctx); assert(err == 0); - avcodec_close(stream->codec); - err = avio_closep(&format_ctx->pb); assert(err == 0); @@ -155,18 +152,32 @@ int RawLogger::encode_frame(const uint8_t *y_ptr, const uint8_t *u_ptr, const ui int ret = counter; - int got_output = 0; - int err = avcodec_encode_video2(codec_ctx, &pkt, frame, &got_output); - if (err) { - LOGE("encoding error\n"); + int err = avcodec_send_frame(codec_ctx, frame); + if (ret < 0) { + LOGE("avcode_send_frame error %d", err); ret = -1; - } else if (got_output) { + } + + while (ret >= 0){ + err = avcodec_receive_packet(codec_ctx, &pkt); + if (err == AVERROR_EOF) { + break; + } else if (err == AVERROR(EAGAIN)) { + // Encoder might need a few frames on startup to get started. Keep going + ret = 0; + break; + } else if (err < 0) { + LOGE("avcodec_receive_packet error %d", err); + ret = -1; + break; + } + av_packet_rescale_ts(&pkt, codec_ctx->time_base, stream->time_base); pkt.stream_index = 0; err = av_interleaved_write_frame(format_ctx, &pkt); if (err < 0) { - LOGE("encoder writer error\n"); + LOGE("av_interleaved_write_frame %d", err); ret = -1; } else { counter++; diff --git a/selfdrive/loggerd/raw_logger.h b/selfdrive/loggerd/raw_logger.h index 75d906784d..3c7fed38cc 100644 --- a/selfdrive/loggerd/raw_logger.h +++ b/selfdrive/loggerd/raw_logger.h @@ -32,7 +32,7 @@ private: std::string vid_path, lock_path; - AVCodec *codec = NULL; + const AVCodec *codec = NULL; AVCodecContext *codec_ctx = NULL; AVStream *stream = NULL; diff --git a/selfdrive/loggerd/tests/loggerd_tests_common.py b/selfdrive/loggerd/tests/loggerd_tests_common.py index 1a16e343e6..59952fd2c3 100644 --- a/selfdrive/loggerd/tests/loggerd_tests_common.py +++ b/selfdrive/loggerd/tests/loggerd_tests_common.py @@ -14,7 +14,8 @@ def create_random_file(file_path, size_mb, lock=False): pass lock_path = file_path + ".lock" - os.close(os.open(lock_path, os.O_CREAT | os.O_EXCL)) + if lock: + os.close(os.open(lock_path, os.O_CREAT | os.O_EXCL)) chunks = 128 chunk_bytes = int(size_mb * 1024 * 1024 / chunks) @@ -24,9 +25,6 @@ def create_random_file(file_path, size_mb, lock=False): for _ in range(chunks): f.write(data) - if not lock: - os.remove(lock_path) - class MockResponse(): def __init__(self, text, status_code): self.text = text diff --git a/selfdrive/loggerd/tests/test_loggerd.cc b/selfdrive/loggerd/tests/test_loggerd.cc index 849b350ee4..d84185cbba 100644 --- a/selfdrive/loggerd/tests/test_loggerd.cc +++ b/selfdrive/loggerd/tests/test_loggerd.cc @@ -91,21 +91,3 @@ TEST_CASE("trigger_rotate") { REQUIRE(frame_id == start_frame_id + encoder_seg * (SEGMENT_LENGTH * MAIN_FPS)); } } - -TEST_CASE("clear_locks") { - std::vector dirs; - for (int i = 0; i < 10; ++i) { - std::string &path = dirs.emplace_back(LOG_ROOT + "/" + std::to_string(i)); - REQUIRE(util::create_directories(path, 0775)); - std::ofstream{path + "/.lock"}; - REQUIRE(util::file_exists(path + "/.lock")); - } - - clear_locks(LOG_ROOT); - - for (const auto &dir : dirs) { - std::string lock_file = dir + "/.lock"; - REQUIRE(util::file_exists(lock_file) == false); - rmdir(dir.c_str()); - } -} diff --git a/selfdrive/loggerd/tests/test_uploader.py b/selfdrive/loggerd/tests/test_uploader.py index 54c65db73d..01b529468b 100755 --- a/selfdrive/loggerd/tests/test_uploader.py +++ b/selfdrive/loggerd/tests/test_uploader.py @@ -133,9 +133,11 @@ class TestUploader(UploaderTestCase): self.assertTrue(log_handler.upload_order == exp_order, "Files uploaded in wrong order") def test_no_upload_with_lock_file(self): + self.start_thread() + + time.sleep(0.25) f_paths = self.gen_files(lock=True, boot=False) - self.start_thread() # allow enough time that files should have been uploaded if they would be uploaded time.sleep(5) self.join_thread() @@ -144,5 +146,15 @@ class TestUploader(UploaderTestCase): self.assertFalse(getxattr(f_path, uploader.UPLOAD_ATTR_NAME), "File upload when locked") + def test_clear_locks_on_startup(self): + f_paths = self.gen_files(lock=True, boot=False) + self.start_thread() + time.sleep(1) + self.join_thread() + + for f_path in f_paths: + self.assertFalse(os.path.isfile(f_path + ".lock"), "File lock not cleared on startup") + + if __name__ == "__main__": unittest.main(failfast=True) diff --git a/selfdrive/loggerd/uploader.py b/selfdrive/loggerd/uploader.py index 2013a522bc..afdc5a6ed1 100644 --- a/selfdrive/loggerd/uploader.py +++ b/selfdrive/loggerd/uploader.py @@ -213,6 +213,8 @@ class Uploader(): return msg def uploader_fn(exit_event): + clear_locks(ROOT) + params = Params() dongle_id = params.get("DongleId", encoding='utf8') diff --git a/selfdrive/modeld/SConscript b/selfdrive/modeld/SConscript index ceb3c560bc..9066a653fa 100644 --- a/selfdrive/modeld/SConscript +++ b/selfdrive/modeld/SConscript @@ -1,3 +1,5 @@ +import os + Import('env', 'arch', 'cereal', 'messaging', 'common', 'gpucommon', 'visionipc') lenv = env.Clone() @@ -23,11 +25,15 @@ common_src = [ thneed_src = [ "thneed/thneed.cc", "thneed/serialize.cc", + "thneed/optimizer.cc", "runners/thneedmodel.cc", ] use_thneed = not GetOption('no_thneed') +use_extra = True # arch == "larch64" +lenv['CXXFLAGS'].append('-DUSE_EXTRA=true' if use_extra else '-DUSE_EXTRA=false') + if arch == "aarch64" or arch == "larch64": libs += ['gsl', 'CB'] libs += ['gnustl_shared'] if arch == "aarch64" else ['pthread', 'dl'] @@ -62,12 +68,16 @@ common_model = lenv.Object(common_src) # build thneed model if use_thneed and arch in ("aarch64", "larch64"): + fn = "../../models/big_supercombo" if use_extra else "../../models/supercombo" compiler = lenv.Program('thneed/compile', ["thneed/compile.cc"]+common_model, LIBS=libs) - cmd = f"cd {Dir('.').abspath} && {compiler[0].abspath} ../../models/supercombo.dlc ../../models/supercombo.thneed --binary" + cmd = f"cd {Dir('.').abspath} && {compiler[0].abspath} {fn}.dlc {fn}.thneed --binary" lib_paths = ':'.join(Dir(p).abspath for p in lenv["LIBPATH"]) - cenv = Environment(ENV={'LD_LIBRARY_PATH': f"{lib_paths}:{lenv['ENV']['LD_LIBRARY_PATH']}"}) - cenv.Command("../../models/supercombo.thneed", ["../../models/supercombo.dlc", compiler], cmd) + kernel_path = os.path.join(Dir('.').abspath, "thneed", "kernels") + cenv = Environment(ENV={'LD_LIBRARY_PATH': f"{lib_paths}:{lenv['ENV']['LD_LIBRARY_PATH']}", 'KERNEL_PATH': kernel_path}) + + kernels = [os.path.join(kernel_path, x) for x in os.listdir(kernel_path) if x.endswith(".cl")] + cenv.Command(fn + ".thneed", [fn + ".dlc", kernels, compiler], cmd) lenv.Program('_dmonitoringmodeld', [ "dmonitoringmodeld.cc", diff --git a/selfdrive/modeld/modeld.cc b/selfdrive/modeld/modeld.cc index 0facf3fa0b..697e31a7b7 100644 --- a/selfdrive/modeld/modeld.cc +++ b/selfdrive/modeld/modeld.cc @@ -1,6 +1,7 @@ #include #include #include +#include #include @@ -15,34 +16,34 @@ ExitHandler do_exit; -mat3 update_calibration(cereal::LiveCalibrationData::Reader live_calib, bool wide_camera) { +mat3 update_calibration(Eigen::Matrix &extrinsics, bool wide_camera, bool bigmodel_frame) { /* import numpy as np from common.transformations.model import medmodel_frame_from_road_frame medmodel_frame_from_ground = medmodel_frame_from_road_frame[:, (0, 1, 3)] ground_from_medmodel_frame = np.linalg.inv(medmodel_frame_from_ground) */ - static const auto ground_from_medmodel_frame = (Eigen::Matrix() << - 0.00000000e+00, 0.00000000e+00, 1.00000000e+00, + static const auto ground_from_medmodel_frame = (Eigen::Matrix() << + 0.00000000e+00, 0.00000000e+00, 1.00000000e+00, -1.09890110e-03, 0.00000000e+00, 2.81318681e-01, -1.84808520e-20, 9.00738606e-04, -4.28751576e-02).finished(); - static const auto cam_intrinsics = Eigen::Matrix(wide_camera ? ecam_intrinsic_matrix.v : fcam_intrinsic_matrix.v); - static const mat3 yuv_transform = get_model_yuv_transform(); + static const auto ground_from_sbigmodel_frame = (Eigen::Matrix() << + 0.00000000e+00, 7.31372216e-19, 1.00000000e+00, + -2.19780220e-03, 4.11497335e-19, 5.62637363e-01, + -5.46146580e-20, 1.80147721e-03, -2.73464241e-01).finished(); - auto extrinsic_matrix = live_calib.getExtrinsicMatrix(); - Eigen::Matrix extrinsic_matrix_eigen; - for (int i = 0; i < 4*3; i++) { - extrinsic_matrix_eigen(i / 4, i % 4) = extrinsic_matrix[i]; - } + const auto cam_intrinsics = Eigen::Matrix(wide_camera ? ecam_intrinsic_matrix.v : fcam_intrinsic_matrix.v); + static const mat3 yuv_transform = get_model_yuv_transform(); - auto camera_frame_from_road_frame = cam_intrinsics * extrinsic_matrix_eigen; + auto ground_from_model_frame = bigmodel_frame ? ground_from_sbigmodel_frame : ground_from_medmodel_frame; + auto camera_frame_from_road_frame = cam_intrinsics * extrinsics; Eigen::Matrix camera_frame_from_ground; camera_frame_from_ground.col(0) = camera_frame_from_road_frame.col(0); camera_frame_from_ground.col(1) = camera_frame_from_road_frame.col(1); camera_frame_from_ground.col(2) = camera_frame_from_road_frame.col(3); - auto warp_matrix = camera_frame_from_ground * ground_from_medmodel_frame; + auto warp_matrix = camera_frame_from_ground * ground_from_model_frame; mat3 transform = {}; for (int i=0; i<3*3; i++) { transform.v[i] = warp_matrix(i / 3, i % 3); @@ -50,7 +51,7 @@ mat3 update_calibration(cereal::LiveCalibrationData::Reader live_calib, bool wid return matmul3(yuv_transform, transform); } -void run_model(ModelState &model, VisionIpcClient &vipc_client, bool wide_camera) { +void run_model(ModelState &model, VisionIpcClient &vipc_client_main, VisionIpcClient &vipc_client_extra, bool main_wide_camera, bool use_extra, bool use_extra_client) { // messaging PubMaster pm({"modelV2", "cameraOdometry"}); SubMaster sm({"lateralPlan", "roadCameraState", "liveCalibration"}); @@ -62,20 +63,80 @@ void run_model(ModelState &model, VisionIpcClient &vipc_client, bool wide_camera double last = 0; uint32_t run_count = 0; - mat3 model_transform = {}; + mat3 model_transform_main = {}; + mat3 model_transform_extra = {}; bool live_calib_seen = false; + VisionBuf *buf_main = nullptr; + VisionBuf *buf_extra = nullptr; + + VisionIpcBufExtra meta_main = {0}; + VisionIpcBufExtra meta_extra = {0}; + while (!do_exit) { - VisionIpcBufExtra extra = {}; - VisionBuf *buf = vipc_client.recv(&extra); - if (buf == nullptr) continue; + // TODO: change sync logic to use timestamp start of frame in case camerad skips a frame + // log frame id in model packet + + // Keep receiving frames until we are at least 1 frame ahead of previous extra frame + while (meta_main.frame_id <= meta_extra.frame_id) { + buf_main = vipc_client_main.recv(&meta_main); + if (meta_main.frame_id <= meta_extra.frame_id) { + LOGE("main camera behind! main: %d (%.5f), extra: %d (%.5f)", + meta_main.frame_id, double(meta_main.timestamp_sof) / 1e9, + meta_extra.frame_id, double(meta_extra.timestamp_sof) / 1e9); + } + + if (buf_main == nullptr) break; + } + + if (buf_main == nullptr) { + LOGE("vipc_client_main no frame"); + continue; + } + + if (use_extra_client) { + // Keep receiving extra frames until frame id matches main camera + do { + buf_extra = vipc_client_extra.recv(&meta_extra); + + if (meta_main.frame_id > meta_extra.frame_id) { + LOGE("extra camera behind! main: %d (%.5f), extra: %d (%.5f)", + meta_main.frame_id, double(meta_main.timestamp_sof) / 1e9, + meta_extra.frame_id, double(meta_extra.timestamp_sof) / 1e9); + } + } while (buf_extra != nullptr && meta_main.frame_id > meta_extra.frame_id); + + if (buf_extra == nullptr) { + LOGE("vipc_client_extra no frame"); + continue; + } + + if (meta_main.frame_id != meta_extra.frame_id || std::abs((int64_t)meta_main.timestamp_sof - (int64_t)meta_extra.timestamp_sof) > 10000000ULL) { + LOGE("frames out of sync! main: %d (%.5f), extra: %d (%.5f)", + meta_main.frame_id, double(meta_main.timestamp_sof) / 1e9, + meta_extra.frame_id, double(meta_extra.timestamp_sof) / 1e9); + } + } else { + // Use single camera + buf_extra = buf_main; + meta_extra = meta_main; + } // TODO: path planner timeout? sm.update(0); int desire = ((int)sm["lateralPlan"].getLateralPlan().getDesire()); frame_id = sm["roadCameraState"].getRoadCameraState().getFrameId(); if (sm.updated("liveCalibration")) { - model_transform = update_calibration(sm["liveCalibration"].getLiveCalibration(), wide_camera); + auto extrinsic_matrix = sm["liveCalibration"].getLiveCalibration().getExtrinsicMatrix(); + Eigen::Matrix extrinsic_matrix_eigen; + for (int i = 0; i < 4*3; i++) { + extrinsic_matrix_eigen(i / 4, i % 4) = extrinsic_matrix[i]; + } + + model_transform_main = update_calibration(extrinsic_matrix_eigen, main_wide_camera, false); + if (use_extra) { + model_transform_extra = update_calibration(extrinsic_matrix_eigen, Hardware::TICI(), true); + } live_calib_seen = true; } @@ -85,13 +146,12 @@ void run_model(ModelState &model, VisionIpcClient &vipc_client, bool wide_camera } double mt1 = millis_since_boot(); - ModelOutput *model_output = model_eval_frame(&model, buf->buf_cl, buf->width, buf->height, - model_transform, vec_desire); + ModelOutput *model_output = model_eval_frame(&model, buf_main, buf_extra, model_transform_main, model_transform_extra, vec_desire); double mt2 = millis_since_boot(); float model_execution_time = (mt2 - mt1) / 1000.0; // tracked dropped frames - uint32_t vipc_dropped_frames = extra.frame_id - last_vipc_frame_id - 1; + uint32_t vipc_dropped_frames = meta_main.frame_id - last_vipc_frame_id - 1; float frames_dropped = frame_dropped_filter.update((float)std::min(vipc_dropped_frames, 10U)); if (run_count < 10) { // let frame drops warm up frame_dropped_filter.reset(0); @@ -101,13 +161,13 @@ void run_model(ModelState &model, VisionIpcClient &vipc_client, bool wide_camera float frame_drop_ratio = frames_dropped / (1 + frames_dropped); - model_publish(pm, extra.frame_id, frame_id, frame_drop_ratio, *model_output, extra.timestamp_eof, model_execution_time, + model_publish(pm, meta_main.frame_id, frame_id, frame_drop_ratio, *model_output, meta_main.timestamp_eof, model_execution_time, kj::ArrayPtr(model.output.data(), model.output.size()), live_calib_seen); - posenet_publish(pm, extra.frame_id, vipc_dropped_frames, *model_output, extra.timestamp_eof, live_calib_seen); + posenet_publish(pm, meta_main.frame_id, vipc_dropped_frames, *model_output, meta_main.timestamp_eof, live_calib_seen); //printf("model process: %.2fms, from last %.2fms, vipc_frame_id %u, frame_id, %u, frame_drop %.3f\n", mt2 - mt1, mt1 - last, extra.frame_id, frame_id, frame_drop_ratio); last = mt1; - last_vipc_frame_id = extra.frame_id; + last_vipc_frame_id = meta_main.frame_id; } } @@ -120,7 +180,9 @@ int main(int argc, char **argv) { assert(ret == 0); } - bool wide_camera = Hardware::TICI() ? Params().getBool("EnableWideCamera") : false; + bool main_wide_camera = Hardware::TICI() ? Params().getBool("EnableWideCamera") : false; + bool use_extra = USE_EXTRA; + bool use_extra_client = Hardware::TICI() && use_extra && !main_wide_camera; // cl init cl_device_id device_id = cl_get_device_id(CL_DEVICE_TYPE_DEFAULT); @@ -128,20 +190,32 @@ int main(int argc, char **argv) { // init the models ModelState model; - model_init(&model, device_id, context); + model_init(&model, device_id, context, use_extra); LOGW("models loaded, modeld starting"); - VisionIpcClient vipc_client = VisionIpcClient("camerad", wide_camera ? VISION_STREAM_WIDE_ROAD : VISION_STREAM_ROAD, true, device_id, context); - while (!do_exit && !vipc_client.connect(false)) { + VisionIpcClient vipc_client_main = VisionIpcClient("camerad", main_wide_camera ? VISION_STREAM_WIDE_ROAD : VISION_STREAM_ROAD, true, device_id, context); + VisionIpcClient vipc_client_extra = VisionIpcClient("camerad", VISION_STREAM_WIDE_ROAD, false, device_id, context); + + while (!do_exit && !vipc_client_main.connect(false)) { + util::sleep_for(100); + } + + while (!do_exit && use_extra_client && !vipc_client_extra.connect(false)) { util::sleep_for(100); } // run the models // vipc_client.connected is false only when do_exit is true - if (vipc_client.connected) { - const VisionBuf *b = &vipc_client.buffers[0]; - LOGW("connected with buffer size: %d (%d x %d)", b->len, b->width, b->height); - run_model(model, vipc_client, wide_camera); + if (!do_exit) { + const VisionBuf *b = &vipc_client_main.buffers[0]; + LOGW("connected main cam with buffer size: %d (%d x %d)", b->len, b->width, b->height); + + if (use_extra_client) { + const VisionBuf *wb = &vipc_client_extra.buffers[0]; + LOGW("connected extra cam with buffer size: %d (%d x %d)", wb->len, wb->width, wb->height); + } + + run_model(model, vipc_client_main, vipc_client_extra, main_wide_camera, use_extra, use_extra_client); } model_free(&model); diff --git a/selfdrive/modeld/models/dmonitoring.cc b/selfdrive/modeld/models/dmonitoring.cc index 2acdf7d2b7..005d0bcc53 100644 --- a/selfdrive/modeld/models/dmonitoring.cc +++ b/selfdrive/modeld/models/dmonitoring.cc @@ -166,7 +166,8 @@ DMonitoringResult dmonitoring_eval_frame(DMonitoringModelState* s, void* stream_ //fclose(dump_yuv_file2); double t1 = millis_since_boot(); - s->m->execute(net_input_buf, yuv_buf_len); + s->m->addImage(net_input_buf, yuv_buf_len); + s->m->execute(); double t2 = millis_since_boot(); DMonitoringResult ret = {0}; diff --git a/selfdrive/modeld/models/driving.cc b/selfdrive/modeld/models/driving.cc index 69aeb91be1..e42f2c0f46 100644 --- a/selfdrive/modeld/models/driving.cc +++ b/selfdrive/modeld/models/driving.cc @@ -26,15 +26,19 @@ constexpr const kj::ArrayPtr to_kj_array_ptr(const std::array return kj::ArrayPtr(arr.data(), arr.size()); } -void model_init(ModelState* s, cl_device_id device_id, cl_context context) { +void model_init(ModelState* s, cl_device_id device_id, cl_context context, bool use_extra) { s->frame = new ModelFrame(device_id, context); + s->wide_frame = new ModelFrame(device_id, context); #ifdef USE_THNEED - s->m = std::make_unique("../../models/supercombo.thneed", &s->output[0], NET_OUTPUT_SIZE, USE_GPU_RUNTIME); + s->m = std::make_unique(use_extra ? "../../models/big_supercombo.thneed" : "../../models/supercombo.thneed", + &s->output[0], NET_OUTPUT_SIZE, USE_GPU_RUNTIME, use_extra); #elif USE_ONNX_MODEL - s->m = std::make_unique("../../models/supercombo.onnx", &s->output[0], NET_OUTPUT_SIZE, USE_GPU_RUNTIME); + s->m = std::make_unique(use_extra ? "../../models/big_supercombo.onnx" : "../../models/supercombo.onnx", + &s->output[0], NET_OUTPUT_SIZE, USE_GPU_RUNTIME, use_extra); #else - s->m = std::make_unique("../../models/supercombo.dlc", &s->output[0], NET_OUTPUT_SIZE, USE_GPU_RUNTIME); + s->m = std::make_unique(use_extra ? "../../models/big_supercombo.dlc" : "../../models/supercombo.dlc", + &s->output[0], NET_OUTPUT_SIZE, USE_GPU_RUNTIME, use_extra); #endif #ifdef TEMPORAL @@ -52,8 +56,8 @@ void model_init(ModelState* s, cl_device_id device_id, cl_context context) { #endif } -ModelOutput* model_eval_frame(ModelState* s, cl_mem yuv_cl, int width, int height, - const mat3 &transform, float *desire_in) { +ModelOutput* model_eval_frame(ModelState* s, VisionBuf* buf, VisionBuf* wbuf, + const mat3 &transform, const mat3 &transform_wide, float *desire_in) { #ifdef DESIRE if (desire_in != NULL) { for (int i = 1; i < DESIRE_LEN; i++) { @@ -70,8 +74,14 @@ ModelOutput* model_eval_frame(ModelState* s, cl_mem yuv_cl, int width, int heigh #endif // if getInputBuf is not NULL, net_input_buf will be - auto net_input_buf = s->frame->prepare(yuv_cl, width, height, transform, static_cast(s->m->getInputBuf())); - s->m->execute(net_input_buf, s->frame->buf_size); + auto net_input_buf = s->frame->prepare(buf->buf_cl, buf->width, buf->height, transform, static_cast(s->m->getInputBuf())); + s->m->addImage(net_input_buf, s->frame->buf_size); + + if (wbuf != nullptr) { + auto net_extra_buf = s->wide_frame->prepare(wbuf->buf_cl, wbuf->width, wbuf->height, transform_wide, static_cast(s->m->getExtraBuf())); + s->m->addExtra(net_extra_buf, s->wide_frame->buf_size); + } + s->m->execute(); return (ModelOutput*)&s->output; } diff --git a/selfdrive/modeld/models/driving.h b/selfdrive/modeld/models/driving.h index 14645dcfaa..eead06575f 100644 --- a/selfdrive/modeld/models/driving.h +++ b/selfdrive/modeld/models/driving.h @@ -9,6 +9,7 @@ #include #include "cereal/messaging/messaging.h" +#include "cereal/visionipc/visionipc_client.h" #include "selfdrive/common/mat.h" #include "selfdrive/common/modeldata.h" #include "selfdrive/common/util.h" @@ -25,6 +26,7 @@ constexpr int BLINKER_LEN = 6; constexpr int META_STRIDE = 7; constexpr int PLAN_MHP_N = 5; +constexpr int STOP_LINE_MHP_N = 3; constexpr int LEAD_MHP_N = 2; constexpr int LEAD_TRAJ_LEN = 6; @@ -147,6 +149,37 @@ struct ModelOutputLeads { }; static_assert(sizeof(ModelOutputLeads) == (sizeof(ModelOutputLeadPrediction)*LEAD_MHP_N) + (sizeof(float)*LEAD_MHP_SELECTION)); +struct ModelOutputStopLineElement { + ModelOutputXYZ position; + ModelOutputXYZ rotation; + float speed; + float time; +}; +static_assert(sizeof(ModelOutputStopLineElement) == (sizeof(ModelOutputXYZ)*2 + sizeof(float)*2)); + +struct ModelOutputStopLinePrediction { + ModelOutputStopLineElement mean; + ModelOutputStopLineElement std; + float prob; +}; +static_assert(sizeof(ModelOutputStopLinePrediction) == (sizeof(ModelOutputStopLineElement)*2 + sizeof(float))); + +struct ModelOutputStopLines { + std::array prediction; + float prob; + + constexpr const ModelOutputStopLinePrediction &get_best_prediction(int t_idx) const { + int max_idx = 0; + for (int i = 1; i < prediction.size(); i++) { + if (prediction[i].prob > prediction[max_idx].prob) { + max_idx = i; + } + } + return prediction[max_idx]; + } +}; +static_assert(sizeof(ModelOutputStopLines) == (sizeof(ModelOutputStopLinePrediction)*STOP_LINE_MHP_N) + sizeof(float)); + struct ModelOutputPose { ModelOutputXYZ velocity_mean; ModelOutputXYZ rotation_mean; @@ -205,6 +238,7 @@ struct ModelOutput { const ModelOutputLaneLines lane_lines; const ModelOutputRoadEdges road_edges; const ModelOutputLeads leads; + const ModelOutputStopLines stop_lines; const ModelOutputMeta meta; const ModelOutputPose pose; }; @@ -220,6 +254,7 @@ constexpr int NET_OUTPUT_SIZE = OUTPUT_SIZE + TEMPORAL_SIZE; // TODO: convert remaining arrays to std::array and update model runners struct ModelState { ModelFrame *frame; + ModelFrame *wide_frame; std::array output = {}; std::unique_ptr m; #ifdef DESIRE @@ -231,9 +266,9 @@ struct ModelState { #endif }; -void model_init(ModelState* s, cl_device_id device_id, cl_context context); -ModelOutput *model_eval_frame(ModelState* s, cl_mem yuv_cl, int width, int height, - const mat3 &transform, float *desire_in); +void model_init(ModelState* s, cl_device_id device_id, cl_context context, bool use_extra); +ModelOutput *model_eval_frame(ModelState* s, VisionBuf* buf, VisionBuf* buf_wide, + const mat3 &transform, const mat3 &transform_wide, float *desire_in); void model_free(ModelState* s); void model_publish(PubMaster &pm, uint32_t vipc_frame_id, uint32_t frame_id, float frame_drop, const ModelOutput &net_outputs, uint64_t timestamp_eof, diff --git a/selfdrive/modeld/runners/onnxmodel.cc b/selfdrive/modeld/runners/onnxmodel.cc index cd56f6c21b..b2de9e9a1b 100644 --- a/selfdrive/modeld/runners/onnxmodel.cc +++ b/selfdrive/modeld/runners/onnxmodel.cc @@ -14,11 +14,12 @@ #include "selfdrive/common/swaglog.h" #include "selfdrive/common/util.h" -ONNXModel::ONNXModel(const char *path, float *_output, size_t _output_size, int runtime) { +ONNXModel::ONNXModel(const char *path, float *_output, size_t _output_size, int runtime, bool _use_extra) { LOGD("loading model %s", path); output = _output; output_size = _output_size; + use_extra = _use_extra; int err = pipe(pipein); assert(err == 0); @@ -99,9 +100,24 @@ void ONNXModel::addTrafficConvention(float *state, int state_size) { traffic_convention_size = state_size; } -void ONNXModel::execute(float *net_input_buf, int buf_size) { +void ONNXModel::addImage(float *image_buf, int buf_size) { + image_input_buf = image_buf; + image_buf_size = buf_size; +} + +void ONNXModel::addExtra(float *image_buf, int buf_size) { + extra_input_buf = image_buf; + extra_buf_size = buf_size; +} + +void ONNXModel::execute() { // order must be this - pwrite(net_input_buf, buf_size); + if (image_input_buf != NULL) { + pwrite(image_input_buf, image_buf_size); + } + if (extra_input_buf != NULL) { + pwrite(extra_input_buf, extra_buf_size); + } if (desire_input_buf != NULL) { pwrite(desire_input_buf, desire_state_size); } diff --git a/selfdrive/modeld/runners/onnxmodel.h b/selfdrive/modeld/runners/onnxmodel.h index 8fb1403c97..a3a6a1862a 100644 --- a/selfdrive/modeld/runners/onnxmodel.h +++ b/selfdrive/modeld/runners/onnxmodel.h @@ -6,12 +6,14 @@ class ONNXModel : public RunModel { public: - ONNXModel(const char *path, float *output, size_t output_size, int runtime); + ONNXModel(const char *path, float *output, size_t output_size, int runtime, bool use_extra = false); ~ONNXModel(); void addRecurrent(float *state, int state_size); void addDesire(float *state, int state_size); void addTrafficConvention(float *state, int state_size); - void execute(float *net_input_buf, int buf_size); + void addImage(float *image_buf, int buf_size); + void addExtra(float *image_buf, int buf_size); + void execute(); private: int proc_pid; @@ -24,6 +26,11 @@ private: int desire_state_size; float *traffic_convention_input_buf = NULL; int traffic_convention_size; + float *image_input_buf = NULL; + int image_buf_size; + float *extra_input_buf = NULL; + int extra_buf_size; + bool use_extra; // pipe to communicate to keras subprocess void pread(float *buf, int size); diff --git a/selfdrive/modeld/runners/runmodel.h b/selfdrive/modeld/runners/runmodel.h index 948048f5d3..2d61351ebf 100644 --- a/selfdrive/modeld/runners/runmodel.h +++ b/selfdrive/modeld/runners/runmodel.h @@ -5,7 +5,10 @@ public: virtual void addRecurrent(float *state, int state_size) {} virtual void addDesire(float *state, int state_size) {} virtual void addTrafficConvention(float *state, int state_size) {} - virtual void execute(float *net_input_buf, int buf_size) {} + virtual void addImage(float *image_buf, int buf_size) {} + virtual void addExtra(float *image_buf, int buf_size) {} + virtual void execute() {} virtual void* getInputBuf() { return nullptr; } + virtual void* getExtraBuf() { return nullptr; } }; diff --git a/selfdrive/modeld/runners/snpemodel.cc b/selfdrive/modeld/runners/snpemodel.cc index 3a6e9e622c..df66a9db93 100644 --- a/selfdrive/modeld/runners/snpemodel.cc +++ b/selfdrive/modeld/runners/snpemodel.cc @@ -7,15 +7,17 @@ #include #include "selfdrive/common/util.h" +#include "selfdrive/common/timing.h" void PrintErrorStringAndExit() { std::cerr << zdl::DlSystem::getLastErrorString() << std::endl; std::exit(EXIT_FAILURE); } -SNPEModel::SNPEModel(const char *path, float *loutput, size_t loutput_size, int runtime) { +SNPEModel::SNPEModel(const char *path, float *loutput, size_t loutput_size, int runtime, bool luse_extra) { output = loutput; output_size = loutput_size; + use_extra = luse_extra; #if defined(QCOM) || defined(QCOM2) if (runtime==USE_GPU_RUNTIME) { Runtime = zdl::DlSystem::Runtime_t::GPU; @@ -89,6 +91,25 @@ SNPEModel::SNPEModel(const char *path, float *loutput, size_t loutput_size, int inputMap.add(input_tensor_name, inputBuffer.get()); } + if (use_extra) { + const char *extra_tensor_name = strListi.at(1); + const auto &extraDims_opt = snpe->getInputDimensions(extra_tensor_name); + const zdl::DlSystem::TensorShape& bufferShape = *extraDims_opt; + std::vector strides(bufferShape.rank()); + strides[strides.size() - 1] = sizeof(float); + size_t product = 1; + for (size_t i = 0; i < bufferShape.rank(); i++) product *= bufferShape[i]; + size_t stride = strides[strides.size() - 1]; + for (size_t i = bufferShape.rank() - 1; i > 0; i--) { + stride *= bufferShape[i]; + strides[i-1] = stride; + } + printf("extra product is %lu\n", product); + extraBuffer = ubFactory.createUserBuffer(NULL, product*sizeof(float), strides, &userBufferEncodingFloat); + + inputMap.add(extra_tensor_name, extraBuffer.get()); + } + // create output buffer { const zdl::DlSystem::TensorShape& bufferShape = snpe->getInputOutputBufferAttributes(output_tensor_name)->getDims(); @@ -120,13 +141,24 @@ void SNPEModel::addDesire(float *state, int state_size) { desireBuffer = this->addExtra(state, state_size, 1); } +void SNPEModel::addImage(float *image_buf, int buf_size) { + input = image_buf; + input_size = buf_size; +} + +void SNPEModel::addExtra(float *image_buf, int buf_size) { + extra = image_buf; + extra_size = buf_size; +} + std::unique_ptr SNPEModel::addExtra(float *state, int state_size, int idx) { // get input and output names + const auto real_idx = idx + (use_extra ? 1 : 0); const auto &strListi_opt = snpe->getInputTensorNames(); if (!strListi_opt) throw std::runtime_error("Error obtaining Input tensor names"); const auto &strListi = *strListi_opt; - const char *input_tensor_name = strListi.at(idx); - printf("adding index %d: %s\n", idx, input_tensor_name); + const char *input_tensor_name = strListi.at(real_idx); + printf("adding index %d: %s\n", real_idx, input_tensor_name); zdl::DlSystem::UserBufferEncodingFloat userBufferEncodingFloat; zdl::DlSystem::IUserBufferFactory& ubFactory = zdl::SNPE::SNPEFactory::getUserBufferFactory(); @@ -136,13 +168,17 @@ std::unique_ptr SNPEModel::addExtra(float *state, in return ret; } -void SNPEModel::execute(float *net_input_buf, int buf_size) { +void SNPEModel::execute() { #ifdef USE_THNEED if (Runtime == zdl::DlSystem::Runtime_t::GPU) { - float *inputs[4] = {recurrent, trafficConvention, desire, net_input_buf}; if (thneed == NULL) { - bool ret = inputBuffer->setBufferAddress(net_input_buf); + bool ret = inputBuffer->setBufferAddress(input); assert(ret == true); + if (use_extra) { + assert(extra != NULL); + bool extra_ret = extraBuffer->setBufferAddress(extra); + assert(extra_ret == true); + } if (!snpe->execute(inputMap, outputMap)) { PrintErrorStringAndExit(); } @@ -159,7 +195,16 @@ void SNPEModel::execute(float *net_input_buf, int buf_size) { memcpy(outputs_golden, output, output_size*sizeof(float)); memset(output, 0, output_size*sizeof(float)); memset(recurrent, 0, recurrent_size*sizeof(float)); - thneed->execute(inputs, output); + uint64_t start_time = nanos_since_boot(); + if (extra != NULL) { + float *inputs[5] = {recurrent, trafficConvention, desire, extra, input}; + thneed->execute(inputs, output); + } else { + float *inputs[4] = {recurrent, trafficConvention, desire, input}; + thneed->execute(inputs, output); + } + uint64_t elapsed_time = nanos_since_boot() - start_time; + printf("ran model in %.2f ms\n", float(elapsed_time)/1e6); if (memcmp(output, outputs_golden, output_size*sizeof(float)) == 0) { printf("thneed selftest passed\n"); @@ -171,12 +216,22 @@ void SNPEModel::execute(float *net_input_buf, int buf_size) { } free(outputs_golden); } else { - thneed->execute(inputs, output); + if (use_extra) { + float *inputs[5] = {recurrent, trafficConvention, desire, extra, input}; + thneed->execute(inputs, output); + } else { + float *inputs[4] = {recurrent, trafficConvention, desire, input}; + thneed->execute(inputs, output); + } } } else { #endif - bool ret = inputBuffer->setBufferAddress(net_input_buf); + bool ret = inputBuffer->setBufferAddress(input); assert(ret == true); + if (use_extra) { + bool extra_ret = extraBuffer->setBufferAddress(extra); + assert(extra_ret == true); + } if (!snpe->execute(inputMap, outputMap)) { PrintErrorStringAndExit(); } diff --git a/selfdrive/modeld/runners/snpemodel.h b/selfdrive/modeld/runners/snpemodel.h index 6d3a70414c..0c927a2f42 100644 --- a/selfdrive/modeld/runners/snpemodel.h +++ b/selfdrive/modeld/runners/snpemodel.h @@ -22,11 +22,13 @@ class SNPEModel : public RunModel { public: - SNPEModel(const char *path, float *loutput, size_t loutput_size, int runtime); + SNPEModel(const char *path, float *loutput, size_t loutput_size, int runtime, bool luse_extra = false); void addRecurrent(float *state, int state_size); void addTrafficConvention(float *state, int state_size); void addDesire(float *state, int state_size); - void execute(float *net_input_buf, int buf_size); + void addImage(float *image_buf, int buf_size); + void addExtra(float *image_buf, int buf_size); + void execute(); #ifdef USE_THNEED Thneed *thneed = NULL; @@ -45,6 +47,8 @@ private: // snpe input stuff zdl::DlSystem::UserBufferMap inputMap; std::unique_ptr inputBuffer; + float *input; + size_t input_size; // snpe output stuff zdl::DlSystem::UserBufferMap outputMap; @@ -52,6 +56,12 @@ private: float *output; size_t output_size; + // extra input stuff + std::unique_ptr extraBuffer; + float *extra; + size_t extra_size; + bool use_extra; + // recurrent and desire std::unique_ptr addExtra(float *state, int state_size, int idx); float *recurrent; diff --git a/selfdrive/modeld/runners/thneedmodel.cc b/selfdrive/modeld/runners/thneedmodel.cc index 0beb6a2794..a85b2ac022 100644 --- a/selfdrive/modeld/runners/thneedmodel.cc +++ b/selfdrive/modeld/runners/thneedmodel.cc @@ -2,7 +2,7 @@ #include -ThneedModel::ThneedModel(const char *path, float *loutput, size_t loutput_size, int runtime) { +ThneedModel::ThneedModel(const char *path, float *loutput, size_t loutput_size, int runtime, bool luse_extra) { thneed = new Thneed(true); thneed->record = 0; thneed->load(path); @@ -11,6 +11,7 @@ ThneedModel::ThneedModel(const char *path, float *loutput, size_t loutput_size, recorded = false; output = loutput; + use_extra = luse_extra; } void ThneedModel::addRecurrent(float *state, int state_size) { @@ -25,23 +26,48 @@ void ThneedModel::addDesire(float *state, int state_size) { desire = state; } +void ThneedModel::addImage(float *image_input_buf, int buf_size) { + input = image_input_buf; +} + +void ThneedModel::addExtra(float *extra_input_buf, int buf_size) { + extra = extra_input_buf; +} + void* ThneedModel::getInputBuf() { + if (use_extra && thneed->input_clmem.size() > 4) return &(thneed->input_clmem[4]); + else if (!use_extra && thneed->input_clmem.size() > 3) return &(thneed->input_clmem[3]); + else return nullptr; +} + +void* ThneedModel::getExtraBuf() { if (thneed->input_clmem.size() > 3) return &(thneed->input_clmem[3]); else return nullptr; } -void ThneedModel::execute(float *net_input_buf, int buf_size) { - float *inputs[4] = {recurrent, trafficConvention, desire, net_input_buf}; +void ThneedModel::execute() { if (!recorded) { thneed->record = THNEED_RECORD; - thneed->copy_inputs(inputs); + if (use_extra) { + float *inputs[5] = {recurrent, trafficConvention, desire, extra, input}; + thneed->copy_inputs(inputs); + } else { + float *inputs[4] = {recurrent, trafficConvention, desire, input}; + thneed->copy_inputs(inputs); + } thneed->clexec(); thneed->copy_output(output); thneed->stop(); recorded = true; } else { - thneed->execute(inputs, output); + if (use_extra) { + float *inputs[5] = {recurrent, trafficConvention, desire, extra, input}; + thneed->execute(inputs, output); + } else { + float *inputs[4] = {recurrent, trafficConvention, desire, input}; + thneed->execute(inputs, output); + } } } diff --git a/selfdrive/modeld/runners/thneedmodel.h b/selfdrive/modeld/runners/thneedmodel.h index 1720d5ff20..fe0f9ae44f 100644 --- a/selfdrive/modeld/runners/thneedmodel.h +++ b/selfdrive/modeld/runners/thneedmodel.h @@ -5,16 +5,22 @@ class ThneedModel : public RunModel { public: - ThneedModel(const char *path, float *loutput, size_t loutput_size, int runtime); + ThneedModel(const char *path, float *loutput, size_t loutput_size, int runtime, bool luse_extra = false); void addRecurrent(float *state, int state_size); void addTrafficConvention(float *state, int state_size); void addDesire(float *state, int state_size); - void execute(float *net_input_buf, int buf_size); + void addImage(float *image_buf, int buf_size); + void addExtra(float *image_buf, int buf_size); + void execute(); void* getInputBuf(); + void* getExtraBuf(); private: Thneed *thneed = NULL; bool recorded; + bool use_extra; + float *input; + float *extra; float *output; // recurrent and desire diff --git a/selfdrive/modeld/thneed/compile.cc b/selfdrive/modeld/thneed/compile.cc index 01a71b7475..8e031b9ab6 100644 --- a/selfdrive/modeld/thneed/compile.cc +++ b/selfdrive/modeld/thneed/compile.cc @@ -2,6 +2,7 @@ #include "selfdrive/modeld/runners/snpemodel.h" #include "selfdrive/modeld/thneed/thneed.h" +#include "selfdrive/hardware/hw.h" #define TEMPORAL_SIZE 512 #define DESIRE_LEN 8 @@ -10,22 +11,28 @@ // TODO: This should probably use SNPE directly. int main(int argc, char* argv[]) { #define OUTPUT_SIZE 0x10000 + float *output = (float*)calloc(OUTPUT_SIZE, sizeof(float)); - SNPEModel mdl(argv[1], output, 0, USE_GPU_RUNTIME); + SNPEModel mdl(argv[1], output, 0, USE_GPU_RUNTIME, USE_EXTRA); float state[TEMPORAL_SIZE] = {0}; float desire[DESIRE_LEN] = {0}; float traffic_convention[TRAFFIC_CONVENTION_LEN] = {0}; float *input = (float*)calloc(0x1000000, sizeof(float)); + float *extra = (float*)calloc(0x1000000, sizeof(float)); mdl.addRecurrent(state, TEMPORAL_SIZE); mdl.addDesire(desire, DESIRE_LEN); mdl.addTrafficConvention(traffic_convention, TRAFFIC_CONVENTION_LEN); + mdl.addImage(input, 0); + if (USE_EXTRA) { + mdl.addExtra(extra, 0); + } // first run printf("************** execute 1 **************\n"); memset(output, 0, OUTPUT_SIZE * sizeof(float)); - mdl.execute(input, 0); + mdl.execute(); // save model bool save_binaries = (argc > 3) && (strcmp(argv[3], "--binary") == 0); diff --git a/selfdrive/modeld/thneed/kernels/convolution_.cl b/selfdrive/modeld/thneed/kernels/convolution_.cl new file mode 100644 index 0000000000..1b9d74b83f --- /dev/null +++ b/selfdrive/modeld/thneed/kernels/convolution_.cl @@ -0,0 +1,272 @@ + read_only image2d_t input, +#ifndef DEPTHWISE + short startPackedInputChannel, + short numPackedInputChannelsForGroup, short totalNumPackedInputChannels, + // typo required for API compatibility + short packedOuputChannelOffset, short totalNumPackedOutputChannels, +#else + short totalNumPackedChannels, +#endif + read_only image2d_t weights, __constant float *biases, + short filterSizeX, short filterSizeY, + write_only image2d_t output, + short paddingX, short paddingY, short strideX, short strideY, +#ifdef SUPPORT_DILATION + short dilationX, short dilationY, +#endif + short neuron, float a, float b, float min_clamp, float max_clamp, +#ifndef DEPTHWISE + // note: these are not supported + __constant float *parameters, __constant float *batchNormBiases, +#endif + short numOutputColumns +#ifdef SUPPORT_ACCUMULATION + , short doAccumulate, read_only image2d_t accumulator +#endif + ) { + +#ifndef NUM_OUTPUTS + #define NUM_OUTPUTS 4 +#endif + + // init + const sampler_t smp = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST; + short packedOutputChannel = get_global_id(0); + short startOutputColumn = mul24((short)get_global_id(1), NUM_OUTPUTS); + short outputRow = get_global_id(2); + +#ifdef DEPTHWISE + short totalNumPackedInputChannels = totalNumPackedChannels; + short totalNumPackedOutputChannels = totalNumPackedChannels; + short startPackedInputChannel = packedOutputChannel; +#endif + + short startX = mad24(mad24(startOutputColumn, strideX, -paddingX), totalNumPackedInputChannels, startPackedInputChannel); + short strideWithChannels = mul24(strideX, totalNumPackedInputChannels); + + float4 outputValues[NUM_OUTPUTS]; + for (short i = 0; i < NUM_OUTPUTS; ++i) { + outputValues[i] = (float4)(0, 0, 0, 0); + } + + int2 inputLocation; + inputLocation.y = mad24(outputRow, strideY, -paddingY); + + int2 weightLocation; + weightLocation.x = 0; + weightLocation.y = packedOutputChannel; + +#ifdef DEPTHWISE + +#ifdef SUPPORT_DILATION + + // depthwise convolution + for (short rfRow = 0; rfRow < filterSizeY; ++rfRow) { + for (short rfColumn = 0; rfColumn < filterSizeX; ++rfColumn) { + short dilatedStepX = mul24(totalNumPackedChannels, dilationX); + inputLocation.x = mad24(rfColumn, dilatedStepX, startX); + float4 inputValues[4]; + for (short i = 0; i < 4; ++i) { + inputValues[i] = read_imagef(input, smp, inputLocation); + inputLocation.x += strideWithChannels; + } + float4 weightValues = read_imagef(weights, smp, weightLocation); + ++weightLocation.x; + outputValues[0] += inputValues[0] * weightValues; + outputValues[1] += inputValues[1] * weightValues; + outputValues[2] += inputValues[2] * weightValues; + outputValues[3] += inputValues[3] * weightValues; + } + inputLocation.y += dilationY; + } + +#else + + // depthwise unstrided convolution + for (short rfRow = 0; rfRow < filterSizeY; ++rfRow) { + float4 inputValues[4]; + inputLocation.x = startX; + for (short i = 1; i < 4; ++i) { + inputValues[i] = read_imagef(input, smp, inputLocation); + inputLocation.x += totalNumPackedOutputChannels; + } + for (short rfColumn = 0; rfColumn < filterSizeX; ++rfColumn) { + inputValues[0] = inputValues[1]; + inputValues[1] = inputValues[2]; + inputValues[2] = inputValues[3]; + inputValues[3] = read_imagef(input, smp, inputLocation); + inputLocation.x += totalNumPackedChannels; + float4 weightValues = read_imagef(weights, smp, weightLocation); + ++weightLocation.x; + outputValues[0] += inputValues[0] * weightValues; + outputValues[1] += inputValues[1] * weightValues; + outputValues[2] += inputValues[2] * weightValues; + outputValues[3] += inputValues[3] * weightValues; + } + ++inputLocation.y; + } + +#endif + +#elif defined(ONLY_1X1_CONV) + + // 1x1 convolution + short endPackedInputChannel = startPackedInputChannel + numPackedInputChannelsForGroup; + for (short packedInputChannel = startPackedInputChannel; packedInputChannel < endPackedInputChannel; ++packedInputChannel) { + float4 weightValues[4]; + for (short outChIdx = 0; outChIdx < 4; ++outChIdx) { + weightValues[outChIdx] = read_imagef(weights, smp, weightLocation); + ++weightLocation.x; + } + + inputLocation.x = startX + packedInputChannel; + float4 inputValues[NUM_OUTPUTS]; + for (short i = 0; i < NUM_OUTPUTS; ++i) { + inputValues[i] = read_imagef(input, smp, inputLocation); + inputLocation.x += strideWithChannels; + } + + for (short i = 0; i < NUM_OUTPUTS; ++i) { + float4 curOutputValues = outputValues[i]; + curOutputValues.x += inputValues[i].x * weightValues[0].x; + curOutputValues.x += inputValues[i].y * weightValues[0].y; + curOutputValues.x += inputValues[i].z * weightValues[0].z; + curOutputValues.x += inputValues[i].w * weightValues[0].w; + curOutputValues.y += inputValues[i].x * weightValues[1].x; + curOutputValues.y += inputValues[i].y * weightValues[1].y; + curOutputValues.y += inputValues[i].z * weightValues[1].z; + curOutputValues.y += inputValues[i].w * weightValues[1].w; + curOutputValues.z += inputValues[i].x * weightValues[2].x; + curOutputValues.z += inputValues[i].y * weightValues[2].y; + curOutputValues.z += inputValues[i].z * weightValues[2].z; + curOutputValues.z += inputValues[i].w * weightValues[2].w; + curOutputValues.w += inputValues[i].x * weightValues[3].x; + curOutputValues.w += inputValues[i].y * weightValues[3].y; + curOutputValues.w += inputValues[i].z * weightValues[3].z; + curOutputValues.w += inputValues[i].w * weightValues[3].w; + outputValues[i] = curOutputValues; + } + } + packedOutputChannel += packedOuputChannelOffset; + +#else + + // normal convolution + for (short rfRow = 0; rfRow < filterSizeY; ++rfRow) { + for (short packedInputChannel = 0; packedInputChannel < numPackedInputChannelsForGroup; ++packedInputChannel) { + short startXForChannel = startX + packedInputChannel; + for (short rfColumn = 0; rfColumn < filterSizeX; ++rfColumn) { + + float4 weightValues[4]; + for (short outChIdx = 0; outChIdx < 4; ++outChIdx) { + weightValues[outChIdx] = read_imagef(weights, smp, weightLocation); + ++weightLocation.x; + } + +#ifdef SUPPORT_DILATION + short dilatedStepX = mul24(totalNumPackedInputChannels, dilationX); + inputLocation.x = mad24(rfColumn, dilatedStepX, startXForChannel); +#else + inputLocation.x = mad24(rfColumn, totalNumPackedInputChannels, startXForChannel); +#endif + float4 inputValues[NUM_OUTPUTS]; + for (short i = 0; i < NUM_OUTPUTS; ++i) { + inputValues[i] = read_imagef(input, smp, inputLocation); + inputLocation.x += strideWithChannels; + } + for (short i = 0; i < NUM_OUTPUTS; ++i) { + float4 curOutputValues = outputValues[i]; + curOutputValues.x += inputValues[i].x * weightValues[0].x; + curOutputValues.x += inputValues[i].y * weightValues[0].y; + curOutputValues.x += inputValues[i].z * weightValues[0].z; + curOutputValues.x += inputValues[i].w * weightValues[0].w; + curOutputValues.y += inputValues[i].x * weightValues[1].x; + curOutputValues.y += inputValues[i].y * weightValues[1].y; + curOutputValues.y += inputValues[i].z * weightValues[1].z; + curOutputValues.y += inputValues[i].w * weightValues[1].w; + curOutputValues.z += inputValues[i].x * weightValues[2].x; + curOutputValues.z += inputValues[i].y * weightValues[2].y; + curOutputValues.z += inputValues[i].z * weightValues[2].z; + curOutputValues.z += inputValues[i].w * weightValues[2].w; + curOutputValues.w += inputValues[i].x * weightValues[3].x; + curOutputValues.w += inputValues[i].y * weightValues[3].y; + curOutputValues.w += inputValues[i].z * weightValues[3].z; + curOutputValues.w += inputValues[i].w * weightValues[3].w; + outputValues[i] = curOutputValues; + } + } + } +#ifdef SUPPORT_DILATION + inputLocation.y += dilationY; +#else + ++inputLocation.y; +#endif + } + packedOutputChannel += packedOuputChannelOffset; +#endif + + // bias + short outputChannel = mul24(packedOutputChannel, 4); + float4 biasValues = vload4(0, biases + outputChannel); + for (short i = 0; i < NUM_OUTPUTS; ++i) { + outputValues[i] += biasValues; + } + +#ifdef SUPPORT_ACCUMULATION + // accumulate + if (doAccumulate) { + int2 outputLocation; + short outputColumn = startOutputColumn; + outputLocation.y = outputRow; + for (short i = 0; i < NUM_OUTPUTS; ++i) { + outputLocation.x = mad24(outputColumn, totalNumPackedOutputChannels, packedOutputChannel); + if (outputColumn < numOutputColumns) { + outputValues[i] += read_imagef(accumulator, smp, outputLocation); + } + ++outputColumn; + } + } +#endif + + // activation + switch (neuron) { + case 1: + for (short i = 0; i < NUM_OUTPUTS; ++i) { + outputValues[i] = max(outputValues[i], 0.0f); + } + break; + case 2: + for (short i = 0; i < NUM_OUTPUTS; ++i) { + outputValues[i] = a * tanh(b * outputValues[i]); + } + break; + case 3: + for (short i = 0; i < NUM_OUTPUTS; ++i) { + outputValues[i] = native_recip(1.0f + native_exp(-a * outputValues[i] + b)); + } + break; + case 4: + for (short i = 0; i < NUM_OUTPUTS; ++i) { + outputValues[i] = max(outputValues[i], min_clamp); + outputValues[i] = min(outputValues[i], max_clamp); + } + break; + case 5: + for (short i = 0; i < NUM_OUTPUTS; ++i) { + outputValues[i] = max(outputValues[i], 0.0f) + a * (native_exp(min(outputValues[i], 0.0f)) - 1.0f); + } + break; + } + + // output + int2 outputLocation; + short outputColumn = startOutputColumn; + outputLocation.y = outputRow; + for (short i = 0; i < NUM_OUTPUTS; ++i) { + outputLocation.x = mad24(outputColumn, totalNumPackedOutputChannels, packedOutputChannel); + if (outputColumn < numOutputColumns) { + write_imagef(output, outputLocation, outputValues[i]); + } + ++outputColumn; + } +} diff --git a/selfdrive/modeld/thneed/kernels/convolution_horizontal_reduced_reads.cl b/selfdrive/modeld/thneed/kernels/convolution_horizontal_reduced_reads.cl new file mode 100644 index 0000000000..bc8add79aa --- /dev/null +++ b/selfdrive/modeld/thneed/kernels/convolution_horizontal_reduced_reads.cl @@ -0,0 +1,4 @@ +#define SUPPORT_DILATION + +__kernel void convolution_horizontal_reduced_reads( +#include "convolution_.cl" diff --git a/selfdrive/modeld/thneed/kernels/convolution_horizontal_reduced_reads_1x1.cl b/selfdrive/modeld/thneed/kernels/convolution_horizontal_reduced_reads_1x1.cl new file mode 100644 index 0000000000..75a090ca22 --- /dev/null +++ b/selfdrive/modeld/thneed/kernels/convolution_horizontal_reduced_reads_1x1.cl @@ -0,0 +1,5 @@ +#define ONLY_1X1_CONV +#define SUPPORT_ACCUMULATION + +__kernel void convolution_horizontal_reduced_reads_1x1( +#include "convolution_.cl" diff --git a/selfdrive/modeld/thneed/kernels/convolution_horizontal_reduced_reads_5_outputs.cl b/selfdrive/modeld/thneed/kernels/convolution_horizontal_reduced_reads_5_outputs.cl new file mode 100644 index 0000000000..980e7d1f67 --- /dev/null +++ b/selfdrive/modeld/thneed/kernels/convolution_horizontal_reduced_reads_5_outputs.cl @@ -0,0 +1,4 @@ +#define NUM_OUTPUTS 5 + +__kernel void convolution_horizontal_reduced_reads_5_outputs( +#include "convolution_.cl" diff --git a/selfdrive/modeld/thneed/kernels/convolution_horizontal_reduced_reads_depthwise.cl b/selfdrive/modeld/thneed/kernels/convolution_horizontal_reduced_reads_depthwise.cl new file mode 100644 index 0000000000..80be0da924 --- /dev/null +++ b/selfdrive/modeld/thneed/kernels/convolution_horizontal_reduced_reads_depthwise.cl @@ -0,0 +1,5 @@ +#define DEPTHWISE +#define SUPPORT_DILATION + +__kernel void convolution_horizontal_reduced_reads_depthwise( +#include "convolution_.cl" diff --git a/selfdrive/modeld/thneed/kernels/convolution_horizontal_reduced_reads_depthwise_stride_1.cl b/selfdrive/modeld/thneed/kernels/convolution_horizontal_reduced_reads_depthwise_stride_1.cl new file mode 100644 index 0000000000..3d651c229b --- /dev/null +++ b/selfdrive/modeld/thneed/kernels/convolution_horizontal_reduced_reads_depthwise_stride_1.cl @@ -0,0 +1,4 @@ +#define DEPTHWISE + +__kernel void convolution_horizontal_reduced_reads_depthwise_stride_1( +#include "convolution_.cl" diff --git a/selfdrive/modeld/thneed/optimizer.cc b/selfdrive/modeld/thneed/optimizer.cc new file mode 100644 index 0000000000..3c7c41873d --- /dev/null +++ b/selfdrive/modeld/thneed/optimizer.cc @@ -0,0 +1,266 @@ +#include +#include +#include +#include +#include "thneed.h" + +extern map g_program_source; + +static int is_same_size_image(cl_mem a, cl_mem b) { + size_t a_width, a_height, a_depth, a_array_size, a_row_pitch, a_slice_pitch; + clGetImageInfo(a, CL_IMAGE_WIDTH, sizeof(a_width), &a_width, NULL); + clGetImageInfo(a, CL_IMAGE_HEIGHT, sizeof(a_height), &a_height, NULL); + clGetImageInfo(a, CL_IMAGE_DEPTH, sizeof(a_depth), &a_depth, NULL); + clGetImageInfo(a, CL_IMAGE_ARRAY_SIZE, sizeof(a_array_size), &a_array_size, NULL); + clGetImageInfo(a, CL_IMAGE_ROW_PITCH, sizeof(a_row_pitch), &a_row_pitch, NULL); + clGetImageInfo(a, CL_IMAGE_SLICE_PITCH, sizeof(a_slice_pitch), &a_slice_pitch, NULL); + + size_t b_width, b_height, b_depth, b_array_size, b_row_pitch, b_slice_pitch; + clGetImageInfo(b, CL_IMAGE_WIDTH, sizeof(b_width), &b_width, NULL); + clGetImageInfo(b, CL_IMAGE_HEIGHT, sizeof(b_height), &b_height, NULL); + clGetImageInfo(b, CL_IMAGE_DEPTH, sizeof(b_depth), &b_depth, NULL); + clGetImageInfo(b, CL_IMAGE_ARRAY_SIZE, sizeof(b_array_size), &b_array_size, NULL); + clGetImageInfo(b, CL_IMAGE_ROW_PITCH, sizeof(b_row_pitch), &b_row_pitch, NULL); + clGetImageInfo(b, CL_IMAGE_SLICE_PITCH, sizeof(b_slice_pitch), &b_slice_pitch, NULL); + + return (a_width == b_width) && (a_height == b_height) && + (a_depth == b_depth) && (a_array_size == b_array_size) && + (a_row_pitch == b_row_pitch) && (a_slice_pitch == b_slice_pitch); +} + +static cl_mem make_image_like(cl_context context, cl_mem val) { + cl_image_format format; + size_t width, height, row_pitch; + clGetImageInfo(val, CL_IMAGE_FORMAT, sizeof(format), &format, NULL); + assert(format.image_channel_order == CL_RGBA); + assert(format.image_channel_data_type == CL_HALF_FLOAT); + clGetImageInfo(val, CL_IMAGE_WIDTH, sizeof(width), &width, NULL); + clGetImageInfo(val, CL_IMAGE_HEIGHT, sizeof(height), &height, NULL); + clGetImageInfo(val, CL_IMAGE_ROW_PITCH, sizeof(row_pitch), &row_pitch, NULL); + + cl_image_desc desc = {0}; + desc.image_type = CL_MEM_OBJECT_IMAGE2D; + desc.image_width = width; + desc.image_height = height; + desc.image_row_pitch = row_pitch; + + cl_mem buf = clCreateBuffer(context, CL_MEM_READ_WRITE, row_pitch*height, NULL, NULL); + assert(buf != NULL); + desc.buffer = buf; + + cl_int err; + cl_mem tmp = clCreateImage(context, CL_MEM_READ_WRITE, &format, &desc, NULL, &err); + //printf("got %d for image %zux%zu %zu\n", err, width, height, row_pitch); + assert(tmp != NULL); + + return tmp; +} + +// convolution_horizontal_reduced_reads_1x1 is 66% of the model runtime +// make that faster and the model gets faster + +// this cuts ~2 ms off the model runtime right now +int Thneed::optimize() { + const char *kernel_path = getenv("KERNEL_PATH"); + if (!kernel_path) { kernel_path = "/data/openpilot/selfdrive/modeld/thneed/kernels"; printf("no KERNEL_PATH set, defaulting to %s\n", kernel_path); } + // load custom kernels + map g_programs; + for (auto &k : kq) { + // replace program? + if (g_programs.find(k->name) == g_programs.end()) { + char fn[0x100]; + snprintf(fn, sizeof(fn), "%s/%s.cl", kernel_path, k->name.c_str()); + FILE *g = fopen(fn, "rb"); + if (g != NULL) { + char *src[0x10000]; + const char *srcs[1]; srcs[0] = (const char *)src; + memset(src, 0, sizeof(src)); + size_t length = fread(src, 1, sizeof(src), g); + fclose(g); + + printf("building kernel %s\n", k->name.c_str()); + k->program = clCreateProgramWithSource(context, 1, srcs, &length, NULL); + char options[0x100]; + snprintf(options, sizeof(options)-1, "-I %s", kernel_path); + int err = clBuildProgram(k->program, 1, &device_id, options, NULL, NULL); + + if (err != 0) { + printf("got err %d\n", err); + size_t err_length; + char buffer[2048]; + clGetProgramBuildInfo(k->program, device_id, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &err_length); + buffer[err_length] = '\0'; + printf("%s\n", buffer); + } + assert(err == 0); + + // save in cache + g_programs[k->name] = k->program; + g_program_source[k->program] = string((char *)src, length); + } else { + g_programs[k->name] = NULL; + } + } else { + // cached replacement + if (g_programs[k->name] != NULL) { + k->program = g_programs[k->name]; + } + } + + // hack in accumulator to convolution_horizontal_reduced_reads_1x1 + if (k->name == "convolution_horizontal_reduced_reads_1x1") { + k->arg_names.push_back("doAccumulate"); + short doAccumulate = 0; + k->args.push_back(string((char *)&doAccumulate, sizeof(doAccumulate))); + k->args_size.push_back(2); + k->arg_names.push_back("accumulator"); + k->args.push_back(k->args[k->get_arg_num("output")]); + k->args_size.push_back(8); + k->num_args += 2; + } + + // assert that parameters + batchNormBiases are not used + // since they aren't supported in custom replacement kernels + if (k->name == "convolution_horizontal_reduced_reads_1x1" || + k->name == "convolution_horizontal_reduced_reads" || + k->name == "convolution_horizontal_reduced_reads_5_outputs") { + string p1 = k->args[k->get_arg_num("parameters")]; + string p2 = k->args[k->get_arg_num("batchNormBiases")]; + assert(p1.length() == 8 && *((uint64_t*)p1.data()) == 0); + assert(p2.length() == 8 && *((uint64_t*)p2.data()) == 0); + } + } + + // optimizer + size_t start_size; + do { + start_size = kq.size(); + + // get optimizations + map replacements; + for (int i = 0; i < kq.size(); i++) { + // fusing elementwise_sum + activate_image will save 3 enqueues + + // delete useless copy layers + // saves ~0.7 ms + if (kq[i]->name == "concatenation" || kq[i]->name == "flatten") { + string in = kq[i]->args[kq[i]->get_arg_num("input")]; + string out = kq[i]->args[kq[i]->get_arg_num("output")]; + if (is_same_size_image(*(cl_mem*)in.data(), *(cl_mem*)out.data())) { + cl_mem tmp = make_image_like(context, *(cl_mem *)in.data()); + replacements[in] = string((char *)&tmp, sizeof(tmp)); + replacements[out] = string((char *)&tmp, sizeof(tmp)); + + kq.erase(kq.begin()+i); --i; + } + } + + // NOTE: if activations/accumulation are done in the wrong order, this will be wrong + + // fuse activations into convs and fc_Wtx + // saves ~1.5 ms + // NOTE: this changes the outputs because of rounding, should be better now! + if (i != 0 && kq[i]->name == "activate_image") { + if (kq[i-1]->name == "convolution_horizontal_reduced_reads_1x1" || + kq[i-1]->name == "convolution_horizontal_reduced_reads_5_outputs" || + kq[i-1]->name == "convolution_horizontal_reduced_reads" || + kq[i-1]->name == "convolution_horizontal_reduced_reads_depthwise" || + kq[i-1]->name == "convolution_horizontal_reduced_reads_depthwise_stride_1" || + kq[i-1]->name == "fc_Wtx") { + string lastout = kq[i-1]->args[kq[i-1]->get_arg_num("output")]; + string in = kq[i]->args[kq[i]->get_arg_num("input")]; + string out = kq[i]->args[kq[i]->get_arg_num("output")]; + + if (lastout == in) { + short neuron = *(int*)kq[i]->args[kq[i]->get_arg_num("neuron")].data(); + assert(neuron <= 5); + + // ELU isn't supported in fc_Wtx + assert(!(kq[i-1]->name == "fc_Wtx" && neuron == 5)); + + kq[i-1]->args[kq[i-1]->get_arg_num("neuron")] = string((char *)&neuron, sizeof(neuron)); + + cl_mem tmp = make_image_like(context, *(cl_mem *)lastout.data()); + replacements[in] = string((char *)&tmp, sizeof(tmp)); + replacements[out] = string((char *)&tmp, sizeof(tmp)); + + kq.erase(kq.begin()+i); --i; + } + } + } + + // fuse accumulation into convs and fc_Wtx + if (i != 0 && kq[i]->name == "elementwise_sum") { + if (kq[i-1]->name == "convolution_horizontal_reduced_reads_1x1" || + kq[i-1]->name == "fc_Wtx") { + string lastout = kq[i-1]->args[kq[i-1]->get_arg_num("output")]; + string a = kq[i]->args[kq[i]->get_arg_num("a")]; + string b = kq[i]->args[kq[i]->get_arg_num("b")]; + string out = kq[i]->args[kq[i]->get_arg_num("output")]; + + if (lastout == a) { + kq[i-1]->args[kq[i-1]->get_arg_num("accumulator")] = b; + } else if (lastout == b) { + kq[i-1]->args[kq[i-1]->get_arg_num("accumulator")] = a; + } else { + continue; + } + + cl_mem tmp = make_image_like(context, *(cl_mem *)lastout.data()); + replacements[lastout] = string((char *)&tmp, sizeof(tmp)); + replacements[out] = string((char *)&tmp, sizeof(tmp)); + + short doAccumulate = 1; + kq[i-1]->args[kq[i-1]->get_arg_num("doAccumulate")] = string((char *)&doAccumulate, sizeof(doAccumulate)); + + kq.erase(kq.begin()+i); --i; + } + } + } + + // remap inputs and outputs, and clear the kernels + for (int i = 0; i < kq.size(); i++) { + kq[i]->kernel = NULL; + for (int j = 0; j < kq[i]->num_args; j++) { + if (replacements.find(kq[i]->args[j]) != replacements.end()) { + kq[i]->args[j] = replacements[kq[i]->args[j]]; + } + } + } + + printf("optimize %lu -> %lu\n", start_size, kq.size()); + } while (kq.size() != start_size); + + size_t work_group_size = 0; + clGetDeviceInfo(device_id, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(work_group_size), &work_group_size, NULL); + printf("max work group size %lu\n", work_group_size); + + // local work group optimizer + for (auto &k : kq) { + // only do it for convs, since others might share memory + if (k->name.rfind("convolution_", 0) == 0) { + int best = -1; + if (k->local_work_size[0] * k->local_work_size[1] * k->local_work_size[2] < work_group_size/2) { + uint64_t base_time = k->benchmark(); + uint64_t best_time = base_time; + for (int i = 0; i < 3; i++) { + k->local_work_size[i] *= 2; + uint64_t this_time = k->benchmark(); + if (this_time < best_time) { + best = i; + best_time = this_time; + } + k->local_work_size[i] /= 2; + } + if (best != -1) { + k->local_work_size[best] *= 2; + //printf("%s %.2f ms doubled %d to %.2f ms\n", k->name.c_str(), base_time/1e6, best, best_time/1e6); + } + } + + } + } + + return 0; +} + diff --git a/selfdrive/modeld/thneed/thneed.cc b/selfdrive/modeld/thneed/thneed.cc index a934094ecc..31bfa3e2f3 100644 --- a/selfdrive/modeld/thneed/thneed.cc +++ b/selfdrive/modeld/thneed/thneed.cc @@ -12,7 +12,7 @@ #include "selfdrive/common/clutil.h" #include "selfdrive/common/timing.h" //#define RUN_DISASSEMBLER -//#define RUN_OPTIMIZER +#define RUN_OPTIMIZER Thneed *g_thneed = NULL; int g_fd = -1; @@ -528,6 +528,23 @@ cl_int CLQueuedKernel::exec() { kernel, work_dim, NULL, global_work_size, local_work_size, 0, NULL, NULL); } +uint64_t CLQueuedKernel::benchmark() { + uint64_t ret = 0; + int old_record = thneed->record; + thneed->record = 0; + clFinish(thneed->command_queue); + // TODO: benchmarking at a lower level will make this more accurate + for (int i = 0; i < 10; i++) { + uint64_t sb = nanos_since_boot(); + exec(); + clFinish(thneed->command_queue); + uint64_t et = nanos_since_boot() - sb; + if (ret == 0 || et < ret) ret = et; + } + thneed->record = old_record; + return ret; +} + void CLQueuedKernel::debug_print(bool verbose) { printf("%p %56s -- ", kernel, name.c_str()); for (int i = 0; i < work_dim; i++) { diff --git a/selfdrive/modeld/thneed/thneed.h b/selfdrive/modeld/thneed/thneed.h index 77d27e0435..1197e4c5ed 100644 --- a/selfdrive/modeld/thneed/thneed.h +++ b/selfdrive/modeld/thneed/thneed.h @@ -44,6 +44,7 @@ class CLQueuedKernel { const size_t *_global_work_size, const size_t *_local_work_size); cl_int exec(); + uint64_t benchmark(); void debug_print(bool verbose); int get_arg_num(const char *search_arg_name); cl_program program; diff --git a/selfdrive/test/longitudinal_maneuvers/plant.py b/selfdrive/test/longitudinal_maneuvers/plant.py index beb544b1b1..13025a9f03 100755 --- a/selfdrive/test/longitudinal_maneuvers/plant.py +++ b/selfdrive/test/longitudinal_maneuvers/plant.py @@ -86,11 +86,10 @@ class Plant(): radar.radarState.leadOne = lead radar.radarState.leadTwo = lead - control.controlsState.longControlState = LongCtrlState.pid control.controlsState.vCruise = float(v_cruise * 3.6) car_state.carState.vEgo = float(self.speed) - + car_state.carState.standstill = self.speed < 0.01 # ******** get controlsState messages for plotting *** sm = {'radarState': radar.radarState, diff --git a/selfdrive/test/openpilotci.py b/selfdrive/test/openpilotci.py index 5bf43fb10a..6483b6717f 100755 --- a/selfdrive/test/openpilotci.py +++ b/selfdrive/test/openpilotci.py @@ -8,7 +8,7 @@ BASE_URL = "https://commadataci.blob.core.windows.net/openpilotci/" TOKEN_PATH = "/data/azure_token" def get_url(route_name, segment_num, log_type="rlog"): - ext = "hevc" if log_type in ["fcamera", "dcamera"] else "bz2" + ext = "hevc" if log_type.endswith('camera') else "bz2" return BASE_URL + f"{route_name.replace('|', '/')}/{segment_num}/{log_type}.{ext}" def upload_file(path, name): diff --git a/selfdrive/test/process_replay/model_replay.py b/selfdrive/test/process_replay/model_replay.py index 6eb37dfa29..0028371e81 100755 --- a/selfdrive/test/process_replay/model_replay.py +++ b/selfdrive/test/process_replay/model_replay.py @@ -3,8 +3,8 @@ import os import sys import time from collections import defaultdict -from tqdm import tqdm from typing import Any +from itertools import zip_longest import cereal.messaging as messaging from cereal.visionipc.visionipc_pyx import VisionIpcServer, VisionStreamType # pylint: disable=no-name-in-module, import-error @@ -21,17 +21,21 @@ from selfdrive.version import get_commit from tools.lib.framereader import FrameReader from tools.lib.logreader import LogReader +TICI_TEST_ROUTE = "4cf7a6ad03080c90|2021-09-29--13-46-36" +EON_TEST_ROUTE = "303055c0002aefd1|2021-11-22--18-36-32" +SEGMENT = 0 if TICI: - TEST_ROUTE = "4cf7a6ad03080c90|2021-09-29--13-46-36" + TEST_ROUTE = TICI_TEST_ROUTE else: - TEST_ROUTE = "303055c0002aefd1|2021-11-22--18-36-32" -SEGMENT = 0 + TEST_ROUTE = EON_TEST_ROUTE SEND_EXTRA_INPUTS = bool(os.getenv("SEND_EXTRA_INPUTS", "0")) +VIPC_STREAM = {"roadCameraState": VisionStreamType.VISION_STREAM_ROAD, "driverCameraState": VisionStreamType.VISION_STREAM_DRIVER, + "wideRoadCameraState": VisionStreamType.VISION_STREAM_WIDE_ROAD} -def get_log_fn(ref_commit): - return f"{TEST_ROUTE}_{'model_tici' if TICI else 'model'}_{ref_commit}.bz2" +def get_log_fn(ref_commit, test_route, tici=True): + return f"{test_route}_{'model_tici' if tici else 'model'}_{ref_commit}.bz2" def replace_calib(msg, calib): @@ -48,19 +52,21 @@ def model_replay(lr, frs): vipc_server = VisionIpcServer("camerad") vipc_server.create_buffers(VisionStreamType.VISION_STREAM_ROAD, 40, False, *(tici_f_frame_size if TICI else eon_f_frame_size)) vipc_server.create_buffers(VisionStreamType.VISION_STREAM_DRIVER, 40, False, *(tici_d_frame_size if TICI else eon_d_frame_size)) + vipc_server.create_buffers(VisionStreamType.VISION_STREAM_WIDE_ROAD, 40, False, *(tici_f_frame_size)) vipc_server.start_listener() sm = messaging.SubMaster(['modelV2', 'driverState']) - pm = messaging.PubMaster(['roadCameraState', 'driverCameraState', 'liveCalibration', 'lateralPlan']) + pm = messaging.PubMaster(['roadCameraState', 'wideRoadCameraState', 'driverCameraState', 'liveCalibration', 'lateralPlan']) try: managed_processes['modeld'].start() managed_processes['dmonitoringmodeld'].start() - time.sleep(2) + time.sleep(5) sm.update(1000) log_msgs = [] last_desire = None + recv_cnt = defaultdict(lambda: 0) frame_idxs = defaultdict(lambda: 0) # init modeld with valid calibration @@ -69,38 +75,59 @@ def model_replay(lr, frs): pm.send(cal_msgs[0].which(), cal_msgs[0].as_builder()) time.sleep(0.1) - for msg in tqdm(lr): - if SEND_EXTRA_INPUTS: - if msg.which() == "liveCalibration": - last_calib = list(msg.liveCalibration.rpyCalib) - pm.send(msg.which(), replace_calib(msg, last_calib)) - elif msg.which() == "lateralPlan": - last_desire = msg.lateralPlan.desire - dat = messaging.new_message('lateralPlan') - dat.lateralPlan.desire = last_desire - pm.send('lateralPlan', dat) - - if msg.which() in ["roadCameraState", "driverCameraState"]: - camera_state = getattr(msg, msg.which()) - stream = VisionStreamType.VISION_STREAM_ROAD if msg.which() == "roadCameraState" else VisionStreamType.VISION_STREAM_DRIVER - img = frs[msg.which()].get(frame_idxs[msg.which()], pix_fmt="yuv420p")[0] - - # send camera state and frame - pm.send(msg.which(), msg.as_builder()) - vipc_server.send(stream, img.flatten().tobytes(), camera_state.frameId, - camera_state.timestampSof, camera_state.timestampEof) - - # wait for a response - with Timeout(seconds=15): - packet_from_camera = {"roadCameraState": "modelV2", "driverCameraState": "driverState"} - log_msgs.append(messaging.recv_one(sm.sock[packet_from_camera[msg.which()]])) - - frame_idxs[msg.which()] += 1 - if frame_idxs[msg.which()] >= frs[msg.which()].frame_count: - break - - spinner.update("replaying models: road %d/%d, driver %d/%d" % (frame_idxs['roadCameraState'], - frs['roadCameraState'].frame_count, frame_idxs['driverCameraState'], frs['driverCameraState'].frame_count)) + msgs = defaultdict(list) + for msg in lr: + msgs[msg.which()].append(msg) + + for cam_msgs in zip_longest(msgs['roadCameraState'], msgs['wideRoadCameraState'], msgs['driverCameraState']): + # need a pair of road/wide msgs + if TICI and None in (cam_msgs[0], cam_msgs[1]): + break + + for msg in cam_msgs: + if msg is None: + continue + + if SEND_EXTRA_INPUTS: + if msg.which() == "liveCalibration": + last_calib = list(msg.liveCalibration.rpyCalib) + pm.send(msg.which(), replace_calib(msg, last_calib)) + elif msg.which() == "lateralPlan": + last_desire = msg.lateralPlan.desire + dat = messaging.new_message('lateralPlan') + dat.lateralPlan.desire = last_desire + pm.send('lateralPlan', dat) + + if msg.which() in VIPC_STREAM: + msg = msg.as_builder() + camera_state = getattr(msg, msg.which()) + img = frs[msg.which()].get(frame_idxs[msg.which()], pix_fmt="yuv420p")[0] + frame_idxs[msg.which()] += 1 + + # send camera state and frame + camera_state.frameId = frame_idxs[msg.which()] + pm.send(msg.which(), msg) + vipc_server.send(VIPC_STREAM[msg.which()], img.flatten().tobytes(), camera_state.frameId, + camera_state.timestampSof, camera_state.timestampEof) + + recv = None + if msg.which() in ('roadCameraState', 'wideRoadCameraState'): + if not TICI or min(frame_idxs['roadCameraState'], frame_idxs['wideRoadCameraState']) > recv_cnt['modelV2']: + recv = "modelV2" + elif msg.which() == 'driverCameraState': + recv = "driverState" + + # wait for a response + with Timeout(15, f"timed out waiting for {recv}"): + if recv: + recv_cnt[recv] += 1 + log_msgs.append(messaging.recv_one(sm.sock[recv])) + + spinner.update("replaying models: road %d/%d, driver %d/%d" % (frame_idxs['roadCameraState'], + frs['roadCameraState'].frame_count, frame_idxs['driverCameraState'], frs['driverCameraState'].frame_count)) + + if any(frame_idxs[c] >= frs[c].frame_count for c in frame_idxs.keys()): + break finally: spinner.close() @@ -123,6 +150,8 @@ if __name__ == "__main__": 'roadCameraState': FrameReader(get_url(TEST_ROUTE, SEGMENT, log_type="fcamera")), 'driverCameraState': FrameReader(get_url(TEST_ROUTE, SEGMENT, log_type="dcamera")), } + if TICI: + frs['wideRoadCameraState'] = FrameReader(get_url(TEST_ROUTE, SEGMENT, log_type="ecamera")) # run replay log_msgs = model_replay(lr, frs) @@ -132,26 +161,30 @@ if __name__ == "__main__": if not update: with open(ref_commit_fn) as f: ref_commit = f.read().strip() - log_fn = get_log_fn(ref_commit) - cmp_log = LogReader(BASE_URL + log_fn) - - ignore = [ - 'logMonoTime', - 'modelV2.frameDropPerc', - 'modelV2.modelExecutionTime', - 'driverState.modelExecutionTime', - 'driverState.dspExecutionTime' - ] - tolerance = None if not PC else 1e-3 - results: Any = {TEST_ROUTE: {}} - results[TEST_ROUTE]["models"] = compare_logs(cmp_log, log_msgs, tolerance=tolerance, ignore_fields=ignore) - diff1, diff2, failed = format_diff(results, ref_commit) - - print(diff2) - print('-------------\n'*5) - print(diff1) - with open("model_diff.txt", "w") as f: - f.write(diff2) + log_fn = get_log_fn(ref_commit, TEST_ROUTE, tici=TICI) + try: + cmp_log = LogReader(BASE_URL + log_fn) + + ignore = [ + 'logMonoTime', + 'modelV2.frameDropPerc', + 'modelV2.modelExecutionTime', + 'driverState.modelExecutionTime', + 'driverState.dspExecutionTime' + ] + tolerance = None if not PC else 1e-3 + results: Any = {TEST_ROUTE: {}} + results[TEST_ROUTE]["models"] = compare_logs(cmp_log, log_msgs, tolerance=tolerance, ignore_fields=ignore) + diff1, diff2, failed = format_diff(results, ref_commit) + + print(diff2) + print('-------------\n'*5) + print(diff1) + with open("model_diff.txt", "w") as f: + f.write(diff2) + except Exception as e: + print(str(e)) + failed = True # upload new refs if update or failed: @@ -160,7 +193,7 @@ if __name__ == "__main__": print("Uploading new refs") new_commit = get_commit() - log_fn = get_log_fn(new_commit) + log_fn = get_log_fn(new_commit, TEST_ROUTE, tici=TICI) save_log(log_fn, log_msgs) try: upload_file(log_fn, os.path.basename(log_fn)) diff --git a/selfdrive/test/process_replay/model_replay_ref_commit b/selfdrive/test/process_replay/model_replay_ref_commit index c343275b6b..d210251c09 100644 --- a/selfdrive/test/process_replay/model_replay_ref_commit +++ b/selfdrive/test/process_replay/model_replay_ref_commit @@ -1 +1 @@ -7d3ad941bc4ba4c923af7a1d7b48544bfc0d3e13 +19720e79b1c5136a882efd689651d9044e2e2007 diff --git a/selfdrive/test/process_replay/process_replay.py b/selfdrive/test/process_replay/process_replay.py index 83596420c1..d62484c43c 100755 --- a/selfdrive/test/process_replay/process_replay.py +++ b/selfdrive/test/process_replay/process_replay.py @@ -389,7 +389,7 @@ def python_replay_process(cfg, lr, fingerprint=None): for msg in lr: if msg.which() == 'carParams': car_fingerprint = migration.get(msg.carParams.carFingerprint, msg.carParams.carFingerprint) - if len(msg.carParams.carFw) and (car_fingerprint in FW_VERSIONS): + if msg.carParams.fingerprintSource == "fw" and (car_fingerprint in FW_VERSIONS): Params().put("CarParamsCache", msg.carParams.as_builder().to_bytes()) else: os.environ['SKIP_FW_QUERY'] = "1" diff --git a/selfdrive/test/process_replay/ref_commit b/selfdrive/test/process_replay/ref_commit index 50a0c30f83..9982949b20 100644 --- a/selfdrive/test/process_replay/ref_commit +++ b/selfdrive/test/process_replay/ref_commit @@ -1 +1 @@ -0c4da879ace9c1517c2324b35da7ff05a4744dd9 \ No newline at end of file +67c8f283858998b75ac28879e1350a589a968e5d \ No newline at end of file diff --git a/selfdrive/test/test_models.py b/selfdrive/test/test_models.py index 6b9595b6c6..3be0f96b37 100755 --- a/selfdrive/test/test_models.py +++ b/selfdrive/test/test_models.py @@ -14,7 +14,7 @@ from selfdrive.boardd.boardd import can_capnp_to_can_list, can_list_to_can_capnp from selfdrive.car.fingerprints import all_known_cars from selfdrive.car.car_helpers import interfaces from selfdrive.car.gm.values import CAR as GM -from selfdrive.car.honda.values import CAR as HONDA +from selfdrive.car.honda.values import CAR as HONDA, HONDA_BOSCH from selfdrive.car.hyundai.values import CAR as HYUNDAI from selfdrive.car.toyota.values import CAR as TOYOTA from selfdrive.test.test_routes import routes, non_tested_cars @@ -232,6 +232,10 @@ class TestCarModel(unittest.TestCase): if self.CP.carFingerprint == TOYOTA.SIENNA and checks['brakePressed'] < 25: checks['brakePressed'] = 0 + # Honda Nidec uses button enable in panda, but pcm enable in openpilot + if self.CP.carName == "honda" and self.CP.carFingerprint not in HONDA_BOSCH and checks['controlsAllowed'] < 25: + checks['controlsAllowed'] = 0 + failed_checks = {k: v for k, v in checks.items() if v > 0} self.assertFalse(len(failed_checks), f"panda safety doesn't agree with openpilot: {failed_checks}") diff --git a/selfdrive/test/test_onroad.py b/selfdrive/test/test_onroad.py index d0a1ad956f..a49f93e37d 100755 --- a/selfdrive/test/test_onroad.py +++ b/selfdrive/test/test_onroad.py @@ -27,8 +27,8 @@ PROCS = { "./locationd": 9.1, "selfdrive.controls.plannerd": 22.6, "./_ui": 20.0, - "selfdrive.locationd.paramsd": 9.1, - "./camerad": 7.07, + "selfdrive.locationd.paramsd": 14.0, + "./camerad": 9.16, "./_sensord": 6.17, "selfdrive.controls.radard": 7.0, "./_modeld": 4.48, @@ -56,7 +56,7 @@ if TICI: PROCS.update({ "./loggerd": 70.0, "selfdrive.controls.controlsd": 31.0, - "./camerad": 31.0, + "./camerad": 36.8, "./_ui": 33.0, "selfdrive.controls.plannerd": 11.7, "./_dmonitoringmodeld": 10.0, @@ -227,7 +227,12 @@ class TestOnroad(unittest.TestCase): result += "----------------- Model Timing -----------------\n" result += "------------------------------------------------\n" # TODO: this went up when plannerd cpu usage increased, why? - cfgs = [("modelV2", 0.038, 0.036), ("driverState", 0.028, 0.026)] + cfgs = [("driverState", 0.028, 0.026)] + if EON: + cfgs += [("modelV2", 0.045, 0.04)] + else: + cfgs += [("modelV2", 0.038, 0.036), ("driverState", 0.028, 0.026)] + for (s, instant_max, avg_max) in cfgs: ts = [getattr(getattr(m, s), "modelExecutionTime") for m in self.lr if m.which() == s] self.assertLess(min(ts), instant_max, f"high '{s}' execution time: {min(ts)}") diff --git a/selfdrive/ui/qt/maps/map_helpers.cc b/selfdrive/ui/qt/maps/map_helpers.cc index f87e80403f..f0545924c0 100644 --- a/selfdrive/ui/qt/maps/map_helpers.cc +++ b/selfdrive/ui/qt/maps/map_helpers.cc @@ -160,6 +160,11 @@ static float dot(QGeoCoordinate v, QGeoCoordinate w) { } float minimum_distance(QGeoCoordinate a, QGeoCoordinate b, QGeoCoordinate p) { + // If a and b are the same coordinate the computation below doesn't work + if (a.distanceTo(b) < 0.01) { + return a.distanceTo(p); + } + const QGeoCoordinate ap = sub(p, a); const QGeoCoordinate ab = sub(b, a); const float t = std::clamp(dot(ap, ab) / dot(ab, ab), 0.0f, 1.0f); diff --git a/selfdrive/ui/qt/maps/map_settings.cc b/selfdrive/ui/qt/maps/map_settings.cc index e130a9a1ec..1e3a99e31a 100644 --- a/selfdrive/ui/qt/maps/map_settings.cc +++ b/selfdrive/ui/qt/maps/map_settings.cc @@ -115,11 +115,12 @@ MapPanel::MapPanel(QWidget* parent) : QWidget(parent) { stack->addWidget(main_widget); stack->addWidget(no_prime_widget); - stack->setCurrentIndex(1); + stack->setCurrentIndex(uiState()->prime_type ? 0 : 1); QVBoxLayout *wrapper = new QVBoxLayout(this); wrapper->addWidget(stack); + clear(); if (auto dongle_id = getDongleId()) { @@ -183,8 +184,9 @@ void MapPanel::updateCurrentRoute() { } void MapPanel::parseResponse(const QString &response, bool success) { + stack->setCurrentIndex((uiState()->prime_type || success) ? 0 : 1); + if (!success) { - stack->setCurrentIndex(1); return; } @@ -283,7 +285,6 @@ void MapPanel::parseResponse(const QString &response, bool success) { } recent_layout->addStretch(); - stack->setCurrentIndex(0); repaint(); } diff --git a/selfdrive/ui/qt/offroad/settings.cc b/selfdrive/ui/qt/offroad/settings.cc index 4058bd405f..00e371178e 100644 --- a/selfdrive/ui/qt/offroad/settings.cc +++ b/selfdrive/ui/qt/offroad/settings.cc @@ -202,10 +202,10 @@ void DevicePanel::updateCalibDescription() { } void DevicePanel::reboot() { - if (uiState()->status == UIStatus::STATUS_DISENGAGED) { + if (!uiState()->engaged()) { if (ConfirmationDialog::confirm("Are you sure you want to reboot?", this)) { // Check engaged again in case it changed while the dialog was open - if (uiState()->status == UIStatus::STATUS_DISENGAGED) { + if (!uiState()->engaged()) { Params().putBool("DoReboot", true); } } @@ -215,10 +215,10 @@ void DevicePanel::reboot() { } void DevicePanel::poweroff() { - if (uiState()->status == UIStatus::STATUS_DISENGAGED) { + if (!uiState()->engaged()) { if (ConfirmationDialog::confirm("Are you sure you want to power off?", this)) { // Check engaged again in case it changed while the dialog was open - if (uiState()->status == UIStatus::STATUS_DISENGAGED) { + if (!uiState()->engaged()) { Params().putBool("DoShutdown", true); } } diff --git a/selfdrive/ui/qt/onroad.cc b/selfdrive/ui/qt/onroad.cc index f7e2dc9bf8..44dde85817 100644 --- a/selfdrive/ui/qt/onroad.cc +++ b/selfdrive/ui/qt/onroad.cc @@ -80,11 +80,15 @@ void OnroadWindow::offroadTransition(bool offroad) { if (!offroad) { if (map == nullptr && (uiState()->prime_type || !MAPBOX_TOKEN.isEmpty())) { MapWindow * m = new MapWindow(get_mapbox_settings()); - m->setFixedWidth(topWidget(this)->width() / 2); - m->offroadTransition(offroad); + map = m; + QObject::connect(uiState(), &UIState::offroadTransition, m, &MapWindow::offroadTransition); + + m->setFixedWidth(topWidget(this)->width() / 2); split->addWidget(m, 0, Qt::AlignRight); - map = m; + + // Make map visible after adding to split + m->offroadTransition(offroad); } } #endif diff --git a/selfdrive/ui/replay/framereader.cc b/selfdrive/ui/replay/framereader.cc index ef90c28355..3186b4ff3a 100644 --- a/selfdrive/ui/replay/framereader.cc +++ b/selfdrive/ui/replay/framereader.cc @@ -6,6 +6,14 @@ #include "cereal/visionipc/visionbuf.h" +#ifdef __APPLE__ +#define HW_DEVICE_TYPE AV_HWDEVICE_TYPE_VIDEOTOOLBOX +#define HW_PIX_FMT AV_PIX_FMT_VIDEOTOOLBOX +#else +#define HW_DEVICE_TYPE AV_HWDEVICE_TYPE_CUDA +#define HW_PIX_FMT AV_PIX_FMT_CUDA +#endif + namespace { struct buffer_data { @@ -30,7 +38,7 @@ enum AVPixelFormat get_hw_format(AVCodecContext *ctx, const enum AVPixelFormat * for (const enum AVPixelFormat *p = pix_fmts; *p != -1; p++) { if (*p == *hw_pix_fmt) return *p; } - rWarning("Please run replay with the --no-cuda flag!"); + rWarning("Please run replay with the --no-hw-decoder flag!"); // fallback to YUV420p *hw_pix_fmt = AV_PIX_FMT_NONE; return AV_PIX_FMT_YUV420P; @@ -57,15 +65,15 @@ FrameReader::~FrameReader() { } } -bool FrameReader::load(const std::string &url, bool no_cuda, std::atomic *abort, bool local_cache, int chunk_size, int retries) { +bool FrameReader::load(const std::string &url, bool no_hw_decoder, std::atomic *abort, bool local_cache, int chunk_size, int retries) { FileReader f(local_cache, chunk_size, retries); std::string data = f.read(url, abort); if (data.empty()) return false; - return load((std::byte *)data.data(), data.size(), no_cuda, abort); + return load((std::byte *)data.data(), data.size(), no_hw_decoder, abort); } -bool FrameReader::load(const std::byte *data, size_t size, bool no_cuda, std::atomic *abort) { +bool FrameReader::load(const std::byte *data, size_t size, bool no_hw_decoder, std::atomic *abort) { input_ctx = avformat_alloc_context(); if (!input_ctx) return false; @@ -95,7 +103,7 @@ bool FrameReader::load(const std::byte *data, size_t size, bool no_cuda, std::at } AVStream *video = input_ctx->streams[0]; - AVCodec *decoder = avcodec_find_decoder(video->codec->codec_id); + const AVCodec *decoder = avcodec_find_decoder(video->codecpar->codec_id); if (!decoder) return false; decoder_ctx = avcodec_alloc_context3(decoder); @@ -106,9 +114,9 @@ bool FrameReader::load(const std::byte *data, size_t size, bool no_cuda, std::at height = decoder_ctx->height; visionbuf_compute_aligned_width_and_height(width, height, &aligned_width, &aligned_height); - if (has_cuda_device && !no_cuda) { - if (!initHardwareDecoder(AV_HWDEVICE_TYPE_CUDA)) { - rWarning("No CUDA capable device was found. fallback to CPU decoding."); + if (has_hw_decoder && !no_hw_decoder) { + if (!initHardwareDecoder(HW_DEVICE_TYPE)) { + rWarning("No device with hardware decoder found. fallback to CPU decoding."); } else { nv12toyuv_buffer.resize(getYUVSize()); } @@ -151,7 +159,7 @@ bool FrameReader::initHardwareDecoder(AVHWDeviceType hw_device_type) { int ret = av_hwdevice_ctx_create(&hw_device_ctx, hw_device_type, nullptr, nullptr, 0); if (ret < 0) { hw_pix_fmt = AV_PIX_FMT_NONE; - has_cuda_device = false; + has_hw_decoder = false; rWarning("Failed to create specified HW device %d.", ret); return false; } @@ -219,7 +227,7 @@ AVFrame *FrameReader::decodeFrame(AVPacket *pkt) { } bool FrameReader::copyBuffers(AVFrame *f, uint8_t *rgb, uint8_t *yuv) { - if (hw_pix_fmt == AV_PIX_FMT_CUDA) { + if (hw_pix_fmt == HW_PIX_FMT) { uint8_t *y = yuv ? yuv : nv12toyuv_buffer.data(); uint8_t *u = y + width * height; uint8_t *v = u + (width / 2) * (height / 2); diff --git a/selfdrive/ui/replay/framereader.h b/selfdrive/ui/replay/framereader.h index 8de5c1f93e..b58aca5f74 100644 --- a/selfdrive/ui/replay/framereader.h +++ b/selfdrive/ui/replay/framereader.h @@ -19,8 +19,8 @@ class FrameReader { public: FrameReader(); ~FrameReader(); - bool load(const std::string &url, bool no_cuda = false, std::atomic *abort = nullptr, bool local_cache = false, int chunk_size = -1, int retries = 0); - bool load(const std::byte *data, size_t size, bool no_cuda = false, std::atomic *abort = nullptr); + bool load(const std::string &url, bool no_hw_decoder = false, std::atomic *abort = nullptr, bool local_cache = false, int chunk_size = -1, int retries = 0); + bool load(const std::byte *data, size_t size, bool no_hw_decoder = false, std::atomic *abort = nullptr); bool get(int idx, uint8_t *rgb, uint8_t *yuv); int getRGBSize() const { return aligned_width * aligned_height * 3; } int getYUVSize() const { return width * height * 3 / 2; } @@ -48,5 +48,5 @@ private: AVBufferRef *hw_device_ctx = nullptr; std::vector nv12toyuv_buffer; int prev_idx = -1; - inline static std::atomic has_cuda_device = true; + inline static std::atomic has_hw_decoder = true; }; diff --git a/selfdrive/ui/replay/main.cc b/selfdrive/ui/replay/main.cc index a7d2f54042..5027423ebc 100644 --- a/selfdrive/ui/replay/main.cc +++ b/selfdrive/ui/replay/main.cc @@ -7,7 +7,7 @@ const QString DEMO_ROUTE = "4cf7a6ad03080c90|2021-09-29--13-46-36"; int main(int argc, char *argv[]) { - QApplication app(argc, argv); + QCoreApplication app(argc, argv); const std::tuple flags[] = { {"dcam", REPLAY_FLAG_DCAM, "load driver camera"}, @@ -16,7 +16,7 @@ int main(int argc, char *argv[]) { {"no-cache", REPLAY_FLAG_NO_FILE_CACHE, "turn off local cache"}, {"qcam", REPLAY_FLAG_QCAMERA, "load qcamera"}, {"yuv", REPLAY_FLAG_SEND_YUV, "send yuv frame"}, - {"no-cuda", REPLAY_FLAG_NO_CUDA, "disable CUDA"}, + {"no-hw-decoder", REPLAY_FLAG_NO_HW_DECODER, "disable HW video decoding"}, {"no-vipc", REPLAY_FLAG_NO_VIPC, "do not output video"}, }; diff --git a/selfdrive/ui/replay/replay.h b/selfdrive/ui/replay/replay.h index d20d5bb920..4b33c56267 100644 --- a/selfdrive/ui/replay/replay.h +++ b/selfdrive/ui/replay/replay.h @@ -18,7 +18,7 @@ enum REPLAY_FLAGS { REPLAY_FLAG_NO_FILE_CACHE = 0x0020, REPLAY_FLAG_QCAMERA = 0x0040, REPLAY_FLAG_SEND_YUV = 0x0080, - REPLAY_FLAG_NO_CUDA = 0x0100, + REPLAY_FLAG_NO_HW_DECODER = 0x0100, REPLAY_FLAG_FULL_SPEED = 0x0200, REPLAY_FLAG_NO_VIPC = 0x0400, }; diff --git a/selfdrive/ui/replay/route.cc b/selfdrive/ui/replay/route.cc index e6a6177149..509be15383 100644 --- a/selfdrive/ui/replay/route.cc +++ b/selfdrive/ui/replay/route.cc @@ -117,7 +117,7 @@ void Segment::loadFile(int id, const std::string file) { bool success = false; if (id < MAX_CAMERAS) { frames[id] = std::make_unique(); - success = frames[id]->load(file, flags & REPLAY_FLAG_NO_CUDA, &abort_, local_cache, 20 * 1024 * 1024, 3); + success = frames[id]->load(file, flags & REPLAY_FLAG_NO_HW_DECODER, &abort_, local_cache, 20 * 1024 * 1024, 3); } else { log = std::make_unique(); success = log->load(file, &abort_, local_cache, 0, 3); diff --git a/selfdrive/ui/ui.cc b/selfdrive/ui/ui.cc index 007dc36d07..bbdd6d4f30 100644 --- a/selfdrive/ui/ui.cc +++ b/selfdrive/ui/ui.cc @@ -208,7 +208,7 @@ void UIState::updateStatus() { } // Handle onroad/offroad transition - if (scene.started != started_prev) { + if (scene.started != started_prev || sm->frame == 1) { if (scene.started) { status = STATUS_DISENGAGED; scene.started_frame = sm->frame; @@ -217,8 +217,6 @@ void UIState::updateStatus() { } started_prev = scene.started; emit offroadTransition(!scene.started); - } else if (sm->frame == 1) { - emit offroadTransition(!scene.started); } } diff --git a/selfdrive/ui/ui.h b/selfdrive/ui/ui.h index cd7ea446ec..382a7df5ed 100644 --- a/selfdrive/ui/ui.h +++ b/selfdrive/ui/ui.h @@ -118,6 +118,9 @@ public: inline bool worldObjectsVisible() const { return sm->rcv_frame("liveCalibration") > scene.started_frame; }; + inline bool engaged() const { + return scene.started && (*sm)["controlsState"].getControlsState().getEnabled(); + }; int fb_w = 0, fb_h = 0; @@ -141,7 +144,7 @@ private slots: private: QTimer *timer; - bool started_prev = true; + bool started_prev = false; }; UIState *uiState(); diff --git a/selfdrive/ui/watch3.cc b/selfdrive/ui/watch3.cc index 73c8e11318..aeabfe110e 100644 --- a/selfdrive/ui/watch3.cc +++ b/selfdrive/ui/watch3.cc @@ -6,9 +6,7 @@ #include "selfdrive/ui/qt/widgets/cameraview.h" int main(int argc, char *argv[]) { - QSurfaceFormat fmt; - fmt.setRenderableType(QSurfaceFormat::OpenGLES); - QSurfaceFormat::setDefaultFormat(fmt); + setQtSurfaceFormat(); QApplication a(argc, argv); QWidget w; diff --git a/selfdrive/version.py b/selfdrive/version.py index f2570cd305..9b18edc432 100644 --- a/selfdrive/version.py +++ b/selfdrive/version.py @@ -55,11 +55,15 @@ def get_origin(default: Optional[str] = None) -> Optional[str]: @cache def get_normalized_origin(default: Optional[str] = None) -> Optional[str]: - return get_origin()\ - .replace("git@", "", 1)\ - .replace(".git", "", 1)\ - .replace("https://", "", 1)\ - .replace(":", "/", 1) + origin = get_origin() + + if origin is None: + return default + + return origin.replace("git@", "", 1) \ + .replace(".git", "", 1) \ + .replace("https://", "", 1) \ + .replace(":", "/", 1) @cache diff --git a/tools/mac_setup.sh b/tools/mac_setup.sh index fa1a23dd47..9140cc4335 100755 --- a/tools/mac_setup.sh +++ b/tools/mac_setup.sh @@ -4,12 +4,28 @@ set -e DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )" ROOT="$(cd $DIR/../ && pwd)" +ARCH=$(uname -m) + +if [[ $SHELL == "/bin/zsh" ]]; then + RC_FILE="$HOME/.zshrc" +elif [[ $SHELL == "/bin/bash" ]]; then + RC_FILE="$HOME/.bashrc" +fi # Install brew if required if [[ $(command -v brew) == "" ]]; then echo "Installing Hombrew" /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install.sh)" echo "[ ] installed brew t=$SECONDS" + + # make brew available now + if [[ $ARCH == "x86_64" ]]; then + echo 'eval "$(/usr/local/homebrew/bin/brew shellenv)"' >> $RC_FILE + eval "$(/usr/local/homebrew/bin/brew shellenv)" + else + echo 'eval "$(/opt/homebrew/bin/brew shellenv)"' >> $RC_FILE + eval "$(/opt/homebrew/bin/brew shellenv)" + fi fi # TODO: remove protobuf,protobuf-c,swig when casadi can be pip installed @@ -40,20 +56,18 @@ EOS echo "[ ] finished brew install t=$SECONDS" -if [[ $SHELL == "/bin/zsh" ]]; then - RC_FILE="$HOME/.zshrc" -elif [[ $SHELL == "/bin/bash" ]]; then - RC_FILE="$HOME/.bash_profile" -fi +BREW_PREFIX=$(brew --prefix) + +# archive backend tools for pip dependencies +export LDFLAGS="$LDFLAGS -L${BREW_PREFIX}/opt/zlib/lib" +export LDFLAGS="$LDFLAGS -L${BREW_PREFIX}/opt/bzip2/lib" +export CPPFLAGS="$CPPFLAGS -I${BREW_PREFIX}/opt/zlib/include" +export CPPFLAGS="$CPPFLAGS -I${BREW_PREFIX}/opt/bzip2/include" -export LDFLAGS="$LDFLAGS -L/usr/local/opt/zlib/lib" -export LDFLAGS="$LDFLAGS -L/usr/local/opt/bzip2/lib" -export LDFLAGS="$LDFLAGS -L/usr/local/opt/openssl@1.1/lib" -export CPPFLAGS="$CPPFLAGS -I/usr/local/opt/zlib/include" -export CPPFLAGS="$CPPFLAGS -I/usr/local/opt/bzip2/include" -export CPPFLAGS="$CPPFLAGS -I/usr/local/opt/openssl@1.1/include" -export PATH="$PATH:/usr/local/opt/openssl@1.1/bin" -export PATH="$PATH:/usr/local/bin" +# pycurl curl/openssl backend dependencies +export LDFLAGS="$LDFLAGS -L${BREW_PREFIX}/opt/openssl@3/lib" +export CPPFLAGS="$CPPFLAGS -I${BREW_PREFIX}/opt/openssl@3/include" +export PYCURL_SSL_LIBRARY=openssl # openpilot environment if [ -z "$OPENPILOT_ENV" ] && [ -n "$RC_FILE" ] && [ -z "$CI" ]; then diff --git a/tools/replay/lib/ui_helpers.py b/tools/replay/lib/ui_helpers.py index 7410c107c4..33a1e77e05 100644 --- a/tools/replay/lib/ui_helpers.py +++ b/tools/replay/lib/ui_helpers.py @@ -185,12 +185,12 @@ def plot_model(m, img, calibration, top_down): if calibration is None or top_down is None: return - for lead in m.leads: + for lead in m.leadsV3: if lead.prob < 0.5: continue - x, y, _, _ = lead.xyva - x_std, _, _, _ = lead.xyvaStd + x, y = lead.x[0], lead.y[0] + x_std = lead.xStd[0] x -= RADAR_TO_CAMERA _, py_top = to_topdown_pt(x + x_std, y)