From c10abce1d5b9071694caba65af6d9965e96a1824 Mon Sep 17 00:00:00 2001 From: Adeeb Shihadeh Date: Thu, 27 Feb 2025 18:26:31 -0800 Subject: [PATCH] camerad: make robust to introduced failures (#34723) * camerad: make robust to introduced failures * more test * start here --------- Co-authored-by: Comma Device --- system/camerad/cameras/spectra.cc | 30 ++++++++++++++--------------- system/camerad/cameras/spectra.h | 2 +- system/camerad/test/test_camerad.py | 29 +++++++++++++++++++++++++--- 3 files changed, 42 insertions(+), 19 deletions(-) diff --git a/system/camerad/cameras/spectra.cc b/system/camerad/cameras/spectra.cc index f5757dcded..7b2b385a1f 100644 --- a/system/camerad/cameras/spectra.cc +++ b/system/camerad/cameras/spectra.cc @@ -249,13 +249,6 @@ SpectraCamera::~SpectraCamera() { } int SpectraCamera::clear_req_queue() { - struct cam_req_mgr_flush_info req_mgr_flush_request = {0}; - req_mgr_flush_request.session_hdl = session_handle; - req_mgr_flush_request.link_hdl = link_handle; - req_mgr_flush_request.flush_type = CAM_REQ_MGR_FLUSH_TYPE_ALL; - int ret = do_cam_control(m->video0_fd, CAM_REQ_MGR_FLUSH_REQ, &req_mgr_flush_request, sizeof(req_mgr_flush_request)); - LOGD("flushed all req: %d", ret); - if (icp_dev_handle > 0) { struct cam_flush_dev_cmd cmd = { .session_handle = session_handle, @@ -264,8 +257,16 @@ int SpectraCamera::clear_req_queue() { }; int err = do_cam_control(m->icp_fd, CAM_FLUSH_REQ, &cmd, sizeof(cmd)); assert(err == 0); + LOGD("flushed bps: %d", err); } + struct cam_req_mgr_flush_info req_mgr_flush_request = {0}; + req_mgr_flush_request.session_hdl = session_handle; + req_mgr_flush_request.link_hdl = link_handle; + req_mgr_flush_request.flush_type = CAM_REQ_MGR_FLUSH_TYPE_ALL; + int ret = do_cam_control(m->video0_fd, CAM_REQ_MGR_FLUSH_REQ, &req_mgr_flush_request, sizeof(req_mgr_flush_request)); + LOGD("flushed all req: %d", ret); + for (int i = 0; i < MAX_IFE_BUFS; ++i) { destroySyncObjectAt(i); } @@ -938,16 +939,15 @@ bool SpectraCamera::enqueue_buffer(int i, uint64_t request_id) { } } - // all good, hand off frame if (ret == 0) { + // all good, hand off frame frame_ready = true; - } - - if (ret != 0) { + destroySyncObjectAt(i); + } else { + // need to start over on sync failures, + // otherwise future frames will tear clear_req_queue(); } - - destroySyncObjectAt(i); } // create output fences @@ -1376,14 +1376,14 @@ bool SpectraCamera::handle_camera_event(const cam_req_mgr_message *event_data) { return false; } - if (!enabled) return false; - // ID from the qcom camera request manager uint64_t request_id = event_data->u.frame_msg.request_id; // raw as opposed to our re-indexed frame ID uint64_t frame_id_raw = event_data->u.frame_msg.frame_id; + //LOGD("handle cam %d, request id %lu -> %lu, frame id raw %lu", cc.camera_num, request_id_last, request_id, frame_id_raw); + if (request_id != 0) { // next ready // check for skipped_last frames if (frame_id_raw > frame_id_raw_last + 1 && !skipped_last) { diff --git a/system/camerad/cameras/spectra.h b/system/camerad/cameras/spectra.h index d9abf884c1..197cae0a3e 100644 --- a/system/camerad/cameras/spectra.h +++ b/system/camerad/cameras/spectra.h @@ -208,7 +208,7 @@ private: inline static bool first_frame_synced = false; // a mode for stressing edge cases: realignment, sync failures, etc. - inline bool stress_test(const char* log, float prob=0.01) { + inline bool stress_test(const char* log, float prob=0.03) { static bool enable = getenv("SPECTRA_STRESS_TEST") != nullptr; bool triggered = enable && ((static_cast(rand()) / RAND_MAX) < prob); if (triggered) { diff --git a/system/camerad/test/test_camerad.py b/system/camerad/test/test_camerad.py index e88a7bf4bf..cdacd389df 100644 --- a/system/camerad/test/test_camerad.py +++ b/system/camerad/test/test_camerad.py @@ -64,7 +64,30 @@ class TestCamerad: laggy_frames = {k: v for k, v in diffs.items() if v > 1.1} assert len(laggy_frames) == 0, f"Frames not synced properly: {laggy_frames=}" - @pytest.mark.skip("TODO: enable this") - def test_stress_test(self, logs): + def test_stress_test(self): os.environ['SPECTRA_STRESS_TEST'] = '1' - run_and_log(["camerad", ], CAMERAS, 5) + logs = run_and_log(["camerad", ], CAMERAS, 12) + ts = msgs_to_time_series(logs) + + for c in CAMERAS: + assert c in ts + assert len(ts[c]['t']) > 20 + + # not a valid request id + assert 0 not in ts[c]['requestId'] + + # we should see jumps + assert np.max(np.diff(ts[c]['frameId'])) > 1 + assert np.max(np.diff(ts[c]['requestId'])) > 1 + + # should monotonically increase + assert np.all(np.diff(ts[c]['frameId']) >= 1) + assert np.all(np.diff(ts[c]['requestId']) >= 1) + + # EOF > SOF + assert np.all((ts[c]['timestampEof'] - ts[c]['timestampSof']) > 0) + + # logMonoTime > SOF + assert np.all((ts[c]['t'] - ts[c]['timestampSof']/1e9) > 0.01) + # TODO: make this work + #assert np.all((ts[c]['t'] - ts[c]['timestampSof']/1e9) < 0.25)