From e4a37620ee9cb6416e51a3e8346e5008aeeab027 Mon Sep 17 00:00:00 2001 From: Adeeb Shihadeh Date: Fri, 19 May 2023 23:24:52 -0700 Subject: [PATCH] boardd: handle nack on tx buffer full (#28241) * boardd: handle nack on tx buffer full * print pandaStates * rx buffer too small * connect timeout --------- Co-authored-by: Comma Device --- panda | 2 +- selfdrive/boardd/panda_comms.h | 4 +-- selfdrive/boardd/spi.cc | 34 ++++++++----------- .../boardd/tests/test_boardd_loopback.py | 8 ++++- 4 files changed, 24 insertions(+), 24 deletions(-) diff --git a/panda b/panda index 2a53833753..52f96bac68 160000 --- a/panda +++ b/panda @@ -1 +1 @@ -Subproject commit 2a538337537e6830f02565c2c36af4f886c963e4 +Subproject commit 52f96bac685b129c5b90e274fe39386e76da512e diff --git a/selfdrive/boardd/panda_comms.h b/selfdrive/boardd/panda_comms.h index f93d292170..723a4ab79c 100644 --- a/selfdrive/boardd/panda_comms.h +++ b/selfdrive/boardd/panda_comms.h @@ -75,8 +75,8 @@ private: inline static std::recursive_mutex hw_lock; int wait_for_ack(spi_ioc_transfer &transfer, uint8_t ack); - int bulk_transfer(uint8_t endpoint, uint8_t *tx_data, uint16_t tx_len, uint8_t *rx_data, uint16_t rx_len); + int bulk_transfer(uint8_t endpoint, uint8_t *tx_data, uint16_t tx_len, uint8_t *rx_data, uint16_t rx_len, unsigned int timeout); int spi_transfer(uint8_t endpoint, uint8_t *tx_data, uint16_t tx_len, uint8_t *rx_data, uint16_t max_rx_len); - int spi_transfer_retry(uint8_t endpoint, uint8_t *tx_data, uint16_t tx_len, uint8_t *rx_data, uint16_t max_rx_len); + int spi_transfer_retry(uint8_t endpoint, uint8_t *tx_data, uint16_t tx_len, uint8_t *rx_data, uint16_t max_rx_len, unsigned int timeout); }; #endif diff --git a/selfdrive/boardd/spi.cc b/selfdrive/boardd/spi.cc index 82a95f7759..54638b1b8d 100644 --- a/selfdrive/boardd/spi.cc +++ b/selfdrive/boardd/spi.cc @@ -29,8 +29,7 @@ struct __attribute__((packed)) spi_header { uint16_t max_rx_len; }; -const int SPI_MAX_RETRIES = 5; -const int SPI_ACK_TIMEOUT = 50; // milliseconds +const int SPI_ACK_TIMEOUT = 500; // milliseconds const std::string SPI_DEVICE = "/dev/spidev0.0"; class LockEx { @@ -89,7 +88,7 @@ PandaSpiHandle::PandaSpiHandle(std::string serial) : PandaCommsHandle(serial) { } // get hw UID/serial - ret = control_read(0xc3, 0, 0, uid, uid_len); + ret = control_read(0xc3, 0, 0, uid, uid_len, 1000); if (ret == uid_len) { std::stringstream stream; for (int i = 0; i < uid_len; i++) { @@ -127,37 +126,33 @@ void PandaSpiHandle::cleanup() { int PandaSpiHandle::control_write(uint8_t request, uint16_t param1, uint16_t param2, unsigned int timeout) { - LockEx lock(spi_fd, hw_lock); ControlPacket_t packet = { .request = request, .param1 = param1, .param2 = param2, .length = 0 }; - return spi_transfer_retry(0, (uint8_t *) &packet, sizeof(packet), NULL, 0); + return spi_transfer_retry(0, (uint8_t *) &packet, sizeof(packet), NULL, 0, timeout); } int PandaSpiHandle::control_read(uint8_t request, uint16_t param1, uint16_t param2, unsigned char *data, uint16_t length, unsigned int timeout) { - LockEx lock(spi_fd, hw_lock); ControlPacket_t packet = { .request = request, .param1 = param1, .param2 = param2, .length = length }; - return spi_transfer_retry(0, (uint8_t *) &packet, sizeof(packet), data, length); + return spi_transfer_retry(0, (uint8_t *) &packet, sizeof(packet), data, length, timeout); } int PandaSpiHandle::bulk_write(unsigned char endpoint, unsigned char* data, int length, unsigned int timeout) { - LockEx lock(spi_fd, hw_lock); - return bulk_transfer(endpoint, data, length, NULL, 0); + return bulk_transfer(endpoint, data, length, NULL, 0, timeout); } int PandaSpiHandle::bulk_read(unsigned char endpoint, unsigned char* data, int length, unsigned int timeout) { - LockEx lock(spi_fd, hw_lock); - return bulk_transfer(endpoint, NULL, 0, data, length); + return bulk_transfer(endpoint, NULL, 0, data, length, timeout); } -int PandaSpiHandle::bulk_transfer(uint8_t endpoint, uint8_t *tx_data, uint16_t tx_len, uint8_t *rx_data, uint16_t rx_len) { +int PandaSpiHandle::bulk_transfer(uint8_t endpoint, uint8_t *tx_data, uint16_t tx_len, uint8_t *rx_data, uint16_t rx_len, unsigned int timeout) { const int xfer_size = 0x40 * 15; int ret = 0; @@ -166,10 +161,10 @@ int PandaSpiHandle::bulk_transfer(uint8_t endpoint, uint8_t *tx_data, uint16_t t int d; if (tx_data != NULL) { int len = std::min(xfer_size, tx_len - (xfer_size * i)); - d = spi_transfer_retry(endpoint, tx_data + (xfer_size * i), len, NULL, 0); + d = spi_transfer_retry(endpoint, tx_data + (xfer_size * i), len, NULL, 0, timeout); } else { uint16_t to_read = std::min(xfer_size, rx_len - ret); - d = spi_transfer_retry(endpoint, NULL, 0, rx_data + (xfer_size * i), to_read); + d = spi_transfer_retry(endpoint, NULL, 0, rx_data + (xfer_size * i), to_read, timeout); } if (d < 0) { @@ -213,15 +208,13 @@ bool check_checksum(uint8_t *data, int data_len) { } -int PandaSpiHandle::spi_transfer_retry(uint8_t endpoint, uint8_t *tx_data, uint16_t tx_len, uint8_t *rx_data, uint16_t max_rx_len) { +int PandaSpiHandle::spi_transfer_retry(uint8_t endpoint, uint8_t *tx_data, uint16_t tx_len, uint8_t *rx_data, uint16_t max_rx_len, unsigned int timeout) { int ret; - - int count = SPI_MAX_RETRIES; + double start_time = millis_since_boot(); do { // TODO: handle error ret = spi_transfer(endpoint, tx_data, tx_len, rx_data, max_rx_len); - count--; - } while (ret < 0 && connected && count > 0); + } while (ret < 0 && connected && ((timeout == 0) || (millis_since_boot() - start_time) < timeout)); return ret; } @@ -238,7 +231,7 @@ int PandaSpiHandle::wait_for_ack(spi_ioc_transfer &transfer, uint8_t ack) { if (rx_buf[0] == ack) { break; } else if (rx_buf[0] == SPI_NACK) { - LOGW("SPI: got NACK"); + LOGD("SPI: got NACK"); return -1; } @@ -255,6 +248,7 @@ int PandaSpiHandle::wait_for_ack(spi_ioc_transfer &transfer, uint8_t ack) { int PandaSpiHandle::spi_transfer(uint8_t endpoint, uint8_t *tx_data, uint16_t tx_len, uint8_t *rx_data, uint16_t max_rx_len) { int ret; uint16_t rx_data_len; + LockEx lock(spi_fd, hw_lock); // needs to be less, since we need to have space for the checksum assert(tx_len < SPI_BUF_SIZE); diff --git a/selfdrive/boardd/tests/test_boardd_loopback.py b/selfdrive/boardd/tests/test_boardd_loopback.py index f3642e1668..d0504d6bf2 100755 --- a/selfdrive/boardd/tests/test_boardd_loopback.py +++ b/selfdrive/boardd/tests/test_boardd_loopback.py @@ -61,6 +61,7 @@ class TestBoardd(unittest.TestCase): sendcan = messaging.pub_sock('sendcan') can = messaging.sub_sock('can', conflate=False, timeout=100) + sm = messaging.SubMaster(['pandaStates']) time.sleep(0.5) n = 200 @@ -71,7 +72,7 @@ class TestBoardd(unittest.TestCase): sent_msgs = defaultdict(set) for _ in range(random.randrange(20, 100)): to_send = [] - for __ in range(random.randrange(50)): + for __ in range(random.randrange(20)): bus = random.choice([b for b in range(3*num_pandas) if b % 4 != 3]) addr = random.randrange(1, 1<<29) dat = bytes(random.getrandbits(8) for _ in range(random.randrange(1, 9))) @@ -83,6 +84,7 @@ class TestBoardd(unittest.TestCase): sent_loopback.update({k+128: copy.deepcopy(v) for k, v in sent_msgs.items()}) sent_total = {k: len(v) for k, v in sent_loopback.items()} for _ in range(100 * 5): + sm.update(0) recvd = messaging.drain_sock(can, wait_for_one=True) for msg in recvd: for m in msg.can: @@ -96,8 +98,12 @@ class TestBoardd(unittest.TestCase): # if a set isn't empty, messages got dropped pprint(sent_msgs) pprint(sent_loopback) + print({k: len(x) for k, x in sent_loopback.items()}) + print(sum([len(x) for x in sent_loopback.values()])) + pprint(sm['pandaStates']) # may drop messages due to RX buffer overflow for bus in sent_loopback.keys(): assert not len(sent_loopback[bus]), f"loop {i}: bus {bus} missing {len(sent_loopback[bus])} out of {sent_total[bus]} messages" + if __name__ == "__main__": unittest.main()