boardd: handle nack on tx buffer full (#28241)

* boardd: handle nack on tx buffer full

* print pandaStates

* rx buffer too small

* connect timeout

---------

Co-authored-by: Comma Device <device@comma.ai>
pull/28243/head
Adeeb Shihadeh 2 years ago committed by GitHub
parent 7a684da47f
commit e4a37620ee
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 2
      panda
  2. 4
      selfdrive/boardd/panda_comms.h
  3. 34
      selfdrive/boardd/spi.cc
  4. 8
      selfdrive/boardd/tests/test_boardd_loopback.py

@ -1 +1 @@
Subproject commit 2a538337537e6830f02565c2c36af4f886c963e4 Subproject commit 52f96bac685b129c5b90e274fe39386e76da512e

@ -75,8 +75,8 @@ private:
inline static std::recursive_mutex hw_lock; inline static std::recursive_mutex hw_lock;
int wait_for_ack(spi_ioc_transfer &transfer, uint8_t ack); int wait_for_ack(spi_ioc_transfer &transfer, uint8_t ack);
int bulk_transfer(uint8_t endpoint, uint8_t *tx_data, uint16_t tx_len, uint8_t *rx_data, uint16_t rx_len); int bulk_transfer(uint8_t endpoint, uint8_t *tx_data, uint16_t tx_len, uint8_t *rx_data, uint16_t rx_len, unsigned int timeout);
int spi_transfer(uint8_t endpoint, uint8_t *tx_data, uint16_t tx_len, uint8_t *rx_data, uint16_t max_rx_len); int spi_transfer(uint8_t endpoint, uint8_t *tx_data, uint16_t tx_len, uint8_t *rx_data, uint16_t max_rx_len);
int spi_transfer_retry(uint8_t endpoint, uint8_t *tx_data, uint16_t tx_len, uint8_t *rx_data, uint16_t max_rx_len); int spi_transfer_retry(uint8_t endpoint, uint8_t *tx_data, uint16_t tx_len, uint8_t *rx_data, uint16_t max_rx_len, unsigned int timeout);
}; };
#endif #endif

@ -29,8 +29,7 @@ struct __attribute__((packed)) spi_header {
uint16_t max_rx_len; uint16_t max_rx_len;
}; };
const int SPI_MAX_RETRIES = 5; const int SPI_ACK_TIMEOUT = 500; // milliseconds
const int SPI_ACK_TIMEOUT = 50; // milliseconds
const std::string SPI_DEVICE = "/dev/spidev0.0"; const std::string SPI_DEVICE = "/dev/spidev0.0";
class LockEx { class LockEx {
@ -89,7 +88,7 @@ PandaSpiHandle::PandaSpiHandle(std::string serial) : PandaCommsHandle(serial) {
} }
// get hw UID/serial // get hw UID/serial
ret = control_read(0xc3, 0, 0, uid, uid_len); ret = control_read(0xc3, 0, 0, uid, uid_len, 1000);
if (ret == uid_len) { if (ret == uid_len) {
std::stringstream stream; std::stringstream stream;
for (int i = 0; i < uid_len; i++) { for (int i = 0; i < uid_len; i++) {
@ -127,37 +126,33 @@ void PandaSpiHandle::cleanup() {
int PandaSpiHandle::control_write(uint8_t request, uint16_t param1, uint16_t param2, unsigned int timeout) { int PandaSpiHandle::control_write(uint8_t request, uint16_t param1, uint16_t param2, unsigned int timeout) {
LockEx lock(spi_fd, hw_lock);
ControlPacket_t packet = { ControlPacket_t packet = {
.request = request, .request = request,
.param1 = param1, .param1 = param1,
.param2 = param2, .param2 = param2,
.length = 0 .length = 0
}; };
return spi_transfer_retry(0, (uint8_t *) &packet, sizeof(packet), NULL, 0); return spi_transfer_retry(0, (uint8_t *) &packet, sizeof(packet), NULL, 0, timeout);
} }
int PandaSpiHandle::control_read(uint8_t request, uint16_t param1, uint16_t param2, unsigned char *data, uint16_t length, unsigned int timeout) { int PandaSpiHandle::control_read(uint8_t request, uint16_t param1, uint16_t param2, unsigned char *data, uint16_t length, unsigned int timeout) {
LockEx lock(spi_fd, hw_lock);
ControlPacket_t packet = { ControlPacket_t packet = {
.request = request, .request = request,
.param1 = param1, .param1 = param1,
.param2 = param2, .param2 = param2,
.length = length .length = length
}; };
return spi_transfer_retry(0, (uint8_t *) &packet, sizeof(packet), data, length); return spi_transfer_retry(0, (uint8_t *) &packet, sizeof(packet), data, length, timeout);
} }
int PandaSpiHandle::bulk_write(unsigned char endpoint, unsigned char* data, int length, unsigned int timeout) { int PandaSpiHandle::bulk_write(unsigned char endpoint, unsigned char* data, int length, unsigned int timeout) {
LockEx lock(spi_fd, hw_lock); return bulk_transfer(endpoint, data, length, NULL, 0, timeout);
return bulk_transfer(endpoint, data, length, NULL, 0);
} }
int PandaSpiHandle::bulk_read(unsigned char endpoint, unsigned char* data, int length, unsigned int timeout) { int PandaSpiHandle::bulk_read(unsigned char endpoint, unsigned char* data, int length, unsigned int timeout) {
LockEx lock(spi_fd, hw_lock); return bulk_transfer(endpoint, NULL, 0, data, length, timeout);
return bulk_transfer(endpoint, NULL, 0, data, length);
} }
int PandaSpiHandle::bulk_transfer(uint8_t endpoint, uint8_t *tx_data, uint16_t tx_len, uint8_t *rx_data, uint16_t rx_len) { int PandaSpiHandle::bulk_transfer(uint8_t endpoint, uint8_t *tx_data, uint16_t tx_len, uint8_t *rx_data, uint16_t rx_len, unsigned int timeout) {
const int xfer_size = 0x40 * 15; const int xfer_size = 0x40 * 15;
int ret = 0; int ret = 0;
@ -166,10 +161,10 @@ int PandaSpiHandle::bulk_transfer(uint8_t endpoint, uint8_t *tx_data, uint16_t t
int d; int d;
if (tx_data != NULL) { if (tx_data != NULL) {
int len = std::min(xfer_size, tx_len - (xfer_size * i)); int len = std::min(xfer_size, tx_len - (xfer_size * i));
d = spi_transfer_retry(endpoint, tx_data + (xfer_size * i), len, NULL, 0); d = spi_transfer_retry(endpoint, tx_data + (xfer_size * i), len, NULL, 0, timeout);
} else { } else {
uint16_t to_read = std::min(xfer_size, rx_len - ret); uint16_t to_read = std::min(xfer_size, rx_len - ret);
d = spi_transfer_retry(endpoint, NULL, 0, rx_data + (xfer_size * i), to_read); d = spi_transfer_retry(endpoint, NULL, 0, rx_data + (xfer_size * i), to_read, timeout);
} }
if (d < 0) { if (d < 0) {
@ -213,15 +208,13 @@ bool check_checksum(uint8_t *data, int data_len) {
} }
int PandaSpiHandle::spi_transfer_retry(uint8_t endpoint, uint8_t *tx_data, uint16_t tx_len, uint8_t *rx_data, uint16_t max_rx_len) { int PandaSpiHandle::spi_transfer_retry(uint8_t endpoint, uint8_t *tx_data, uint16_t tx_len, uint8_t *rx_data, uint16_t max_rx_len, unsigned int timeout) {
int ret; int ret;
double start_time = millis_since_boot();
int count = SPI_MAX_RETRIES;
do { do {
// TODO: handle error // TODO: handle error
ret = spi_transfer(endpoint, tx_data, tx_len, rx_data, max_rx_len); ret = spi_transfer(endpoint, tx_data, tx_len, rx_data, max_rx_len);
count--; } while (ret < 0 && connected && ((timeout == 0) || (millis_since_boot() - start_time) < timeout));
} while (ret < 0 && connected && count > 0);
return ret; return ret;
} }
@ -238,7 +231,7 @@ int PandaSpiHandle::wait_for_ack(spi_ioc_transfer &transfer, uint8_t ack) {
if (rx_buf[0] == ack) { if (rx_buf[0] == ack) {
break; break;
} else if (rx_buf[0] == SPI_NACK) { } else if (rx_buf[0] == SPI_NACK) {
LOGW("SPI: got NACK"); LOGD("SPI: got NACK");
return -1; return -1;
} }
@ -255,6 +248,7 @@ int PandaSpiHandle::wait_for_ack(spi_ioc_transfer &transfer, uint8_t ack) {
int PandaSpiHandle::spi_transfer(uint8_t endpoint, uint8_t *tx_data, uint16_t tx_len, uint8_t *rx_data, uint16_t max_rx_len) { int PandaSpiHandle::spi_transfer(uint8_t endpoint, uint8_t *tx_data, uint16_t tx_len, uint8_t *rx_data, uint16_t max_rx_len) {
int ret; int ret;
uint16_t rx_data_len; uint16_t rx_data_len;
LockEx lock(spi_fd, hw_lock);
// needs to be less, since we need to have space for the checksum // needs to be less, since we need to have space for the checksum
assert(tx_len < SPI_BUF_SIZE); assert(tx_len < SPI_BUF_SIZE);

@ -61,6 +61,7 @@ class TestBoardd(unittest.TestCase):
sendcan = messaging.pub_sock('sendcan') sendcan = messaging.pub_sock('sendcan')
can = messaging.sub_sock('can', conflate=False, timeout=100) can = messaging.sub_sock('can', conflate=False, timeout=100)
sm = messaging.SubMaster(['pandaStates'])
time.sleep(0.5) time.sleep(0.5)
n = 200 n = 200
@ -71,7 +72,7 @@ class TestBoardd(unittest.TestCase):
sent_msgs = defaultdict(set) sent_msgs = defaultdict(set)
for _ in range(random.randrange(20, 100)): for _ in range(random.randrange(20, 100)):
to_send = [] to_send = []
for __ in range(random.randrange(50)): for __ in range(random.randrange(20)):
bus = random.choice([b for b in range(3*num_pandas) if b % 4 != 3]) bus = random.choice([b for b in range(3*num_pandas) if b % 4 != 3])
addr = random.randrange(1, 1<<29) addr = random.randrange(1, 1<<29)
dat = bytes(random.getrandbits(8) for _ in range(random.randrange(1, 9))) dat = bytes(random.getrandbits(8) for _ in range(random.randrange(1, 9)))
@ -83,6 +84,7 @@ class TestBoardd(unittest.TestCase):
sent_loopback.update({k+128: copy.deepcopy(v) for k, v in sent_msgs.items()}) sent_loopback.update({k+128: copy.deepcopy(v) for k, v in sent_msgs.items()})
sent_total = {k: len(v) for k, v in sent_loopback.items()} sent_total = {k: len(v) for k, v in sent_loopback.items()}
for _ in range(100 * 5): for _ in range(100 * 5):
sm.update(0)
recvd = messaging.drain_sock(can, wait_for_one=True) recvd = messaging.drain_sock(can, wait_for_one=True)
for msg in recvd: for msg in recvd:
for m in msg.can: for m in msg.can:
@ -96,8 +98,12 @@ class TestBoardd(unittest.TestCase):
# if a set isn't empty, messages got dropped # if a set isn't empty, messages got dropped
pprint(sent_msgs) pprint(sent_msgs)
pprint(sent_loopback) pprint(sent_loopback)
print({k: len(x) for k, x in sent_loopback.items()})
print(sum([len(x) for x in sent_loopback.values()]))
pprint(sm['pandaStates']) # may drop messages due to RX buffer overflow
for bus in sent_loopback.keys(): for bus in sent_loopback.keys():
assert not len(sent_loopback[bus]), f"loop {i}: bus {bus} missing {len(sent_loopback[bus])} out of {sent_total[bus]} messages" assert not len(sent_loopback[bus]), f"loop {i}: bus {bus} missing {len(sent_loopback[bus])} out of {sent_total[bus]} messages"
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()

Loading…
Cancel
Save