process replay: fix socket-ful replay flakiness (#27721)

* process replay: fix socket-ful replay flakiness

* bump up laikad timeout

* bump

* bump again

* revert that
old-commit-hash: 85908f7e2b
beeps
Adeeb Shihadeh 2 years ago committed by GitHub
parent 6433154d5d
commit 6f52cc09f4
  1. 42
      selfdrive/test/process_replay/process_replay.py

@ -5,7 +5,8 @@ import sys
import threading import threading
import time import time
import signal import signal
from collections import namedtuple from dataclasses import dataclass, field
from typing import Dict, List, Optional, Callable
import capnp import capnp
@ -28,7 +29,20 @@ TIMEOUT = 15
PROC_REPLAY_DIR = os.path.dirname(os.path.abspath(__file__)) PROC_REPLAY_DIR = os.path.dirname(os.path.abspath(__file__))
FAKEDATA = os.path.join(PROC_REPLAY_DIR, "fakedata/") FAKEDATA = os.path.join(PROC_REPLAY_DIR, "fakedata/")
ProcessConfig = namedtuple('ProcessConfig', ['proc_name', 'pub_sub', 'ignore', 'init_callback', 'should_recv_callback', 'tolerance', 'fake_pubsubmaster', 'submaster_config', 'environ', 'subtest_name', "field_tolerances"], defaults=({}, {}, "", {})) @dataclass
class ProcessConfig:
proc_name: str
pub_sub: Dict[str, List[str]]
ignore: List[str]
init_callback: Optional[Callable]
should_recv_callback: Optional[Callable]
tolerance: Optional[float]
fake_pubsubmaster: bool
submaster_config: Dict[str, List[str]] = field(default_factory=dict)
environ: Dict[str, str] = field(default_factory=dict)
subtest_name: str = ""
field_tolerances: Dict[str, float] = field(default_factory=dict)
timeout: int = 30
def wait_for_event(evt): def wait_for_event(evt):
@ -365,6 +379,7 @@ CONFIGS = [
should_recv_callback=None, should_recv_callback=None,
tolerance=NUMPY_TOLERANCE, tolerance=NUMPY_TOLERANCE,
fake_pubsubmaster=False, fake_pubsubmaster=False,
timeout=60*10, # first messages are blocked on internet assistance
), ),
ProcessConfig( ProcessConfig(
proc_name="torqued", proc_name="torqued",
@ -532,8 +547,9 @@ def python_replay_process(cfg, lr, fingerprint=None):
def replay_process_with_sockets(cfg, lr, fingerprint=None): def replay_process_with_sockets(cfg, lr, fingerprint=None):
sub_sockets = [s for _, sub in cfg.pub_sub.items() for s in sub]
pm = messaging.PubMaster(cfg.pub_sub.keys()) pm = messaging.PubMaster(cfg.pub_sub.keys())
sub_sockets = [s for _, sub in cfg.pub_sub.items() for s in sub]
sockets = {s: messaging.sub_sock(s, timeout=100) for s in sub_sockets}
all_msgs = sorted(lr, key=lambda msg: msg.logMonoTime) all_msgs = sorted(lr, key=lambda msg: msg.logMonoTime)
pub_msgs = [msg for msg in all_msgs if msg.which() in list(cfg.pub_sub.keys())] pub_msgs = [msg for msg in all_msgs if msg.which() in list(cfg.pub_sub.keys())]
@ -556,22 +572,26 @@ def replay_process_with_sockets(cfg, lr, fingerprint=None):
while not any(pm.all_readers_updated(s) for s in cfg.pub_sub.keys()): while not any(pm.all_readers_updated(s) for s in cfg.pub_sub.keys()):
time.sleep(0) time.sleep(0)
# Make sure all subscribers are connected for s in sockets.values():
sockets = {s: messaging.sub_sock(s, timeout=2000) for s in sub_sockets} messaging.recv_one_or_none(s)
for s in sub_sockets:
messaging.recv_one_or_none(sockets[s])
# Do the replay # Do the replay
cnt = 0 cnt = 0
for msg in pub_msgs: for msg in pub_msgs:
with Timeout(TIMEOUT, error_msg=f"timed out testing process {repr(cfg.proc_name)}, {cnt}/{len(pub_msgs)} msgs done"): with Timeout(cfg.timeout, error_msg=f"timed out testing process {repr(cfg.proc_name)}, {cnt}/{len(pub_msgs)} msgs done"):
resp_sockets = cfg.pub_sub[msg.which()]
if cfg.should_recv_callback is not None:
resp_sockets, _ = cfg.should_recv_callback(msg, None, None, None)
# Make sure all subscribers are connected
if len(log_msgs) == 0 and len(resp_sockets) > 0:
for s in sockets.values():
messaging.recv_one_or_none(s)
pm.send(msg.which(), msg.as_builder()) pm.send(msg.which(), msg.as_builder())
while not pm.all_readers_updated(msg.which()): while not pm.all_readers_updated(msg.which()):
time.sleep(0) time.sleep(0)
resp_sockets = cfg.pub_sub[msg.which()]
if cfg.should_recv_callback is not None:
resp_sockets, _ = cfg.should_recv_callback(msg, None, None, None)
for s in resp_sockets: for s in resp_sockets:
m = messaging.recv_one_retry(sockets[s]) m = messaging.recv_one_retry(sockets[s])
m = m.as_builder() m = m.as_builder()

Loading…
Cancel
Save