store mic audio with toggle (#35595)

* store/send mic audio with toggle * script to extract audio from logs * change description and add translation placeholders * microphone icon * apply toggle in loggerd * add legnth and counter * startFrameIdx counter * Revert "change description and add translation placeholders" This reverts commit 7baa1f6de9. * send mic data first and then calc * restore changed description/icon after revert * adjust fft samples to keep old time window * remove extract_audio.py since audio is now stored in qcam isntead of rlog * qt microphone recording icon * Revert "remove extract_audio.py since audio is now stored in qcam isntead of rlog" This reverts commit 7a3a75bd8db5376d1e442a3ba931c67550b5f132. * move extract_audio script and output file by default * remove length field * recording indicator swaps sides based on lhd/rhd * use record icon from comma body * Update toggle description Co-authored-by: Adeeb Shihadeh <adeebshihadeh@gmail.com> * update raylib toggle desc cause I did earlier * microphone --> soundPressure, audioData --> rawAudioData * cleanup unused var * update README * sidebar mic indicator instead of annotated camera * improve logic readability * remove startFrameIdx and sequenceNum * use Q_PROPERTY/setProperty so that update() is actually called on value change * specify old id for SoundPressure * fix typo --------- Co-authored-by: Adeeb Shihadeh <adeebshihadeh@gmail.com>
4 months ago · dcd56ae09a
parent 082f4c0aee
commit dcd56ae09a
15 changed files with 166 additions and 35 deletions
--- a/README.md
+++ b/README.md
@ -103,7 +103,7 @@ By default, openpilot uploads the driving data to our servers. You can also acce
 openpilot is open source software: the user is free to disable data collection if they wish to do so.

 openpilot logs the road-facing cameras, CAN, GPS, IMU, magnetometer, thermal sensors, crashes, and operating system logs.
-The driver-facing camera is only logged if you explicitly opt-in in settings. The microphone is not recorded.
+The driver-facing camera and microphone are only logged if you explicitly opt-in in settings.

 By using openpilot, you agree to [our Privacy Policy](https://comma.ai/privacy). You understand that use of this software or its related services will generate certain types of user data, which may be logged and stored at the sole discretion of comma. By accepting this agreement, you grant an irrevocable, perpetual, worldwide right to comma for the use of this data.
 </details>
--- a/cereal/log.capnp
+++ b/cereal/log.capnp
@ -2470,7 +2470,7 @@ struct DebugAlert {
 struct UserFlag {
 }

-struct Microphone {
+struct SoundPressure @0xdc24138990726023 {
  soundPressure @0 :Float32;

  # uncalibrated, A-weighted
@ -2480,6 +2480,11 @@ struct Microphone {
  filteredSoundPressureWeightedDbDEPRECATED @2 :Float32;
 }

+struct AudioData {
+  data @0 :Data;
+  sampleRate @1 :UInt32;
+}
+
 struct Touch {
  sec @0 :Int64;
  usec @1 :Int64;
@ -2557,7 +2562,8 @@ struct Event {
    livestreamDriverEncodeIdx @119 :EncodeIndex;

    # microphone data
-    microphone @103 :Microphone;
+    soundPressure @103 :SoundPressure;
+    rawAudioData @147 :AudioData;

    # systems stuff
    androidLog @20 :AndroidLogEntry;
--- a/cereal/services.py
+++ b/cereal/services.py
@ -73,7 +73,8 @@ _services: dict[str, tuple] = {
  "navThumbnail": (True, 0.),
  "qRoadEncodeIdx": (False, 20.),
  "userFlag": (True, 0., 1),
-  "microphone": (True, 10., 10),
+  "soundPressure": (True, 10., 10),
+  "rawAudioData": (False, 20.),

  # debug
  "uiDebug": (True, 0., 1),
--- a/common/params_keys.h
+++ b/common/params_keys.h
@ -99,6 +99,7 @@ inline static std::unordered_map<std::string, uint32_t> keys = {
    {"PandaSomResetTriggered", CLEAR_ON_MANAGER_START | CLEAR_ON_OFFROAD_TRANSITION},
    {"PandaSignatures", CLEAR_ON_MANAGER_START},
    {"PrimeType", PERSISTENT},
+    {"RecordAudio", PERSISTENT},
    {"RecordFront", PERSISTENT},
    {"RecordFrontLock", PERSISTENT},  // for the internal fleet
    {"SecOCKey", PERSISTENT | DONT_LOG},
--- a/selfdrive/assets/icons/microphone.png
+++ b/selfdrive/assets/icons/microphone.png
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9fc1f7f31d41f26ea7d6f52b3096f7a91844a3b897bc233a8489253c46f0403b
+size 6324
--- a/selfdrive/ui/layouts/settings/toggles.py
+++ b/selfdrive/ui/layouts/settings/toggles.py
@ -22,6 +22,7 @@ DESCRIPTIONS = {
  "AlwaysOnDM": "Enable driver monitoring even when openpilot is not engaged.",
  'RecordFront': "Upload data from the driver facing camera and help improve the driver monitoring algorithm.",
  "IsMetric": "Display speed in km/h instead of mph.",
+  "RecordAudio": "Record and store microphone audio while driving. The audio will be included in the dashcam video in comma connect.",
 }


@ -77,6 +78,12 @@ class TogglesLayout(Widget):
      toggle_item(
        "Use Metric System", DESCRIPTIONS["IsMetric"], self._params.get_bool("IsMetric"), icon="monitoring.png"
      ),
+      toggle_item(
+        "Record Microphone Audio",
+        DESCRIPTIONS["RecordAudio"],
+        self._params.get_bool("RecordAudio"),
+        icon="microphone.png",
+      ),
    ]

    self._scroller = Scroller(items, line_separator=True, spacing=0)
--- a/selfdrive/ui/qt/offroad/settings.cc
+++ b/selfdrive/ui/qt/offroad/settings.cc
@ -68,6 +68,13 @@ TogglesPanel::TogglesPanel(SettingsWindow *parent) : ListWidget(parent) {
      "../assets/icons/metric.png",
      false,
    },
+    {
+      "RecordAudio",
+      tr("Record Microphone Audio"),
+      tr("Record and store microphone audio while driving. The audio will be included in the dashcam video in comma connect."),
+      "../assets/icons/microphone.png",
+      true,
+    },
  };


--- a/selfdrive/ui/qt/sidebar.cc
+++ b/selfdrive/ui/qt/sidebar.cc
@ -24,10 +24,11 @@ void Sidebar::drawMetric(QPainter &p, const QPair<QString, QString> &label, QCol
  p.drawText(rect.adjusted(22, 0, 0, 0), Qt::AlignCenter, label.first + "\n" + label.second);
 }

-Sidebar::Sidebar(QWidget *parent) : QFrame(parent), onroad(false), flag_pressed(false), settings_pressed(false) {
+Sidebar::Sidebar(QWidget *parent) : QFrame(parent), onroad(false), flag_pressed(false), settings_pressed(false), mic_indicator_pressed(false) {
  home_img = loadPixmap("../assets/images/button_home.png", home_btn.size());
  flag_img = loadPixmap("../assets/images/button_flag.png", home_btn.size());
  settings_img = loadPixmap("../assets/images/button_settings.png", settings_btn.size(), Qt::IgnoreAspectRatio);
+  mic_img = loadPixmap("../assets/icons/microphone.png", QSize(30, 30));

  connect(this, &Sidebar::valueChanged, [=] { update(); });

@ -47,12 +48,15 @@ void Sidebar::mousePressEvent(QMouseEvent *event) {
  } else if (settings_btn.contains(event->pos())) {
    settings_pressed = true;
    update();
+  } else if (recording_audio && mic_indicator_btn.contains(event->pos())) {
+    mic_indicator_pressed = true;
+    update();
  }
 }

 void Sidebar::mouseReleaseEvent(QMouseEvent *event) {
-  if (flag_pressed || settings_pressed) {
-    flag_pressed = settings_pressed = false;
+  if (flag_pressed || settings_pressed || mic_indicator_pressed) {
+    flag_pressed = settings_pressed = mic_indicator_pressed = false;
    update();
  }
  if (onroad && home_btn.contains(event->pos())) {
@ -61,6 +65,8 @@ void Sidebar::mouseReleaseEvent(QMouseEvent *event) {
    pm->send("userFlag", msg);
  } else if (settings_btn.contains(event->pos())) {
    emit openSettings();
+  } else if (recording_audio && mic_indicator_btn.contains(event->pos())) {
+    emit openSettings(2, "RecordAudio");
  }
 }

@ -106,6 +112,8 @@ void Sidebar::updateState(const UIState &s) {
    pandaStatus = {{tr("NO"), tr("PANDA")}, danger_color};
  }
  setProperty("pandaStatus", QVariant::fromValue(pandaStatus));
+
+  setProperty("recordingAudio", s.scene.recording_audio);
 }

 void Sidebar::paintEvent(QPaintEvent *event) {
@ -120,6 +128,14 @@ void Sidebar::paintEvent(QPaintEvent *event) {
  p.drawPixmap(settings_btn.x(), settings_btn.y(), settings_img);
  p.setOpacity(onroad && flag_pressed ? 0.65 : 1.0);
  p.drawPixmap(home_btn.x(), home_btn.y(), onroad ? flag_img : home_img);
+  if (recording_audio) {
+    p.setBrush(danger_color);
+    p.setOpacity(mic_indicator_pressed ? 0.65 : 1.0);
+    p.drawRoundedRect(mic_indicator_btn, mic_indicator_btn.height() / 2, mic_indicator_btn.height() / 2);
+    int icon_x = mic_indicator_btn.x() + (mic_indicator_btn.width() - mic_img.width()) / 2;
+    int icon_y = mic_indicator_btn.y() + (mic_indicator_btn.height() - mic_img.height()) / 2;
+    p.drawPixmap(icon_x, icon_y, mic_img);
+  }
  p.setOpacity(1.0);

  // network
--- a/selfdrive/ui/qt/sidebar.h
+++ b/selfdrive/ui/qt/sidebar.h
@ -18,6 +18,7 @@ class Sidebar : public QFrame {
  Q_PROPERTY(ItemStatus tempStatus MEMBER temp_status NOTIFY valueChanged);
  Q_PROPERTY(QString netType MEMBER net_type NOTIFY valueChanged);
  Q_PROPERTY(int netStrength MEMBER net_strength NOTIFY valueChanged);
+  Q_PROPERTY(bool recordingAudio MEMBER recording_audio NOTIFY valueChanged);

 public:
  explicit Sidebar(QWidget* parent = 0);
@ -36,8 +37,8 @@ protected:
  void mouseReleaseEvent(QMouseEvent *event) override;
  void drawMetric(QPainter &p, const QPair<QString, QString> &label, QColor c, int y);

-  QPixmap home_img, flag_img, settings_img;
-  bool onroad, flag_pressed, settings_pressed;
+  QPixmap home_img, flag_img, settings_img, mic_img;
+  bool onroad, recording_audio, flag_pressed, settings_pressed, mic_indicator_pressed;
  const QMap<cereal::DeviceState::NetworkType, QString> network_type = {
    {cereal::DeviceState::NetworkType::NONE, tr("--")},
    {cereal::DeviceState::NetworkType::WIFI, tr("Wi-Fi")},
@ -50,6 +51,7 @@ protected:

  const QRect home_btn = QRect(60, 860, 180, 180);
  const QRect settings_btn = QRect(50, 35, 200, 117);
+  const QRect mic_indicator_btn = QRect(158, 252, 75, 40);
  const QColor good_color = QColor(255, 255, 255);
  const QColor warning_color = QColor(218, 202, 37);
  const QColor danger_color = QColor(201, 34, 49);
--- a/selfdrive/ui/soundd.py
+++ b/selfdrive/ui/soundd.py
@ -135,7 +135,7 @@ class Soundd:
    # sounddevice must be imported after forking processes
    import sounddevice as sd

-    sm = messaging.SubMaster(['selfdriveState', 'microphone'])
+    sm = messaging.SubMaster(['selfdriveState', 'soundPressure'])

    with self.get_stream(sd) as stream:
      rk = Ratekeeper(20)
@ -144,8 +144,8 @@ class Soundd:
      while True:
        sm.update(0)

-        if sm.updated['microphone'] and self.current_alert == AudibleAlert.none: # only update volume filter when not playing alert
-          self.spl_filter_weighted.update(sm["microphone"].soundPressureWeightedDb)
+        if sm.updated['soundPressure'] and self.current_alert == AudibleAlert.none: # only update volume filter when not playing alert
+          self.spl_filter_weighted.update(sm["soundPressure"].soundPressureWeightedDb)
          self.current_volume = self.calculate_volume(float(self.spl_filter_weighted.x))

        self.get_audible_alert(sm)
--- a/selfdrive/ui/ui.cc
+++ b/selfdrive/ui/ui.cc
@ -60,6 +60,9 @@ static void update_state(UIState *s) {
    scene.light_sensor = -1;
  }
  scene.started = sm["deviceState"].getDeviceState().getStarted() && scene.ignition;
+
+  auto params = Params();
+  scene.recording_audio = params.getBool("RecordAudio") && scene.started;
 }

 void ui_update_params(UIState *s) {
--- a/selfdrive/ui/ui.h
+++ b/selfdrive/ui/ui.h
@ -58,7 +58,7 @@ typedef struct UIScene {
  cereal::LongitudinalPersonality personality;

  float light_sensor = -1;
-  bool started, ignition, is_metric;
+  bool started, ignition, is_metric, recording_audio;
  uint64_t started_frame;
 } UIScene;

--- a/system/loggerd/loggerd.cc
+++ b/system/loggerd/loggerd.cc
@ -226,19 +226,21 @@ void loggerd_thread() {
  for (const auto& [_, it] : services) {
    const bool encoder = util::ends_with(it.name, "EncodeData");
    const bool livestream_encoder = util::starts_with(it.name, "livestream");
-    if (!it.should_log && (!encoder || livestream_encoder)) continue;
-    LOGD("logging %s", it.name.c_str());
-
-    SubSocket * sock = SubSocket::create(ctx.get(), it.name);
-    assert(sock != NULL);
-    poller->registerSocket(sock);
-    service_state[sock] = {
-      .name = it.name,
-      .counter = 0,
-      .freq = it.decimation,
-      .encoder = encoder,
-      .user_flag = it.name == "userFlag",
-    };
+    const bool record_audio = (it.name == "rawAudioData") && Params().getBool("RecordAudio");
+    if (it.should_log || (encoder && !livestream_encoder) || record_audio) {
+      LOGD("logging %s", it.name.c_str());
+
+      SubSocket * sock = SubSocket::create(ctx.get(), it.name);
+      assert(sock != NULL);
+      poller->registerSocket(sock);
+      service_state[sock] = {
+        .name = it.name,
+        .counter = 0,
+        .freq = it.decimation,
+        .encoder = encoder,
+        .user_flag = it.name == "userFlag",
+      };
+    }
  }

  LoggerdState s;
--- a/system/micd.py
+++ b/system/micd.py
@ -9,10 +9,10 @@ from openpilot.common.retry import retry
 from openpilot.common.swaglog import cloudlog

 RATE = 10
-FFT_SAMPLES = 4096
+FFT_SAMPLES = 1600 # 100ms
 REFERENCE_SPL = 2e-5  # newtons/m^2
-SAMPLE_RATE = 44100
-SAMPLE_BUFFER = 4096  # approx 100ms
+SAMPLE_RATE = 16000
+SAMPLE_BUFFER = 800  # 50ms


@cache
@ -45,7 +45,7 @@ def apply_a_weighting(measurements: np.ndarray) -> np.ndarray:
 class Mic:
  def __init__(self):
    self.rk = Ratekeeper(RATE)
-    self.pm = messaging.PubMaster(['microphone'])
+    self.pm = messaging.PubMaster(['soundPressure', 'rawAudioData'])

    self.measurements = np.empty(0)

@ -61,12 +61,12 @@ class Mic:
      sound_pressure_weighted = self.sound_pressure_weighted
      sound_pressure_level_weighted = self.sound_pressure_level_weighted

-    msg = messaging.new_message('microphone', valid=True)
-    msg.microphone.soundPressure = float(sound_pressure)
-    msg.microphone.soundPressureWeighted = float(sound_pressure_weighted)
-    msg.microphone.soundPressureWeightedDb = float(sound_pressure_level_weighted)
+    msg = messaging.new_message('soundPressure', valid=True)
+    msg.soundPressure.soundPressure = float(sound_pressure)
+    msg.soundPressure.soundPressureWeighted = float(sound_pressure_weighted)
+    msg.soundPressure.soundPressureWeightedDb = float(sound_pressure_level_weighted)

-    self.pm.send('microphone', msg)
+    self.pm.send('soundPressure', msg)
    self.rk.keep_time()

  def callback(self, indata, frames, time, status):
@ -76,6 +76,12 @@ class Mic:

    Logged A-weighted equivalents are rough approximations of the human-perceived loudness.
    """
+    msg = messaging.new_message('rawAudioData', valid=True)
+    audio_data_int_16 = (indata[:, 0] * 32767).astype(np.int16)
+    msg.rawAudioData.data = audio_data_int_16.tobytes()
+    msg.rawAudioData.sampleRate = SAMPLE_RATE
+    self.pm.send('rawAudioData', msg)
+
    with self.lock:
      self.measurements = np.concatenate((self.measurements, indata[:, 0]))

--- a/tools/scripts/extract_audio.py
+++ b/tools/scripts/extract_audio.py
@ -0,0 +1,77 @@
+#!/usr/bin/env python3
+import os
+import sys
+import wave
+import argparse
+import numpy as np
+
+from openpilot.tools.lib.logreader import LogReader, ReadMode
+
+
+def extract_audio(route_or_segment_name, output_file=None, play=False):
+  lr = LogReader(route_or_segment_name, default_mode=ReadMode.AUTO_INTERACTIVE)
+  audio_messages = list(lr.filter("rawAudioData"))
+  if not audio_messages:
+    print("No rawAudioData messages found in logs")
+    return
+  sample_rate = audio_messages[0].sampleRate
+
+  audio_chunks = []
+  total_frames = 0
+  for msg in audio_messages:
+    audio_array = np.frombuffer(msg.data, dtype=np.int16)
+    audio_chunks.append(audio_array)
+    total_frames += len(audio_array)
+  full_audio = np.concatenate(audio_chunks)
+
+  print(f"Found {total_frames} frames from {len(audio_messages)} audio messages at {sample_rate} Hz")
+
+  if output_file:
+    if write_wav_file(output_file, full_audio, sample_rate):
+      print(f"Audio written to {output_file}")
+    else:
+      print("Audio extraction canceled.")
+  if play:
+    play_audio(full_audio, sample_rate)
+
+
+def write_wav_file(filename, audio_data, sample_rate):
+  if os.path.exists(filename):
+    if input(f"File '{filename}' exists. Overwrite? (y/N): ").lower() not in ['y', 'yes']:
+      return False
+
+  with wave.open(filename, 'wb') as wav_file:
+    wav_file.setnchannels(1)  # Mono
+    wav_file.setsampwidth(2)  # 16-bit
+    wav_file.setframerate(sample_rate)
+    wav_file.writeframes(audio_data.tobytes())
+  return True
+
+
+def play_audio(audio_data, sample_rate):
+  try:
+    import sounddevice as sd
+
+    print("Playing audio... Press Ctrl+C to stop")
+    sd.play(audio_data, sample_rate)
+    sd.wait()
+  except KeyboardInterrupt:
+    print("\nPlayback stopped")
+
+
+if __name__ == "__main__":
+  parser = argparse.ArgumentParser(description="Extract audio data from openpilot logs")
+  parser.add_argument("-o", "--output", help="Output WAV file path")
+  parser.add_argument("--play", action="store_true", help="Play audio with sounddevice")
+  parser.add_argument("route_or_segment_name", nargs='?', help="The route or segment name")
+
+  if len(sys.argv) == 1:
+    parser.print_help()
+    sys.exit()
+  args = parser.parse_args()
+
+  output_file = args.output
+  if not args.output and not args.play:
+    output_file = "extracted_audio.wav"
+
+  extract_audio(args.route_or_segment_name.strip(), output_file, args.play)