wip

2024-04-27 03:27:27 -05:00
parent 886a019ad5
commit c22b4866eb
55 changed files with 49557 additions and 116523 deletions
--- a/selfdrive/modeld/tests/init.py
+++ b/selfdrive/modeld/tests/init.py
--- a/selfdrive/modeld/tests/dmon_lag/repro.cc
+++ b/selfdrive/modeld/tests/dmon_lag/repro.cc
@@ -0,0 +1,101 @@
+// clang++ -O2 repro.cc && ./a.out
+
+#include <sched.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <cstdint>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <ctime>
+
+static inline double millis_since_boot() {
+  struct timespec t;
+  clock_gettime(CLOCK_BOOTTIME, &t);
+  return t.tv_sec * 1000.0 + t.tv_nsec * 1e-6;
+}
+
+#define MODEL_WIDTH 320
+#define MODEL_HEIGHT 640
+
+// null function still breaks it
+#define input_lambda(x) x
+
+// this is copied from models/dmonitoring.cc, and is the code that triggers the issue
+void inner(uint8_t *resized_buf, float *net_input_buf) {
+  int resized_width = MODEL_WIDTH;
+  int resized_height = MODEL_HEIGHT;
+
+  // one shot conversion, O(n) anyway
+  // yuvframe2tensor, normalize
+  for (int r = 0; r < MODEL_HEIGHT/2; r++) {
+    for (int c = 0; c < MODEL_WIDTH/2; c++) {
+      // Y_ul
+      net_input_buf[(c*MODEL_HEIGHT/2) + r] = input_lambda(resized_buf[(2*r*resized_width) + (2*c)]);
+      // Y_ur
+      net_input_buf[(c*MODEL_HEIGHT/2) + r + (2*(MODEL_WIDTH/2)*(MODEL_HEIGHT/2))] = input_lambda(resized_buf[(2*r*resized_width) + (2*c+1)]);
+      // Y_dl
+      net_input_buf[(c*MODEL_HEIGHT/2) + r + ((MODEL_WIDTH/2)*(MODEL_HEIGHT/2))] = input_lambda(resized_buf[(2*r*resized_width+1) + (2*c)]);
+      // Y_dr
+      net_input_buf[(c*MODEL_HEIGHT/2) + r + (3*(MODEL_WIDTH/2)*(MODEL_HEIGHT/2))] = input_lambda(resized_buf[(2*r*resized_width+1) + (2*c+1)]);
+      // U
+      net_input_buf[(c*MODEL_HEIGHT/2) + r + (4*(MODEL_WIDTH/2)*(MODEL_HEIGHT/2))] = input_lambda(resized_buf[(resized_width*resized_height) + (r*resized_width/2) + c]);
+      // V
+      net_input_buf[(c*MODEL_HEIGHT/2) + r + (5*(MODEL_WIDTH/2)*(MODEL_HEIGHT/2))] = input_lambda(resized_buf[(resized_width*resized_height) + ((resized_width/2)*(resized_height/2)) + (r*resized_width/2) + c]);
+    }
+  }
+}
+
+float trial() {
+  int resized_width = MODEL_WIDTH;
+  int resized_height = MODEL_HEIGHT;
+
+  int yuv_buf_len = (MODEL_WIDTH/2) * (MODEL_HEIGHT/2) * 6; // Y|u|v -> y|y|y|y|u|v
+
+  // allocate the buffers
+  uint8_t *resized_buf = (uint8_t*)malloc(resized_width*resized_height*3/2);
+  float *net_input_buf = (float*)malloc(yuv_buf_len*sizeof(float));
+  printf("allocate -- %p 0x%x -- %p 0x%lx\n", resized_buf, resized_width*resized_height*3/2, net_input_buf, yuv_buf_len*sizeof(float));
+
+  // test for bad buffers
+  static int CNT = 20;
+  float avg = 0.0;
+  for (int i = 0; i < CNT; i++) {
+    double s4 = millis_since_boot();
+    inner(resized_buf, net_input_buf);
+    double s5 = millis_since_boot();
+    avg += s5-s4;
+  }
+  avg /= CNT;
+
+  // once it's bad, it's reliably bad
+  if (avg > 10) {
+    printf("HIT %f\n", avg);
+    printf("BAD\n");
+
+    for (int i = 0; i < 200; i++) {
+      double s4 = millis_since_boot();
+      inner(resized_buf, net_input_buf);
+      double s5 = millis_since_boot();
+      printf("%.2f   ", s5-s4);
+    }
+    printf("\n");
+
+    exit(0);
+  }
+
+  // don't free so we get a different buffer each time
+  //free(resized_buf);
+  //free(net_input_buf);
+
+  return avg;
+}
+
+int main() {
+  while (true) {
+    float ret = trial();
+    printf("got %f\n", ret);
+  }
+}
+
--- a/selfdrive/modeld/tests/snpe_benchmark/.gitignore
+++ b/selfdrive/modeld/tests/snpe_benchmark/.gitignore
@@ -0,0 +1 @@
+benchmark
--- a/selfdrive/modeld/tests/snpe_benchmark/benchmark.cc
+++ b/selfdrive/modeld/tests/snpe_benchmark/benchmark.cc
@@ -0,0 +1,192 @@
+#include <SNPE/SNPE.hpp>
+#include <SNPE/SNPEBuilder.hpp>
+#include <SNPE/SNPEFactory.hpp>
+#include <DlContainer/IDlContainer.hpp>
+#include <DlSystem/DlError.hpp>
+#include <DlSystem/ITensor.hpp>
+#include <DlSystem/ITensorFactory.hpp>
+#include <iostream>
+#include <fstream>
+#include <sstream>
+
+using namespace std;
+
+int64_t timespecDiff(struct timespec *timeA_p, struct timespec *timeB_p) {
+  return ((timeA_p->tv_sec * 1000000000) + timeA_p->tv_nsec) - ((timeB_p->tv_sec * 1000000000) + timeB_p->tv_nsec);
+}
+
+void PrintErrorStringAndExit() {
+  cout << "ERROR!" << endl;
+  const char* const errStr = zdl::DlSystem::getLastErrorString();
+  std::cerr << errStr << std::endl;
+  std::exit(EXIT_FAILURE);
+}
+
+
+zdl::DlSystem::Runtime_t checkRuntime() {
+  static zdl::DlSystem::Version_t Version = zdl::SNPE::SNPEFactory::getLibraryVersion();
+  static zdl::DlSystem::Runtime_t Runtime;
+  std::cout << "SNPE Version: " << Version.asString().c_str() << std::endl; //Print Version number
+  if (zdl::SNPE::SNPEFactory::isRuntimeAvailable(zdl::DlSystem::Runtime_t::DSP)) {
+    std::cout << "Using DSP runtime" << std::endl;
+    Runtime = zdl::DlSystem::Runtime_t::DSP;
+  } else if (zdl::SNPE::SNPEFactory::isRuntimeAvailable(zdl::DlSystem::Runtime_t::GPU)) {
+    std::cout << "Using GPU runtime" << std::endl;
+    Runtime = zdl::DlSystem::Runtime_t::GPU;
+  } else {
+    std::cout << "Using cpu runtime" << std::endl;
+    Runtime = zdl::DlSystem::Runtime_t::CPU;
+  }
+  return Runtime;
+}
+
+void test(char *filename) {
+  static zdl::DlSystem::Runtime_t runtime = checkRuntime();
+  std::unique_ptr<zdl::DlContainer::IDlContainer> container;
+  container = zdl::DlContainer::IDlContainer::open(filename);
+
+  if (!container) { PrintErrorStringAndExit(); }
+  cout << "start build" << endl;
+  std::unique_ptr<zdl::SNPE::SNPE> snpe;
+  {
+    snpe = NULL;
+    zdl::SNPE::SNPEBuilder snpeBuilder(container.get());
+    snpe = snpeBuilder.setOutputLayers({})
+      .setRuntimeProcessor(runtime)
+      .setUseUserSuppliedBuffers(false)
+      //.setDebugMode(true)
+      .build();
+    if (!snpe) {
+      cout << "ERROR!" << endl;
+      const char* const errStr = zdl::DlSystem::getLastErrorString();
+      std::cerr << errStr << std::endl;
+    }
+    cout << "ran snpeBuilder" << endl;
+  }
+
+  const auto &strList_opt = snpe->getInputTensorNames();
+  if (!strList_opt) throw std::runtime_error("Error obtaining input tensor names");
+
+  cout << "get input tensor names done" << endl;
+  const auto &strList = *strList_opt;
+  static zdl::DlSystem::TensorMap inputTensorMap;
+  static zdl::DlSystem::TensorMap outputTensorMap;
+  vector<std::unique_ptr<zdl::DlSystem::ITensor> > inputs;
+  for (int i = 0; i < strList.size(); i++) {
+    cout << "input name: " << strList.at(i) << endl;
+
+    const auto &inputDims_opt = snpe->getInputDimensions(strList.at(i));
+    const auto &inputShape = *inputDims_opt;
+    inputs.push_back(zdl::SNPE::SNPEFactory::getTensorFactory().createTensor(inputShape));
+    inputTensorMap.add(strList.at(i), inputs[i].get());
+  }
+
+  struct timespec start, end;
+  cout << "**** starting benchmark ****" << endl;
+  for (int i = 0; i < 50; i++) {
+    clock_gettime(CLOCK_MONOTONIC, &start);
+    int err = snpe->execute(inputTensorMap, outputTensorMap);
+    assert(err == true);
+    clock_gettime(CLOCK_MONOTONIC, &end);
+    uint64_t timeElapsed = timespecDiff(&end, &start);
+    printf("time: %f ms\n", timeElapsed*1.0/1e6);
+  }
+}
+
+void get_testframe(int index, std::unique_ptr<zdl::DlSystem::ITensor> &input) {
+  FILE * pFile;
+  string filepath="/data/ipt/quantize_samples/sample_input_"+std::to_string(index);
+  pFile = fopen(filepath.c_str(), "rb");
+  int length = 1*6*160*320*4;
+  float * frame_buffer = new float[length/4]; // 32/8
+  fread(frame_buffer, length, 1, pFile);
+  // std::cout << *(frame_buffer+length/4-1) << std::endl;
+  std::copy(frame_buffer, frame_buffer+(length/4), input->begin());
+  fclose(pFile);
+}
+
+void SaveITensor(const std::string& path, const zdl::DlSystem::ITensor* tensor)
+{
+   std::ofstream os(path, std::ofstream::binary);
+   if (!os)
+   {
+      std::cerr << "Failed to open output file for writing: " << path << "\n";
+      std::exit(EXIT_FAILURE);
+   }
+   for ( auto it = tensor->cbegin(); it != tensor->cend(); ++it )
+   {
+      float f = *it;
+      if (!os.write(reinterpret_cast<char*>(&f), sizeof(float)))
+      {
+         std::cerr << "Failed to write data to: " << path << "\n";
+         std::exit(EXIT_FAILURE);
+      }
+   }
+}
+
+void testrun(char* modelfile) {
+  static zdl::DlSystem::Runtime_t runtime = checkRuntime();
+  std::unique_ptr<zdl::DlContainer::IDlContainer> container;
+  container = zdl::DlContainer::IDlContainer::open(modelfile);
+
+  if (!container) { PrintErrorStringAndExit(); }
+  cout << "start build" << endl;
+  std::unique_ptr<zdl::SNPE::SNPE> snpe;
+  {
+    snpe = NULL;
+    zdl::SNPE::SNPEBuilder snpeBuilder(container.get());
+    snpe = snpeBuilder.setOutputLayers({})
+      .setRuntimeProcessor(runtime)
+      .setUseUserSuppliedBuffers(false)
+      //.setDebugMode(true)
+      .build();
+    if (!snpe) {
+      cout << "ERROR!" << endl;
+      const char* const errStr = zdl::DlSystem::getLastErrorString();
+      std::cerr << errStr << std::endl;
+    }
+    cout << "ran snpeBuilder" << endl;
+  }
+
+  const auto &strList_opt = snpe->getInputTensorNames();
+  if (!strList_opt) throw std::runtime_error("Error obtaining input tensor names");
+  cout << "get input tensor names done" << endl;
+
+  const auto &strList = *strList_opt;
+  static zdl::DlSystem::TensorMap inputTensorMap;
+  static zdl::DlSystem::TensorMap outputTensorMap;
+
+  assert(strList.size() == 1);
+  const auto &inputDims_opt = snpe->getInputDimensions(strList.at(0));
+  const auto &inputShape = *inputDims_opt;
+  std::cout << "winkwink" << std::endl;
+
+  for (int i=0; i<10000; i++) {
+    std::unique_ptr<zdl::DlSystem::ITensor> input;
+    input = zdl::SNPE::SNPEFactory::getTensorFactory().createTensor(inputShape);
+    get_testframe(i, input);
+    snpe->execute(input.get(), outputTensorMap);
+    zdl::DlSystem::StringList tensorNames = outputTensorMap.getTensorNames();
+    std::for_each(tensorNames.begin(), tensorNames.end(), [&](const char* name) {
+      std::ostringstream path;
+      path << "/data/opt/Result_" << std::to_string(i) << ".raw";
+      auto tensorPtr = outputTensorMap.getTensor(name);
+      SaveITensor(path.str(), tensorPtr);
+    });
+  }
+}
+
+int main(int argc, char* argv[]) {
+  if (argc < 2) {
+    printf("usage: %s <filename>\n", argv[0]);
+    return -1;
+  }
+
+  if (argc == 2) {
+    while (true) test(argv[1]);
+  } else if (argc == 3) {
+    testrun(argv[1]);
+  }
+  return 0;
+}
+
--- a/selfdrive/modeld/tests/snpe_benchmark/benchmark.sh
+++ b/selfdrive/modeld/tests/snpe_benchmark/benchmark.sh
@@ -0,0 +1,4 @@
+#!/bin/sh -e
+clang++ -I /data/openpilot/third_party/snpe/include/ -L/data/pythonpath/third_party/snpe/aarch64 -lSNPE benchmark.cc -o benchmark
+export LD_LIBRARY_PATH="/data/pythonpath/third_party/snpe/aarch64/:$HOME/openpilot/third_party/snpe/x86_64/:$LD_LIBRARY_PATH"
+exec ./benchmark $1
--- a/selfdrive/modeld/tests/test_modeld.py
+++ b/selfdrive/modeld/tests/test_modeld.py
@@ -0,0 +1,107 @@
+#!/usr/bin/env python3
+import unittest
+import numpy as np
+import random
+
+import cereal.messaging as messaging
+from cereal.visionipc import VisionIpcServer, VisionStreamType
+from openpilot.common.transformations.camera import DEVICE_CAMERAS
+from openpilot.common.realtime import DT_MDL
+from openpilot.selfdrive.car.car_helpers import write_car_param
+from openpilot.selfdrive.manager.process_config import managed_processes
+from openpilot.selfdrive.test.process_replay.vision_meta import meta_from_camera_state
+
+CAM = DEVICE_CAMERAS[("tici", "ar0231")].fcam
+IMG = np.zeros(int(CAM.width*CAM.height*(3/2)), dtype=np.uint8)
+IMG_BYTES = IMG.flatten().tobytes()
+
+
+class TestModeld(unittest.TestCase):
+
+  def setUp(self):
+    self.vipc_server = VisionIpcServer("camerad")
+    self.vipc_server.create_buffers(VisionStreamType.VISION_STREAM_ROAD, 40, False, CAM.width, CAM.height)
+    self.vipc_server.create_buffers(VisionStreamType.VISION_STREAM_DRIVER, 40, False, CAM.width, CAM.height)
+    self.vipc_server.create_buffers(VisionStreamType.VISION_STREAM_WIDE_ROAD, 40, False, CAM.width, CAM.height)
+    self.vipc_server.start_listener()
+    write_car_param()
+
+    self.sm = messaging.SubMaster(['modelV2', 'cameraOdometry'])
+    self.pm = messaging.PubMaster(['roadCameraState', 'wideRoadCameraState', 'liveCalibration'])
+
+    managed_processes['modeld'].start()
+    self.pm.wait_for_readers_to_update("roadCameraState", 10)
+
+  def tearDown(self):
+    managed_processes['modeld'].stop()
+    del self.vipc_server
+
+  def _send_frames(self, frame_id, cams=None):
+    if cams is None:
+      cams = ('roadCameraState', 'wideRoadCameraState')
+
+    cs = None
+    for cam in cams:
+      msg = messaging.new_message(cam)
+      cs = getattr(msg, cam)
+      cs.frameId = frame_id
+      cs.timestampSof = int((frame_id * DT_MDL) * 1e9)
+      cs.timestampEof = int(cs.timestampSof + (DT_MDL * 1e9))
+      cam_meta = meta_from_camera_state(cam)
+
+      self.pm.send(msg.which(), msg)
+      self.vipc_server.send(cam_meta.stream, IMG_BYTES, cs.frameId,
+                            cs.timestampSof, cs.timestampEof)
+    return cs
+
+  def _wait(self):
+    self.sm.update(5000)
+    if self.sm['modelV2'].frameId != self.sm['cameraOdometry'].frameId:
+      self.sm.update(1000)
+
+  def test_modeld(self):
+    for n in range(1, 500):
+      cs = self._send_frames(n)
+      self._wait()
+
+      mdl = self.sm['modelV2']
+      self.assertEqual(mdl.frameId, n)
+      self.assertEqual(mdl.frameIdExtra, n)
+      self.assertEqual(mdl.timestampEof, cs.timestampEof)
+      self.assertEqual(mdl.frameAge, 0)
+      self.assertEqual(mdl.frameDropPerc, 0)
+
+      odo = self.sm['cameraOdometry']
+      self.assertEqual(odo.frameId, n)
+      self.assertEqual(odo.timestampEof, cs.timestampEof)
+
+  def test_dropped_frames(self):
+    """
+      modeld should only run on consecutive road frames
+    """
+    frame_id = -1
+    road_frames = list()
+    for n in range(1, 50):
+      if (random.random() < 0.1) and n > 3:
+        cams = random.choice([(), ('wideRoadCameraState', )])
+        self._send_frames(n, cams)
+      else:
+        self._send_frames(n)
+        road_frames.append(n)
+      self._wait()
+
+      if len(road_frames) < 3 or road_frames[-1] - road_frames[-2] == 1:
+        frame_id = road_frames[-1]
+
+      mdl = self.sm['modelV2']
+      odo = self.sm['cameraOdometry']
+      self.assertEqual(mdl.frameId, frame_id)
+      self.assertEqual(mdl.frameIdExtra, frame_id)
+      self.assertEqual(odo.frameId, frame_id)
+      if n != frame_id:
+        self.assertFalse(self.sm.updated['modelV2'])
+        self.assertFalse(self.sm.updated['cameraOdometry'])
+
+
+if __name__ == "__main__":
+  unittest.main()
--- a/selfdrive/modeld/tests/tf_test/build.sh
+++ b/selfdrive/modeld/tests/tf_test/build.sh
@@ -0,0 +1,2 @@
+#!/bin/bash
+clang++ -I /home/batman/one/external/tensorflow/include/ -L /home/batman/one/external/tensorflow/lib -Wl,-rpath=/home/batman/one/external/tensorflow/lib main.cc -ltensorflow
--- a/selfdrive/modeld/tests/tf_test/main.cc
+++ b/selfdrive/modeld/tests/tf_test/main.cc
@@ -0,0 +1,69 @@
+#include <cassert>
+#include <cstdio>
+#include <cstdlib>
+#include "tensorflow/c/c_api.h"
+
+void* read_file(const char* path, size_t* out_len) {
+  FILE* f = fopen(path, "r");
+  if (!f) {
+    return NULL;
+  }
+  fseek(f, 0, SEEK_END);
+  long f_len = ftell(f);
+  rewind(f);
+
+  char* buf = (char*)calloc(f_len, 1);
+  assert(buf);
+
+  size_t num_read = fread(buf, f_len, 1, f);
+  fclose(f);
+
+  if (num_read != 1) {
+    free(buf);
+    return NULL;
+  }
+
+  if (out_len) {
+    *out_len = f_len;
+  }
+
+  return buf;
+}
+
+static void DeallocateBuffer(void* data, size_t) {
+  free(data);
+}
+
+int main(int argc, char* argv[]) {
+  TF_Buffer* buf;
+  TF_Graph* graph;
+  TF_Status* status;
+  char *path = argv[1];
+
+  // load model
+  {
+    size_t model_size;
+    char tmp[1024];
+    snprintf(tmp, sizeof(tmp), "%s.pb", path);
+    printf("loading model %s\n", tmp);
+    uint8_t *model_data = (uint8_t *)read_file(tmp, &model_size);
+    buf = TF_NewBuffer();
+    buf->data = model_data;
+    buf->length = model_size;
+    buf->data_deallocator = DeallocateBuffer;
+    printf("loaded model of size %d\n", model_size);
+  }
+
+  // import graph
+  status = TF_NewStatus();
+  graph = TF_NewGraph();
+  TF_ImportGraphDefOptions *opts = TF_NewImportGraphDefOptions();
+  TF_GraphImportGraphDef(graph, buf, opts, status);
+  TF_DeleteImportGraphDefOptions(opts);
+  TF_DeleteBuffer(buf);
+  if (TF_GetCode(status) != TF_OK) {
+    printf("FAIL: %s\n", TF_Message(status));
+  } else {
+    printf("SUCCESS\n");
+  }
+}
--- a/selfdrive/modeld/tests/tf_test/pb_loader.py
+++ b/selfdrive/modeld/tests/tf_test/pb_loader.py
@@ -0,0 +1,8 @@
+#!/usr/bin/env python3
+import sys
+import tensorflow as tf
+
+with open(sys.argv[1], "rb") as f:
+  graph_def = tf.compat.v1.GraphDef()
+  graph_def.ParseFromString(f.read())
+  #tf.io.write_graph(graph_def, '', sys.argv[1]+".try")
--- a/selfdrive/modeld/tests/timing/benchmark.py
+++ b/selfdrive/modeld/tests/timing/benchmark.py
@@ -0,0 +1,39 @@
+#!/usr/bin/env python3
+# type: ignore
+
+import os
+import time
+import numpy as np
+
+import cereal.messaging as messaging
+from openpilot.selfdrive.manager.process_config import managed_processes
+
+
+N = int(os.getenv("N", "5"))
+TIME = int(os.getenv("TIME", "30"))
+
+if __name__ == "__main__":
+  sock = messaging.sub_sock('modelV2', conflate=False, timeout=1000)
+
+  execution_times = []
+
+  for _ in range(N):
+    os.environ['LOGPRINT'] = 'debug'
+    managed_processes['modeld'].start()
+    time.sleep(5)
+
+    t = []
+    start = time.monotonic()
+    while time.monotonic() - start < TIME:
+      msgs = messaging.drain_sock(sock, wait_for_one=True)
+      for m in msgs:
+        t.append(m.modelV2.modelExecutionTime)
+
+    execution_times.append(np.array(t[10:]) * 1000)
+    managed_processes['modeld'].stop()
+
+  print("\n\n")
+  print(f"ran modeld {N} times for {TIME}s each")
+  for _, t in enumerate(execution_times):
+    print(f"\tavg: {sum(t)/len(t):0.2f}ms, min: {min(t):0.2f}ms, max: {max(t):0.2f}ms")
+  print("\n\n")