This commit is contained in:
Your Name
2024-04-27 03:27:27 -05:00
parent 886a019ad5
commit c22b4866eb
55 changed files with 49557 additions and 116523 deletions

View File

View File

@@ -0,0 +1,101 @@
// clang++ -O2 repro.cc && ./a.out
#include <sched.h>
#include <sys/types.h>
#include <unistd.h>
#include <cstdint>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <ctime>
static inline double millis_since_boot() {
struct timespec t;
clock_gettime(CLOCK_BOOTTIME, &t);
return t.tv_sec * 1000.0 + t.tv_nsec * 1e-6;
}
#define MODEL_WIDTH 320
#define MODEL_HEIGHT 640
// null function still breaks it
#define input_lambda(x) x
// this is copied from models/dmonitoring.cc, and is the code that triggers the issue
void inner(uint8_t *resized_buf, float *net_input_buf) {
int resized_width = MODEL_WIDTH;
int resized_height = MODEL_HEIGHT;
// one shot conversion, O(n) anyway
// yuvframe2tensor, normalize
for (int r = 0; r < MODEL_HEIGHT/2; r++) {
for (int c = 0; c < MODEL_WIDTH/2; c++) {
// Y_ul
net_input_buf[(c*MODEL_HEIGHT/2) + r] = input_lambda(resized_buf[(2*r*resized_width) + (2*c)]);
// Y_ur
net_input_buf[(c*MODEL_HEIGHT/2) + r + (2*(MODEL_WIDTH/2)*(MODEL_HEIGHT/2))] = input_lambda(resized_buf[(2*r*resized_width) + (2*c+1)]);
// Y_dl
net_input_buf[(c*MODEL_HEIGHT/2) + r + ((MODEL_WIDTH/2)*(MODEL_HEIGHT/2))] = input_lambda(resized_buf[(2*r*resized_width+1) + (2*c)]);
// Y_dr
net_input_buf[(c*MODEL_HEIGHT/2) + r + (3*(MODEL_WIDTH/2)*(MODEL_HEIGHT/2))] = input_lambda(resized_buf[(2*r*resized_width+1) + (2*c+1)]);
// U
net_input_buf[(c*MODEL_HEIGHT/2) + r + (4*(MODEL_WIDTH/2)*(MODEL_HEIGHT/2))] = input_lambda(resized_buf[(resized_width*resized_height) + (r*resized_width/2) + c]);
// V
net_input_buf[(c*MODEL_HEIGHT/2) + r + (5*(MODEL_WIDTH/2)*(MODEL_HEIGHT/2))] = input_lambda(resized_buf[(resized_width*resized_height) + ((resized_width/2)*(resized_height/2)) + (r*resized_width/2) + c]);
}
}
}
float trial() {
int resized_width = MODEL_WIDTH;
int resized_height = MODEL_HEIGHT;
int yuv_buf_len = (MODEL_WIDTH/2) * (MODEL_HEIGHT/2) * 6; // Y|u|v -> y|y|y|y|u|v
// allocate the buffers
uint8_t *resized_buf = (uint8_t*)malloc(resized_width*resized_height*3/2);
float *net_input_buf = (float*)malloc(yuv_buf_len*sizeof(float));
printf("allocate -- %p 0x%x -- %p 0x%lx\n", resized_buf, resized_width*resized_height*3/2, net_input_buf, yuv_buf_len*sizeof(float));
// test for bad buffers
static int CNT = 20;
float avg = 0.0;
for (int i = 0; i < CNT; i++) {
double s4 = millis_since_boot();
inner(resized_buf, net_input_buf);
double s5 = millis_since_boot();
avg += s5-s4;
}
avg /= CNT;
// once it's bad, it's reliably bad
if (avg > 10) {
printf("HIT %f\n", avg);
printf("BAD\n");
for (int i = 0; i < 200; i++) {
double s4 = millis_since_boot();
inner(resized_buf, net_input_buf);
double s5 = millis_since_boot();
printf("%.2f ", s5-s4);
}
printf("\n");
exit(0);
}
// don't free so we get a different buffer each time
//free(resized_buf);
//free(net_input_buf);
return avg;
}
int main() {
while (true) {
float ret = trial();
printf("got %f\n", ret);
}
}

View File

@@ -0,0 +1 @@
benchmark

View File

@@ -0,0 +1,192 @@
#include <SNPE/SNPE.hpp>
#include <SNPE/SNPEBuilder.hpp>
#include <SNPE/SNPEFactory.hpp>
#include <DlContainer/IDlContainer.hpp>
#include <DlSystem/DlError.hpp>
#include <DlSystem/ITensor.hpp>
#include <DlSystem/ITensorFactory.hpp>
#include <iostream>
#include <fstream>
#include <sstream>
using namespace std;
int64_t timespecDiff(struct timespec *timeA_p, struct timespec *timeB_p) {
return ((timeA_p->tv_sec * 1000000000) + timeA_p->tv_nsec) - ((timeB_p->tv_sec * 1000000000) + timeB_p->tv_nsec);
}
void PrintErrorStringAndExit() {
cout << "ERROR!" << endl;
const char* const errStr = zdl::DlSystem::getLastErrorString();
std::cerr << errStr << std::endl;
std::exit(EXIT_FAILURE);
}
zdl::DlSystem::Runtime_t checkRuntime() {
static zdl::DlSystem::Version_t Version = zdl::SNPE::SNPEFactory::getLibraryVersion();
static zdl::DlSystem::Runtime_t Runtime;
std::cout << "SNPE Version: " << Version.asString().c_str() << std::endl; //Print Version number
if (zdl::SNPE::SNPEFactory::isRuntimeAvailable(zdl::DlSystem::Runtime_t::DSP)) {
std::cout << "Using DSP runtime" << std::endl;
Runtime = zdl::DlSystem::Runtime_t::DSP;
} else if (zdl::SNPE::SNPEFactory::isRuntimeAvailable(zdl::DlSystem::Runtime_t::GPU)) {
std::cout << "Using GPU runtime" << std::endl;
Runtime = zdl::DlSystem::Runtime_t::GPU;
} else {
std::cout << "Using cpu runtime" << std::endl;
Runtime = zdl::DlSystem::Runtime_t::CPU;
}
return Runtime;
}
void test(char *filename) {
static zdl::DlSystem::Runtime_t runtime = checkRuntime();
std::unique_ptr<zdl::DlContainer::IDlContainer> container;
container = zdl::DlContainer::IDlContainer::open(filename);
if (!container) { PrintErrorStringAndExit(); }
cout << "start build" << endl;
std::unique_ptr<zdl::SNPE::SNPE> snpe;
{
snpe = NULL;
zdl::SNPE::SNPEBuilder snpeBuilder(container.get());
snpe = snpeBuilder.setOutputLayers({})
.setRuntimeProcessor(runtime)
.setUseUserSuppliedBuffers(false)
//.setDebugMode(true)
.build();
if (!snpe) {
cout << "ERROR!" << endl;
const char* const errStr = zdl::DlSystem::getLastErrorString();
std::cerr << errStr << std::endl;
}
cout << "ran snpeBuilder" << endl;
}
const auto &strList_opt = snpe->getInputTensorNames();
if (!strList_opt) throw std::runtime_error("Error obtaining input tensor names");
cout << "get input tensor names done" << endl;
const auto &strList = *strList_opt;
static zdl::DlSystem::TensorMap inputTensorMap;
static zdl::DlSystem::TensorMap outputTensorMap;
vector<std::unique_ptr<zdl::DlSystem::ITensor> > inputs;
for (int i = 0; i < strList.size(); i++) {
cout << "input name: " << strList.at(i) << endl;
const auto &inputDims_opt = snpe->getInputDimensions(strList.at(i));
const auto &inputShape = *inputDims_opt;
inputs.push_back(zdl::SNPE::SNPEFactory::getTensorFactory().createTensor(inputShape));
inputTensorMap.add(strList.at(i), inputs[i].get());
}
struct timespec start, end;
cout << "**** starting benchmark ****" << endl;
for (int i = 0; i < 50; i++) {
clock_gettime(CLOCK_MONOTONIC, &start);
int err = snpe->execute(inputTensorMap, outputTensorMap);
assert(err == true);
clock_gettime(CLOCK_MONOTONIC, &end);
uint64_t timeElapsed = timespecDiff(&end, &start);
printf("time: %f ms\n", timeElapsed*1.0/1e6);
}
}
void get_testframe(int index, std::unique_ptr<zdl::DlSystem::ITensor> &input) {
FILE * pFile;
string filepath="/data/ipt/quantize_samples/sample_input_"+std::to_string(index);
pFile = fopen(filepath.c_str(), "rb");
int length = 1*6*160*320*4;
float * frame_buffer = new float[length/4]; // 32/8
fread(frame_buffer, length, 1, pFile);
// std::cout << *(frame_buffer+length/4-1) << std::endl;
std::copy(frame_buffer, frame_buffer+(length/4), input->begin());
fclose(pFile);
}
void SaveITensor(const std::string& path, const zdl::DlSystem::ITensor* tensor)
{
std::ofstream os(path, std::ofstream::binary);
if (!os)
{
std::cerr << "Failed to open output file for writing: " << path << "\n";
std::exit(EXIT_FAILURE);
}
for ( auto it = tensor->cbegin(); it != tensor->cend(); ++it )
{
float f = *it;
if (!os.write(reinterpret_cast<char*>(&f), sizeof(float)))
{
std::cerr << "Failed to write data to: " << path << "\n";
std::exit(EXIT_FAILURE);
}
}
}
void testrun(char* modelfile) {
static zdl::DlSystem::Runtime_t runtime = checkRuntime();
std::unique_ptr<zdl::DlContainer::IDlContainer> container;
container = zdl::DlContainer::IDlContainer::open(modelfile);
if (!container) { PrintErrorStringAndExit(); }
cout << "start build" << endl;
std::unique_ptr<zdl::SNPE::SNPE> snpe;
{
snpe = NULL;
zdl::SNPE::SNPEBuilder snpeBuilder(container.get());
snpe = snpeBuilder.setOutputLayers({})
.setRuntimeProcessor(runtime)
.setUseUserSuppliedBuffers(false)
//.setDebugMode(true)
.build();
if (!snpe) {
cout << "ERROR!" << endl;
const char* const errStr = zdl::DlSystem::getLastErrorString();
std::cerr << errStr << std::endl;
}
cout << "ran snpeBuilder" << endl;
}
const auto &strList_opt = snpe->getInputTensorNames();
if (!strList_opt) throw std::runtime_error("Error obtaining input tensor names");
cout << "get input tensor names done" << endl;
const auto &strList = *strList_opt;
static zdl::DlSystem::TensorMap inputTensorMap;
static zdl::DlSystem::TensorMap outputTensorMap;
assert(strList.size() == 1);
const auto &inputDims_opt = snpe->getInputDimensions(strList.at(0));
const auto &inputShape = *inputDims_opt;
std::cout << "winkwink" << std::endl;
for (int i=0; i<10000; i++) {
std::unique_ptr<zdl::DlSystem::ITensor> input;
input = zdl::SNPE::SNPEFactory::getTensorFactory().createTensor(inputShape);
get_testframe(i, input);
snpe->execute(input.get(), outputTensorMap);
zdl::DlSystem::StringList tensorNames = outputTensorMap.getTensorNames();
std::for_each(tensorNames.begin(), tensorNames.end(), [&](const char* name) {
std::ostringstream path;
path << "/data/opt/Result_" << std::to_string(i) << ".raw";
auto tensorPtr = outputTensorMap.getTensor(name);
SaveITensor(path.str(), tensorPtr);
});
}
}
int main(int argc, char* argv[]) {
if (argc < 2) {
printf("usage: %s <filename>\n", argv[0]);
return -1;
}
if (argc == 2) {
while (true) test(argv[1]);
} else if (argc == 3) {
testrun(argv[1]);
}
return 0;
}

View File

@@ -0,0 +1,4 @@
#!/bin/sh -e
clang++ -I /data/openpilot/third_party/snpe/include/ -L/data/pythonpath/third_party/snpe/aarch64 -lSNPE benchmark.cc -o benchmark
export LD_LIBRARY_PATH="/data/pythonpath/third_party/snpe/aarch64/:$HOME/openpilot/third_party/snpe/x86_64/:$LD_LIBRARY_PATH"
exec ./benchmark $1

View File

@@ -0,0 +1,107 @@
#!/usr/bin/env python3
import unittest
import numpy as np
import random
import cereal.messaging as messaging
from cereal.visionipc import VisionIpcServer, VisionStreamType
from openpilot.common.transformations.camera import DEVICE_CAMERAS
from openpilot.common.realtime import DT_MDL
from openpilot.selfdrive.car.car_helpers import write_car_param
from openpilot.selfdrive.manager.process_config import managed_processes
from openpilot.selfdrive.test.process_replay.vision_meta import meta_from_camera_state
CAM = DEVICE_CAMERAS[("tici", "ar0231")].fcam
IMG = np.zeros(int(CAM.width*CAM.height*(3/2)), dtype=np.uint8)
IMG_BYTES = IMG.flatten().tobytes()
class TestModeld(unittest.TestCase):
def setUp(self):
self.vipc_server = VisionIpcServer("camerad")
self.vipc_server.create_buffers(VisionStreamType.VISION_STREAM_ROAD, 40, False, CAM.width, CAM.height)
self.vipc_server.create_buffers(VisionStreamType.VISION_STREAM_DRIVER, 40, False, CAM.width, CAM.height)
self.vipc_server.create_buffers(VisionStreamType.VISION_STREAM_WIDE_ROAD, 40, False, CAM.width, CAM.height)
self.vipc_server.start_listener()
write_car_param()
self.sm = messaging.SubMaster(['modelV2', 'cameraOdometry'])
self.pm = messaging.PubMaster(['roadCameraState', 'wideRoadCameraState', 'liveCalibration'])
managed_processes['modeld'].start()
self.pm.wait_for_readers_to_update("roadCameraState", 10)
def tearDown(self):
managed_processes['modeld'].stop()
del self.vipc_server
def _send_frames(self, frame_id, cams=None):
if cams is None:
cams = ('roadCameraState', 'wideRoadCameraState')
cs = None
for cam in cams:
msg = messaging.new_message(cam)
cs = getattr(msg, cam)
cs.frameId = frame_id
cs.timestampSof = int((frame_id * DT_MDL) * 1e9)
cs.timestampEof = int(cs.timestampSof + (DT_MDL * 1e9))
cam_meta = meta_from_camera_state(cam)
self.pm.send(msg.which(), msg)
self.vipc_server.send(cam_meta.stream, IMG_BYTES, cs.frameId,
cs.timestampSof, cs.timestampEof)
return cs
def _wait(self):
self.sm.update(5000)
if self.sm['modelV2'].frameId != self.sm['cameraOdometry'].frameId:
self.sm.update(1000)
def test_modeld(self):
for n in range(1, 500):
cs = self._send_frames(n)
self._wait()
mdl = self.sm['modelV2']
self.assertEqual(mdl.frameId, n)
self.assertEqual(mdl.frameIdExtra, n)
self.assertEqual(mdl.timestampEof, cs.timestampEof)
self.assertEqual(mdl.frameAge, 0)
self.assertEqual(mdl.frameDropPerc, 0)
odo = self.sm['cameraOdometry']
self.assertEqual(odo.frameId, n)
self.assertEqual(odo.timestampEof, cs.timestampEof)
def test_dropped_frames(self):
"""
modeld should only run on consecutive road frames
"""
frame_id = -1
road_frames = list()
for n in range(1, 50):
if (random.random() < 0.1) and n > 3:
cams = random.choice([(), ('wideRoadCameraState', )])
self._send_frames(n, cams)
else:
self._send_frames(n)
road_frames.append(n)
self._wait()
if len(road_frames) < 3 or road_frames[-1] - road_frames[-2] == 1:
frame_id = road_frames[-1]
mdl = self.sm['modelV2']
odo = self.sm['cameraOdometry']
self.assertEqual(mdl.frameId, frame_id)
self.assertEqual(mdl.frameIdExtra, frame_id)
self.assertEqual(odo.frameId, frame_id)
if n != frame_id:
self.assertFalse(self.sm.updated['modelV2'])
self.assertFalse(self.sm.updated['cameraOdometry'])
if __name__ == "__main__":
unittest.main()

View File

@@ -0,0 +1,2 @@
#!/bin/bash
clang++ -I /home/batman/one/external/tensorflow/include/ -L /home/batman/one/external/tensorflow/lib -Wl,-rpath=/home/batman/one/external/tensorflow/lib main.cc -ltensorflow

View File

@@ -0,0 +1,69 @@
#include <cassert>
#include <cstdio>
#include <cstdlib>
#include "tensorflow/c/c_api.h"
void* read_file(const char* path, size_t* out_len) {
FILE* f = fopen(path, "r");
if (!f) {
return NULL;
}
fseek(f, 0, SEEK_END);
long f_len = ftell(f);
rewind(f);
char* buf = (char*)calloc(f_len, 1);
assert(buf);
size_t num_read = fread(buf, f_len, 1, f);
fclose(f);
if (num_read != 1) {
free(buf);
return NULL;
}
if (out_len) {
*out_len = f_len;
}
return buf;
}
static void DeallocateBuffer(void* data, size_t) {
free(data);
}
int main(int argc, char* argv[]) {
TF_Buffer* buf;
TF_Graph* graph;
TF_Status* status;
char *path = argv[1];
// load model
{
size_t model_size;
char tmp[1024];
snprintf(tmp, sizeof(tmp), "%s.pb", path);
printf("loading model %s\n", tmp);
uint8_t *model_data = (uint8_t *)read_file(tmp, &model_size);
buf = TF_NewBuffer();
buf->data = model_data;
buf->length = model_size;
buf->data_deallocator = DeallocateBuffer;
printf("loaded model of size %d\n", model_size);
}
// import graph
status = TF_NewStatus();
graph = TF_NewGraph();
TF_ImportGraphDefOptions *opts = TF_NewImportGraphDefOptions();
TF_GraphImportGraphDef(graph, buf, opts, status);
TF_DeleteImportGraphDefOptions(opts);
TF_DeleteBuffer(buf);
if (TF_GetCode(status) != TF_OK) {
printf("FAIL: %s\n", TF_Message(status));
} else {
printf("SUCCESS\n");
}
}

View File

@@ -0,0 +1,8 @@
#!/usr/bin/env python3
import sys
import tensorflow as tf
with open(sys.argv[1], "rb") as f:
graph_def = tf.compat.v1.GraphDef()
graph_def.ParseFromString(f.read())
#tf.io.write_graph(graph_def, '', sys.argv[1]+".try")

View File

@@ -0,0 +1,39 @@
#!/usr/bin/env python3
# type: ignore
import os
import time
import numpy as np
import cereal.messaging as messaging
from openpilot.selfdrive.manager.process_config import managed_processes
N = int(os.getenv("N", "5"))
TIME = int(os.getenv("TIME", "30"))
if __name__ == "__main__":
sock = messaging.sub_sock('modelV2', conflate=False, timeout=1000)
execution_times = []
for _ in range(N):
os.environ['LOGPRINT'] = 'debug'
managed_processes['modeld'].start()
time.sleep(5)
t = []
start = time.monotonic()
while time.monotonic() - start < TIME:
msgs = messaging.drain_sock(sock, wait_for_one=True)
for m in msgs:
t.append(m.modelV2.modelExecutionTime)
execution_times.append(np.array(t[10:]) * 1000)
managed_processes['modeld'].stop()
print("\n\n")
print(f"ran modeld {N} times for {TIME}s each")
for _, t in enumerate(execution_times):
print(f"\tavg: {sum(t)/len(t):0.2f}ms, min: {min(t):0.2f}ms, max: {max(t):0.2f}ms")
print("\n\n")