openpilot v0.9.6 release

date: 2024-01-12T10:13:37
master commit: ba792d576a49a0899b88a753fa1c52956bedf9e6
This commit is contained in:
FrogAi
2024-01-12 22:39:28 -07:00
commit 08e9fb1edc
1881 changed files with 653708 additions and 0 deletions

View File

@@ -0,0 +1,27 @@
import os
from openpilot.system.hardware import TICI
from openpilot.selfdrive.modeld.runners.runmodel_pyx import RunModel, Runtime
assert Runtime
USE_THNEED = int(os.getenv('USE_THNEED', str(int(TICI))))
USE_SNPE = int(os.getenv('USE_SNPE', str(int(TICI))))
class ModelRunner(RunModel):
THNEED = 'THNEED'
SNPE = 'SNPE'
ONNX = 'ONNX'
def __new__(cls, paths, *args, **kwargs):
if ModelRunner.THNEED in paths and USE_THNEED:
from openpilot.selfdrive.modeld.runners.thneedmodel_pyx import ThneedModel as Runner
runner_type = ModelRunner.THNEED
elif ModelRunner.SNPE in paths and USE_SNPE:
from openpilot.selfdrive.modeld.runners.snpemodel_pyx import SNPEModel as Runner
runner_type = ModelRunner.SNPE
elif ModelRunner.ONNX in paths:
from openpilot.selfdrive.modeld.runners.onnxmodel import ONNXModel as Runner
runner_type = ModelRunner.ONNX
else:
raise Exception("Couldn't select a model runner, make sure to pass at least one valid model path")
return Runner(str(paths[runner_type]), *args, **kwargs)

View File

@@ -0,0 +1,93 @@
import onnx
import itertools
import os
import sys
import numpy as np
from typing import Tuple, Dict, Union, Any
from openpilot.selfdrive.modeld.runners.runmodel_pyx import RunModel
ORT_TYPES_TO_NP_TYPES = {'tensor(float16)': np.float16, 'tensor(float)': np.float32, 'tensor(uint8)': np.uint8}
def attributeproto_fp16_to_fp32(attr):
float32_list = np.frombuffer(attr.raw_data, dtype=np.float16)
attr.data_type = 1
attr.raw_data = float32_list.astype(np.float32).tobytes()
def convert_fp16_to_fp32(path):
model = onnx.load(path)
for i in model.graph.initializer:
if i.data_type == 10:
attributeproto_fp16_to_fp32(i)
for i in itertools.chain(model.graph.input, model.graph.output):
if i.type.tensor_type.elem_type == 10:
i.type.tensor_type.elem_type = 1
for i in model.graph.node:
for a in i.attribute:
if hasattr(a, 't'):
if a.t.data_type == 10:
attributeproto_fp16_to_fp32(a.t)
return model.SerializeToString()
def create_ort_session(path, fp16_to_fp32):
os.environ["OMP_NUM_THREADS"] = "4"
os.environ["OMP_WAIT_POLICY"] = "PASSIVE"
import onnxruntime as ort
print("Onnx available providers: ", ort.get_available_providers(), file=sys.stderr)
options = ort.SessionOptions()
options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_DISABLE_ALL
provider: Union[str, Tuple[str, Dict[Any, Any]]]
if 'OpenVINOExecutionProvider' in ort.get_available_providers() and 'ONNXCPU' not in os.environ:
provider = 'OpenVINOExecutionProvider'
elif 'CUDAExecutionProvider' in ort.get_available_providers() and 'ONNXCPU' not in os.environ:
options.intra_op_num_threads = 2
provider = ('CUDAExecutionProvider', {'cudnn_conv_algo_search': 'DEFAULT'})
else:
options.intra_op_num_threads = 2
options.execution_mode = ort.ExecutionMode.ORT_SEQUENTIAL
options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
provider = 'CPUExecutionProvider'
model_data = convert_fp16_to_fp32(path) if fp16_to_fp32 else path
print("Onnx selected provider: ", [provider], file=sys.stderr)
ort_session = ort.InferenceSession(model_data, options, providers=[provider])
print("Onnx using ", ort_session.get_providers(), file=sys.stderr)
return ort_session
class ONNXModel(RunModel):
def __init__(self, path, output, runtime, use_tf8, cl_context):
self.inputs = {}
self.output = output
self.use_tf8 = use_tf8
self.session = create_ort_session(path, fp16_to_fp32=True)
self.input_names = [x.name for x in self.session.get_inputs()]
self.input_shapes = {x.name: [1, *x.shape[1:]] for x in self.session.get_inputs()}
self.input_dtypes = {x.name: ORT_TYPES_TO_NP_TYPES[x.type] for x in self.session.get_inputs()}
# run once to initialize CUDA provider
if "CUDAExecutionProvider" in self.session.get_providers():
self.session.run(None, {k: np.zeros(self.input_shapes[k], dtype=self.input_dtypes[k]) for k in self.input_names})
print("ready to run onnx model", self.input_shapes, file=sys.stderr)
def addInput(self, name, buffer):
assert name in self.input_names
self.inputs[name] = buffer
def setInputBuffer(self, name, buffer):
assert name in self.inputs
self.inputs[name] = buffer
def getCLBuffer(self, name):
return None
def execute(self):
inputs = {k: (v.view(np.uint8) / 255. if self.use_tf8 and k == 'input_img' else v) for k,v in self.inputs.items()}
inputs = {k: v.reshape(self.input_shapes[k]).astype(self.input_dtypes[k]) for k,v in inputs.items()}
outputs = self.session.run(None, inputs)
assert len(outputs) == 1, "Only single model outputs are supported"
self.output[:] = outputs[0]
return self.output

View File

@@ -0,0 +1,4 @@
#pragma once
#include "selfdrive/modeld/runners/runmodel.h"
#include "selfdrive/modeld/runners/snpemodel.h"

View File

@@ -0,0 +1,49 @@
#pragma once
#include <string>
#include <vector>
#include <memory>
#include <cassert>
#include "common/clutil.h"
#include "common/swaglog.h"
#define USE_CPU_RUNTIME 0
#define USE_GPU_RUNTIME 1
#define USE_DSP_RUNTIME 2
struct ModelInput {
const std::string name;
float *buffer;
int size;
ModelInput(const std::string _name, float *_buffer, int _size) : name(_name), buffer(_buffer), size(_size) {}
virtual void setBuffer(float *_buffer, int _size) {
assert(size == _size || size == 0);
buffer = _buffer;
size = _size;
}
};
class RunModel {
public:
std::vector<std::unique_ptr<ModelInput>> inputs;
virtual ~RunModel() {}
virtual void execute() {}
virtual void* getCLBuffer(const std::string name) { return nullptr; }
virtual void addInput(const std::string name, float *buffer, int size) {
inputs.push_back(std::unique_ptr<ModelInput>(new ModelInput(name, buffer, size)));
}
virtual void setInputBuffer(const std::string name, float *buffer, int size) {
for (auto &input : inputs) {
if (name == input->name) {
input->setBuffer(buffer, size);
return;
}
}
LOGE("Tried to update input `%s` but no input with this name exists", name.c_str());
assert(false);
}
};

View File

@@ -0,0 +1,14 @@
# distutils: language = c++
from libcpp.string cimport string
cdef extern from "selfdrive/modeld/runners/runmodel.h":
cdef int USE_CPU_RUNTIME
cdef int USE_GPU_RUNTIME
cdef int USE_DSP_RUNTIME
cdef cppclass RunModel:
void addInput(string, float*, int)
void setInputBuffer(string, float*, int)
void * getCLBuffer(string)
void execute()

View File

@@ -0,0 +1,6 @@
# distutils: language = c++
from .runmodel cimport RunModel as cppRunModel
cdef class RunModel:
cdef cppRunModel * model

View File

@@ -0,0 +1,38 @@
# distutils: language = c++
# cython: c_string_encoding=ascii
from libcpp.string cimport string
from libc.string cimport memcpy
from .runmodel cimport USE_CPU_RUNTIME, USE_GPU_RUNTIME, USE_DSP_RUNTIME
from selfdrive.modeld.models.commonmodel_pyx cimport CLMem
class Runtime:
CPU = USE_CPU_RUNTIME
GPU = USE_GPU_RUNTIME
DSP = USE_DSP_RUNTIME
cdef class RunModel:
def __dealloc__(self):
del self.model
def addInput(self, string name, float[:] buffer):
if buffer is not None:
self.model.addInput(name, &buffer[0], len(buffer))
else:
self.model.addInput(name, NULL, 0)
def setInputBuffer(self, string name, float[:] buffer):
if buffer is not None:
self.model.setInputBuffer(name, &buffer[0], len(buffer))
else:
self.model.setInputBuffer(name, NULL, 0)
def getCLBuffer(self, string name):
cdef void * cl_buf = self.model.getCLBuffer(name)
if not cl_buf:
return None
return CLMem.create(cl_buf)
def execute(self):
self.model.execute()

View File

@@ -0,0 +1,116 @@
#pragma clang diagnostic ignored "-Wexceptions"
#include "selfdrive/modeld/runners/snpemodel.h"
#include <cstring>
#include <memory>
#include <string>
#include <utility>
#include <vector>
#include "common/util.h"
#include "common/timing.h"
void PrintErrorStringAndExit() {
std::cerr << zdl::DlSystem::getLastErrorString() << std::endl;
std::exit(EXIT_FAILURE);
}
SNPEModel::SNPEModel(const std::string path, float *_output, size_t _output_size, int runtime, bool _use_tf8, cl_context context) {
output = _output;
output_size = _output_size;
use_tf8 = _use_tf8;
#ifdef QCOM2
if (runtime == USE_GPU_RUNTIME) {
snpe_runtime = zdl::DlSystem::Runtime_t::GPU;
} else if (runtime == USE_DSP_RUNTIME) {
snpe_runtime = zdl::DlSystem::Runtime_t::DSP;
} else {
snpe_runtime = zdl::DlSystem::Runtime_t::CPU;
}
assert(zdl::SNPE::SNPEFactory::isRuntimeAvailable(snpe_runtime));
#endif
model_data = util::read_file(path);
assert(model_data.size() > 0);
// load model
std::unique_ptr<zdl::DlContainer::IDlContainer> container = zdl::DlContainer::IDlContainer::open((uint8_t*)model_data.data(), model_data.size());
if (!container) { PrintErrorStringAndExit(); }
LOGW("loaded model with size: %lu", model_data.size());
// create model runner
zdl::SNPE::SNPEBuilder snpe_builder(container.get());
while (!snpe) {
#ifdef QCOM2
snpe = snpe_builder.setOutputLayers({})
.setRuntimeProcessor(snpe_runtime)
.setUseUserSuppliedBuffers(true)
.setPerformanceProfile(zdl::DlSystem::PerformanceProfile_t::HIGH_PERFORMANCE)
.build();
#else
snpe = snpe_builder.setOutputLayers({})
.setUseUserSuppliedBuffers(true)
.setPerformanceProfile(zdl::DlSystem::PerformanceProfile_t::HIGH_PERFORMANCE)
.build();
#endif
if (!snpe) std::cerr << zdl::DlSystem::getLastErrorString() << std::endl;
}
// create output buffer
zdl::DlSystem::UserBufferEncodingFloat ub_encoding_float;
zdl::DlSystem::IUserBufferFactory &ub_factory = zdl::SNPE::SNPEFactory::getUserBufferFactory();
const auto &output_tensor_names_opt = snpe->getOutputTensorNames();
if (!output_tensor_names_opt) throw std::runtime_error("Error obtaining output tensor names");
const auto &output_tensor_names = *output_tensor_names_opt;
assert(output_tensor_names.size() == 1);
const char *output_tensor_name = output_tensor_names.at(0);
const zdl::DlSystem::TensorShape &buffer_shape = snpe->getInputOutputBufferAttributes(output_tensor_name)->getDims();
if (output_size != 0) {
assert(output_size == buffer_shape[1]);
} else {
output_size = buffer_shape[1];
}
std::vector<size_t> output_strides = {output_size * sizeof(float), sizeof(float)};
output_buffer = ub_factory.createUserBuffer(output, output_size * sizeof(float), output_strides, &ub_encoding_float);
output_map.add(output_tensor_name, output_buffer.get());
}
void SNPEModel::addInput(const std::string name, float *buffer, int size) {
const int idx = inputs.size();
const auto &input_tensor_names_opt = snpe->getInputTensorNames();
if (!input_tensor_names_opt) throw std::runtime_error("Error obtaining input tensor names");
const auto &input_tensor_names = *input_tensor_names_opt;
const char *input_tensor_name = input_tensor_names.at(idx);
const bool input_tf8 = use_tf8 && strcmp(input_tensor_name, "input_img") == 0; // TODO: This is a terrible hack, get rid of this name check both here and in onnx_runner.py
LOGW("adding index %d: %s", idx, input_tensor_name);
zdl::DlSystem::UserBufferEncodingFloat ub_encoding_float;
zdl::DlSystem::UserBufferEncodingTf8 ub_encoding_tf8(0, 1./255); // network takes 0-1
zdl::DlSystem::IUserBufferFactory &ub_factory = zdl::SNPE::SNPEFactory::getUserBufferFactory();
zdl::DlSystem::UserBufferEncoding *input_encoding = input_tf8 ? (zdl::DlSystem::UserBufferEncoding*)&ub_encoding_tf8 : (zdl::DlSystem::UserBufferEncoding*)&ub_encoding_float;
const auto &buffer_shape_opt = snpe->getInputDimensions(input_tensor_name);
const zdl::DlSystem::TensorShape &buffer_shape = *buffer_shape_opt;
size_t size_of_input = input_tf8 ? sizeof(uint8_t) : sizeof(float);
std::vector<size_t> strides(buffer_shape.rank());
strides[strides.size() - 1] = size_of_input;
size_t product = 1;
for (size_t i = 0; i < buffer_shape.rank(); i++) product *= buffer_shape[i];
size_t stride = strides[strides.size() - 1];
for (size_t i = buffer_shape.rank() - 1; i > 0; i--) {
stride *= buffer_shape[i];
strides[i-1] = stride;
}
auto input_buffer = ub_factory.createUserBuffer(buffer, product*size_of_input, strides, input_encoding);
input_map.add(input_tensor_name, input_buffer.get());
inputs.push_back(std::unique_ptr<SNPEModelInput>(new SNPEModelInput(name, buffer, size, std::move(input_buffer))));
}
void SNPEModel::execute() {
if (!snpe->execute(input_map, output_map)) {
PrintErrorStringAndExit();
}
}

View File

@@ -0,0 +1,52 @@
#pragma once
#pragma clang diagnostic ignored "-Wdeprecated-declarations"
#include <memory>
#include <string>
#include <utility>
#include <DlContainer/IDlContainer.hpp>
#include <DlSystem/DlError.hpp>
#include <DlSystem/ITensor.hpp>
#include <DlSystem/ITensorFactory.hpp>
#include <DlSystem/IUserBuffer.hpp>
#include <DlSystem/IUserBufferFactory.hpp>
#include <SNPE/SNPE.hpp>
#include <SNPE/SNPEBuilder.hpp>
#include <SNPE/SNPEFactory.hpp>
#include "selfdrive/modeld/runners/runmodel.h"
struct SNPEModelInput : public ModelInput {
std::unique_ptr<zdl::DlSystem::IUserBuffer> snpe_buffer;
SNPEModelInput(const std::string _name, float *_buffer, int _size, std::unique_ptr<zdl::DlSystem::IUserBuffer> _snpe_buffer) : ModelInput(_name, _buffer, _size), snpe_buffer(std::move(_snpe_buffer)) {}
void setBuffer(float *_buffer, int _size) {
ModelInput::setBuffer(_buffer, _size);
assert(snpe_buffer->setBufferAddress(_buffer) == true);
}
};
class SNPEModel : public RunModel {
public:
SNPEModel(const std::string path, float *_output, size_t _output_size, int runtime, bool use_tf8 = false, cl_context context = NULL);
void addInput(const std::string name, float *buffer, int size);
void execute();
private:
std::string model_data;
#ifdef QCOM2
zdl::DlSystem::Runtime_t snpe_runtime;
#endif
// snpe model stuff
std::unique_ptr<zdl::SNPE::SNPE> snpe;
zdl::DlSystem::UserBufferMap input_map;
zdl::DlSystem::UserBufferMap output_map;
std::unique_ptr<zdl::DlSystem::IUserBuffer> output_buffer;
bool use_tf8;
float *output;
size_t output_size;
};

View File

@@ -0,0 +1,9 @@
# distutils: language = c++
from libcpp.string cimport string
from cereal.visionipc.visionipc cimport cl_context
cdef extern from "selfdrive/modeld/runners/snpemodel.h":
cdef cppclass SNPEModel:
SNPEModel(string, float*, size_t, int, bool, cl_context)

View File

@@ -0,0 +1,17 @@
# distutils: language = c++
# cython: c_string_encoding=ascii
import os
from libcpp cimport bool
from libcpp.string cimport string
from .snpemodel cimport SNPEModel as cppSNPEModel
from selfdrive.modeld.models.commonmodel_pyx cimport CLContext
from selfdrive.modeld.runners.runmodel_pyx cimport RunModel
from selfdrive.modeld.runners.runmodel cimport RunModel as cppRunModel
os.environ['ADSP_LIBRARY_PATH'] = "/data/pythonpath/third_party/snpe/dsp/"
cdef class SNPEModel(RunModel):
def __cinit__(self, string path, float[:] output, int runtime, bool use_tf8, CLContext context):
self.model = <cppRunModel *> new cppSNPEModel(path, &output[0], len(output), runtime, use_tf8, context.context)

View File

@@ -0,0 +1,58 @@
#include "selfdrive/modeld/runners/thneedmodel.h"
#include <string>
#include "common/swaglog.h"
ThneedModel::ThneedModel(const std::string path, float *_output, size_t _output_size, int runtime, bool luse_tf8, cl_context context) {
thneed = new Thneed(true, context);
thneed->load(path.c_str());
thneed->clexec();
recorded = false;
output = _output;
}
void* ThneedModel::getCLBuffer(const std::string name) {
int index = -1;
for (int i = 0; i < inputs.size(); i++) {
if (name == inputs[i]->name) {
index = i;
break;
}
}
if (index == -1) {
LOGE("Tried to get CL buffer for input `%s` but no input with this name exists", name.c_str());
assert(false);
}
if (thneed->input_clmem.size() >= inputs.size()) {
return &thneed->input_clmem[inputs.size() - index - 1];
} else {
return nullptr;
}
}
void ThneedModel::execute() {
if (!recorded) {
thneed->record = true;
float *input_buffers[inputs.size()];
for (int i = 0; i < inputs.size(); i++) {
input_buffers[inputs.size() - i - 1] = inputs[i]->buffer;
}
thneed->copy_inputs(input_buffers);
thneed->clexec();
thneed->copy_output(output);
thneed->stop();
recorded = true;
} else {
float *input_buffers[inputs.size()];
for (int i = 0; i < inputs.size(); i++) {
input_buffers[inputs.size() - i - 1] = inputs[i]->buffer;
}
thneed->execute(input_buffers, output);
}
}

View File

@@ -0,0 +1,17 @@
#pragma once
#include <string>
#include "selfdrive/modeld/runners/runmodel.h"
#include "selfdrive/modeld/thneed/thneed.h"
class ThneedModel : public RunModel {
public:
ThneedModel(const std::string path, float *_output, size_t _output_size, int runtime, bool use_tf8 = false, cl_context context = NULL);
void *getCLBuffer(const std::string name);
void execute();
private:
Thneed *thneed = NULL;
bool recorded;
float *output;
};

View File

@@ -0,0 +1,9 @@
# distutils: language = c++
from libcpp.string cimport string
from cereal.visionipc.visionipc cimport cl_context
cdef extern from "selfdrive/modeld/runners/thneedmodel.h":
cdef cppclass ThneedModel:
ThneedModel(string, float*, size_t, int, bool, cl_context)

View File

@@ -0,0 +1,14 @@
# distutils: language = c++
# cython: c_string_encoding=ascii
from libcpp cimport bool
from libcpp.string cimport string
from .thneedmodel cimport ThneedModel as cppThneedModel
from selfdrive.modeld.models.commonmodel_pyx cimport CLContext
from selfdrive.modeld.runners.runmodel_pyx cimport RunModel
from selfdrive.modeld.runners.runmodel cimport RunModel as cppRunModel
cdef class ThneedModel(RunModel):
def __cinit__(self, string path, float[:] output, int runtime, bool use_tf8, CLContext context):
self.model = <cppRunModel *> new cppThneedModel(path, &output[0], len(output), runtime, use_tf8, context.context)