openpilot v0.9.6 release
date: 2024-01-12T10:13:37 master commit: ba792d576a49a0899b88a753fa1c52956bedf9e6
This commit is contained in:
27
selfdrive/modeld/runners/__init__.py
Normal file
27
selfdrive/modeld/runners/__init__.py
Normal file
@@ -0,0 +1,27 @@
|
||||
import os
|
||||
from openpilot.system.hardware import TICI
|
||||
from openpilot.selfdrive.modeld.runners.runmodel_pyx import RunModel, Runtime
|
||||
assert Runtime
|
||||
|
||||
USE_THNEED = int(os.getenv('USE_THNEED', str(int(TICI))))
|
||||
USE_SNPE = int(os.getenv('USE_SNPE', str(int(TICI))))
|
||||
|
||||
class ModelRunner(RunModel):
|
||||
THNEED = 'THNEED'
|
||||
SNPE = 'SNPE'
|
||||
ONNX = 'ONNX'
|
||||
|
||||
def __new__(cls, paths, *args, **kwargs):
|
||||
if ModelRunner.THNEED in paths and USE_THNEED:
|
||||
from openpilot.selfdrive.modeld.runners.thneedmodel_pyx import ThneedModel as Runner
|
||||
runner_type = ModelRunner.THNEED
|
||||
elif ModelRunner.SNPE in paths and USE_SNPE:
|
||||
from openpilot.selfdrive.modeld.runners.snpemodel_pyx import SNPEModel as Runner
|
||||
runner_type = ModelRunner.SNPE
|
||||
elif ModelRunner.ONNX in paths:
|
||||
from openpilot.selfdrive.modeld.runners.onnxmodel import ONNXModel as Runner
|
||||
runner_type = ModelRunner.ONNX
|
||||
else:
|
||||
raise Exception("Couldn't select a model runner, make sure to pass at least one valid model path")
|
||||
|
||||
return Runner(str(paths[runner_type]), *args, **kwargs)
|
||||
93
selfdrive/modeld/runners/onnxmodel.py
Normal file
93
selfdrive/modeld/runners/onnxmodel.py
Normal file
@@ -0,0 +1,93 @@
|
||||
import onnx
|
||||
import itertools
|
||||
import os
|
||||
import sys
|
||||
import numpy as np
|
||||
from typing import Tuple, Dict, Union, Any
|
||||
|
||||
from openpilot.selfdrive.modeld.runners.runmodel_pyx import RunModel
|
||||
|
||||
ORT_TYPES_TO_NP_TYPES = {'tensor(float16)': np.float16, 'tensor(float)': np.float32, 'tensor(uint8)': np.uint8}
|
||||
|
||||
def attributeproto_fp16_to_fp32(attr):
|
||||
float32_list = np.frombuffer(attr.raw_data, dtype=np.float16)
|
||||
attr.data_type = 1
|
||||
attr.raw_data = float32_list.astype(np.float32).tobytes()
|
||||
|
||||
def convert_fp16_to_fp32(path):
|
||||
model = onnx.load(path)
|
||||
for i in model.graph.initializer:
|
||||
if i.data_type == 10:
|
||||
attributeproto_fp16_to_fp32(i)
|
||||
for i in itertools.chain(model.graph.input, model.graph.output):
|
||||
if i.type.tensor_type.elem_type == 10:
|
||||
i.type.tensor_type.elem_type = 1
|
||||
for i in model.graph.node:
|
||||
for a in i.attribute:
|
||||
if hasattr(a, 't'):
|
||||
if a.t.data_type == 10:
|
||||
attributeproto_fp16_to_fp32(a.t)
|
||||
return model.SerializeToString()
|
||||
|
||||
def create_ort_session(path, fp16_to_fp32):
|
||||
os.environ["OMP_NUM_THREADS"] = "4"
|
||||
os.environ["OMP_WAIT_POLICY"] = "PASSIVE"
|
||||
|
||||
import onnxruntime as ort
|
||||
print("Onnx available providers: ", ort.get_available_providers(), file=sys.stderr)
|
||||
options = ort.SessionOptions()
|
||||
options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_DISABLE_ALL
|
||||
|
||||
provider: Union[str, Tuple[str, Dict[Any, Any]]]
|
||||
if 'OpenVINOExecutionProvider' in ort.get_available_providers() and 'ONNXCPU' not in os.environ:
|
||||
provider = 'OpenVINOExecutionProvider'
|
||||
elif 'CUDAExecutionProvider' in ort.get_available_providers() and 'ONNXCPU' not in os.environ:
|
||||
options.intra_op_num_threads = 2
|
||||
provider = ('CUDAExecutionProvider', {'cudnn_conv_algo_search': 'DEFAULT'})
|
||||
else:
|
||||
options.intra_op_num_threads = 2
|
||||
options.execution_mode = ort.ExecutionMode.ORT_SEQUENTIAL
|
||||
options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
|
||||
provider = 'CPUExecutionProvider'
|
||||
|
||||
model_data = convert_fp16_to_fp32(path) if fp16_to_fp32 else path
|
||||
print("Onnx selected provider: ", [provider], file=sys.stderr)
|
||||
ort_session = ort.InferenceSession(model_data, options, providers=[provider])
|
||||
print("Onnx using ", ort_session.get_providers(), file=sys.stderr)
|
||||
return ort_session
|
||||
|
||||
|
||||
class ONNXModel(RunModel):
|
||||
def __init__(self, path, output, runtime, use_tf8, cl_context):
|
||||
self.inputs = {}
|
||||
self.output = output
|
||||
self.use_tf8 = use_tf8
|
||||
|
||||
self.session = create_ort_session(path, fp16_to_fp32=True)
|
||||
self.input_names = [x.name for x in self.session.get_inputs()]
|
||||
self.input_shapes = {x.name: [1, *x.shape[1:]] for x in self.session.get_inputs()}
|
||||
self.input_dtypes = {x.name: ORT_TYPES_TO_NP_TYPES[x.type] for x in self.session.get_inputs()}
|
||||
|
||||
# run once to initialize CUDA provider
|
||||
if "CUDAExecutionProvider" in self.session.get_providers():
|
||||
self.session.run(None, {k: np.zeros(self.input_shapes[k], dtype=self.input_dtypes[k]) for k in self.input_names})
|
||||
print("ready to run onnx model", self.input_shapes, file=sys.stderr)
|
||||
|
||||
def addInput(self, name, buffer):
|
||||
assert name in self.input_names
|
||||
self.inputs[name] = buffer
|
||||
|
||||
def setInputBuffer(self, name, buffer):
|
||||
assert name in self.inputs
|
||||
self.inputs[name] = buffer
|
||||
|
||||
def getCLBuffer(self, name):
|
||||
return None
|
||||
|
||||
def execute(self):
|
||||
inputs = {k: (v.view(np.uint8) / 255. if self.use_tf8 and k == 'input_img' else v) for k,v in self.inputs.items()}
|
||||
inputs = {k: v.reshape(self.input_shapes[k]).astype(self.input_dtypes[k]) for k,v in inputs.items()}
|
||||
outputs = self.session.run(None, inputs)
|
||||
assert len(outputs) == 1, "Only single model outputs are supported"
|
||||
self.output[:] = outputs[0]
|
||||
return self.output
|
||||
4
selfdrive/modeld/runners/run.h
Normal file
4
selfdrive/modeld/runners/run.h
Normal file
@@ -0,0 +1,4 @@
|
||||
#pragma once
|
||||
|
||||
#include "selfdrive/modeld/runners/runmodel.h"
|
||||
#include "selfdrive/modeld/runners/snpemodel.h"
|
||||
49
selfdrive/modeld/runners/runmodel.h
Normal file
49
selfdrive/modeld/runners/runmodel.h
Normal file
@@ -0,0 +1,49 @@
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include <cassert>
|
||||
|
||||
#include "common/clutil.h"
|
||||
#include "common/swaglog.h"
|
||||
|
||||
#define USE_CPU_RUNTIME 0
|
||||
#define USE_GPU_RUNTIME 1
|
||||
#define USE_DSP_RUNTIME 2
|
||||
|
||||
struct ModelInput {
|
||||
const std::string name;
|
||||
float *buffer;
|
||||
int size;
|
||||
|
||||
ModelInput(const std::string _name, float *_buffer, int _size) : name(_name), buffer(_buffer), size(_size) {}
|
||||
virtual void setBuffer(float *_buffer, int _size) {
|
||||
assert(size == _size || size == 0);
|
||||
buffer = _buffer;
|
||||
size = _size;
|
||||
}
|
||||
};
|
||||
|
||||
class RunModel {
|
||||
public:
|
||||
std::vector<std::unique_ptr<ModelInput>> inputs;
|
||||
|
||||
virtual ~RunModel() {}
|
||||
virtual void execute() {}
|
||||
virtual void* getCLBuffer(const std::string name) { return nullptr; }
|
||||
|
||||
virtual void addInput(const std::string name, float *buffer, int size) {
|
||||
inputs.push_back(std::unique_ptr<ModelInput>(new ModelInput(name, buffer, size)));
|
||||
}
|
||||
virtual void setInputBuffer(const std::string name, float *buffer, int size) {
|
||||
for (auto &input : inputs) {
|
||||
if (name == input->name) {
|
||||
input->setBuffer(buffer, size);
|
||||
return;
|
||||
}
|
||||
}
|
||||
LOGE("Tried to update input `%s` but no input with this name exists", name.c_str());
|
||||
assert(false);
|
||||
}
|
||||
};
|
||||
14
selfdrive/modeld/runners/runmodel.pxd
Normal file
14
selfdrive/modeld/runners/runmodel.pxd
Normal file
@@ -0,0 +1,14 @@
|
||||
# distutils: language = c++
|
||||
|
||||
from libcpp.string cimport string
|
||||
|
||||
cdef extern from "selfdrive/modeld/runners/runmodel.h":
|
||||
cdef int USE_CPU_RUNTIME
|
||||
cdef int USE_GPU_RUNTIME
|
||||
cdef int USE_DSP_RUNTIME
|
||||
|
||||
cdef cppclass RunModel:
|
||||
void addInput(string, float*, int)
|
||||
void setInputBuffer(string, float*, int)
|
||||
void * getCLBuffer(string)
|
||||
void execute()
|
||||
6
selfdrive/modeld/runners/runmodel_pyx.pxd
Normal file
6
selfdrive/modeld/runners/runmodel_pyx.pxd
Normal file
@@ -0,0 +1,6 @@
|
||||
# distutils: language = c++
|
||||
|
||||
from .runmodel cimport RunModel as cppRunModel
|
||||
|
||||
cdef class RunModel:
|
||||
cdef cppRunModel * model
|
||||
38
selfdrive/modeld/runners/runmodel_pyx.pyx
Normal file
38
selfdrive/modeld/runners/runmodel_pyx.pyx
Normal file
@@ -0,0 +1,38 @@
|
||||
# distutils: language = c++
|
||||
# cython: c_string_encoding=ascii
|
||||
|
||||
from libcpp.string cimport string
|
||||
from libc.string cimport memcpy
|
||||
|
||||
from .runmodel cimport USE_CPU_RUNTIME, USE_GPU_RUNTIME, USE_DSP_RUNTIME
|
||||
from selfdrive.modeld.models.commonmodel_pyx cimport CLMem
|
||||
|
||||
class Runtime:
|
||||
CPU = USE_CPU_RUNTIME
|
||||
GPU = USE_GPU_RUNTIME
|
||||
DSP = USE_DSP_RUNTIME
|
||||
|
||||
cdef class RunModel:
|
||||
def __dealloc__(self):
|
||||
del self.model
|
||||
|
||||
def addInput(self, string name, float[:] buffer):
|
||||
if buffer is not None:
|
||||
self.model.addInput(name, &buffer[0], len(buffer))
|
||||
else:
|
||||
self.model.addInput(name, NULL, 0)
|
||||
|
||||
def setInputBuffer(self, string name, float[:] buffer):
|
||||
if buffer is not None:
|
||||
self.model.setInputBuffer(name, &buffer[0], len(buffer))
|
||||
else:
|
||||
self.model.setInputBuffer(name, NULL, 0)
|
||||
|
||||
def getCLBuffer(self, string name):
|
||||
cdef void * cl_buf = self.model.getCLBuffer(name)
|
||||
if not cl_buf:
|
||||
return None
|
||||
return CLMem.create(cl_buf)
|
||||
|
||||
def execute(self):
|
||||
self.model.execute()
|
||||
116
selfdrive/modeld/runners/snpemodel.cc
Normal file
116
selfdrive/modeld/runners/snpemodel.cc
Normal file
@@ -0,0 +1,116 @@
|
||||
#pragma clang diagnostic ignored "-Wexceptions"
|
||||
|
||||
#include "selfdrive/modeld/runners/snpemodel.h"
|
||||
|
||||
#include <cstring>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "common/util.h"
|
||||
#include "common/timing.h"
|
||||
|
||||
void PrintErrorStringAndExit() {
|
||||
std::cerr << zdl::DlSystem::getLastErrorString() << std::endl;
|
||||
std::exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
SNPEModel::SNPEModel(const std::string path, float *_output, size_t _output_size, int runtime, bool _use_tf8, cl_context context) {
|
||||
output = _output;
|
||||
output_size = _output_size;
|
||||
use_tf8 = _use_tf8;
|
||||
|
||||
#ifdef QCOM2
|
||||
if (runtime == USE_GPU_RUNTIME) {
|
||||
snpe_runtime = zdl::DlSystem::Runtime_t::GPU;
|
||||
} else if (runtime == USE_DSP_RUNTIME) {
|
||||
snpe_runtime = zdl::DlSystem::Runtime_t::DSP;
|
||||
} else {
|
||||
snpe_runtime = zdl::DlSystem::Runtime_t::CPU;
|
||||
}
|
||||
assert(zdl::SNPE::SNPEFactory::isRuntimeAvailable(snpe_runtime));
|
||||
#endif
|
||||
model_data = util::read_file(path);
|
||||
assert(model_data.size() > 0);
|
||||
|
||||
// load model
|
||||
std::unique_ptr<zdl::DlContainer::IDlContainer> container = zdl::DlContainer::IDlContainer::open((uint8_t*)model_data.data(), model_data.size());
|
||||
if (!container) { PrintErrorStringAndExit(); }
|
||||
LOGW("loaded model with size: %lu", model_data.size());
|
||||
|
||||
// create model runner
|
||||
zdl::SNPE::SNPEBuilder snpe_builder(container.get());
|
||||
while (!snpe) {
|
||||
#ifdef QCOM2
|
||||
snpe = snpe_builder.setOutputLayers({})
|
||||
.setRuntimeProcessor(snpe_runtime)
|
||||
.setUseUserSuppliedBuffers(true)
|
||||
.setPerformanceProfile(zdl::DlSystem::PerformanceProfile_t::HIGH_PERFORMANCE)
|
||||
.build();
|
||||
#else
|
||||
snpe = snpe_builder.setOutputLayers({})
|
||||
.setUseUserSuppliedBuffers(true)
|
||||
.setPerformanceProfile(zdl::DlSystem::PerformanceProfile_t::HIGH_PERFORMANCE)
|
||||
.build();
|
||||
#endif
|
||||
if (!snpe) std::cerr << zdl::DlSystem::getLastErrorString() << std::endl;
|
||||
}
|
||||
|
||||
// create output buffer
|
||||
zdl::DlSystem::UserBufferEncodingFloat ub_encoding_float;
|
||||
zdl::DlSystem::IUserBufferFactory &ub_factory = zdl::SNPE::SNPEFactory::getUserBufferFactory();
|
||||
|
||||
const auto &output_tensor_names_opt = snpe->getOutputTensorNames();
|
||||
if (!output_tensor_names_opt) throw std::runtime_error("Error obtaining output tensor names");
|
||||
const auto &output_tensor_names = *output_tensor_names_opt;
|
||||
assert(output_tensor_names.size() == 1);
|
||||
const char *output_tensor_name = output_tensor_names.at(0);
|
||||
const zdl::DlSystem::TensorShape &buffer_shape = snpe->getInputOutputBufferAttributes(output_tensor_name)->getDims();
|
||||
if (output_size != 0) {
|
||||
assert(output_size == buffer_shape[1]);
|
||||
} else {
|
||||
output_size = buffer_shape[1];
|
||||
}
|
||||
std::vector<size_t> output_strides = {output_size * sizeof(float), sizeof(float)};
|
||||
output_buffer = ub_factory.createUserBuffer(output, output_size * sizeof(float), output_strides, &ub_encoding_float);
|
||||
output_map.add(output_tensor_name, output_buffer.get());
|
||||
}
|
||||
|
||||
void SNPEModel::addInput(const std::string name, float *buffer, int size) {
|
||||
const int idx = inputs.size();
|
||||
const auto &input_tensor_names_opt = snpe->getInputTensorNames();
|
||||
if (!input_tensor_names_opt) throw std::runtime_error("Error obtaining input tensor names");
|
||||
const auto &input_tensor_names = *input_tensor_names_opt;
|
||||
const char *input_tensor_name = input_tensor_names.at(idx);
|
||||
const bool input_tf8 = use_tf8 && strcmp(input_tensor_name, "input_img") == 0; // TODO: This is a terrible hack, get rid of this name check both here and in onnx_runner.py
|
||||
LOGW("adding index %d: %s", idx, input_tensor_name);
|
||||
|
||||
zdl::DlSystem::UserBufferEncodingFloat ub_encoding_float;
|
||||
zdl::DlSystem::UserBufferEncodingTf8 ub_encoding_tf8(0, 1./255); // network takes 0-1
|
||||
zdl::DlSystem::IUserBufferFactory &ub_factory = zdl::SNPE::SNPEFactory::getUserBufferFactory();
|
||||
zdl::DlSystem::UserBufferEncoding *input_encoding = input_tf8 ? (zdl::DlSystem::UserBufferEncoding*)&ub_encoding_tf8 : (zdl::DlSystem::UserBufferEncoding*)&ub_encoding_float;
|
||||
|
||||
const auto &buffer_shape_opt = snpe->getInputDimensions(input_tensor_name);
|
||||
const zdl::DlSystem::TensorShape &buffer_shape = *buffer_shape_opt;
|
||||
size_t size_of_input = input_tf8 ? sizeof(uint8_t) : sizeof(float);
|
||||
std::vector<size_t> strides(buffer_shape.rank());
|
||||
strides[strides.size() - 1] = size_of_input;
|
||||
size_t product = 1;
|
||||
for (size_t i = 0; i < buffer_shape.rank(); i++) product *= buffer_shape[i];
|
||||
size_t stride = strides[strides.size() - 1];
|
||||
for (size_t i = buffer_shape.rank() - 1; i > 0; i--) {
|
||||
stride *= buffer_shape[i];
|
||||
strides[i-1] = stride;
|
||||
}
|
||||
|
||||
auto input_buffer = ub_factory.createUserBuffer(buffer, product*size_of_input, strides, input_encoding);
|
||||
input_map.add(input_tensor_name, input_buffer.get());
|
||||
inputs.push_back(std::unique_ptr<SNPEModelInput>(new SNPEModelInput(name, buffer, size, std::move(input_buffer))));
|
||||
}
|
||||
|
||||
void SNPEModel::execute() {
|
||||
if (!snpe->execute(input_map, output_map)) {
|
||||
PrintErrorStringAndExit();
|
||||
}
|
||||
}
|
||||
52
selfdrive/modeld/runners/snpemodel.h
Normal file
52
selfdrive/modeld/runners/snpemodel.h
Normal file
@@ -0,0 +1,52 @@
|
||||
#pragma once
|
||||
#pragma clang diagnostic ignored "-Wdeprecated-declarations"
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
|
||||
#include <DlContainer/IDlContainer.hpp>
|
||||
#include <DlSystem/DlError.hpp>
|
||||
#include <DlSystem/ITensor.hpp>
|
||||
#include <DlSystem/ITensorFactory.hpp>
|
||||
#include <DlSystem/IUserBuffer.hpp>
|
||||
#include <DlSystem/IUserBufferFactory.hpp>
|
||||
#include <SNPE/SNPE.hpp>
|
||||
#include <SNPE/SNPEBuilder.hpp>
|
||||
#include <SNPE/SNPEFactory.hpp>
|
||||
|
||||
#include "selfdrive/modeld/runners/runmodel.h"
|
||||
|
||||
struct SNPEModelInput : public ModelInput {
|
||||
std::unique_ptr<zdl::DlSystem::IUserBuffer> snpe_buffer;
|
||||
|
||||
SNPEModelInput(const std::string _name, float *_buffer, int _size, std::unique_ptr<zdl::DlSystem::IUserBuffer> _snpe_buffer) : ModelInput(_name, _buffer, _size), snpe_buffer(std::move(_snpe_buffer)) {}
|
||||
void setBuffer(float *_buffer, int _size) {
|
||||
ModelInput::setBuffer(_buffer, _size);
|
||||
assert(snpe_buffer->setBufferAddress(_buffer) == true);
|
||||
}
|
||||
};
|
||||
|
||||
class SNPEModel : public RunModel {
|
||||
public:
|
||||
SNPEModel(const std::string path, float *_output, size_t _output_size, int runtime, bool use_tf8 = false, cl_context context = NULL);
|
||||
void addInput(const std::string name, float *buffer, int size);
|
||||
void execute();
|
||||
|
||||
private:
|
||||
std::string model_data;
|
||||
|
||||
#ifdef QCOM2
|
||||
zdl::DlSystem::Runtime_t snpe_runtime;
|
||||
#endif
|
||||
|
||||
// snpe model stuff
|
||||
std::unique_ptr<zdl::SNPE::SNPE> snpe;
|
||||
zdl::DlSystem::UserBufferMap input_map;
|
||||
zdl::DlSystem::UserBufferMap output_map;
|
||||
std::unique_ptr<zdl::DlSystem::IUserBuffer> output_buffer;
|
||||
|
||||
bool use_tf8;
|
||||
float *output;
|
||||
size_t output_size;
|
||||
};
|
||||
9
selfdrive/modeld/runners/snpemodel.pxd
Normal file
9
selfdrive/modeld/runners/snpemodel.pxd
Normal file
@@ -0,0 +1,9 @@
|
||||
# distutils: language = c++
|
||||
|
||||
from libcpp.string cimport string
|
||||
|
||||
from cereal.visionipc.visionipc cimport cl_context
|
||||
|
||||
cdef extern from "selfdrive/modeld/runners/snpemodel.h":
|
||||
cdef cppclass SNPEModel:
|
||||
SNPEModel(string, float*, size_t, int, bool, cl_context)
|
||||
17
selfdrive/modeld/runners/snpemodel_pyx.pyx
Normal file
17
selfdrive/modeld/runners/snpemodel_pyx.pyx
Normal file
@@ -0,0 +1,17 @@
|
||||
# distutils: language = c++
|
||||
# cython: c_string_encoding=ascii
|
||||
|
||||
import os
|
||||
from libcpp cimport bool
|
||||
from libcpp.string cimport string
|
||||
|
||||
from .snpemodel cimport SNPEModel as cppSNPEModel
|
||||
from selfdrive.modeld.models.commonmodel_pyx cimport CLContext
|
||||
from selfdrive.modeld.runners.runmodel_pyx cimport RunModel
|
||||
from selfdrive.modeld.runners.runmodel cimport RunModel as cppRunModel
|
||||
|
||||
os.environ['ADSP_LIBRARY_PATH'] = "/data/pythonpath/third_party/snpe/dsp/"
|
||||
|
||||
cdef class SNPEModel(RunModel):
|
||||
def __cinit__(self, string path, float[:] output, int runtime, bool use_tf8, CLContext context):
|
||||
self.model = <cppRunModel *> new cppSNPEModel(path, &output[0], len(output), runtime, use_tf8, context.context)
|
||||
58
selfdrive/modeld/runners/thneedmodel.cc
Normal file
58
selfdrive/modeld/runners/thneedmodel.cc
Normal file
@@ -0,0 +1,58 @@
|
||||
#include "selfdrive/modeld/runners/thneedmodel.h"
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "common/swaglog.h"
|
||||
|
||||
ThneedModel::ThneedModel(const std::string path, float *_output, size_t _output_size, int runtime, bool luse_tf8, cl_context context) {
|
||||
thneed = new Thneed(true, context);
|
||||
thneed->load(path.c_str());
|
||||
thneed->clexec();
|
||||
|
||||
recorded = false;
|
||||
output = _output;
|
||||
}
|
||||
|
||||
void* ThneedModel::getCLBuffer(const std::string name) {
|
||||
int index = -1;
|
||||
for (int i = 0; i < inputs.size(); i++) {
|
||||
if (name == inputs[i]->name) {
|
||||
index = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (index == -1) {
|
||||
LOGE("Tried to get CL buffer for input `%s` but no input with this name exists", name.c_str());
|
||||
assert(false);
|
||||
}
|
||||
|
||||
if (thneed->input_clmem.size() >= inputs.size()) {
|
||||
return &thneed->input_clmem[inputs.size() - index - 1];
|
||||
} else {
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
void ThneedModel::execute() {
|
||||
if (!recorded) {
|
||||
thneed->record = true;
|
||||
float *input_buffers[inputs.size()];
|
||||
for (int i = 0; i < inputs.size(); i++) {
|
||||
input_buffers[inputs.size() - i - 1] = inputs[i]->buffer;
|
||||
}
|
||||
|
||||
thneed->copy_inputs(input_buffers);
|
||||
thneed->clexec();
|
||||
thneed->copy_output(output);
|
||||
thneed->stop();
|
||||
|
||||
recorded = true;
|
||||
} else {
|
||||
float *input_buffers[inputs.size()];
|
||||
for (int i = 0; i < inputs.size(); i++) {
|
||||
input_buffers[inputs.size() - i - 1] = inputs[i]->buffer;
|
||||
}
|
||||
thneed->execute(input_buffers, output);
|
||||
}
|
||||
}
|
||||
17
selfdrive/modeld/runners/thneedmodel.h
Normal file
17
selfdrive/modeld/runners/thneedmodel.h
Normal file
@@ -0,0 +1,17 @@
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "selfdrive/modeld/runners/runmodel.h"
|
||||
#include "selfdrive/modeld/thneed/thneed.h"
|
||||
|
||||
class ThneedModel : public RunModel {
|
||||
public:
|
||||
ThneedModel(const std::string path, float *_output, size_t _output_size, int runtime, bool use_tf8 = false, cl_context context = NULL);
|
||||
void *getCLBuffer(const std::string name);
|
||||
void execute();
|
||||
private:
|
||||
Thneed *thneed = NULL;
|
||||
bool recorded;
|
||||
float *output;
|
||||
};
|
||||
9
selfdrive/modeld/runners/thneedmodel.pxd
Normal file
9
selfdrive/modeld/runners/thneedmodel.pxd
Normal file
@@ -0,0 +1,9 @@
|
||||
# distutils: language = c++
|
||||
|
||||
from libcpp.string cimport string
|
||||
|
||||
from cereal.visionipc.visionipc cimport cl_context
|
||||
|
||||
cdef extern from "selfdrive/modeld/runners/thneedmodel.h":
|
||||
cdef cppclass ThneedModel:
|
||||
ThneedModel(string, float*, size_t, int, bool, cl_context)
|
||||
14
selfdrive/modeld/runners/thneedmodel_pyx.pyx
Normal file
14
selfdrive/modeld/runners/thneedmodel_pyx.pyx
Normal file
@@ -0,0 +1,14 @@
|
||||
# distutils: language = c++
|
||||
# cython: c_string_encoding=ascii
|
||||
|
||||
from libcpp cimport bool
|
||||
from libcpp.string cimport string
|
||||
|
||||
from .thneedmodel cimport ThneedModel as cppThneedModel
|
||||
from selfdrive.modeld.models.commonmodel_pyx cimport CLContext
|
||||
from selfdrive.modeld.runners.runmodel_pyx cimport RunModel
|
||||
from selfdrive.modeld.runners.runmodel cimport RunModel as cppRunModel
|
||||
|
||||
cdef class ThneedModel(RunModel):
|
||||
def __cinit__(self, string path, float[:] output, int runtime, bool use_tf8, CLContext context):
|
||||
self.model = <cppRunModel *> new cppThneedModel(path, &output[0], len(output), runtime, use_tf8, context.context)
|
||||
Reference in New Issue
Block a user