wip

2024-04-27 03:27:27 -05:00
parent 886a019ad5
commit c22b4866eb
55 changed files with 49557 additions and 116523 deletions
--- a/selfdrive/modeld/thneed/thneed.h
+++ b/selfdrive/modeld/thneed/thneed.h
@@ -0,0 +1,133 @@
+#pragma once
+
+#ifndef __user
+#define __user __attribute__(())
+#endif
+
+#include <cstdint>
+#include <cstdlib>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include <CL/cl.h>
+
+#include "third_party/linux/include/msm_kgsl.h"
+
+using namespace std;
+
+cl_int thneed_clSetKernelArg(cl_kernel kernel, cl_uint arg_index, size_t arg_size, const void *arg_value);
+
+namespace json11 {
+  class Json;
+}
+class Thneed;
+
+class GPUMalloc {
+  public:
+    GPUMalloc(int size, int fd);
+    ~GPUMalloc();
+    void *alloc(int size);
+  private:
+    uint64_t base;
+    int remaining;
+};
+
+class CLQueuedKernel {
+  public:
+    CLQueuedKernel(Thneed *lthneed) { thneed = lthneed; }
+    CLQueuedKernel(Thneed *lthneed,
+                   cl_kernel _kernel,
+                   cl_uint _work_dim,
+                   const size_t *_global_work_size,
+                   const size_t *_local_work_size);
+    cl_int exec();
+    void debug_print(bool verbose);
+    int get_arg_num(const char *search_arg_name);
+    cl_program program;
+    string name;
+    cl_uint num_args;
+    vector<string> arg_names;
+    vector<string> arg_types;
+    vector<string> args;
+    vector<int> args_size;
+    cl_kernel kernel = NULL;
+    json11::Json to_json() const;
+
+    cl_uint work_dim;
+    size_t global_work_size[3] = {0};
+    size_t local_work_size[3] = {0};
+  private:
+    Thneed *thneed;
+};
+
+class CachedIoctl {
+  public:
+    virtual void exec() {}
+};
+
+class CachedSync: public CachedIoctl {
+  public:
+    CachedSync(Thneed *lthneed, string ldata) { thneed = lthneed; data = ldata; }
+    void exec();
+  private:
+    Thneed *thneed;
+    string data;
+};
+
+class CachedCommand: public CachedIoctl {
+  public:
+    CachedCommand(Thneed *lthneed, struct kgsl_gpu_command *cmd);
+    void exec();
+  private:
+    void disassemble(int cmd_index);
+    struct kgsl_gpu_command cache;
+    unique_ptr<kgsl_command_object[]> cmds;
+    unique_ptr<kgsl_command_object[]> objs;
+    Thneed *thneed;
+    vector<shared_ptr<CLQueuedKernel> > kq;
+};
+
+class Thneed {
+  public:
+    Thneed(bool do_clinit=false, cl_context _context = NULL);
+    void stop();
+    void execute(float **finputs, float *foutput, bool slow=false);
+    void wait();
+
+    vector<cl_mem> input_clmem;
+    vector<void *> inputs;
+    vector<size_t> input_sizes;
+    cl_mem output = NULL;
+
+    cl_context context = NULL;
+    cl_command_queue command_queue;
+    cl_device_id device_id;
+    int context_id;
+
+    // protected?
+    bool record = false;
+    int debug;
+    int timestamp;
+
+#ifdef QCOM2
+    unique_ptr<GPUMalloc> ram;
+    vector<unique_ptr<CachedIoctl> > cmds;
+    int fd;
+#endif
+
+    // all CL kernels
+    void copy_inputs(float **finputs, bool internal=false);
+    void copy_output(float *foutput);
+    cl_int clexec();
+    vector<shared_ptr<CLQueuedKernel> > kq;
+
+    // pending CL kernels
+    vector<shared_ptr<CLQueuedKernel> > ckq;
+
+    // loading
+    void load(const char *filename);
+  private:
+    void clinit();
+};
+