wip

2024-04-27 03:22:06 -05:00
parent 4ffbd8982f
commit c95d41c62e
25 changed files with 3801 additions and 53 deletions
--- a/system/camerad/cameras/camera_common.cc
+++ b/system/camerad/cameras/camera_common.cc
@@ -0,0 +1,355 @@
+#include "system/camerad/cameras/camera_common.h"
+
+#include <cassert>
+#include <string>
+
+#include "third_party/libyuv/include/libyuv.h"
+#include <jpeglib.h>
+
+#include "common/clutil.h"
+#include "common/swaglog.h"
+#include "common/util.h"
+#include "third_party/linux/include/msm_media_info.h"
+
+#include "system/camerad/cameras/camera_qcom2.h"
+#ifdef QCOM2
+#include "CL/cl_ext_qcom.h"
+#endif
+
+ExitHandler do_exit;
+
+class Debayer {
+public:
+  Debayer(cl_device_id device_id, cl_context context, const CameraBuf *b, const CameraState *s, int buf_width, int uv_offset) {
+    char args[4096];
+    const SensorInfo *ci = s->ci.get();
+    snprintf(args, sizeof(args),
+             "-cl-fast-relaxed-math -cl-denorms-are-zero "
+             "-DFRAME_WIDTH=%d -DFRAME_HEIGHT=%d -DFRAME_STRIDE=%d -DFRAME_OFFSET=%d "
+             "-DRGB_WIDTH=%d -DRGB_HEIGHT=%d -DYUV_STRIDE=%d -DUV_OFFSET=%d "
+             "-DIS_OX=%d -DIS_OS=%d -DIS_BGGR=%d -DCAM_NUM=%d%s",
+             ci->frame_width, ci->frame_height, ci->frame_stride, ci->frame_offset,
+             b->rgb_width, b->rgb_height, buf_width, uv_offset,
+             ci->image_sensor == cereal::FrameData::ImageSensor::OX03C10,
+             ci->image_sensor == cereal::FrameData::ImageSensor::OS04C10,
+             ci->image_sensor == cereal::FrameData::ImageSensor::OS04C10,
+             s->camera_num, s->camera_num==1 ? " -DVIGNETTING" : "");
+    const char *cl_file = "cameras/real_debayer.cl";
+    cl_program prg_debayer = cl_program_from_file(context, device_id, cl_file, args);
+    krnl_ = CL_CHECK_ERR(clCreateKernel(prg_debayer, "debayer10", &err));
+    CL_CHECK(clReleaseProgram(prg_debayer));
+
+  }
+
+  void queue(cl_command_queue q, cl_mem cam_buf_cl, cl_mem buf_cl, int width, int height, cl_event *debayer_event) {
+    CL_CHECK(clSetKernelArg(krnl_, 0, sizeof(cl_mem), &cam_buf_cl));
+    CL_CHECK(clSetKernelArg(krnl_, 1, sizeof(cl_mem), &buf_cl));
+
+    const size_t globalWorkSize[] = {size_t(width / 2), size_t(height / 2)};
+    const int debayer_local_worksize = 16;
+    const size_t localWorkSize[] = {debayer_local_worksize, debayer_local_worksize};
+    CL_CHECK(clEnqueueNDRangeKernel(q, krnl_, 2, NULL, globalWorkSize, localWorkSize, 0, 0, debayer_event));
+  }
+
+  ~Debayer() {
+    CL_CHECK(clReleaseKernel(krnl_));
+  }
+
+private:
+  cl_kernel krnl_;
+};
+
+void CameraBuf::init(cl_device_id device_id, cl_context context, CameraState *s, VisionIpcServer * v, int frame_cnt, VisionStreamType type) {
+  vipc_server = v;
+  stream_type = type;
+  frame_buf_count = frame_cnt;
+
+  const SensorInfo *ci = s->ci.get();
+  // RAW frame
+  const int frame_size = (ci->frame_height + ci->extra_height) * ci->frame_stride;
+  camera_bufs = std::make_unique<VisionBuf[]>(frame_buf_count);
+  camera_bufs_metadata = std::make_unique<FrameMetadata[]>(frame_buf_count);
+
+  for (int i = 0; i < frame_buf_count; i++) {
+    camera_bufs[i].allocate(frame_size);
+    camera_bufs[i].init_cl(device_id, context);
+  }
+  LOGD("allocated %d CL buffers", frame_buf_count);
+
+  rgb_width = ci->frame_width;
+  rgb_height = ci->frame_height;
+
+  int nv12_width = VENUS_Y_STRIDE(COLOR_FMT_NV12, rgb_width);
+  int nv12_height = VENUS_Y_SCANLINES(COLOR_FMT_NV12, rgb_height);
+  assert(nv12_width == VENUS_UV_STRIDE(COLOR_FMT_NV12, rgb_width));
+  assert(nv12_height/2 == VENUS_UV_SCANLINES(COLOR_FMT_NV12, rgb_height));
+  size_t nv12_uv_offset = nv12_width * nv12_height;
+
+  // the encoder HW tells us the size it wants after setting it up.
+  // TODO: VENUS_BUFFER_SIZE should give the size, but it's too small. dependent on encoder settings?
+  size_t nv12_size = (rgb_width >= 2688 ? 2900 : 2346)*nv12_width;
+
+  vipc_server->create_buffers_with_sizes(stream_type, YUV_BUFFER_COUNT, false, rgb_width, rgb_height, nv12_size, nv12_width, nv12_uv_offset);
+  LOGD("created %d YUV vipc buffers with size %dx%d", YUV_BUFFER_COUNT, nv12_width, nv12_height);
+
+  debayer = new Debayer(device_id, context, this, s, nv12_width, nv12_uv_offset);
+
+  const cl_queue_properties props[] = {0};  //CL_QUEUE_PRIORITY_KHR, CL_QUEUE_PRIORITY_HIGH_KHR, 0};
+  q = CL_CHECK_ERR(clCreateCommandQueueWithProperties(context, device_id, props, &err));
+}
+
+CameraBuf::~CameraBuf() {
+  for (int i = 0; i < frame_buf_count; i++) {
+    camera_bufs[i].free();
+  }
+  if (debayer) delete debayer;
+  if (q) CL_CHECK(clReleaseCommandQueue(q));
+}
+
+bool CameraBuf::acquire() {
+  if (!safe_queue.try_pop(cur_buf_idx, 50)) return false;
+
+  if (camera_bufs_metadata[cur_buf_idx].frame_id == -1) {
+    LOGE("no frame data? wtf");
+    return false;
+  }
+
+  cur_frame_data = camera_bufs_metadata[cur_buf_idx];
+  cur_yuv_buf = vipc_server->get_buffer(stream_type);
+  cur_camera_buf = &camera_bufs[cur_buf_idx];
+
+  double start_time = millis_since_boot();
+  cl_event event;
+  debayer->queue(q, camera_bufs[cur_buf_idx].buf_cl, cur_yuv_buf->buf_cl, rgb_width, rgb_height, &event);
+  clWaitForEvents(1, &event);
+  CL_CHECK(clReleaseEvent(event));
+  cur_frame_data.processing_time = (millis_since_boot() - start_time) / 1000.0;
+
+  VisionIpcBufExtra extra = {
+    cur_frame_data.frame_id,
+    cur_frame_data.timestamp_sof,
+    cur_frame_data.timestamp_eof,
+  };
+  cur_yuv_buf->set_frame_id(cur_frame_data.frame_id);
+  vipc_server->send(cur_yuv_buf, &extra);
+
+  return true;
+}
+
+void CameraBuf::queue(size_t buf_idx) {
+  safe_queue.push(buf_idx);
+}
+
+// common functions
+
+void fill_frame_data(cereal::FrameData::Builder &framed, const FrameMetadata &frame_data, CameraState *c) {
+  framed.setFrameId(frame_data.frame_id);
+  framed.setRequestId(frame_data.request_id);
+  framed.setTimestampEof(frame_data.timestamp_eof);
+  framed.setTimestampSof(frame_data.timestamp_sof);
+  framed.setIntegLines(frame_data.integ_lines);
+  framed.setGain(frame_data.gain);
+  framed.setHighConversionGain(frame_data.high_conversion_gain);
+  framed.setMeasuredGreyFraction(frame_data.measured_grey_fraction);
+  framed.setTargetGreyFraction(frame_data.target_grey_fraction);
+  framed.setProcessingTime(frame_data.processing_time);
+
+  const float ev = c->cur_ev[frame_data.frame_id % 3];
+  const float perc = util::map_val(ev, c->ci->min_ev, c->ci->max_ev, 0.0f, 100.0f);
+  framed.setExposureValPercent(perc);
+  framed.setSensor(c->ci->image_sensor);
+}
+
+kj::Array<uint8_t> get_raw_frame_image(const CameraBuf *b) {
+  const uint8_t *dat = (const uint8_t *)b->cur_camera_buf->addr;
+
+  kj::Array<uint8_t> frame_image = kj::heapArray<uint8_t>(b->cur_camera_buf->len);
+  uint8_t *resized_dat = frame_image.begin();
+
+  memcpy(resized_dat, dat, b->cur_camera_buf->len);
+
+  return kj::mv(frame_image);
+}
+
+static kj::Array<capnp::byte> yuv420_to_jpeg(const CameraBuf *b, int thumbnail_width, int thumbnail_height) {
+  int downscale = b->cur_yuv_buf->width / thumbnail_width;
+  assert(downscale * thumbnail_height == b->cur_yuv_buf->height);
+  int in_stride = b->cur_yuv_buf->stride;
+
+  // make the buffer big enough. jpeg_write_raw_data requires 16-pixels aligned height to be used.
+  std::unique_ptr<uint8[]> buf(new uint8_t[(thumbnail_width * ((thumbnail_height + 15) & ~15) * 3) / 2]);
+  uint8_t *y_plane = buf.get();
+  uint8_t *u_plane = y_plane + thumbnail_width * thumbnail_height;
+  uint8_t *v_plane = u_plane + (thumbnail_width * thumbnail_height) / 4;
+  {
+    // subsampled conversion from nv12 to yuv
+    for (int hy = 0; hy < thumbnail_height/2; hy++) {
+      for (int hx = 0; hx < thumbnail_width/2; hx++) {
+        int ix = hx * downscale + (downscale-1)/2;
+        int iy = hy * downscale + (downscale-1)/2;
+        y_plane[(hy*2 + 0)*thumbnail_width + (hx*2 + 0)] = b->cur_yuv_buf->y[(iy*2 + 0) * in_stride + ix*2 + 0];
+        y_plane[(hy*2 + 0)*thumbnail_width + (hx*2 + 1)] = b->cur_yuv_buf->y[(iy*2 + 0) * in_stride + ix*2 + 1];
+        y_plane[(hy*2 + 1)*thumbnail_width + (hx*2 + 0)] = b->cur_yuv_buf->y[(iy*2 + 1) * in_stride + ix*2 + 0];
+        y_plane[(hy*2 + 1)*thumbnail_width + (hx*2 + 1)] = b->cur_yuv_buf->y[(iy*2 + 1) * in_stride + ix*2 + 1];
+        u_plane[hy*thumbnail_width/2 + hx] = b->cur_yuv_buf->uv[iy*in_stride + ix*2 + 0];
+        v_plane[hy*thumbnail_width/2 + hx] = b->cur_yuv_buf->uv[iy*in_stride + ix*2 + 1];
+      }
+    }
+  }
+
+  struct jpeg_compress_struct cinfo;
+  struct jpeg_error_mgr jerr;
+  cinfo.err = jpeg_std_error(&jerr);
+  jpeg_create_compress(&cinfo);
+
+  uint8_t *thumbnail_buffer = nullptr;
+  size_t thumbnail_len = 0;
+  jpeg_mem_dest(&cinfo, &thumbnail_buffer, &thumbnail_len);
+
+  cinfo.image_width = thumbnail_width;
+  cinfo.image_height = thumbnail_height;
+  cinfo.input_components = 3;
+
+  jpeg_set_defaults(&cinfo);
+  jpeg_set_colorspace(&cinfo, JCS_YCbCr);
+  // configure sampling factors for yuv420.
+  cinfo.comp_info[0].h_samp_factor = 2;  // Y
+  cinfo.comp_info[0].v_samp_factor = 2;
+  cinfo.comp_info[1].h_samp_factor = 1;  // U
+  cinfo.comp_info[1].v_samp_factor = 1;
+  cinfo.comp_info[2].h_samp_factor = 1;  // V
+  cinfo.comp_info[2].v_samp_factor = 1;
+  cinfo.raw_data_in = TRUE;
+
+  jpeg_set_quality(&cinfo, 50, TRUE);
+  jpeg_start_compress(&cinfo, TRUE);
+
+  JSAMPROW y[16], u[8], v[8];
+  JSAMPARRAY planes[3]{y, u, v};
+
+  for (int line = 0; line < cinfo.image_height; line += 16) {
+    for (int i = 0; i < 16; ++i) {
+      y[i] = y_plane + (line + i) * cinfo.image_width;
+      if (i % 2 == 0) {
+        int offset = (cinfo.image_width / 2) * ((i + line) / 2);
+        u[i / 2] = u_plane + offset;
+        v[i / 2] = v_plane + offset;
+      }
+    }
+    jpeg_write_raw_data(&cinfo, planes, 16);
+  }
+
+  jpeg_finish_compress(&cinfo);
+  jpeg_destroy_compress(&cinfo);
+
+  kj::Array<capnp::byte> dat = kj::heapArray<capnp::byte>(thumbnail_buffer, thumbnail_len);
+  free(thumbnail_buffer);
+  return dat;
+}
+
+static void publish_thumbnail(PubMaster *pm, const CameraBuf *b) {
+  auto thumbnail = yuv420_to_jpeg(b, b->rgb_width / 4, b->rgb_height / 4);
+  if (thumbnail.size() == 0) return;
+
+  MessageBuilder msg;
+  auto thumbnaild = msg.initEvent().initThumbnail();
+  thumbnaild.setFrameId(b->cur_frame_data.frame_id);
+  thumbnaild.setTimestampEof(b->cur_frame_data.timestamp_eof);
+  thumbnaild.setThumbnail(thumbnail);
+
+  pm->send("thumbnail", msg);
+}
+
+float set_exposure_target(const CameraBuf *b, int x_start, int x_end, int x_skip, int y_start, int y_end, int y_skip) {
+  int lum_med;
+  uint32_t lum_binning[256] = {0};
+  const uint8_t *pix_ptr = b->cur_yuv_buf->y;
+
+  unsigned int lum_total = 0;
+  for (int y = y_start; y < y_end; y += y_skip) {
+    for (int x = x_start; x < x_end; x += x_skip) {
+      uint8_t lum = pix_ptr[(y * b->rgb_width) + x];
+      lum_binning[lum]++;
+      lum_total += 1;
+    }
+  }
+
+  // Find mean lumimance value
+  unsigned int lum_cur = 0;
+  for (lum_med = 255; lum_med >= 0; lum_med--) {
+    lum_cur += lum_binning[lum_med];
+
+    if (lum_cur >= lum_total / 2) {
+      break;
+    }
+  }
+
+  return lum_med / 256.0;
+}
+
+void *processing_thread(MultiCameraState *cameras, CameraState *cs, process_thread_cb callback) {
+  const char *thread_name = nullptr;
+  if (cs == &cameras->road_cam) {
+    thread_name = "RoadCamera";
+  } else if (cs == &cameras->driver_cam) {
+    thread_name = "DriverCamera";
+  } else {
+    thread_name = "WideRoadCamera";
+  }
+  util::set_thread_name(thread_name);
+
+  uint32_t cnt = 0;
+  while (!do_exit) {
+    if (!cs->buf.acquire()) continue;
+
+    callback(cameras, cs, cnt);
+
+    if (cs == &(cameras->road_cam) && cameras->pm && cnt % 100 == 3) {
+      // this takes 10ms???
+      publish_thumbnail(cameras->pm, &(cs->buf));
+    }
+    ++cnt;
+  }
+  return NULL;
+}
+
+std::thread start_process_thread(MultiCameraState *cameras, CameraState *cs, process_thread_cb callback) {
+  return std::thread(processing_thread, cameras, cs, callback);
+}
+
+void camerad_thread() {
+  cl_device_id device_id = cl_get_device_id(CL_DEVICE_TYPE_DEFAULT);
+#ifdef QCOM2
+  const cl_context_properties props[] = {CL_CONTEXT_PRIORITY_HINT_QCOM, CL_PRIORITY_HINT_HIGH_QCOM, 0};
+  cl_context context = CL_CHECK_ERR(clCreateContext(props, 1, &device_id, NULL, NULL, &err));
+#else
+  cl_context context = CL_CHECK_ERR(clCreateContext(NULL, 1, &device_id, NULL, NULL, &err));
+#endif
+
+  {
+    MultiCameraState cameras = {};
+    VisionIpcServer vipc_server("camerad", device_id, context);
+
+    cameras_open(&cameras);
+    cameras_init(&vipc_server, &cameras, device_id, context);
+
+    vipc_server.start_listener();
+
+    cameras_run(&cameras);
+  }
+
+  CL_CHECK(clReleaseContext(context));
+}
+
+int open_v4l_by_name_and_index(const char name[], int index, int flags) {
+  for (int v4l_index = 0; /**/; ++v4l_index) {
+    std::string v4l_name = util::read_file(util::string_format("/sys/class/video4linux/v4l-subdev%d/name", v4l_index));
+    if (v4l_name.empty()) return -1;
+    if (v4l_name.find(name) == 0) {
+      if (index == 0) {
+        return HANDLE_EINTR(open(util::string_format("/dev/v4l-subdev%d", v4l_index).c_str(), flags));
+      }
+      index--;
+    }
+  }
+}
--- a/system/camerad/cameras/camera_common.h
+++ b/system/camerad/cameras/camera_common.h
@@ -0,0 +1,87 @@
+#pragma once
+
+#include <fcntl.h>
+#include <memory>
+#include <thread>
+
+#include "cereal/messaging/messaging.h"
+#include "cereal/visionipc/visionipc_server.h"
+#include "common/queue.h"
+
+const int YUV_BUFFER_COUNT = 20;
+
+enum CameraType {
+  RoadCam = 0,
+  DriverCam,
+  WideRoadCam
+};
+
+// for debugging
+const bool env_disable_road = getenv("DISABLE_ROAD") != NULL;
+const bool env_disable_wide_road = getenv("DISABLE_WIDE_ROAD") != NULL;
+const bool env_disable_driver = getenv("DISABLE_DRIVER") != NULL;
+const bool env_debug_frames = getenv("DEBUG_FRAMES") != NULL;
+const bool env_log_raw_frames = getenv("LOG_RAW_FRAMES") != NULL;
+const bool env_ctrl_exp_from_params = getenv("CTRL_EXP_FROM_PARAMS") != NULL;
+
+typedef struct FrameMetadata {
+  uint32_t frame_id;
+  uint32_t request_id;
+
+  // Timestamps
+  uint64_t timestamp_sof;
+  uint64_t timestamp_eof;
+
+  // Exposure
+  unsigned int integ_lines;
+  bool high_conversion_gain;
+  float gain;
+  float measured_grey_fraction;
+  float target_grey_fraction;
+
+  float processing_time;
+} FrameMetadata;
+
+struct MultiCameraState;
+class CameraState;
+class Debayer;
+
+class CameraBuf {
+private:
+  VisionIpcServer *vipc_server;
+  Debayer *debayer = nullptr;
+  VisionStreamType stream_type;
+  int cur_buf_idx;
+  SafeQueue<int> safe_queue;
+  int frame_buf_count;
+
+public:
+  cl_command_queue q;
+  FrameMetadata cur_frame_data;
+  VisionBuf *cur_yuv_buf;
+  VisionBuf *cur_camera_buf;
+  std::unique_ptr<VisionBuf[]> camera_bufs;
+  std::unique_ptr<FrameMetadata[]> camera_bufs_metadata;
+  int rgb_width, rgb_height;
+
+  CameraBuf() = default;
+  ~CameraBuf();
+  void init(cl_device_id device_id, cl_context context, CameraState *s, VisionIpcServer * v, int frame_cnt, VisionStreamType type);
+  bool acquire();
+  void queue(size_t buf_idx);
+};
+
+typedef void (*process_thread_cb)(MultiCameraState *s, CameraState *c, int cnt);
+
+void fill_frame_data(cereal::FrameData::Builder &framed, const FrameMetadata &frame_data, CameraState *c);
+kj::Array<uint8_t> get_raw_frame_image(const CameraBuf *b);
+float set_exposure_target(const CameraBuf *b, int x_start, int x_end, int x_skip, int y_start, int y_end, int y_skip);
+std::thread start_process_thread(MultiCameraState *cameras, CameraState *cs, process_thread_cb callback);
+
+void cameras_init(VisionIpcServer *v, MultiCameraState *s, cl_device_id device_id, cl_context ctx);
+void cameras_open(MultiCameraState *s);
+void cameras_run(MultiCameraState *s);
+void cameras_close(MultiCameraState *s);
+void camerad_thread();
+
+int open_v4l_by_name_and_index(const char name[], int index = 0, int flags = O_RDWR | O_NONBLOCK);
--- a/system/camerad/cameras/camera_qcom2.cc
+++ b/system/camerad/cameras/camera_qcom2.cc
--- a/system/camerad/cameras/camera_qcom2.h
+++ b/system/camerad/cameras/camera_qcom2.h
@@ -0,0 +1,98 @@
+#pragma once
+
+#include <memory>
+#include <utility>
+
+#include "system/camerad/cameras/camera_common.h"
+#include "system/camerad/cameras/camera_util.h"
+#include "system/camerad/sensors/sensor.h"
+#include "common/params.h"
+#include "common/util.h"
+
+#define FRAME_BUF_COUNT 4
+
+class CameraState {
+public:
+  MultiCameraState *multi_cam_state;
+  std::unique_ptr<const SensorInfo> ci;
+  bool enabled;
+
+  std::mutex exp_lock;
+
+  int exposure_time;
+  bool dc_gain_enabled;
+  int dc_gain_weight;
+  int gain_idx;
+  float analog_gain_frac;
+
+  float cur_ev[3];
+  float best_ev_score;
+  int new_exp_g;
+  int new_exp_t;
+
+  float measured_grey_fraction;
+  float target_grey_fraction;
+
+  unique_fd sensor_fd;
+  unique_fd csiphy_fd;
+
+  int camera_num;
+
+  void handle_camera_event(void *evdat);
+  void update_exposure_score(float desired_ev, int exp_t, int exp_g_idx, float exp_gain);
+  void set_camera_exposure(float grey_frac);
+
+  void sensors_start();
+
+  void camera_open(MultiCameraState *multi_cam_state, int camera_num, bool enabled);
+  void sensor_set_parameters();
+  void camera_map_bufs(MultiCameraState *s);
+  void camera_init(MultiCameraState *s, VisionIpcServer *v, cl_device_id device_id, cl_context ctx, VisionStreamType yuv_type);
+  void camera_close();
+
+  int32_t session_handle;
+  int32_t sensor_dev_handle;
+  int32_t isp_dev_handle;
+  int32_t csiphy_dev_handle;
+
+  int32_t link_handle;
+
+  int buf0_handle;
+  int buf_handle[FRAME_BUF_COUNT];
+  int sync_objs[FRAME_BUF_COUNT];
+  int request_ids[FRAME_BUF_COUNT];
+  int request_id_last;
+  int frame_id_last;
+  int idx_offset;
+  bool skipped;
+
+  CameraBuf buf;
+  MemoryManager mm;
+
+  void config_isp(int io_mem_handle, int fence, int request_id, int buf0_mem_handle, int buf0_offset);
+  void enqueue_req_multi(int start, int n, bool dp);
+  void enqueue_buffer(int i, bool dp);
+  int clear_req_queue();
+
+  int sensors_init();
+  void sensors_poke(int request_id);
+  void sensors_i2c(const struct i2c_random_wr_payload* dat, int len, int op_code, bool data_word);
+
+private:
+  // for debugging
+  Params params;
+};
+
+typedef struct MultiCameraState {
+  unique_fd video0_fd;
+  unique_fd cam_sync_fd;
+  unique_fd isp_fd;
+  int device_iommu;
+  int cdm_iommu;
+
+  CameraState road_cam;
+  CameraState wide_road_cam;
+  CameraState driver_cam;
+
+  PubMaster *pm;
+} MultiCameraState;
--- a/system/camerad/cameras/camera_util.cc
+++ b/system/camerad/cameras/camera_util.cc
@@ -0,0 +1,138 @@
+#include "system/camerad/cameras/camera_util.h"
+
+#include <string.h>
+
+#include <cassert>
+
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+
+#include "common/swaglog.h"
+#include "common/util.h"
+
+// ************** low level camera helpers ****************
+int do_cam_control(int fd, int op_code, void *handle, int size) {
+  struct cam_control camcontrol = {0};
+  camcontrol.op_code = op_code;
+  camcontrol.handle = (uint64_t)handle;
+  if (size == 0) {
+    camcontrol.size = 8;
+    camcontrol.handle_type = CAM_HANDLE_MEM_HANDLE;
+  } else {
+    camcontrol.size = size;
+    camcontrol.handle_type = CAM_HANDLE_USER_POINTER;
+  }
+
+  int ret = HANDLE_EINTR(ioctl(fd, VIDIOC_CAM_CONTROL, &camcontrol));
+  if (ret == -1) {
+    LOGE("VIDIOC_CAM_CONTROL error: op_code %d - errno %d", op_code, errno);
+  }
+  return ret;
+}
+
+std::optional<int32_t> device_acquire(int fd, int32_t session_handle, void *data, uint32_t num_resources) {
+  struct cam_acquire_dev_cmd cmd = {
+    .session_handle = session_handle,
+    .handle_type = CAM_HANDLE_USER_POINTER,
+    .num_resources = (uint32_t)(data ? num_resources : 0),
+    .resource_hdl = (uint64_t)data,
+  };
+  int err = do_cam_control(fd, CAM_ACQUIRE_DEV, &cmd, sizeof(cmd));
+  return err == 0 ? std::make_optional(cmd.dev_handle) : std::nullopt;
+}
+
+int device_config(int fd, int32_t session_handle, int32_t dev_handle, uint64_t packet_handle) {
+  struct cam_config_dev_cmd cmd = {
+    .session_handle = session_handle,
+    .dev_handle = dev_handle,
+    .packet_handle = packet_handle,
+  };
+  return do_cam_control(fd, CAM_CONFIG_DEV, &cmd, sizeof(cmd));
+}
+
+int device_control(int fd, int op_code, int session_handle, int dev_handle) {
+  // start stop and release are all the same
+  struct cam_start_stop_dev_cmd cmd { .session_handle = session_handle, .dev_handle = dev_handle };
+  return do_cam_control(fd, op_code, &cmd, sizeof(cmd));
+}
+
+void *alloc_w_mmu_hdl(int video0_fd, int len, uint32_t *handle, int align, int flags, int mmu_hdl, int mmu_hdl2) {
+  struct cam_mem_mgr_alloc_cmd mem_mgr_alloc_cmd = {0};
+  mem_mgr_alloc_cmd.len = len;
+  mem_mgr_alloc_cmd.align = align;
+  mem_mgr_alloc_cmd.flags = flags;
+  mem_mgr_alloc_cmd.num_hdl = 0;
+  if (mmu_hdl != 0) {
+    mem_mgr_alloc_cmd.mmu_hdls[0] = mmu_hdl;
+    mem_mgr_alloc_cmd.num_hdl++;
+  }
+  if (mmu_hdl2 != 0) {
+    mem_mgr_alloc_cmd.mmu_hdls[1] = mmu_hdl2;
+    mem_mgr_alloc_cmd.num_hdl++;
+  }
+
+  do_cam_control(video0_fd, CAM_REQ_MGR_ALLOC_BUF, &mem_mgr_alloc_cmd, sizeof(mem_mgr_alloc_cmd));
+  *handle = mem_mgr_alloc_cmd.out.buf_handle;
+
+  void *ptr = NULL;
+  if (mem_mgr_alloc_cmd.out.fd > 0) {
+    ptr = mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_SHARED, mem_mgr_alloc_cmd.out.fd, 0);
+    assert(ptr != MAP_FAILED);
+  }
+
+  // LOGD("allocated: %x %d %llx mapped %p", mem_mgr_alloc_cmd.out.buf_handle, mem_mgr_alloc_cmd.out.fd, mem_mgr_alloc_cmd.out.vaddr, ptr);
+
+  return ptr;
+}
+
+void release(int video0_fd, uint32_t handle) {
+  int ret;
+  struct cam_mem_mgr_release_cmd mem_mgr_release_cmd = {0};
+  mem_mgr_release_cmd.buf_handle = handle;
+
+  ret = do_cam_control(video0_fd, CAM_REQ_MGR_RELEASE_BUF, &mem_mgr_release_cmd, sizeof(mem_mgr_release_cmd));
+  assert(ret == 0);
+}
+
+void release_fd(int video0_fd, uint32_t handle) {
+  // handle to fd
+  close(handle>>16);
+  release(video0_fd, handle);
+}
+
+void *MemoryManager::alloc_buf(int size, uint32_t *handle) {
+  lock.lock();
+  void *ptr;
+  if (!cached_allocations[size].empty()) {
+    ptr = cached_allocations[size].front();
+    cached_allocations[size].pop();
+    *handle = handle_lookup[ptr];
+  } else {
+    ptr = alloc_w_mmu_hdl(video0_fd, size, handle);
+    handle_lookup[ptr] = *handle;
+    size_lookup[ptr] = size;
+  }
+  lock.unlock();
+  memset(ptr, 0, size);
+  return ptr;
+}
+
+void MemoryManager::free(void *ptr) {
+  lock.lock();
+  cached_allocations[size_lookup[ptr]].push(ptr);
+  lock.unlock();
+}
+
+MemoryManager::~MemoryManager() {
+  for (auto& x : cached_allocations) {
+    while (!x.second.empty()) {
+      void *ptr = x.second.front();
+      x.second.pop();
+      LOGD("freeing cached allocation %p with size %d", ptr, size_lookup[ptr]);
+      munmap(ptr, size_lookup[ptr]);
+      release_fd(video0_fd, handle_lookup[ptr]);
+      handle_lookup.erase(ptr);
+      size_lookup.erase(ptr);
+    }
+  }
+}
--- a/system/camerad/cameras/camera_util.h
+++ b/system/camerad/cameras/camera_util.h
@@ -0,0 +1,39 @@
+#pragma once
+
+#include <functional>
+#include <map>
+#include <memory>
+#include <mutex>
+#include <optional>
+#include <queue>
+
+#include <media/cam_req_mgr.h>
+
+std::optional<int32_t> device_acquire(int fd, int32_t session_handle, void *data, uint32_t num_resources=1);
+int device_config(int fd, int32_t session_handle, int32_t dev_handle, uint64_t packet_handle);
+int device_control(int fd, int op_code, int session_handle, int dev_handle);
+int do_cam_control(int fd, int op_code, void *handle, int size);
+void *alloc_w_mmu_hdl(int video0_fd, int len, uint32_t *handle, int align = 8, int flags = CAM_MEM_FLAG_KMD_ACCESS | CAM_MEM_FLAG_UMD_ACCESS | CAM_MEM_FLAG_CMD_BUF_TYPE,
+                      int mmu_hdl = 0, int mmu_hdl2 = 0);
+void release(int video0_fd, uint32_t handle);
+
+class MemoryManager {
+  public:
+    void init(int _video0_fd) { video0_fd = _video0_fd; }
+    ~MemoryManager();
+
+    template <class T>
+    auto alloc(int len, uint32_t *handle) {
+      return std::unique_ptr<T, std::function<void(void *)>>((T*)alloc_buf(len, handle), [this](void *ptr) { this->free(ptr); });
+    }
+
+  private:
+    void *alloc_buf(int len, uint32_t *handle);
+    void free(void *ptr);
+
+    std::mutex lock;
+    std::map<void *, uint32_t> handle_lookup;
+    std::map<void *, int> size_lookup;
+    std::map<int, std::queue<void *> > cached_allocations;
+    int video0_fd;
+};
--- a/system/camerad/cameras/real_debayer.cl
+++ b/system/camerad/cameras/real_debayer.cl
@@ -8,7 +8,7 @@

 float3 color_correct(float3 rgb) {
  // color correction
-  #if IS_OX
+  #if IS_OX | IS_OS
  float3 x = rgb.x * (float3)(1.5664815 , -0.29808738, -0.03973474);
  x += rgb.y * (float3)(-0.48672447,  1.41914433, -0.40295248);
  x += rgb.z * (float3)(-0.07975703, -0.12105695,  1.44268722);
@@ -20,6 +20,8 @@ float3 color_correct(float3 rgb) {

  #if IS_OX
  return -0.507089*exp(-12.54124638*x)+0.9655*powr(x,0.5)-0.472597*x+0.507089;
+  #elif IS_OS
+  return powr(x,0.7);
  #else
  // tone mapping params
  const float gamma_k = 0.75;
@@ -35,6 +37,9 @@ float3 color_correct(float3 rgb) {
 }

 float get_vignetting_s(float r) {
+  #if IS_OS
+    r = r / 2.2545f;
+  #endif
  if (r < 62500) {
    return (1.0f + 0.0000008f*r);
  } else if (r < 490000) {
@@ -85,6 +90,24 @@ float4 val4_from_12(uchar8 pvs, float gain) {

 }

+float4 val4_from_10(uchar8 pvs, uchar ext, bool aligned, float gain) {
+  uint4 parsed;
+  if (aligned) {
+    parsed = (uint4)(((uint)pvs.s0 << 2) + (pvs.s1 & 0b00000011),
+                     ((uint)pvs.s2 << 2) + ((pvs.s6 & 0b11000000) / 64),
+                     ((uint)pvs.s3 << 2) + ((pvs.s6 & 0b00110000) / 16),
+                     ((uint)pvs.s4 << 2) + ((pvs.s6 & 0b00001100) / 4));
+  } else {
+    parsed = (uint4)(((uint)pvs.s0 << 2) + ((pvs.s3 & 0b00110000) / 16),
+                     ((uint)pvs.s1 << 2) + ((pvs.s3 & 0b00001100) / 4),
+                     ((uint)pvs.s2 << 2) + ((pvs.s3 & 0b00000011)),
+                     ((uint)pvs.s4 << 2) + ((ext & 0b11000000) / 64));
+  }
+
+  float4 pv = convert_float4(parsed) / 1024.0;
+  return clamp(pv*gain, 0.0, 1.0);
+}
+
 float get_k(float a, float b, float c, float d) {
  return 2.0 - (fabs(a - b) + fabs(c - d));
 }
@@ -94,20 +117,51 @@ __kernel void debayer10(const __global uchar * in, __global uchar * out)
  const int gid_x = get_global_id(0);
  const int gid_y = get_global_id(1);

-  const int y_top_mod = (gid_y == 0) ? 2: 0;
-  const int y_bot_mod = (gid_y == (RGB_HEIGHT/2 - 1)) ? 1: 3;
+  const int row_before_offset = (gid_y == 0) ? 2 : 0;
+  const int row_after_offset = (gid_y == (RGB_HEIGHT/2 - 1)) ? 1 : 3;

  float3 rgb;
  uchar3 rgb_out[4];

-  int start = (2 * gid_y - 1) * FRAME_STRIDE + (3 * gid_x - 2) + (FRAME_STRIDE * FRAME_OFFSET);
+  #if IS_BGGR
+    constant int row_read_order[] = {3, 2, 1, 0};
+    constant int rgb_write_order[] = {2, 3, 0, 1};
+  #else
+    constant int row_read_order[] = {0, 1, 2, 3};
+    constant int rgb_write_order[] = {0, 1, 2, 3};
+  #endif
+
+  int start_idx;
+  #if IS_10BIT
+    bool aligned10;
+    if (gid_x % 2 == 0) {
+      aligned10 = true;
+      start_idx = (2 * gid_y - 1) * FRAME_STRIDE + (5 * gid_x / 2 - 2) + (FRAME_STRIDE * FRAME_OFFSET);
+    } else {
+      aligned10 = false;
+      start_idx = (2 * gid_y - 1) * FRAME_STRIDE + (5 * (gid_x - 1) / 2 + 1) + (FRAME_STRIDE * FRAME_OFFSET);
+    }
+  #else
+    start_idx = (2 * gid_y - 1) * FRAME_STRIDE + (3 * gid_x - 2) + (FRAME_STRIDE * FRAME_OFFSET);
+  #endif

  // read in 8x4 chars
  uchar8 dat[4];
-  dat[0] = vload8(0, in + start + FRAME_STRIDE*y_top_mod);
-  dat[1] = vload8(0, in + start + FRAME_STRIDE*1);
-  dat[2] = vload8(0, in + start + FRAME_STRIDE*2);
-  dat[3] = vload8(0, in + start + FRAME_STRIDE*y_bot_mod);
+  dat[0] = vload8(0, in + start_idx + FRAME_STRIDE*row_before_offset);
+  dat[1] = vload8(0, in + start_idx + FRAME_STRIDE*1);
+  dat[2] = vload8(0, in + start_idx + FRAME_STRIDE*2);
+  dat[3] = vload8(0, in + start_idx + FRAME_STRIDE*row_after_offset);
+
+  // need extra bit for 10-bit
+  #if IS_10BIT
+    uchar extra[4];
+    if (!aligned10) {
+      extra[0] = in[start_idx + FRAME_STRIDE*row_before_offset + 8];
+      extra[1] = in[start_idx + FRAME_STRIDE*1 + 8];
+      extra[2] = in[start_idx + FRAME_STRIDE*2 + 8];
+      extra[3] = in[start_idx + FRAME_STRIDE*row_after_offset + 8];
+		}
+  #endif

  // correct vignetting
  #if VIGNETTING
@@ -118,60 +172,69 @@ __kernel void debayer10(const __global uchar * in, __global uchar * out)
    const float gain = 1.0;
  #endif

-  // process them to floats
-  float4 va = val4_from_12(dat[0], gain);
-  float4 vb = val4_from_12(dat[1], gain);
-  float4 vc = val4_from_12(dat[2], gain);
-  float4 vd = val4_from_12(dat[3], gain);
+  float4 v_rows[4];
+  // parse into floats
+  #if IS_10BIT
+    v_rows[row_read_order[0]] = val4_from_10(dat[0], extra[0], aligned10, 1.0);
+    v_rows[row_read_order[1]] = val4_from_10(dat[1], extra[1], aligned10, 1.0);
+    v_rows[row_read_order[2]] = val4_from_10(dat[2], extra[2], aligned10, 1.0);
+    v_rows[row_read_order[3]] = val4_from_10(dat[3], extra[3], aligned10, 1.0);
+  #else
+    v_rows[row_read_order[0]] = val4_from_12(dat[0], gain);
+    v_rows[row_read_order[1]] = val4_from_12(dat[1], gain);
+    v_rows[row_read_order[2]] = val4_from_12(dat[2], gain);
+    v_rows[row_read_order[3]] = val4_from_12(dat[3], gain);
+  #endif

+  // mirror padding
  if (gid_x == 0) {
-    va.s0 = va.s2;
-    vb.s0 = vb.s2;
-    vc.s0 = vc.s2;
-    vd.s0 = vd.s2;
+    v_rows[0].s0 = v_rows[0].s2;
+    v_rows[1].s0 = v_rows[1].s2;
+    v_rows[2].s0 = v_rows[2].s2;
+    v_rows[3].s0 = v_rows[3].s2;
  } else if (gid_x == RGB_WIDTH/2 - 1) {
-    va.s3 = va.s1;
-    vb.s3 = vb.s1;
-    vc.s3 = vc.s1;
-    vd.s3 = vd.s1;
+    v_rows[0].s3 = v_rows[0].s1;
+    v_rows[1].s3 = v_rows[1].s1;
+    v_rows[2].s3 = v_rows[2].s1;
+    v_rows[3].s3 = v_rows[3].s1;
  }

  // a simplified version of https://opensignalprocessingjournal.com/contents/volumes/V6/TOSIGPJ-6-1/TOSIGPJ-6-1.pdf
-  const float k01 = get_k(va.s0, vb.s1, va.s2, vb.s1);
-  const float k02 = get_k(va.s2, vb.s1, vc.s2, vb.s1);
-  const float k03 = get_k(vc.s0, vb.s1, vc.s2, vb.s1);
-  const float k04 = get_k(va.s0, vb.s1, vc.s0, vb.s1);
-  rgb.x = (k02*vb.s2+k04*vb.s0)/(k02+k04); // R_G1
-  rgb.y = vb.s1; // G1(R)
-  rgb.z = (k01*va.s1+k03*vc.s1)/(k01+k03); // B_G1
-  rgb_out[0] = convert_uchar3_sat(color_correct(clamp(rgb, 0.0, 1.0)) * 255.0);
+  const float k01 = get_k(v_rows[0].s0, v_rows[1].s1, v_rows[0].s2, v_rows[1].s1);
+  const float k02 = get_k(v_rows[0].s2, v_rows[1].s1, v_rows[2].s2, v_rows[1].s1);
+  const float k03 = get_k(v_rows[2].s0, v_rows[1].s1, v_rows[2].s2, v_rows[1].s1);
+  const float k04 = get_k(v_rows[0].s0, v_rows[1].s1, v_rows[2].s0, v_rows[1].s1);
+  rgb.x = (k02*v_rows[1].s2+k04*v_rows[1].s0)/(k02+k04); // R_G1
+  rgb.y = v_rows[1].s1; // G1(R)
+  rgb.z = (k01*v_rows[0].s1+k03*v_rows[2].s1)/(k01+k03); // B_G1
+  rgb_out[rgb_write_order[0]] = convert_uchar3_sat(color_correct(clamp(rgb, 0.0, 1.0)) * 255.0);

-  const float k11 = get_k(va.s1, vc.s1, va.s3, vc.s3);
-  const float k12 = get_k(va.s2, vb.s1, vb.s3, vc.s2);
-  const float k13 = get_k(va.s1, va.s3, vc.s1, vc.s3);
-  const float k14 = get_k(va.s2, vb.s3, vc.s2, vb.s1);
-  rgb.x = vb.s2; // R
-  rgb.y = (k11*(va.s2+vc.s2)*0.5+k13*(vb.s3+vb.s1)*0.5)/(k11+k13); // G_R
-  rgb.z = (k12*(va.s3+vc.s1)*0.5+k14*(va.s1+vc.s3)*0.5)/(k12+k14); // B_R
-  rgb_out[1] = convert_uchar3_sat(color_correct(clamp(rgb, 0.0, 1.0)) * 255.0);
+  const float k11 = get_k(v_rows[0].s1, v_rows[2].s1, v_rows[0].s3, v_rows[2].s3);
+  const float k12 = get_k(v_rows[0].s2, v_rows[1].s1, v_rows[1].s3, v_rows[2].s2);
+  const float k13 = get_k(v_rows[0].s1, v_rows[0].s3, v_rows[2].s1, v_rows[2].s3);
+  const float k14 = get_k(v_rows[0].s2, v_rows[1].s3, v_rows[2].s2, v_rows[1].s1);
+  rgb.x = v_rows[1].s2; // R
+  rgb.y = (k11*(v_rows[0].s2+v_rows[2].s2)*0.5+k13*(v_rows[1].s3+v_rows[1].s1)*0.5)/(k11+k13); // G_R
+  rgb.z = (k12*(v_rows[0].s3+v_rows[2].s1)*0.5+k14*(v_rows[0].s1+v_rows[2].s3)*0.5)/(k12+k14); // B_R
+  rgb_out[rgb_write_order[1]] = convert_uchar3_sat(color_correct(clamp(rgb, 0.0, 1.0)) * 255.0);

-  const float k21 = get_k(vb.s0, vd.s0, vb.s2, vd.s2);
-  const float k22 = get_k(vb.s1, vc.s0, vc.s2, vd.s1);
-  const float k23 = get_k(vb.s0, vb.s2, vd.s0, vd.s2);
-  const float k24 = get_k(vb.s1, vc.s2, vd.s1, vc.s0);
-  rgb.x = (k22*(vb.s2+vd.s0)*0.5+k24*(vb.s0+vd.s2)*0.5)/(k22+k24); // R_B
-  rgb.y = (k21*(vb.s1+vd.s1)*0.5+k23*(vc.s2+vc.s0)*0.5)/(k21+k23); // G_B
-  rgb.z = vc.s1; // B
-  rgb_out[2] = convert_uchar3_sat(color_correct(clamp(rgb, 0.0, 1.0)) * 255.0);
+  const float k21 = get_k(v_rows[1].s0, v_rows[3].s0, v_rows[1].s2, v_rows[3].s2);
+  const float k22 = get_k(v_rows[1].s1, v_rows[2].s0, v_rows[2].s2, v_rows[3].s1);
+  const float k23 = get_k(v_rows[1].s0, v_rows[1].s2, v_rows[3].s0, v_rows[3].s2);
+  const float k24 = get_k(v_rows[1].s1, v_rows[2].s2, v_rows[3].s1, v_rows[2].s0);
+  rgb.x = (k22*(v_rows[1].s2+v_rows[3].s0)*0.5+k24*(v_rows[1].s0+v_rows[3].s2)*0.5)/(k22+k24); // R_B
+  rgb.y = (k21*(v_rows[1].s1+v_rows[3].s1)*0.5+k23*(v_rows[2].s2+v_rows[2].s0)*0.5)/(k21+k23); // G_B
+  rgb.z = v_rows[2].s1; // B
+  rgb_out[rgb_write_order[2]] = convert_uchar3_sat(color_correct(clamp(rgb, 0.0, 1.0)) * 255.0);

-  const float k31 = get_k(vb.s1, vc.s2, vb.s3, vc.s2);
-  const float k32 = get_k(vb.s3, vc.s2, vd.s3, vc.s2);
-  const float k33 = get_k(vd.s1, vc.s2, vd.s3, vc.s2);
-  const float k34 = get_k(vb.s1, vc.s2, vd.s1, vc.s2);
-  rgb.x = (k31*vb.s2+k33*vd.s2)/(k31+k33); // R_G2
-  rgb.y = vc.s2; // G2(B)
-  rgb.z = (k32*vc.s3+k34*vc.s1)/(k32+k34); // B_G2
-  rgb_out[3] = convert_uchar3_sat(color_correct(clamp(rgb, 0.0, 1.0)) * 255.0);
+  const float k31 = get_k(v_rows[1].s1, v_rows[2].s2, v_rows[1].s3, v_rows[2].s2);
+  const float k32 = get_k(v_rows[1].s3, v_rows[2].s2, v_rows[3].s3, v_rows[2].s2);
+  const float k33 = get_k(v_rows[3].s1, v_rows[2].s2, v_rows[3].s3, v_rows[2].s2);
+  const float k34 = get_k(v_rows[1].s1, v_rows[2].s2, v_rows[3].s1, v_rows[2].s2);
+  rgb.x = (k31*v_rows[1].s2+k33*v_rows[3].s2)/(k31+k33); // R_G2
+  rgb.y = v_rows[2].s2; // G2(B)
+  rgb.z = (k32*v_rows[2].s3+k34*v_rows[2].s1)/(k32+k34); // B_G2
+  rgb_out[rgb_write_order[3]] = convert_uchar3_sat(color_correct(clamp(rgb, 0.0, 1.0)) * 255.0);

  // write ys
  uchar2 yy = (uchar2)(