Test camerad in CI (#2297)
	
		
	
				
					
				
			* remove unused junk
* check frame pkts
* from magic
* self
* cleanup
* add todo
* no opencv
* run in J
* fix conv
* make 250x faster
* abs
* should be +1
* depends on starting phase
* block on furniture refactor
* fixed
* restart test
* check ex
* need scaling
old-commit-hash: cb58e79ee8
			
			
				vw-mqb-aeb
			
			
		
							parent
							
								
									9fd7cfc95a
								
							
						
					
					
						commit
						a54d95fdbb
					
				
				 8 changed files with 144 additions and 377 deletions
			
			
		@ -1,49 +0,0 @@ | 
				
			|||||||
#include <stdio.h> | 
					 | 
				
			||||||
#include <stdarg.h> | 
					 | 
				
			||||||
#include <stdbool.h> | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#include "camera_qcom.h" | 
					 | 
				
			||||||
// TODO: add qcom2 test
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
bool do_exit = false; | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void cloudlog_e(int levelnum, const char* filename, int lineno, const char* func, | 
					 | 
				
			||||||
                const char* fmt, ...) { | 
					 | 
				
			||||||
  va_list args; | 
					 | 
				
			||||||
  va_start(args, fmt); | 
					 | 
				
			||||||
  vprintf(fmt, args); | 
					 | 
				
			||||||
  printf("\n"); | 
					 | 
				
			||||||
} | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void set_thread_name(const char* name) { | 
					 | 
				
			||||||
} | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
// tbuffers
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void tbuffer_init2(TBuffer *tb, int num_bufs, const char* name, | 
					 | 
				
			||||||
                   void (*release_cb)(void* c, int idx), | 
					 | 
				
			||||||
                   void* cb_cookie) { | 
					 | 
				
			||||||
  printf("tbuffer_init2\n"); | 
					 | 
				
			||||||
} | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void tbuffer_dispatch(TBuffer *tb, int idx) { | 
					 | 
				
			||||||
  printf("tbuffer_dispatch\n"); | 
					 | 
				
			||||||
} | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void tbuffer_stop(TBuffer *tb) { | 
					 | 
				
			||||||
  printf("tbuffer_stop\n"); | 
					 | 
				
			||||||
} | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
int main() { | 
					 | 
				
			||||||
  MultiCameraState s={}; | 
					 | 
				
			||||||
  cameras_init(&s); | 
					 | 
				
			||||||
  VisionBuf camera_bufs_rear[0x10] = {0}; | 
					 | 
				
			||||||
  VisionBuf camera_bufs_focus[0x10] = {0}; | 
					 | 
				
			||||||
  VisionBuf camera_bufs_stats[0x10] = {0}; | 
					 | 
				
			||||||
  VisionBuf camera_bufs_front[0x10] = {0}; | 
					 | 
				
			||||||
  cameras_open(&s, | 
					 | 
				
			||||||
    camera_bufs_rear, camera_bufs_focus, | 
					 | 
				
			||||||
    camera_bufs_stats, camera_bufs_front); | 
					 | 
				
			||||||
  cameras_close(&s); | 
					 | 
				
			||||||
} | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
@ -1,10 +0,0 @@ | 
				
			|||||||
#!/bin/sh | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
gcc -DQCOM -I ~/one -I ~/one/selfdrive -I ../../include \ | 
					 | 
				
			||||||
  -I ~/one/phonelibs/android_system_core/include -I ~/one/phonelibs/opencl/include \ | 
					 | 
				
			||||||
  -I ~/one/selfdrive/visiond/cameras \ | 
					 | 
				
			||||||
  test.c ../../cameras/camera_qcom.c \ | 
					 | 
				
			||||||
  -l:libczmq.a -l:libzmq.a -lgnustl_shared -lm -llog -lcutils \ | 
					 | 
				
			||||||
  -l:libcapn.a -l:libcapnp.a -l:libkj.a \ | 
					 | 
				
			||||||
  ~/one/cereal/gen/c/log.capnp.o | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
@ -0,0 +1,143 @@ | 
				
			|||||||
 | 
					#!/usr/bin/env python3 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import random | 
				
			||||||
 | 
					import time | 
				
			||||||
 | 
					import unittest | 
				
			||||||
 | 
					import numpy as np | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import cereal.messaging as messaging | 
				
			||||||
 | 
					from selfdrive.test.helpers import with_processes | 
				
			||||||
 | 
					from selfdrive.camerad.snapshot.visionipc import VisionIPC | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					from common.hardware import EON, TICI | 
				
			||||||
 | 
					# only tests for EON and TICI | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					TEST_TIMESPAN = random.randint(60, 180) # seconds | 
				
			||||||
 | 
					SKIP_FRAME_TOLERANCE = 0 | 
				
			||||||
 | 
					FRAME_COUNT_TOLERANCE = 1 # over the whole test time | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					FPS_BASELINE = 20 | 
				
			||||||
 | 
					CAMERAS = { | 
				
			||||||
 | 
					  "frame": FPS_BASELINE, | 
				
			||||||
 | 
					  "frontFrame": FPS_BASELINE // 2, | 
				
			||||||
 | 
					} | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if TICI: | 
				
			||||||
 | 
					  CAMERAS["frontFrame"] = FPS_BASELINE | 
				
			||||||
 | 
					  CAMERAS["wideFrame"] = FPS_BASELINE | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class TestCamerad(unittest.TestCase): | 
				
			||||||
 | 
					  @classmethod | 
				
			||||||
 | 
					  def setUpClass(cls): | 
				
			||||||
 | 
					    if not (EON or TICI): | 
				
			||||||
 | 
					      raise unittest.SkipTest | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  def _get_snapshots(self): | 
				
			||||||
 | 
					    ret = None | 
				
			||||||
 | 
					    start_time = time.time() | 
				
			||||||
 | 
					    while time.time() - start_time < 5.0: | 
				
			||||||
 | 
					      try: | 
				
			||||||
 | 
					        ipc = VisionIPC() | 
				
			||||||
 | 
					        pic = ipc.get() | 
				
			||||||
 | 
					        del ipc | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        ipc_front = VisionIPC(front=True) # need to add another for tici | 
				
			||||||
 | 
					        fpic = ipc_front.get() | 
				
			||||||
 | 
					        del ipc_front | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        ret = pic, fpic | 
				
			||||||
 | 
					        break | 
				
			||||||
 | 
					      except Exception: | 
				
			||||||
 | 
					        time.sleep(1) | 
				
			||||||
 | 
					    return ret | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  def _numpy_bgr2gray(self, im): | 
				
			||||||
 | 
					    ret = np.clip(im[:,:,0] * 0.114 + im[:,:,1] * 0.587 + im[:,:,2] * 0.299, 0, 255).astype(np.uint8) | 
				
			||||||
 | 
					    return ret | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  def _numpy_lap(self, im): | 
				
			||||||
 | 
					    ret = np.zeros(im.shape) | 
				
			||||||
 | 
					    ret += -4 * im | 
				
			||||||
 | 
					    ret += np.concatenate([np.zeros((im.shape[0],1)),im[:,:-1]], axis=1) | 
				
			||||||
 | 
					    ret += np.concatenate([im[:,1:],np.zeros((im.shape[0],1))], axis=1) | 
				
			||||||
 | 
					    ret += np.concatenate([np.zeros((1,im.shape[1])),im[:-1,:]], axis=0) | 
				
			||||||
 | 
					    ret += np.concatenate([im[1:,:],np.zeros((1,im.shape[1]))], axis=0) | 
				
			||||||
 | 
					    ret = np.clip(ret, 0, 255).astype(np.uint8) | 
				
			||||||
 | 
					    return ret | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  def _is_really_sharp(self, i, threshold=800, roi_max=np.array([8,6]), roi_xxyy=np.array([1,6,2,3])): | 
				
			||||||
 | 
					    i = self._numpy_bgr2gray(i) | 
				
			||||||
 | 
					    x_pitch = i.shape[1] // roi_max[0] | 
				
			||||||
 | 
					    y_pitch = i.shape[0] // roi_max[1] | 
				
			||||||
 | 
					    lap = self._numpy_lap(i) | 
				
			||||||
 | 
					    lap_map = np.zeros((roi_max[1], roi_max[0])) | 
				
			||||||
 | 
					    for r in range(lap_map.shape[0]): | 
				
			||||||
 | 
					      for c in range(lap_map.shape[1]): | 
				
			||||||
 | 
					        selected_lap = lap[r*y_pitch:(r+1)*y_pitch, c*x_pitch:(c+1)*x_pitch] | 
				
			||||||
 | 
					        lap_map[r][c] = 5*selected_lap.var() + selected_lap.max() | 
				
			||||||
 | 
					    print(lap_map[roi_xxyy[2]:roi_xxyy[3]+1,roi_xxyy[0]:roi_xxyy[1]+1]) | 
				
			||||||
 | 
					    if (lap_map[roi_xxyy[2]:roi_xxyy[3]+1,roi_xxyy[0]:roi_xxyy[1]+1] > threshold).sum() > \ | 
				
			||||||
 | 
					          (roi_xxyy[1]+1-roi_xxyy[0]) * (roi_xxyy[3]+1-roi_xxyy[2]) * 0.9: | 
				
			||||||
 | 
					      return True | 
				
			||||||
 | 
					    else: | 
				
			||||||
 | 
					      return False | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  def _is_exposure_okay(self, i, med_ex=np.array([0.2,0.4]), mean_ex=np.array([0.2,0.6])): | 
				
			||||||
 | 
					    i = self._numpy_bgr2gray(i) | 
				
			||||||
 | 
					    i_median = np.median(i) / 256 | 
				
			||||||
 | 
					    i_mean = np.mean(i) / 256 | 
				
			||||||
 | 
					    print([i_median, i_mean]) | 
				
			||||||
 | 
					    return med_ex[0] < i_median < med_ex[1] and mean_ex[0] < i_mean < mean_ex[1] | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  @with_processes(['camerad']) | 
				
			||||||
 | 
					  def test_camera_operation(self): | 
				
			||||||
 | 
					    print("checking image outputs") | 
				
			||||||
 | 
					    if EON: | 
				
			||||||
 | 
					      # run checks similar to prov | 
				
			||||||
 | 
					      time.sleep(15) # wait for startup and AF | 
				
			||||||
 | 
					      pic, fpic = self._get_snapshots() | 
				
			||||||
 | 
					      self.assertTrue(self._is_really_sharp(pic)) | 
				
			||||||
 | 
					      self.assertTrue(self._is_exposure_okay(pic)) | 
				
			||||||
 | 
					      self.assertTrue(self._is_exposure_okay(fpic)) | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      time.sleep(30) | 
				
			||||||
 | 
					      # check again for consistency | 
				
			||||||
 | 
					      pic, fpic = self._get_snapshots() | 
				
			||||||
 | 
					      self.assertTrue(self._is_really_sharp(pic)) | 
				
			||||||
 | 
					      self.assertTrue(self._is_exposure_okay(pic)) | 
				
			||||||
 | 
					      self.assertTrue(self._is_exposure_okay(fpic)) | 
				
			||||||
 | 
					    elif TICI: | 
				
			||||||
 | 
					      raise unittest.SkipTest # TBD | 
				
			||||||
 | 
					    else: | 
				
			||||||
 | 
					      raise unittest.SkipTest | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  @with_processes(['camerad']) | 
				
			||||||
 | 
					  def test_frame_packets(self): | 
				
			||||||
 | 
					    print("checking frame pkts continuity") | 
				
			||||||
 | 
					    print(TEST_TIMESPAN) | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    sm = messaging.SubMaster([socket_name for socket_name in CAMERAS]) | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    last_frame_id = dict.fromkeys(CAMERAS, None) | 
				
			||||||
 | 
					    start_frame_id = dict.fromkeys(CAMERAS, None) | 
				
			||||||
 | 
					    start_time_milli = int(round(time.time() * 1000)) | 
				
			||||||
 | 
					    while int(round(time.time() * 1000)) - start_time_milli < (TEST_TIMESPAN+1) * 1000: | 
				
			||||||
 | 
					      sm.update() | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      for camera in CAMERAS: | 
				
			||||||
 | 
					        if sm.updated[camera]: | 
				
			||||||
 | 
					          if start_frame_id[camera] is None: | 
				
			||||||
 | 
					            start_frame_id[camera] = last_frame_id[camera] = sm[camera].frameId | 
				
			||||||
 | 
					            continue | 
				
			||||||
 | 
					          dfid = sm[camera].frameId - last_frame_id[camera] | 
				
			||||||
 | 
					          self.assertTrue(abs(dfid - 1) <= SKIP_FRAME_TOLERANCE) | 
				
			||||||
 | 
					          last_frame_id[camera] = sm[camera].frameId | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      time.sleep(0.01) | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for camera in CAMERAS: | 
				
			||||||
 | 
					      print(camera, (last_frame_id[camera] - start_frame_id[camera])) | 
				
			||||||
 | 
					      self.assertTrue(abs((last_frame_id[camera] - start_frame_id[camera]) - TEST_TIMESPAN*CAMERAS[camera]) <= FRAME_COUNT_TOLERANCE) | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if __name__ == "__main__": | 
				
			||||||
 | 
					  unittest.main() | 
				
			||||||
@ -1,57 +0,0 @@ | 
				
			|||||||
CC = clang
 | 
					 | 
				
			||||||
CXX = clang++
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
PHONELIBS = ../../../../phonelibs
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
WARN_FLAGS = -Werror=implicit-function-declaration \
 | 
					 | 
				
			||||||
             -Werror=incompatible-pointer-types \
 | 
					 | 
				
			||||||
             -Werror=int-conversion \
 | 
					 | 
				
			||||||
             -Werror=return-type \
 | 
					 | 
				
			||||||
             -Werror=format-extra-args \
 | 
					 | 
				
			||||||
             -Wno-deprecated-declarations
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
CFLAGS = -std=gnu11 -fPIC -O2 $(WARN_FLAGS)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
UNAME_M := $(shell uname -m)
 | 
					 | 
				
			||||||
ifeq ($(UNAME_M),x86_64) | 
					 | 
				
			||||||
  OPENCL_LIBS = -framework OpenCL
 | 
					 | 
				
			||||||
else | 
					 | 
				
			||||||
  OPENCL_FLAGS = -I$(PHONELIBS)/opencl/include
 | 
					 | 
				
			||||||
  OPENCL_LIBS = -L/system/vendor/lib64 -lgsl -lCB -lOpenCL
 | 
					 | 
				
			||||||
endif | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
OBJS += yuv_bench.o \
 | 
					 | 
				
			||||||
        ../../../common/util.o \
 | 
					 | 
				
			||||||
        ../../clutil.o
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
OUTPUT = yuv
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
.PHONY: all | 
					 | 
				
			||||||
all: $(OUTPUT) | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
$(OUTPUT): $(OBJS) | 
					 | 
				
			||||||
	@echo "[ LINK ] $@"
 | 
					 | 
				
			||||||
	$(CXX) -fPIC -o '$@' $^ \
 | 
					 | 
				
			||||||
        -L/usr/lib \
 | 
					 | 
				
			||||||
        $(OPENCL_LIBS)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
%.o: %.cc | 
					 | 
				
			||||||
	@echo "[ CXX ] $@"
 | 
					 | 
				
			||||||
	$(CXX) $(CXXFLAGS) -MMD \
 | 
					 | 
				
			||||||
           -I../.. -I../../.. -I ../../../.. \
 | 
					 | 
				
			||||||
           $(OPENCL_FLAGS) \
 | 
					 | 
				
			||||||
           -c -o '$@' '$<'
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
%.o: %.c | 
					 | 
				
			||||||
	@echo "[ CC ] $@"
 | 
					 | 
				
			||||||
	$(CC) $(CFLAGS) -MMD \
 | 
					 | 
				
			||||||
          -I../.. -I../../.. -I ../../../.. \
 | 
					 | 
				
			||||||
          $(OPENCL_FLAGS) \
 | 
					 | 
				
			||||||
          -c -o '$@' '$<'
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
.PHONY: clean | 
					 | 
				
			||||||
clean: | 
					 | 
				
			||||||
	rm -f $(OUTPUT) $(OBJS) $(DEPS)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
@ -1,19 +0,0 @@ | 
				
			|||||||
import numpy as np | 
					 | 
				
			||||||
import cv2  # pylint: disable=import-error | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# img_bgr = np.zeros((874, 1164, 3), dtype=np.uint8) | 
					 | 
				
			||||||
# for y in range(874): | 
					 | 
				
			||||||
#   for k in range(1164*3): | 
					 | 
				
			||||||
#     img_bgr[y, k//3, k%3] = k ^ y | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# cv2.imwrite("img_rgb.png", img_bgr) | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
cl = np.fromstring(open("out_cl.bin", "rb").read(), dtype=np.uint8) | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
cl_r = cl.reshape(874 * 3 // 2, -1) | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
cv2.imwrite("out_y.png", cl_r[:874]) | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
cl_bgr = cv2.cvtColor(cl_r, cv2.COLOR_YUV2BGR_I420) | 
					 | 
				
			||||||
cv2.imwrite("out_cl.png", cl_bgr) | 
					 | 
				
			||||||
@ -1,116 +0,0 @@ | 
				
			|||||||
 | 
					 | 
				
			||||||
#define PIX_PER_WI_X 1 | 
					 | 
				
			||||||
#define PIX_PER_WI_Y 1 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#define scn 3 | 
					 | 
				
			||||||
#define bidx 2 | 
					 | 
				
			||||||
#define uidx 0 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#define R_COMP x | 
					 | 
				
			||||||
#define G_COMP y | 
					 | 
				
			||||||
#define B_COMP z | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
__constant float c_RGB2YUVCoeffs_420[8] = { 0.256999969f, 0.50399971f, 0.09799957f, -0.1479988098f, -0.2909994125f, | 
					 | 
				
			||||||
                                            0.438999176f, -0.3679990768f, -0.0709991455f }; | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
__kernel void RGB2YUV_YV12_IYUV(__global const uchar* srcptr, int src_step, int src_offset, | 
					 | 
				
			||||||
                                __global uchar* dstptr, int dst_step, int dst_offset, | 
					 | 
				
			||||||
                                int rows, int cols) | 
					 | 
				
			||||||
{ | 
					 | 
				
			||||||
    int x = get_global_id(0) * PIX_PER_WI_X; | 
					 | 
				
			||||||
    int y = get_global_id(1) * PIX_PER_WI_Y; | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    if (x < cols/2) | 
					 | 
				
			||||||
    { | 
					 | 
				
			||||||
        int src_index  = mad24(y << 1, src_step, mad24(x << 1, scn, src_offset)); | 
					 | 
				
			||||||
        int ydst_index = mad24(y << 1, dst_step, (x << 1) + dst_offset); | 
					 | 
				
			||||||
        int y_rows = rows / 3 * 2; | 
					 | 
				
			||||||
        int vsteps[2] = { cols >> 1, dst_step - (cols >> 1)}; | 
					 | 
				
			||||||
        __constant float* coeffs = c_RGB2YUVCoeffs_420; | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        #pragma unroll | 
					 | 
				
			||||||
        for (int cy = 0; cy < PIX_PER_WI_Y; ++cy) | 
					 | 
				
			||||||
        { | 
					 | 
				
			||||||
            if (y < rows / 3) | 
					 | 
				
			||||||
            { | 
					 | 
				
			||||||
                __global const uchar* src1 = srcptr + src_index; | 
					 | 
				
			||||||
                __global const uchar* src2 = src1 + src_step; | 
					 | 
				
			||||||
                __global uchar* ydst1 = dstptr + ydst_index; | 
					 | 
				
			||||||
                __global uchar* ydst2 = ydst1 + dst_step; | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
                __global uchar* udst = dstptr + mad24(y_rows + (y>>1), dst_step, dst_offset + (y%2)*(cols >> 1) + x); | 
					 | 
				
			||||||
                __global uchar* vdst = udst + mad24(y_rows >> 2, dst_step, y_rows % 4 ? vsteps[y%2] : 0); | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#if PIX_PER_WI_X == 2 | 
					 | 
				
			||||||
                int s11 = *((__global const int*) src1); | 
					 | 
				
			||||||
                int s12 = *((__global const int*) src1 + 1); | 
					 | 
				
			||||||
                int s13 = *((__global const int*) src1 + 2); | 
					 | 
				
			||||||
#if scn == 4 | 
					 | 
				
			||||||
                int s14 = *((__global const int*) src1 + 3); | 
					 | 
				
			||||||
#endif | 
					 | 
				
			||||||
                int s21 = *((__global const int*) src2); | 
					 | 
				
			||||||
                int s22 = *((__global const int*) src2 + 1); | 
					 | 
				
			||||||
                int s23 = *((__global const int*) src2 + 2); | 
					 | 
				
			||||||
#if scn == 4 | 
					 | 
				
			||||||
                int s24 = *((__global const int*) src2 + 3); | 
					 | 
				
			||||||
#endif | 
					 | 
				
			||||||
                float src_pix1[scn * 4], src_pix2[scn * 4]; | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
                *((float4*) src_pix1)     = convert_float4(as_uchar4(s11)); | 
					 | 
				
			||||||
                *((float4*) src_pix1 + 1) = convert_float4(as_uchar4(s12)); | 
					 | 
				
			||||||
                *((float4*) src_pix1 + 2) = convert_float4(as_uchar4(s13)); | 
					 | 
				
			||||||
#if scn == 4 | 
					 | 
				
			||||||
                *((float4*) src_pix1 + 3) = convert_float4(as_uchar4(s14)); | 
					 | 
				
			||||||
#endif | 
					 | 
				
			||||||
                *((float4*) src_pix2)     = convert_float4(as_uchar4(s21)); | 
					 | 
				
			||||||
                *((float4*) src_pix2 + 1) = convert_float4(as_uchar4(s22)); | 
					 | 
				
			||||||
                *((float4*) src_pix2 + 2) = convert_float4(as_uchar4(s23)); | 
					 | 
				
			||||||
#if scn == 4 | 
					 | 
				
			||||||
                *((float4*) src_pix2 + 3) = convert_float4(as_uchar4(s24)); | 
					 | 
				
			||||||
#endif | 
					 | 
				
			||||||
                uchar4 y1, y2; | 
					 | 
				
			||||||
                y1.x = convert_uchar_sat(fma(coeffs[0], src_pix1[      2-bidx], fma(coeffs[1], src_pix1[      1], fma(coeffs[2], src_pix1[      bidx], 16.5f)))); | 
					 | 
				
			||||||
                y1.y = convert_uchar_sat(fma(coeffs[0], src_pix1[  scn+2-bidx], fma(coeffs[1], src_pix1[  scn+1], fma(coeffs[2], src_pix1[  scn+bidx], 16.5f)))); | 
					 | 
				
			||||||
                y1.z = convert_uchar_sat(fma(coeffs[0], src_pix1[2*scn+2-bidx], fma(coeffs[1], src_pix1[2*scn+1], fma(coeffs[2], src_pix1[2*scn+bidx], 16.5f)))); | 
					 | 
				
			||||||
                y1.w = convert_uchar_sat(fma(coeffs[0], src_pix1[3*scn+2-bidx], fma(coeffs[1], src_pix1[3*scn+1], fma(coeffs[2], src_pix1[3*scn+bidx], 16.5f)))); | 
					 | 
				
			||||||
                y2.x = convert_uchar_sat(fma(coeffs[0], src_pix2[      2-bidx], fma(coeffs[1], src_pix2[      1], fma(coeffs[2], src_pix2[      bidx], 16.5f)))); | 
					 | 
				
			||||||
                y2.y = convert_uchar_sat(fma(coeffs[0], src_pix2[  scn+2-bidx], fma(coeffs[1], src_pix2[  scn+1], fma(coeffs[2], src_pix2[  scn+bidx], 16.5f)))); | 
					 | 
				
			||||||
                y2.z = convert_uchar_sat(fma(coeffs[0], src_pix2[2*scn+2-bidx], fma(coeffs[1], src_pix2[2*scn+1], fma(coeffs[2], src_pix2[2*scn+bidx], 16.5f)))); | 
					 | 
				
			||||||
                y2.w = convert_uchar_sat(fma(coeffs[0], src_pix2[3*scn+2-bidx], fma(coeffs[1], src_pix2[3*scn+1], fma(coeffs[2], src_pix2[3*scn+bidx], 16.5f)))); | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
                *((__global int*) ydst1) = as_int(y1); | 
					 | 
				
			||||||
                *((__global int*) ydst2) = as_int(y2); | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
                float uv[4] = { fma(coeffs[3], src_pix1[      2-bidx], fma(coeffs[4], src_pix1[      1], fma(coeffs[5], src_pix1[      bidx], 128.5f))), | 
					 | 
				
			||||||
                                fma(coeffs[5], src_pix1[      2-bidx], fma(coeffs[6], src_pix1[      1], fma(coeffs[7], src_pix1[      bidx], 128.5f))), | 
					 | 
				
			||||||
                                fma(coeffs[3], src_pix1[2*scn+2-bidx], fma(coeffs[4], src_pix1[2*scn+1], fma(coeffs[5], src_pix1[2*scn+bidx], 128.5f))), | 
					 | 
				
			||||||
                                fma(coeffs[5], src_pix1[2*scn+2-bidx], fma(coeffs[6], src_pix1[2*scn+1], fma(coeffs[7], src_pix1[2*scn+bidx], 128.5f))) }; | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
                udst[0] = convert_uchar_sat(uv[uidx]    ); | 
					 | 
				
			||||||
                vdst[0] = convert_uchar_sat(uv[1 - uidx]); | 
					 | 
				
			||||||
                udst[1] = convert_uchar_sat(uv[2 + uidx]); | 
					 | 
				
			||||||
                vdst[1] = convert_uchar_sat(uv[3 - uidx]); | 
					 | 
				
			||||||
#else | 
					 | 
				
			||||||
                float4 src_pix1 = convert_float4(vload4(0, src1)); | 
					 | 
				
			||||||
                float4 src_pix2 = convert_float4(vload4(0, src1+scn)); | 
					 | 
				
			||||||
                float4 src_pix3 = convert_float4(vload4(0, src2)); | 
					 | 
				
			||||||
                float4 src_pix4 = convert_float4(vload4(0, src2+scn)); | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
                ydst1[0] = convert_uchar_sat(fma(coeffs[0], src_pix1.R_COMP, fma(coeffs[1], src_pix1.G_COMP, fma(coeffs[2], src_pix1.B_COMP, 16.5f)))); | 
					 | 
				
			||||||
                ydst1[1] = convert_uchar_sat(fma(coeffs[0], src_pix2.R_COMP, fma(coeffs[1], src_pix2.G_COMP, fma(coeffs[2], src_pix2.B_COMP, 16.5f)))); | 
					 | 
				
			||||||
                ydst2[0] = convert_uchar_sat(fma(coeffs[0], src_pix3.R_COMP, fma(coeffs[1], src_pix3.G_COMP, fma(coeffs[2], src_pix3.B_COMP, 16.5f)))); | 
					 | 
				
			||||||
                ydst2[1] = convert_uchar_sat(fma(coeffs[0], src_pix4.R_COMP, fma(coeffs[1], src_pix4.G_COMP, fma(coeffs[2], src_pix4.B_COMP, 16.5f)))); | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
                float uv[2] = { fma(coeffs[3], src_pix1.R_COMP, fma(coeffs[4], src_pix1.G_COMP, fma(coeffs[5], src_pix1.B_COMP, 128.5f))), | 
					 | 
				
			||||||
                                fma(coeffs[5], src_pix1.R_COMP, fma(coeffs[6], src_pix1.G_COMP, fma(coeffs[7], src_pix1.B_COMP, 128.5f))) }; | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
                udst[0] = convert_uchar_sat(uv[uidx]  ); | 
					 | 
				
			||||||
                vdst[0] = convert_uchar_sat(uv[1-uidx]); | 
					 | 
				
			||||||
#endif | 
					 | 
				
			||||||
                ++y; | 
					 | 
				
			||||||
                src_index += 2*src_step; | 
					 | 
				
			||||||
                ydst_index += 2*dst_step; | 
					 | 
				
			||||||
            } | 
					 | 
				
			||||||
        } | 
					 | 
				
			||||||
    } | 
					 | 
				
			||||||
} | 
					 | 
				
			||||||
@ -1,126 +0,0 @@ | 
				
			|||||||
#include <cstdlib> | 
					 | 
				
			||||||
#include <cstdio> | 
					 | 
				
			||||||
#include <cassert> | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#include <cstring> | 
					 | 
				
			||||||
#include <unistd.h> | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
// #include <opencv2/opencv.hpp>
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#ifdef __APPLE__ | 
					 | 
				
			||||||
#include <OpenCL/cl.h> | 
					 | 
				
			||||||
#else | 
					 | 
				
			||||||
#include <CL/cl.h> | 
					 | 
				
			||||||
#endif | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#include "common/util.h" | 
					 | 
				
			||||||
#include "common/timing.h" | 
					 | 
				
			||||||
#include "common/mat.h" | 
					 | 
				
			||||||
#include "clutil.h" | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
int main() { | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  int rgb_width = 1164; | 
					 | 
				
			||||||
  int rgb_height = 874; | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  int rgb_stride = rgb_width*3; | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  size_t out_size = rgb_width*rgb_height*3/2; | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  uint8_t* rgb_buf = (uint8_t*)calloc(1, rgb_width*rgb_height*3); | 
					 | 
				
			||||||
  uint8_t* out = (uint8_t*)calloc(1, out_size); | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  for (int y=0; y<rgb_height; y++) { | 
					 | 
				
			||||||
    for (int k=0; k<rgb_stride; k++) { | 
					 | 
				
			||||||
      rgb_buf[y*rgb_stride + k] = k ^ y; | 
					 | 
				
			||||||
    } | 
					 | 
				
			||||||
  } | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  // init cl
 | 
					 | 
				
			||||||
  int err; | 
					 | 
				
			||||||
  cl_device_id device_id = cl_get_device_id(CL_DEVICE_TYPE_DEFAULT); | 
					 | 
				
			||||||
  cl_context context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &err); | 
					 | 
				
			||||||
  assert(err == 0); | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  cl_program prg = cl_create_program_from_file(context, "yuv.cl"); | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  err = clBuildProgram(prg, 1, &device_id, "", NULL, NULL); | 
					 | 
				
			||||||
  if (err != 0) { | 
					 | 
				
			||||||
    cl_print_build_errors(prg, device_id); | 
					 | 
				
			||||||
  } | 
					 | 
				
			||||||
  cl_check_error(err); | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  cl_kernel krnl = clCreateKernel(prg, "RGB2YUV_YV12_IYUV", &err); | 
					 | 
				
			||||||
  assert(err == 0); | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  cl_mem inbuf_cl = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, | 
					 | 
				
			||||||
                                     rgb_width*rgb_height*3, (void*)rgb_buf, &err); | 
					 | 
				
			||||||
  cl_check_error(err); | 
					 | 
				
			||||||
  cl_mem out_cl = clCreateBuffer(context, CL_MEM_READ_WRITE, out_size, NULL, &err); | 
					 | 
				
			||||||
  cl_check_error(err); | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  // load into net
 | 
					 | 
				
			||||||
  err = clSetKernelArg(krnl, 0, sizeof(cl_mem), &inbuf_cl); //srcptr
 | 
					 | 
				
			||||||
  assert(err == 0); | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  int zero = 0; | 
					 | 
				
			||||||
  err = clSetKernelArg(krnl, 1, sizeof(cl_int), &rgb_stride); //src_step
 | 
					 | 
				
			||||||
  assert(err == 0); | 
					 | 
				
			||||||
  err = clSetKernelArg(krnl, 2, sizeof(cl_int), &zero); //src_offset
 | 
					 | 
				
			||||||
  assert(err == 0); | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  err = clSetKernelArg(krnl, 3, sizeof(cl_mem), &out_cl); //dstptr
 | 
					 | 
				
			||||||
  assert(err == 0); | 
					 | 
				
			||||||
  err = clSetKernelArg(krnl, 4, sizeof(cl_int), &rgb_width); //dst_step
 | 
					 | 
				
			||||||
  assert(err == 0); | 
					 | 
				
			||||||
  err = clSetKernelArg(krnl, 5, sizeof(cl_int), &zero); //dst_offset
 | 
					 | 
				
			||||||
  assert(err == 0); | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  const int rows = rgb_height * 3 / 2; | 
					 | 
				
			||||||
  err = clSetKernelArg(krnl, 6, sizeof(cl_int), &rows); //rows
 | 
					 | 
				
			||||||
  assert(err == 0); | 
					 | 
				
			||||||
  err = clSetKernelArg(krnl, 7, sizeof(cl_int), &rgb_width); //cols
 | 
					 | 
				
			||||||
  assert(err == 0); | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  cl_command_queue q = clCreateCommandQueue(context, device_id, 0, &err); | 
					 | 
				
			||||||
  assert(err == 0); | 
					 | 
				
			||||||
  const size_t work_size[2] = {rgb_width/2, rows/3}; | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  err = clEnqueueNDRangeKernel(q, krnl, 2, NULL, | 
					 | 
				
			||||||
                              (const size_t*)&work_size, NULL, 0, 0, NULL); | 
					 | 
				
			||||||
  cl_check_error(err); | 
					 | 
				
			||||||
  clFinish(q); | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  double t1 = millis_since_boot(); | 
					 | 
				
			||||||
  for (int k=0; k<32; k++) { | 
					 | 
				
			||||||
    err = clEnqueueNDRangeKernel(q, krnl, 2, NULL, | 
					 | 
				
			||||||
                                (const size_t*)&work_size, NULL, 0, 0, NULL); | 
					 | 
				
			||||||
    cl_check_error(err); | 
					 | 
				
			||||||
  } | 
					 | 
				
			||||||
  clFinish(q); | 
					 | 
				
			||||||
  double t2 = millis_since_boot(); | 
					 | 
				
			||||||
  printf("t: %.2f\n", (t2-t1)/32.); | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  uint8_t* out_ptr = (uint8_t*)clEnqueueMapBuffer(q, out_cl, CL_FALSE, | 
					 | 
				
			||||||
                                        CL_MAP_READ, 0, out_size, | 
					 | 
				
			||||||
                                        0, NULL, NULL, &err); | 
					 | 
				
			||||||
  assert(err == 0); | 
					 | 
				
			||||||
  clFinish(q); | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  FILE* of = fopen("out_cl.bin", "wb"); | 
					 | 
				
			||||||
  fwrite(out_ptr, out_size, 1, of); | 
					 | 
				
			||||||
  fclose(of); | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
// #endif
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    return 0; | 
					 | 
				
			||||||
} | 
					 | 
				
			||||||
					Loading…
					
					
				
		Reference in new issue