@ -19,6 +19,7 @@ void loadyuv_init(LoadYUVState* s, cl_context ctx, cl_device_id device_id, int w 
			
		
	
		
			
				
					
 
			
		
	
		
			
				
					  s - > loadys_krnl  =  CL_CHECK_ERR ( clCreateKernel ( prg ,  " loadys " ,  & err ) ) ;   
			
		
	
		
			
				
					  s - > loaduv_krnl  =  CL_CHECK_ERR ( clCreateKernel ( prg ,  " loaduv " ,  & err ) ) ;   
			
		
	
		
			
				
					  s - > copy_krnl  =  CL_CHECK_ERR ( clCreateKernel ( prg ,  " copy " ,  & err ) ) ;   
			
		
	
		
			
				
					
 
			
		
	
		
			
				
					  // done with this
   
			
		
	
		
			
				
					  CL_CHECK ( clReleaseProgram ( prg ) ) ;   
			
		
	
	
		
			
				
					
						
						
						
							
								 
						
					 
				
				@ -27,33 +28,46 @@ void loadyuv_init(LoadYUVState* s, cl_context ctx, cl_device_id device_id, int w 
			
		
	
		
			
				
					void  loadyuv_destroy ( LoadYUVState *  s )  {  
			
		
	
		
			
				
					  CL_CHECK ( clReleaseKernel ( s - > loadys_krnl ) ) ;   
			
		
	
		
			
				
					  CL_CHECK ( clReleaseKernel ( s - > loaduv_krnl ) ) ;   
			
		
	
		
			
				
					  CL_CHECK ( clReleaseKernel ( s - > copy_krnl ) ) ;   
			
		
	
		
			
				
					}  
			
		
	
		
			
				
					
 
			
		
	
		
			
				
					void  loadyuv_queue ( LoadYUVState *  s ,  cl_command_queue  q ,  
			
		
	
		
			
				
					                   cl_mem  y_cl ,  cl_mem  u_cl ,  cl_mem  v_cl ,   
			
		
	
		
			
				
					                   cl_mem  out_cl )  {   
			
		
	
		
			
				
					                   cl_mem  out_cl ,  bool  do_shift )  {   
			
		
	
		
			
				
					  cl_int  global_out_off  =  0 ;   
			
		
	
		
			
				
					  if  ( do_shift )  {   
			
		
	
		
			
				
					    // shift the image in slot 1 to slot 0, then place the new image in slot 1
   
			
		
	
		
			
				
					    global_out_off  + =  ( s - > width * s - > height )  +  ( s - > width / 2 ) * ( s - > height / 2 ) * 2 ;   
			
		
	
		
			
				
					    CL_CHECK ( clSetKernelArg ( s - > copy_krnl ,  0 ,  sizeof ( cl_mem ) ,  & out_cl ) ) ;   
			
		
	
		
			
				
					    CL_CHECK ( clSetKernelArg ( s - > copy_krnl ,  1 ,  sizeof ( cl_int ) ,  & global_out_off ) ) ;   
			
		
	
		
			
				
					    const  size_t  copy_work_size  =  global_out_off / 8 ;   
			
		
	
		
			
				
					    CL_CHECK ( clEnqueueNDRangeKernel ( q ,  s - > copy_krnl ,  1 ,  NULL ,   
			
		
	
		
			
				
					                                & copy_work_size ,  NULL ,  0 ,  0 ,  NULL ) ) ;   
			
		
	
		
			
				
					  }   
			
		
	
		
			
				
					
 
			
		
	
		
			
				
					  CL_CHECK ( clSetKernelArg ( s - > loadys_krnl ,  0 ,  sizeof ( cl_mem ) ,  & y_cl ) ) ;   
			
		
	
		
			
				
					  CL_CHECK ( clSetKernelArg ( s - > loadys_krnl ,  1 ,  sizeof ( cl_mem ) ,  & out_cl ) ) ;   
			
		
	
		
			
				
					  CL_CHECK ( clSetKernelArg ( s - > loadys_krnl ,  2 ,  sizeof ( cl_int ) ,  & global_out_off ) ) ;   
			
		
	
		
			
				
					
 
			
		
	
		
			
				
					  const  size_t  loadys_work_size  =  ( s - > width * s - > height ) / 8 ;   
			
		
	
		
			
				
					  CL_CHECK ( clEnqueueNDRangeKernel ( q ,  s - > loadys_krnl ,  1 ,  NULL ,   
			
		
	
		
			
				
					                               & loadys_work_size ,  NULL ,  0 ,  0 ,  NULL ) ) ;   
			
		
	
		
			
				
					
 
			
		
	
		
			
				
					  const  size_t  loaduv_work_size  =  ( ( s - > width / 2 ) * ( s - > height / 2 ) ) / 8 ;   
			
		
	
		
			
				
					  cl_int  loaduv_out_off  =  ( s - > width * s - > height ) ;   
			
		
	
		
			
				
					  global_out_off  + =  ( s - > width * s - > height ) ;   
			
		
	
		
			
				
					
 
			
		
	
		
			
				
					  CL_CHECK ( clSetKernelArg ( s - > loaduv_krnl ,  0 ,  sizeof ( cl_mem ) ,  & u_cl ) ) ;   
			
		
	
		
			
				
					  CL_CHECK ( clSetKernelArg ( s - > loaduv_krnl ,  1 ,  sizeof ( cl_mem ) ,  & out_cl ) ) ;   
			
		
	
		
			
				
					  CL_CHECK ( clSetKernelArg ( s - > loaduv_krnl ,  2 ,  sizeof ( cl_int ) ,  & loaduv _out_off) ) ;   
			
		
	
		
			
				
					  CL_CHECK ( clSetKernelArg ( s - > loaduv_krnl ,  2 ,  sizeof ( cl_int ) ,  & global _out_off) ) ;   
			
		
	
		
			
				
					
 
			
		
	
		
			
				
					  CL_CHECK ( clEnqueueNDRangeKernel ( q ,  s - > loaduv_krnl ,  1 ,  NULL ,   
			
		
	
		
			
				
					                               & loaduv_work_size ,  NULL ,  0 ,  0 ,  NULL ) ) ;   
			
		
	
		
			
				
					
 
			
		
	
		
			
				
					  loaduv _out_off+ =  ( s - > width / 2 ) * ( s - > height / 2 ) ;   
			
		
	
		
			
				
					  global _out_off+ =  ( s - > width / 2 ) * ( s - > height / 2 ) ;   
			
		
	
		
			
				
					
 
			
		
	
		
			
				
					  CL_CHECK ( clSetKernelArg ( s - > loaduv_krnl ,  0 ,  sizeof ( cl_mem ) ,  & v_cl ) ) ;   
			
		
	
		
			
				
					  CL_CHECK ( clSetKernelArg ( s - > loaduv_krnl ,  1 ,  sizeof ( cl_mem ) ,  & out_cl ) ) ;   
			
		
	
		
			
				
					  CL_CHECK ( clSetKernelArg ( s - > loaduv_krnl ,  2 ,  sizeof ( cl_int ) ,  & loaduv _out_off) ) ;   
			
		
	
		
			
				
					  CL_CHECK ( clSetKernelArg ( s - > loaduv_krnl ,  2 ,  sizeof ( cl_int ) ,  & global _out_off) ) ;   
			
		
	
		
			
				
					
 
			
		
	
		
			
				
					  CL_CHECK ( clEnqueueNDRangeKernel ( q ,  s - > loaduv_krnl ,  1 ,  NULL ,   
			
		
	
		
			
				
					                               & loaduv_work_size ,  NULL ,  0 ,  0 ,  NULL ) ) ;