xpra icon
Bug tracker and wiki

Ticket #925: opencl-debug-and-tweaks.patch

File opencl-debug-and-tweaks.patch, 5.0 KB (added by Antoine Martin, 4 years ago)

some tweaks to see if the queuing is part of the problem handle memoryviews, etc..

  • xpra/codecs/csc_opencl/colorspace_converter.py

     
    322322    def add_rgb_to_yuv(src_rgb_mode, kernel_rgb_mode, upload_rgb_mode, channel_order):
    323323        log("add_rgb_to_yuv%s", (src_rgb_mode, kernel_rgb_mode, upload_rgb_mode, CHANNEL_ORDER_TO_STR.get(channel_order)))
    324324        kernels = gen_rgb_to_yuv_kernels(kernel_rgb_mode)
    325         #log("kernels(%s)=%s", rgb_mode, kernels)
     325        log("gen_rgb_to_yuv_kernels(%s)=%s", kernel_rgb_mode, kernels)
    326326        for key, k_def in kernels.items():
    327327            ksrc, dst = key
    328328            assert ksrc==kernel_rgb_mode
     
    554554        global context, program
    555555        self.context = context
    556556        self.program = program
    557         self.queue = pyopencl.CommandQueue(self.context)
    558557        fm = pyopencl.filter_mode.NEAREST
    559558        self.sampler = pyopencl.Sampler(self.context, False, pyopencl.addressing_mode.CLAMP_TO_EDGE, fm)
    560559        k_def = KERNELS_DEFS.get((self.src_format, self.dst_format))
     
    705704        wheight = dimdiv(self.dst_height, max(y_div for _, y_div in divs))
    706705        globalWorkSize, localWorkSize  = self.get_work_sizes(wwidth, wheight)
    707706
     707        self.queue = pyopencl.CommandQueue(self.context)
     708
    708709        kernelargs = [self.queue, globalWorkSize, localWorkSize]
    709710
    710711        iformat = pyopencl.ImageFormat(pyopencl.channel_order.R, pyopencl.channel_type.UNSIGNED_INT8)
     
    742743        out_array = numpy.empty(self.dst_width*self.dst_height*4, dtype=numpy.byte)
    743744        pyopencl.enqueue_read_image(self.queue, oimage, (0, 0), (self.dst_width, self.dst_height), out_array)
    744745        self.queue.finish()
     746        self.queue = None
    745747        log("readback using %s took %.1fms", CHANNEL_ORDER_TO_STR.get(self.channel_order), 1000.0*(time.time()-kend))
    746748        self.time += time.time()-start
    747749        self.frames += 1
     
    772774        iformat = pyopencl.ImageFormat(self.channel_order, pyopencl.channel_type.UNSIGNED_INT8)
    773775        shape = (stride//4, self.src_height)
    774776        log("convert_image() type=%s, input image format=%s, shape=%s, work size: local=%s, global=%s", type(pixels), iformat, shape, localWorkSize, globalWorkSize)
    775         idata = memoryview_to_bytes(pixels)
    776         if type(idata)==str:
     777        if type(pixels)==str:
    777778            #str is not a buffer, so we have to copy the data
    778779            #alternatively, we could copy it first ourselves using this:
    779780            #pixels = numpy.fromstring(pixels, dtype=numpy.byte).data
    780781            #but I think this would be even slower
    781782            flags = mem_flags.READ_ONLY | mem_flags.COPY_HOST_PTR
     783            iimage = pyopencl.Image(self.context, flags, iformat, shape=shape, hostbuf=pixels)
    782784        else:
     785            if type(pixels) in (memoryview, buffer):
     786                pixels = memoryview_to_bytes(pixels)
    783787            flags = mem_flags.READ_ONLY | mem_flags.USE_HOST_PTR
    784         iimage = pyopencl.Image(self.context, flags, iformat, shape=shape, hostbuf=idata)
     788            iimage = pyopencl.Image(self.context, flags, iformat, shape=shape, hostbuf=pixels)
    785789
     790        self.queue = pyopencl.CommandQueue(self.context)
    786791        kernelargs = [self.queue, globalWorkSize, localWorkSize,
    787792                      iimage, numpy.int32(self.src_width), numpy.int32(self.src_height),
    788793                      numpy.int32(self.dst_width), numpy.int32(self.dst_height),
     
    806811
    807812        kstart = time.time()
    808813        log("convert_image(%s) calling %s%s after %.1fms", image, self.kernel_function_name, tuple(kernelargs), 1000.0*(kstart-start))
    809         self.kernel_function(*kernelargs)
    810         self.queue.finish()
     814        self.kernel_function(*kernelargs).wait()
     815        self.queue.flush()
    811816        #free input image:
    812817        iimage.release()
    813818        kend = time.time()
     
    814819        log("%s took %.1fms", self.kernel_function_name, 1000.0*(kend-kstart))
    815820
    816821        #read back:
    817         pixels = []
     822        narrays = []
    818823        for i in range(3):
    819             out_array = numpy.empty(out_sizes[i], dtype=numpy.byte)
    820             pixels.append(out_array.data)
    821             pyopencl.enqueue_read_buffer(self.queue, out_buffers[i], out_array, is_blocking=False)
     824            out_array = numpy.zeros(out_sizes[i], dtype=numpy.byte)
     825            narrays.append(out_array)
     826            pyopencl.enqueue_read_buffer(self.queue, out_buffers[i], out_array, is_blocking=None).wait()
    822827        readstart = time.time()
    823828        log("queue read events took %.1fms (3 planes of size %s, with strides=%s)", 1000.0*(readstart-kend), out_sizes, strides)
    824829        self.queue.finish()
     830        self.queue = None
    825831        readend = time.time()
    826832        log("wait for read events took %.1fms", 1000.0*(readend-readstart))
     833        pixels = []
     834        for out_array in narrays:
     835            pixels.append(out_array.data)
    827836        #free output buffers:
    828837        for out_buf in out_buffers:
    829838            out_buf.release()