xpra icon
Bug tracker and wiki

Ticket #422: add-csc-opencl-v3.patch

File add-csc-opencl-v3.patch, 19.7 KB (added by Antoine Martin, 7 years ago)

minor tweaks

  • setup.py

     
    6161webp_ENABLED = True
    6262nvenc_ENABLED = False
    6363csc_nvcuda_ENABLED = False
     64csc_opencl_ENABLED = True
    6465
    6566warn_ENABLED = True
    6667strict_ENABLED = True
     
    6970
    7071#allow some of these flags to be modified on the command line:
    7172SWITCHES = ("enc_x264", "x264_static",
    72             "nvenc", "dec_avcodec", "csc_swscale", "csc_nvcuda",
     73            "nvenc", "dec_avcodec",
     74            "csc_swscale", "csc_nvcuda", "csc_opencl",
    7375            "vpx", "vpx_static",
    7476            "webp", "rencode", "clipboard",
    7577            "server", "client", "x11",
     
    954956
    955957
    956958toggle_packages(nvenc_ENABLED, "xpra.codecs.nvenc")
     959toggle_packages(csc_opencl_ENABLED, "xpra.codecs.csc_opencl")
    957960
    958 
    959 
    960961toggle_packages(enc_x264_ENABLED, "xpra.codecs.enc_x264")
    961962if enc_x264_ENABLED:
    962963    make_constants("xpra", "codecs", "enc_x264", "constants")
     
    10031004                ["xpra/codecs/csc_nvcuda/colorspace_converter.pyx", "xpra/codecs/csc_nvcuda/csc_nvcuda.c"],
    10041005                **cuda_pkgconfig), min_version=(0, 16))
    10051006
    1006 
    10071007toggle_packages(vpx_ENABLED, "xpra.codecs.vpx")
    10081008if vpx_ENABLED:
    10091009    if vpx_static_ENABLED:
     
    10201020                **vpx_pkgconfig), min_version=(0, 16))
    10211021
    10221022
    1023 
    10241023toggle_packages(rencode_ENABLED, "xpra.net.rencode")
    10251024if rencode_ENABLED:
    10261025    rencode_pkgconfig = pkgconfig()
     
    10331032                **rencode_pkgconfig))
    10341033
    10351034
    1036 
    10371035if ext_modules:
    10381036    setup_options["ext_modules"] = ext_modules
    10391037if cmdclass:
  • tests/xpra/codecs/test_csc.py

     
    44# Xpra is released under the terms of the GNU GPL v2, or, at your option, any
    55# later version. See the file COPYING for details.
    66
     7import time
    78from xpra.codecs.image_wrapper import ImageWrapper
    89
    910
    10 
    1111def test_csc(ColorspaceConverter):
     12    perf_measure(ColorspaceConverter)
    1213    test_csc1(ColorspaceConverter)
    1314    test_csc2(ColorspaceConverter)
    1415
    1516
     17def perf_measure(ColorspaceConverter):
     18    w, h = 1920, 1080
     19    pixels = bytearray("\0" * (w*h*4))
     20    for y in range(h):
     21        for x in range(w):
     22            i = (y*w+x)*4
     23            pixels[i] = i % 256
     24            pixels[i+1] = i % 256
     25            pixels[i+2] = i % 256
     26            pixels[i+3] = 0
     27    start = time.time()
     28    count = 1024
     29    pixels = test_csc_pixels(ColorspaceConverter, w, h, pixels, count=count)
     30    end = time.time()
     31    print("%s did %sx%s csc %s times in %.1fms" % (ColorspaceConverter, w, h, count, end-start))
     32    mpps = float(w*h*count)/(end-start)
     33    print("%s MPixels/s" % int(mpps/1024/1024))
     34
     35
    1636def test_csc2(ColorspaceConverter):
    1737    w, h = 32, 32
    1838    pixels = bytearray("\0" * (w*h*4))
     
    4464
    4565   
    4666
    47 def test_csc_pixels(ColorspaceConverter, w, h, pixels, checks=()):
     67def test_csc_pixels(ColorspaceConverter, w, h, pixels, checks=(), count=1):
    4868    print("going to create %s" % ColorspaceConverter)
    4969    cc = ColorspaceConverter()
    5070    print("%s()=%s" % (ColorspaceConverter, cc))
    5171    cc.init_context(w, h, "BGRX", w, h, "YUV420P")
    5272    print("ColorspaceConverter=%s" % cc)
    53     print("test_csc() input pixels=%s" % str([hex(x) for x in pixels]))
     73    print("test_csc() input pixels=%s" % str([hex(x) for x in pixels][:256]))
    5474    image = ImageWrapper(0, 0, w, h, pixels, "BGRX", 32, w*4, planes=ImageWrapper.PACKED_RGB)
    55     out = cc.convert_image(image)
     75    for _ in range(count):
     76        out = cc.convert_image(image)
    5677    print("test_csc() output=%s" % out)
    5778    assert out.get_planes()==ImageWrapper._3_PLANES
    5879    pixels = out.get_pixels()
     
    6182        plane = pixels[i]
    6283        print("test_csc() plane[%s]=%s" % (i, type(plane)))
    6384        print("test_csc() len(plane[%s])=%s" % (i, len(plane)))
    64         print("test_csc() plane data[%s]=%s" % (i, str([hex(x) for x in bytearray(plane)])))
     85        print("test_csc() plane data[%s]=%s" % (i, str([hex(x) for x in bytearray(plane)][:256])))
    6586    def check_plane(plane, data, expected):
    6687        #chop data to same size as expected sample:
    6788        if type(data)==buffer:
  • tests/xpra/codecs/test_csc_opencl.py

     
     1#!/usr/bin/env python
     2# This file is part of Xpra.
     3# Copyright (C) 2013 Antoine Martin <antoine@devloop.org.uk>
     4# Xpra is released under the terms of the GNU GPL v2, or, at your option, any
     5# later version. See the file COPYING for details.
     6
     7from tests.xpra.codecs.test_csc import test_csc
     8
     9
     10def test_csc_opencl():
     11    from xpra.codecs.csc_opencl.colorspace_converter import ColorspaceConverter
     12    test_csc(ColorspaceConverter)
     13
     14
     15def main():
     16    import logging
     17    import sys
     18    logging.root.setLevel(logging.INFO)
     19    logging.root.addHandler(logging.StreamHandler(sys.stdout))
     20    test_csc_opencl()
     21
     22
     23if __name__ == "__main__":
     24    main()
  • xpra/codecs/csc_opencl/__init__.py

    Property changes on: tests/xpra/codecs/test_csc_opencl.py
    ___________________________________________________________________
    Added: svn:executable
    ## -0,0 +1 ##
    +*
    \ No newline at end of property
     
     1# This file is part of Xpra.
     2# Copyright (C) 2013 Antoine Martin <antoine@devloop.org.uk>
     3# Xpra is released under the terms of the GNU GPL v2, or, at your option, any
     4# later version. See the file COPYING for details.
  • xpra/codecs/csc_opencl/colorspace_converter.py

     
     1# This file is part of Xpra.
     2# Copyright (C) 2013 Antoine Martin <antoine@devloop.org.uk>
     3# Xpra is released under the terms of the GNU GPL v2, or, at your option, any
     4# later version. See the file COPYING for details.
     5
     6
     7from xpra.log import Logger, debug_if_env
     8log = Logger()
     9debug = debug_if_env(log, "XPRA_OPENCL_DEBUG")
     10error = log.error
     11
     12import os
     13import warnings
     14import numpy
     15assert bytearray
     16import pyopencl             #@UnresolvedImport
     17
     18PREFERRED_DEVICE_TYPE = os.environ.get("XPRA_OPENCL_DEVICE_TYPE", "GPU")
     19PREFERRED_DEVICE_NAME = os.environ.get("XPRA_OPENCL_DEVICE_NAME", "")
     20
     21
     22opencl_platforms = pyopencl.get_platforms()
     23if len(opencl_platforms)==0:
     24    raise ImportError("no OpenCL platforms found!")
     25log.info("PyOpenCL OpenGL support: %s", pyopencl.have_gl())
     26log.info("found %s OpenCL platforms:", len(opencl_platforms))
     27selected_device = None
     28for p in opencl_platforms:
     29    devices = p.get_devices()
     30    log.info("* %s (%s) - %s devices:", p.name, p.vendor, len(devices))
     31    for d in devices:
     32        p = "-"
     33        if d.available and d.compiler_available:
     34            p = "+"
     35            dtype = pyopencl.device_type.to_string(d.type)
     36            if dtype==PREFERRED_DEVICE_TYPE and \
     37                (selected_device is None or len(PREFERRED_DEVICE_NAME)>0 and d.name.find(PREFERRED_DEVICE_NAME)>=0):
     38                selected_device = d
     39        log.info(" %s %s: %s (%s / %s)", p, dtype, d.name.strip(), d.version, d.opencl_c_version)
     40
     41context = None
     42try:
     43    if selected_device:
     44        context = pyopencl.Context([selected_device])
     45    else:
     46        context = pyopencl.create_some_context(interactive=False)
     47    assert context is not None
     48except Exception, e:
     49    error("cannot create an OpenCL context: %s", e, exc_info=True)
     50    raise ImportError("cannot create an OpenCL context: %s" % e)
     51
     52from xpra.codecs.csc_opencl.opencl_kernel import KERNEL
     53program = None
     54try:
     55    with warnings.catch_warnings(record=True) as w:
     56        warnings.simplefilter("always")
     57        program = pyopencl.Program(context, KERNEL)
     58        program.build()
     59        log.debug("all warnings:%s", "\n* ".join([str(x) for x in w]))
     60        build_warnings = [x for x in w if x.category==pyopencl.CompilerWarning]
     61        if len(build_warnings)>0:
     62            debug("%s build warnings:", len(build_warnings))
     63            for x in build_warnings:
     64                debug(str(x))
     65except Exception, e:
     66    error("cannot build the OpenCL program: %s", e, exc_info=True)
     67    raise ImportError("cannot build the OpenCL program: %s" % e)
     68
     69
     70from xpra.codecs.image_wrapper import ImageWrapper
     71from xpra.codecs.codec_constants import codec_spec
     72
     73#COLORSPACES_SRC = [ "RGB", "RGBA", "BGR", "BGRX" ]
     74COLORSPACES_SRC = [ "BGRX" ]
     75#COLORSPACES_DST = [ "YUV420P", "YUV422P", "YUV444P" ]
     76COLORSPACES_DST = [ "YUV420P" ]
     77
     78def get_version():
     79    return pyopencl.version.VERSION_TEXT
     80
     81def get_input_colorspaces():
     82    return COLORSPACES_SRC
     83
     84def get_output_colorspaces(input_colorspace):
     85    return COLORSPACES_DST
     86
     87def get_spec(in_colorspace, out_colorspace):
     88    assert in_colorspace in COLORSPACES_SRC, "invalid input colorspace: %s (must be one of %s)" % (in_colorspace, COLORSPACES_SRC)
     89    assert out_colorspace in COLORSPACES_DST, "invalid output colorspace: %s (must be one of %s)" % (out_colorspace, COLORSPACES_DST)
     90    #ratings: quality, speed, setup cost, cpu cost, gpu cost, latency, max_w, max_h, max_pixels
     91    return codec_spec(ColorspaceConverter, speed=100, setup_cost=10, cpu_cost=10, gpu_cost=50, min_w=16, min_h=16, can_scale=False)
     92
     93
     94class ColorspaceConverter(object):
     95
     96    def __init__(self):
     97        self.src_width = 0
     98        self.src_height = 0
     99        self.src_format = ""
     100        self.dst_width = 0
     101        self.dst_height = 0
     102        self.dst_format = ""
     103        self.time = 0
     104        self.frames = 0
     105        self.queue = None
     106
     107    def init_context(self, src_width, src_height, src_format,
     108                           dst_width, dst_height, dst_format):    #@DuplicatedSignature
     109        global context
     110        assert src_format in COLORSPACES_SRC, "invalid source format: %s" % src_format
     111        assert dst_format in COLORSPACES_DST, "invalid source format: %s" % src_format
     112        self.src_width = src_width
     113        self.src_height = src_height
     114        self.src_format = src_format
     115        self.dst_width = dst_width
     116        self.dst_height = dst_height
     117        self.dst_format = dst_format
     118        self.queue = pyopencl.CommandQueue(context)
     119
     120    def get_info(self):
     121        info = {"frames"    : self.frames,
     122                "src_width" : self.src_width,
     123                "src_height": self.src_height,
     124                "src_format": self.src_format,
     125                "dst_width" : self.dst_width,
     126                "dst_height": self.dst_height,
     127                "dst_format": self.dst_format}
     128        if self.frames>0 and self.time>0:
     129            pps = float(self.src_width) * float(self.src_height) * float(self.frames) / self.time
     130            info["total_time_ms"] = int(self.time*1000.0)
     131            info["pixels_per_second"] = int(pps)
     132        return info
     133
     134    def __str__(self):
     135        if self.queue is None:
     136            return "opencl(uninitialized)"
     137        return "opencl(%s %sx%s - %s %sx%s)" % (self.src_format, self.src_width, self.src_height,
     138                                                 self.dst_format, self.dst_width, self.dst_height)
     139
     140    def is_closed(self):
     141        return False
     142
     143    def __dealloc__(self):                  #@DuplicatedSignature
     144        self.clean()
     145
     146    def get_src_width(self):
     147        return self.src_width
     148
     149    def get_src_height(self):
     150        return self.src_height
     151
     152    def get_src_format(self):
     153        return self.src_format
     154
     155    def get_dst_width(self):
     156        return self.dst_width
     157
     158    def get_dst_height(self):
     159        return self.dst_height
     160
     161    def get_dst_format(self):
     162        return self.dst_format
     163
     164    def get_type(self):
     165        return  "opencl"
     166
     167
     168    def clean(self):                        #@DuplicatedSignature
     169        pass
     170
     171    def convert_image(self, image):
     172        global program
     173        iplanes = image.get_planes()
     174        width = image.get_width()
     175        height = image.get_height()
     176        stride = image.get_rowstride()
     177        debug("convert_image(%s) planes=%s", image, iplanes)
     178        assert iplanes==ImageWrapper.PACKED_RGB, "we only handle packed RGB as input!"
     179        assert image.get_pixel_format()==self.src_format, "invalid source format: %s (expected %s)" % (image.get_pixel_format(), self.src_format)
     180        #create numpy array for source image:
     181        pixels = bytearray(image.get_pixels())
     182        in_array = numpy.array(pixels, dtype=numpy.byte)
     183        in_array.reshape(stride, height)
     184        outstride_base = width
     185        size = height * stride
     186        strides = [outstride_base, outstride_base/4, outstride_base/4]
     187        outsize = sum(strides) * height
     188        debug("convert_image(..) len(pixels)=%s, stride=%s, size=%s, outsize=%s", len(pixels), stride, size, outsize)
     189        mf = pyopencl.mem_flags
     190        in_buf = pyopencl.Buffer(context, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=in_array)
     191        out_buf = pyopencl.Buffer(context, mf.WRITE_ONLY, outsize)
     192
     193        kernelargs = (in_buf, out_buf, numpy.int32(width), numpy.int32(height),
     194                      numpy.int32(stride), numpy.int32(outstride_base))
     195        try:
     196            program.ARGB2YUV(self.queue, in_array.shape, None, *(kernelargs))
     197            #out_array = numpy.empty_like(in_array)
     198            out_array = numpy.zeros(outsize, dtype=numpy.byte)
     199            read = pyopencl.enqueue_read_buffer(self.queue, out_buf, out_array)
     200            read.wait()
     201            self.queue.finish()
     202        except Exception:
     203            log.error("convert_image(%s) failed", image, exc_info=True)
     204        #debug("convert_image(%s) out_array=%s", image, [hex(x) for x in bytearray(out_array[:])])
     205        planes = []
     206        index = 0
     207        for i in range(3):
     208            end = index + strides[i] * height
     209            plane = bytearray(out_array[index:end])
     210            planes.append(plane)
     211            index = end
     212        out_image = ImageWrapper(0, 0, self.dst_width, self.dst_height, planes, self.dst_format, 24, strides, 3)
     213        debug("convert_image(%s)=%s", image, out_image)
     214        #print("convert_image(%s)=%s" % (image, yuv))
     215        return out_image
  • xpra/codecs/csc_opencl/opencl_kernel.py

     
     1# This file is part of Xpra.
     2# Copyright (C) 2013 Chi Tai Dang
     3# Xpra is released under the terms of the GNU GPL v2, or, at your option, any
     4# later version. See the file COPYING for details.
     5
     6KERNEL = """
     7__kernel void ARGB2YUV (
     8                            __global unsigned int * sourceImage,
     9                            __global unsigned int * destImage,
     10            unsigned int srcWidth,
     11            unsigned int srcHeight,
     12            unsigned int srcStride,
     13            unsigned int yuvStride // must be srcWidth/4 since we pack 4 pixels into 1 Y-unit (with 4 y-pixels)
     14            )
     15{
     16    int i,j;
     17    unsigned int RGBs [ 4 ];
     18    unsigned int posSrc, RGB, Value4 = 0, Value, yuvStrideHalf, srcHeightHalf, yPlaneOffset, posOffset;
     19    unsigned char red, green, blue;
     20
     21    unsigned int posX = get_global_id(0);
     22    unsigned int posY = get_global_id(1);
     23
     24    if ( posX < yuvStride ) {
     25        // Y plane - pack 4 y's within each work item
     26        if ( posY >= srcHeight )
     27            return;
     28
     29        posSrc = (posY * srcStride) + (posX * 4);
     30
     31        RGBs [ 0 ] = sourceImage [ posSrc ];
     32        RGBs [ 1 ] = sourceImage [ posSrc + 1 ];
     33        RGBs [ 2 ] = sourceImage [ posSrc + 2 ];
     34        RGBs [ 3 ] = sourceImage [ posSrc + 3 ];
     35
     36        for ( i=0; i<4; i++ ) {
     37            RGB = RGBs [ i ];
     38
     39            blue = RGB & 0xff; green = (RGB >> 8) & 0xff; red = (RGB >> 16) & 0xff;
     40
     41            Value = ( ( 66 * red + 129 * green + 25 * blue ) >> 8 ) + 16;
     42            Value4 |= (Value << (i * 8));
     43        }
     44
     45        destImage [ (posY * yuvStride) + posX ] = Value4;
     46        return;
     47    }
     48
     49    posX -= yuvStride;
     50    yuvStrideHalf = yuvStride >> 1;
     51
     52    // U plane - pack 4 u's within each work item
     53    if ( posX >= yuvStrideHalf )
     54        return;
     55
     56    srcHeightHalf = srcHeight >> 1;
     57    if ( posY < srcHeightHalf ) {
     58        posSrc = ((posY * 2) * srcStride) + (posX * 8);
     59
     60        RGBs [ 0 ] = sourceImage [ posSrc ];
     61        RGBs [ 1 ] = sourceImage [ posSrc + 2 ];
     62        RGBs [ 2 ] = sourceImage [ posSrc + 4 ];
     63        RGBs [ 3 ] = sourceImage [ posSrc + 6 ];
     64
     65        for ( i=0; i<4; i++ ) {
     66            RGB = RGBs [ i ];
     67
     68            blue = RGB & 0xff; green = (RGB >> 8) & 0xff; red = (RGB >> 16) & 0xff;
     69            Value = ( ( -38 * red + -74 * green + 112 * blue ) >> 8 ) + 128;
     70            Value4 |= (Value << (i * 8));
     71        }
     72        yPlaneOffset = yuvStride * srcHeight;
     73        posOffset = (posY * yuvStrideHalf) + posX;
     74        destImage [ yPlaneOffset + posOffset ] = Value4;
     75        return;
     76    }
     77
     78    posY -= srcHeightHalf;
     79    if ( posY >= srcHeightHalf )
     80        return;
     81
     82    // V plane - pack 4 v's within each work item
     83    posSrc = ((posY * 2) * srcStride) + (posX * 8);
     84
     85    RGBs [ 0 ] = sourceImage [ posSrc ];
     86    RGBs [ 1 ] = sourceImage [ posSrc + 2 ];
     87    RGBs [ 2 ] = sourceImage [ posSrc + 4 ];
     88    RGBs [ 3 ] = sourceImage [ posSrc + 6 ];
     89
     90    for ( i=0; i<4; i++ ) {
     91        RGB = RGBs [ i ];
     92
     93        blue = RGB & 0xff; green = (RGB >> 8) & 0xff; red = (RGB >> 16) & 0xff;
     94
     95        Value = ( ( 112 * red + -94 * green + -18 * blue ) >> 8 ) + 128;
     96        Value4 |= (Value << (i * 8));
     97    }
     98
     99    yPlaneOffset = yuvStride * srcHeight;
     100    posOffset = (posY * yuvStrideHalf) + posX;
     101
     102    destImage [ yPlaneOffset + (yPlaneOffset >> 2) + posOffset ] = Value4;
     103    return;
     104}
     105"""
     106 No newline at end of file
  • xpra/codecs/video_enc_pipeline.py

     
    88log = Logger()
    99debug = debug_if_env(log, "XPRA_VIDEOPIPELINE_DEBUG")
    1010
    11 from xpra.scripts.config import csc_swscale, enc_vpx, enc_x264
     11from xpra.scripts.config import csc_swscale, csc_opencl, enc_vpx, enc_x264
    1212
    1313
    1414class VideoPipelineHelper(object):
     
    6363            self.init_csc_option(csc_swscale)
    6464        except:
    6565            log.warn("init_csc_options() cannot add swscale csc", exc_info=True)
     66        try:
     67            self.init_csc_option(csc_opencl)
     68        except:
     69            log.warn("init_csc_options() cannot add opencl csc", exc_info=True)
    6670        #try:
    6771        #    self.init_csc_option(csc_nvcuda)
    6872        #except:
  • xpra/scripts/config.py

     
    6666has_csc_swscale = csc_swscale is not None
    6767add_codec_version("swscale", "xpra.codecs.csc_swscale.colorspace_converter", "get_version", True)
    6868
     69csc_opencl = codec_import_check("csc swscale", "xpra.codecs.csc_swscale", "xpra.codecs.csc_swscale.colorspace_converter", "ColorspaceConverter")
     70has_csc_opencl = csc_opencl is not None
     71add_codec_version("opencl", "xpra.codecs.csc_opencl.colorspace_converter", "get_version", True)
     72
    6973csc_nvcuda = None   #codec_import_check("csc nvcuda", "xpra.codecs.csc_nvcuda", "xpra.codecs.csc_nvcuda.colorspace_converter", "ColorspaceConverter")
    7074has_csc_nvcuda = csc_nvcuda is not None
    7175add_codec_version("nvcuda", "xpra.codecs.csc_nvcuda.colorspace_converter", "get_version", True)