xpra icon
Bug tracker and wiki

Ticket #422: add-csc-opencl-v10.patch

File add-csc-opencl-v10.patch, 17.3 KB (added by Antoine Martin, 7 years ago)

working version with all yuv formats as input and both BGRX and RGBX as output

  • setup.py

     
    6161webp_ENABLED = True
    6262nvenc_ENABLED = False
    6363csc_nvcuda_ENABLED = False
     64csc_opencl_ENABLED = True
    6465
    6566warn_ENABLED = True
    6667strict_ENABLED = True
     
    6970
    7071#allow some of these flags to be modified on the command line:
    7172SWITCHES = ("enc_x264", "x264_static",
    72             "nvenc", "dec_avcodec", "csc_swscale", "csc_nvcuda",
     73            "nvenc", "dec_avcodec",
     74            "csc_swscale", "csc_nvcuda", "csc_opencl",
    7375            "vpx", "vpx_static",
    7476            "webp", "rencode", "clipboard",
    7577            "server", "client", "x11",
     
    954956
    955957
    956958toggle_packages(nvenc_ENABLED, "xpra.codecs.nvenc")
     959toggle_packages(csc_opencl_ENABLED, "xpra.codecs.csc_opencl")
    957960
    958 
    959 
    960961toggle_packages(enc_x264_ENABLED, "xpra.codecs.enc_x264")
    961962if enc_x264_ENABLED:
    962963    make_constants("xpra", "codecs", "enc_x264", "constants")
     
    10031004                ["xpra/codecs/csc_nvcuda/colorspace_converter.pyx", "xpra/codecs/csc_nvcuda/csc_nvcuda.c"],
    10041005                **cuda_pkgconfig), min_version=(0, 16))
    10051006
    1006 
    10071007toggle_packages(vpx_ENABLED, "xpra.codecs.vpx")
    10081008if vpx_ENABLED:
    10091009    if vpx_static_ENABLED:
     
    10201020                **vpx_pkgconfig), min_version=(0, 16))
    10211021
    10221022
    1023 
    10241023toggle_packages(rencode_ENABLED, "xpra.net.rencode")
    10251024if rencode_ENABLED:
    10261025    rencode_pkgconfig = pkgconfig()
     
    10331032                **rencode_pkgconfig))
    10341033
    10351034
    1036 
    10371035if ext_modules:
    10381036    setup_options["ext_modules"] = ext_modules
    10391037if cmdclass:
  • tests/xpra/codecs/test_csc_opencl.py

     
     1#!/usr/bin/env python
     2# This file is part of Xpra.
     3# Copyright (C) 2013 Antoine Martin <antoine@devloop.org.uk>
     4# Xpra is released under the terms of the GNU GPL v2, or, at your option, any
     5# later version. See the file COPYING for details.
     6
     7from tests.xpra.codecs.test_csc import test_csc_planar
     8
     9
     10def test_csc_opencl():
     11    from xpra.codecs.csc_opencl import colorspace_converter
     12    test_csc_planar(colorspace_converter)
     13
     14
     15def main():
     16    import logging
     17    import sys
     18    logging.root.setLevel(logging.INFO)
     19    logging.root.addHandler(logging.StreamHandler(sys.stdout))
     20    test_csc_opencl()
     21
     22
     23if __name__ == "__main__":
     24    main()
  • xpra/codecs/csc_opencl/__init__.py

    Property changes on: tests/xpra/codecs/test_csc_opencl.py
    ___________________________________________________________________
    Added: svn:executable
    ## -0,0 +1 ##
    +*
    \ No newline at end of property
     
     1# This file is part of Xpra.
     2# Copyright (C) 2013 Antoine Martin <antoine@devloop.org.uk>
     3# Xpra is released under the terms of the GNU GPL v2, or, at your option, any
     4# later version. See the file COPYING for details.
  • xpra/codecs/csc_opencl/colorspace_converter.py

     
     1# This file is part of Xpra.
     2# Copyright (C) 2013 Antoine Martin <antoine@devloop.org.uk>
     3# Xpra is released under the terms of the GNU GPL v2, or, at your option, any
     4# later version. See the file COPYING for details.
     5
     6
     7from xpra.log import Logger, debug_if_env
     8log = Logger()
     9debug = debug_if_env(log, "XPRA_OPENCL_DEBUG")
     10error = log.error
     11
     12import os
     13import warnings
     14import numpy
     15assert bytearray
     16import pyopencl             #@UnresolvedImport
     17
     18PREFERRED_DEVICE_TYPE = os.environ.get("XPRA_OPENCL_DEVICE_TYPE", "GPU")
     19PREFERRED_DEVICE_NAME = os.environ.get("XPRA_OPENCL_DEVICE_NAME", "")
     20PREFERRED_DEVICE_PLATFORM = os.environ.get("XPRA_OPENCL_PLATFORM", "")
     21
     22
     23opencl_platforms = pyopencl.get_platforms()
     24if len(opencl_platforms)==0:
     25    raise ImportError("no OpenCL platforms found!")
     26log.info("PyOpenCL OpenGL support: %s", pyopencl.have_gl())
     27log.info("found %s OpenCL platforms:", len(opencl_platforms))
     28def device_info(d):
     29    dtype = pyopencl.device_type.to_string(d.type)
     30    return "%s: %s (%s / %s)" % (dtype, d.name.strip(), d.version, d.opencl_c_version)
     31def platform_info(platform):
     32    return "%s (%s)" % (platform.name, platform.vendor)
     33
     34selected_device = None
     35selected_platform = None
     36for platform in opencl_platforms:
     37    devices = platform.get_devices()
     38    log.info("* %s - %s devices:", platform_info(platform), len(devices))
     39    for d in devices:
     40        p = "-"
     41        if d.available and d.compiler_available and d.get_info(pyopencl.device_info.IMAGE_SUPPORT):
     42            p = "+"
     43            dtype = pyopencl.device_type.to_string(d.type)
     44            if selected_device is None and dtype==PREFERRED_DEVICE_TYPE and \
     45                (len(PREFERRED_DEVICE_NAME)==0 or d.name.find(PREFERRED_DEVICE_NAME)>=0) and \
     46                (len(PREFERRED_DEVICE_PLATFORM)==0 or str(platform.name).find(PREFERRED_DEVICE_PLATFORM)>=0):
     47                selected_device = d
     48                selected_platform = platform
     49        log.info(" %s %s", p, device_info(d))
     50
     51context = None
     52try:
     53    if selected_device:
     54        log.info("using platform: %s", platform_info(selected_platform))
     55        log.info("using device: %s", device_info(selected_device))
     56        debug("max_work_group_size=%s", selected_device.max_work_group_size)
     57        debug("max_work_item_dimensions=%s", selected_device.max_work_item_dimensions)
     58        debug("max_work_item_sizes=%s", selected_device.max_work_item_sizes)
     59        context = pyopencl.Context([selected_device])
     60    else:
     61        context = pyopencl.create_some_context(interactive=False)
     62    assert context is not None
     63except Exception, e:
     64    error("cannot create an OpenCL context: %s", e, exc_info=True)
     65    raise ImportError("cannot create an OpenCL context: %s" % e)
     66
     67from xpra.codecs.csc_opencl.opencl_kernels import KERNELS
     68program = None
     69try:
     70    with warnings.catch_warnings(record=True) as w:
     71        warnings.simplefilter("always")
     72        all_kernels = "\n".join(KERNELS.values())
     73        program = pyopencl.Program(context, all_kernels)
     74        program.build()
     75        log.debug("all warnings:%s", "\n* ".join([str(x) for x in w]))
     76        build_warnings = [x for x in w if x.category==pyopencl.CompilerWarning]
     77        if len(build_warnings)>0:
     78            debug("%s build warnings:", len(build_warnings))
     79            for x in build_warnings:
     80                debug(str(x))
     81except Exception, e:
     82    error("cannot build the OpenCL program: %s", e, exc_info=True)
     83    raise ImportError("cannot build the OpenCL program: %s" % e)
     84
     85def roundup(n, m):
     86    return (n + m - 1) & ~(m - 1)
     87
     88
     89from xpra.codecs.image_wrapper import ImageWrapper
     90from xpra.codecs.codec_constants import codec_spec
     91
     92COLORSPACES_SRC = sorted(list(set([src for (src, dst) in KERNELS.keys()])))
     93
     94
     95def get_version():
     96    return pyopencl.version.VERSION_TEXT
     97
     98def get_input_colorspaces():
     99    return COLORSPACES_SRC
     100
     101def get_output_colorspaces(input_colorspace):
     102    return [dst for (src, dst) in KERNELS.keys() if src==input_colorspace]
     103
     104def validate_in_out(in_colorspace, out_colorspace):
     105    assert in_colorspace in get_input_colorspaces(), "invalid input colorspace: %s (must be one of %s)" % (in_colorspace, get_input_colorspaces())
     106    assert out_colorspace in get_output_colorspaces(in_colorspace), "invalid output colorspace: %s (must be one of %s for input %s)" % (out_colorspace, get_output_colorspaces(in_colorspace), in_colorspace)
     107
     108def get_spec(in_colorspace, out_colorspace):
     109    validate_in_out(in_colorspace, out_colorspace)
     110    #ratings: quality, speed, setup cost, cpu cost, gpu cost, latency, max_w, max_h, max_pixels
     111    return codec_spec(ColorspaceConverter, speed=100, setup_cost=10, cpu_cost=10, gpu_cost=50, min_w=16, min_h=16, can_scale=False)
     112
     113
     114class ColorspaceConverter(object):
     115
     116    def __init__(self):
     117        self.src_width = 0
     118        self.src_height = 0
     119        self.src_format = ""
     120        self.dst_width = 0
     121        self.dst_height = 0
     122        self.dst_format = ""
     123        self.time = 0
     124        self.frames = 0
     125        self.queue = None
     126        self.kernel_function = None
     127
     128    def init_context(self, src_width, src_height, src_format,
     129                           dst_width, dst_height, dst_format):    #@DuplicatedSignature
     130        global context
     131        validate_in_out(src_format, dst_format)
     132        self.src_width = src_width
     133        self.src_height = src_height
     134        self.src_format = src_format
     135        self.dst_width = dst_width
     136        self.dst_height = dst_height
     137        self.dst_format = dst_format
     138        self.queue = pyopencl.CommandQueue(context)
     139        debug("init_context(..) kernel source=%s", KERNELS.get((src_format, dst_format)))
     140        self.kernel_function = getattr(program, "%s_to_%s" % (src_format, dst_format))
     141        debug("init_context(..) kernel_function=%s", self.kernel_function)
     142
     143    def get_info(self):
     144        info = {"frames"    : self.frames,
     145                "src_width" : self.src_width,
     146                "src_height": self.src_height,
     147                "src_format": self.src_format,
     148                "dst_width" : self.dst_width,
     149                "dst_height": self.dst_height,
     150                "dst_format": self.dst_format}
     151        if self.frames>0 and self.time>0:
     152            pps = float(self.src_width) * float(self.src_height) * float(self.frames) / self.time
     153            info["total_time_ms"] = int(self.time*1000.0)
     154            info["pixels_per_second"] = int(pps)
     155        return info
     156
     157    def __str__(self):
     158        if self.queue is None:
     159            return "opencl(uninitialized)"
     160        return "opencl(%s %sx%s - %s %sx%s)" % (self.src_format, self.src_width, self.src_height,
     161                                                 self.dst_format, self.dst_width, self.dst_height)
     162
     163    def is_closed(self):
     164        return False
     165
     166    def __dealloc__(self):                  #@DuplicatedSignature
     167        self.clean()
     168
     169    def get_src_width(self):
     170        return self.src_width
     171
     172    def get_src_height(self):
     173        return self.src_height
     174
     175    def get_src_format(self):
     176        return self.src_format
     177
     178    def get_dst_width(self):
     179        return self.dst_width
     180
     181    def get_dst_height(self):
     182        return self.dst_height
     183
     184    def get_dst_format(self):
     185        return self.dst_format
     186
     187    def get_type(self):
     188        return  "opencl"
     189
     190
     191    def clean(self):                        #@DuplicatedSignature
     192        pass
     193
     194    def convert_image(self, image):
     195        global program
     196        iplanes = image.get_planes()
     197        width = image.get_width()
     198        height = image.get_height()
     199        strides = image.get_rowstride()
     200        pixels = image.get_pixels()
     201        debug("convert_image(%s) planes=%s", image, iplanes)
     202        assert iplanes==ImageWrapper._3_PLANES, "we only handle planar data as input!"
     203        assert image.get_pixel_format()==self.src_format, "invalid source format: %s (expected %s)" % (image.get_pixel_format(), self.src_format)
     204        assert len(strides)==len(pixels)==3, "invalid number of planes or strides (should be 3)"
     205        mf = pyopencl.mem_flags
     206
     207        #ensure the local and global work size are valid, see:
     208        #http://stackoverflow.com/questions/3957125/questions-about-global-and-local-work-size
     209        chunk = 64
     210        while chunk**2>selected_device.max_work_group_size or chunk>min(selected_device.max_work_item_sizes):
     211            chunk /= 2
     212        localWorkSize = (chunk, chunk)
     213        globalWorkSize = (roundup(width, localWorkSize[0]), roundup(height, localWorkSize[1]))
     214
     215        kernelargs = [self.queue, globalWorkSize, localWorkSize]
     216
     217        #output image:
     218        oformat = pyopencl.ImageFormat(pyopencl.channel_order.RGBA, pyopencl.channel_type.UNORM_INT8)
     219        oimage = pyopencl.Image(context,
     220                                pyopencl.mem_flags.WRITE_ONLY,
     221                                oformat, shape=(width, height))
     222
     223        #convert input buffers to numpy arrays then OpenCL Buffers:
     224        for i in range(3):
     225            in_array = numpy.frombuffer(pixels[i], dtype=numpy.byte)
     226            in_buf = pyopencl.Buffer(context, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=in_array)
     227            kernelargs.append(in_buf)
     228            kernelargs.append(numpy.int32(strides[i]))
     229        kernelargs += [numpy.int32(width), numpy.int32(height), oimage]
     230
     231        debug("convert_image(%s) calling %s%s", self.kernel_function, kernelargs)
     232        self.kernel_function(*kernelargs)
     233        out_array = numpy.empty(width*height*4, dtype=numpy.byte)
     234        read = pyopencl.enqueue_read_image(self.queue, oimage, origin=(0, 0), region=(width, height), hostbuf=out_array)
     235        read.wait()
     236        self.queue.finish()
     237        return ImageWrapper(0, 0, self.dst_width, self.dst_height, out_array.data, self.dst_format, 24, strides, planes=ImageWrapper.PACKED_RGB)
  • xpra/codecs/csc_opencl/opencl_kernels.py

     
     1# Copyright (C) 2011 Michael Zucchi
     2# This file is based on code from socles, an OpenCL image processing library.
     3#
     4# socles is free software: you can redistribute it and/or modify
     5# it under the terms of the GNU General Public License as published by
     6# the Free Software Foundation, either version 3 of the License, or
     7# (at your option) any later version.
     8#
     9# socles is distributed in the hope that it will be useful,
     10# but WITHOUT ANY WARRANTY; without even the implied warranty of
     11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     12# GNU General Public License for more details.
     13#
     14# You should have received a copy of the GNU General Public License
     15# along with socles.  If not, see <http://www.gnu.org/licenses/>.
     16
     17
     18YUV_TO_RGB = {"X"    : "1.0",
     19              "R"    : "Y + 1.5958 * Cb",
     20              "G"    : "Y - 0.39173*Cr-0.81290*Cb",
     21              "B"    : "Y + 2.017*Cr"
     22              }
     23
     24#Cr width div, Cr heigth div, Cb width div, Cb width div
     25YUV_DIVS = {"444"   : [1, 1, 1, 1],
     26            "422"   : [2, 2, 1, 1],
     27            "420"   : [2, 2, 2, 2]
     28            }
     29
     30
     31def gen_yuv_2_rgb_kernel(yuv_format, rgb_format):
     32    assert len(rgb_format)==4, "invalid destination rgb format: %s" % rgb_format
     33    args = tuple([yuv_format, rgb_format] + YUV_DIVS[yuv_format] +[YUV_TO_RGB[c] for c in rgb_format])
     34    kstr = """
     35__kernel void YUV%sP_to_%s(global uchar *srcY, uint strideY,
     36              global uchar *srcU, uint strideU,
     37              global uchar *srcV, uint strideV,
     38              uint w, uint h, write_only image2d_t dst) {
     39    uint gx = get_global_id(0);
     40    uint gy = get_global_id(1);
     41
     42    if ((gx < w) & (gy<h)) {
     43        float4 p;
     44
     45        float Y = 1.1643 * (srcY[gx + gy*strideY] / 255.0f - 0.0625);
     46        float Cr = srcU[gx/%s+(gy/%s)*(strideU)] / 255.0f - 0.5f;
     47        float Cb = srcV[gx/%s+(gy/%s)*(strideV)] / 255.0f - 0.5f;
     48
     49        p.s0 = %s;
     50        p.s1 = %s;
     51        p.s2 = %s;
     52        p.s3 = %s;
     53
     54        write_imagef(dst, (int2)( gx, gy ), p);
     55    }
     56}
     57"""
     58    return kstr % args
     59
     60
     61KERNELS = {}
     62for yuv in YUV_DIVS.keys():
     63    for rgb in ("XRGB", "BGRX"):
     64        KERNELS[("YUV%sP" % yuv, rgb)] = gen_yuv_2_rgb_kernel(yuv, rgb)
  • xpra/codecs/video_enc_pipeline.py

     
    88log = Logger()
    99debug = debug_if_env(log, "XPRA_VIDEOPIPELINE_DEBUG")
    1010
    11 from xpra.scripts.config import csc_swscale, enc_vpx, enc_x264
     11from xpra.scripts.config import csc_swscale, csc_opencl, enc_vpx, enc_x264
    1212
    1313
    1414class VideoPipelineHelper(object):
     
    6363            self.init_csc_option(csc_swscale)
    6464        except:
    6565            log.warn("init_csc_options() cannot add swscale csc", exc_info=True)
     66        try:
     67            self.init_csc_option(csc_opencl)
     68        except:
     69            log.warn("init_csc_options() cannot add opencl csc", exc_info=True)
    6670        #try:
    6771        #    self.init_csc_option(csc_nvcuda)
    6872        #except:
  • xpra/scripts/config.py

     
    6666has_csc_swscale = csc_swscale is not None
    6767add_codec_version("swscale", "xpra.codecs.csc_swscale.colorspace_converter", "get_version", True)
    6868
     69csc_opencl = codec_import_check("csc swscale", "xpra.codecs.csc_swscale", "xpra.codecs.csc_swscale.colorspace_converter", "ColorspaceConverter")
     70has_csc_opencl = csc_opencl is not None
     71add_codec_version("opencl", "xpra.codecs.csc_opencl.colorspace_converter", "get_version", True)
     72
    6973csc_nvcuda = None   #codec_import_check("csc nvcuda", "xpra.codecs.csc_nvcuda", "xpra.codecs.csc_nvcuda.colorspace_converter", "ColorspaceConverter")
    7074has_csc_nvcuda = csc_nvcuda is not None
    7175add_codec_version("nvcuda", "xpra.codecs.csc_nvcuda.colorspace_converter", "get_version", True)