xpra icon
Bug tracker and wiki

Ticket #640: bencode-work9.patch

File bencode-work9.patch, 15.0 KB (added by Antoine Martin, 5 years ago)

work in progress patch - fixes bencode for py3k

  • setup.py

     
    19761976if cython_bencode_ENABLED:
    19771977    bencode_pkgconfig = pkgconfig(optimize=not debug_ENABLED)
    19781978    cython_add(Extension("xpra.net.bencode.cython_bencode",
    1979                 ["xpra/net/bencode/cython_bencode.pyx"],
     1979                ["xpra/net/bencode/cython_bencode.pyx", buffers_c],
    19801980                **bencode_pkgconfig))
    19811981
    19821982
  • tests/unit/net/bencode_test.py

     
    55# Xpra is released under the terms of the GNU GPL v2, or, at your option, any
    66# later version. See the file COPYING for details.
    77
    8 from xpra.util import nonl, bytestostr, strtobytes
     8from xpra.util import strtobytes, bytestostr, repr_ellipsized
    99import unittest
    1010import binascii
    1111
     
    9797        return
    9898    if type(r)==dict and type(o)==dict:
    9999        for k,ov in o.items():
    100             ak = None
    101             for tk in k,bytestostr(k),strtobytes(k):
    102                 if tk in r:
    103                     ak = tk
    104                     break
    105             if not ak or ak not in r:
    106                 assert False, "restored dict is missing %s" % k
    107             rv = r.get(ak)
     100            #with py3k, the key can end up being bytes instead of string...
     101            rv = r.get(k, r.get(bytestostr(k), r.get(strtobytes(k))))
     102            assert rv is not None, "restored dict is missing %s: %s" % (k, r)
    108103            _cmp(ov, rv)
    109104        return
     105    import sys
     106    if sys.version_info[0]<3 and type(o)==unicode and type(r)==str:
     107        o = o.encode("utf-8")
     108    elif type(o)==bytes and type(r)==str:
     109        o = o.decode("utf-8")
     110    elif type(o)==str and type(r)==bytes:
     111        r = r.decode("utf-8")
    110112    if o==r:
    111113        return
    112114    print("")
    113115    print("original %s:" % type(o))
    114     print("%s" % binascii.hexlify(strtobytes(o)))
    115116    print("returned %s:" % type(r))
    116117    try:
    117         print("%s" % binascii.hexlify(strtobytes(r)))
     118        print("original: %s" % binascii.hexlify(str(o)))
     119        print("returned: %s" % binascii.hexlify(str(r)))
    118120    except:
    119121        pass
    120122    assert False, "value does not match: expected %s (%s) but got %s (%s)" % (o, type(o), r, type(r))
     
    126128    def test_decoding(self):
    127129
    128130        def t(s, ev, remainder=""):
    129             rv, rr = self.decode(s)
    130             #print("decode(%s)=%s (%s)" % (s, rv, type(rv)))
    131             _cmp(rv, ev)
     131            try:
     132                rv, rr = self.decode(s)
     133                #print("decode(%s)=%s (%s)" % (s, rv, type(rv)))
     134                _cmp(rv, ev)
     135            except Exception as e:
     136                print("error on decoding of '%s'" % repr_ellipsized(s))
     137                raise e
    132138            rrstr = s[rr:]
    133139            assert rrstr == remainder, "expected remainder value %s but got %s" % (remainder, rrstr)
    134140            # With gibberish added:
     
    178184
    179185    def t(self, v, encstr=None):
    180186        be = self.encode(v)
    181         #print("bencode(%s)=%s" % (i(v), i(be)))
    182187        if encstr:
    183             p = 0
    184             while p<min(len(encstr), len(be)):
    185                 if encstr[p]!=be[p]:
    186                     break
    187                 p += 1
    188             assert be==encstr, "expected '%s' but got '%s', strings differ at position %s: expected '%s' but found '%s' (lengths: %s vs %s)" % (nonl(encstr), nonl(be), p, encstr[p], be[p], len(encstr), len(be))
     188            _cmp(be, encstr)
    189189        restored = self.decode(be)
    190190        rlist = restored[0]
    191         if len(rlist)!=len(v):
    192             print("MISMATCH!")
    193             print("v=%s" % v)
    194             print("l=%s" % rlist)
    195             assert False, "length of decoded value does not match: exected %s but got %s" % (len(v), len(rlist))
    196         assert len(rlist)==2
    197         #print("enc=%s" % binascii.hexlify(be or ""))
    198         #print("exp=%s" % binascii.hexlify(encstr or ""))
    199         _cmp(rlist[0], v[0])
     191        _cmp(v[0], rlist[0])
    200192
    201193        rd = rlist[1]
    202194        od = v[1]
     
    203195        _cmp(od, rd)
    204196
    205197    def test_simple(self):
    206         v = [b"a", []]
     198        v = ["a", []]
    207199        estr = binascii.unhexlify("6c313a616c6565").decode()
    208200        self.t(v, estr)
    209201
     
    211203        ustr = u"Schr\xc3\xb6dinger\xe2\x80\x99s_Cat".encode("utf8")
    212204        estr = binascii.unhexlify("6c32353a53636872c383c2b664696e676572c3a2c280c299735f436174646565")
    213205        self.t([ustr, {}], estr)
    214    
     206
    215207    def test_encoding_hello(self):
    216208        self.t(hello)
    217209   
     
    240232
    241233
    242234def main():
    243     import sys
    244     #needs fixing for py3k:
    245     if sys.version_info[0]<3:
    246         unittest.main()
     235    unittest.main()
    247236
    248237if __name__ == '__main__':
    249238    main()
  • xpra/net/bencode/bencode.py

     
    1212__version__ = ("Python", 0, 11)
    1313
    1414import sys
    15 if sys.version > '3':
     15if sys.version_info[0] >= 3:
    1616    long = int              #@ReservedAssignment
     17    #idiotic py3k unicode mess makes us reinvent the wheel again:
     18    def strindex(s, c, start):
     19        i = start
     20        while s[i] != ord(c):
     21            i += 1
     22            if i>=len(s):
     23                return -1
     24        return i
     25    #the values end up being ints..
     26    def cv(x):
     27        return ord(x)
     28else:
     29    def strindex(s, c, start):
     30        return s.index(c, start)
     31    def cv(x):
     32        return x
    1733
    18 unicode_support = False
    19 def set_unicode_support(us):
    20     global unicode_support
    21     unicode_support = us
     34def bytestostr(x):
     35    if type(x)==bytes:
     36        return x.decode("utf8")
     37    return x
    2238
     39   
    2340
     41
    2442def decode_int(x, f):
    2543    f += 1
    26     newf = x.index('e', f)
     44    newf = strindex(x, 'e', f)
    2745    try:
    2846        n = int(x[f:newf])
    2947    except (OverflowError, ValueError):
    3048        n = long(x[f:newf])
    31     if x[f] == '-':
    32         if x[f + 1] == '0':
     49    if x[f] == cv('-'):
     50        if x[f + 1] == cv('0'):
    3351            raise ValueError
    34     elif x[f] == '0' and newf != f+1:
     52    elif x[f] == cv('0') and newf != f+1:
    3553        raise ValueError
    3654    return (n, newf+1)
    3755
    3856def decode_string(x, f):
    39     colon = x.index(':', f)
     57    colon = strindex(x, ':', f)
    4058    assert colon>=0
    4159    try:
    4260        n = int(x[f:colon])
    4361    except (OverflowError, ValueError):
    4462        n = long(x[f:colon])
    45     if x[f] == '0' and colon != f+1:
     63    if x[f] == cv('0') and colon != f+1:
    4664        raise ValueError
    4765    colon += 1
    4866    return (x[colon:colon+n], colon+n)
    4967
     68def decode_py3kstring(x, f):
     69    xs, fs = decode_string(x, f)
     70    return (xs, fs)
     71
    5072def decode_unicode(x, f):
    5173    xs, fs = decode_string(x, f+1)
    5274    return (xs.decode("utf8"), fs)
     
    5375
    5476def decode_list(x, f):
    5577    r, f = [], f+1
    56     while x[f] != 'e':
     78    while x[f] != cv('e'):
    5779        fn = decode_func.get(x[f])
    5880        if not fn:
    5981            raise ValueError("invalid list entry: %s" % (x[f:]))
     
    6486def decode_dict(x, f):
    6587    r, f = {}, f+1
    6688    #lastkey = None
    67     while x[f] != 'e':
     89    while x[f] != cv('e'):
    6890        fn = decode_func.get(x[f])
    6991        if not fn:
    7092            raise ValueError("invalid dict key: %s" % (x[f:]))
    7193        k, f = fn(x, f)
    72         #if lastkey is not None and lastkey >= k:
    73         #    raise ValueError("keys are not in ascending order!")
    74         #lastkey = k
    7594        fn = decode_func.get(x[f])
    7695        if not fn:
    7796            raise ValueError("invalid dict value: %s" % (x[f:]))
     
    7897        r[k], f = fn(x, f)
    7998    return (r, f + 1)
    8099
     100
    81101decode_func = {}
    82102decode_func['l'] = decode_list
    83103decode_func['d'] = decode_dict
    84104decode_func['i'] = decode_int
    85105for c in '0123456789':
    86     decode_func[c] = decode_string
     106    if sys.version_info[0]<3:
     107        decode_func[c] = decode_string
     108    else:
     109        decode_func[c] = decode_py3kstring
    87110decode_func['u'] = decode_unicode
    88111#now as byte values:
    89112for k,v in dict(decode_func).items():
    90     decode_func[ord(k)] = lambda x,f : v(str(x), f)
     113    decode_func[ord(k)] = v
    91114
     115
    92116def bdecode(x):
    93117    try:
    94         fn = decode_func.get(x[0])
     118        #v = x[0].encode("utf-8")
     119        xs = x.encode("utf8")
     120        #v = x[0]
     121        fn = decode_func.get(xs[0])
    95122        if not fn:
    96             raise ValueError("invalid type identifier: %s" % (x[0]))
    97         r, l = fn(x, 0)
    98     except (IndexError, KeyError):
     123            raise ValueError("invalid type identifier: %s" % (xs[0]))
     124        r, l = fn(xs, 0)
     125    except (IndexError, KeyError) as e:
    99126        import traceback
    100127        traceback.print_exc()
    101         raise ValueError
     128        raise e
    102129    return r, l
    103130
    104131def encode_int(x, r):
     
    108135def encode_string(x, r):
    109136    r.extend((str(len(x)), ':', x))
    110137
     138def encode_bytes(x, r):
     139    encode_string(x, r)
     140
    111141def encode_unicode(x, r):
    112     global unicode_support
    113     x = x.encode("utf8")
    114     if unicode_support:
    115         r.extend(('u', str(len(x)), ':', x))
    116     else:
    117         encode_string(x, r)
     142    encode_string(x.encode("utf8"), r)
    118143
    119144def encode_list(x, r):
    120145    r.append('l')
     
    132157
    133158
    134159encode_func = {}
    135 if sys.version < '3':
     160if sys.version_info[0] < 3:
    136161    from types import (StringType, UnicodeType, IntType, LongType, DictType, ListType,
    137162                       TupleType, BooleanType)
    138163    encode_func[IntType] = encode_int
     
    145170    encode_func[BooleanType] = encode_int
    146171else:
    147172    encode_func[int] = encode_int
    148     encode_func[str] = encode_string
     173    encode_func[str] = encode_unicode
    149174    encode_func[list] = encode_list
    150175    encode_func[tuple] = encode_list
    151176    encode_func[dict] = encode_dict
    152177    encode_func[bool] = encode_int
    153     def encode_bytes(x, r):
    154         encode_string(x.decode(), r)
    155178    encode_func[bytes] = encode_bytes
    156179
    157180def bencode(x):
    158181    r = []
    159182    encode_func[type(x)](x, r)
    160     return ''.join(r)
     183    return ''.join(bytestostr(x) for x in r)
  • xpra/net/bencode/cython_bencode.pyx

     
    77# Taken from BitTorrent 3.4.2 (which is MIT-licensed), then hacked up
    88# further.
    99
     10cdef extern from "../../codecs/buffers/buffers.h":
     11    object memory_as_pybuffer(void* ptr, Py_ssize_t buf_len, int readonly)
     12    int    object_as_buffer(object obj, const void ** buffer, Py_ssize_t * buffer_len)
     13
     14cdef extern from "Python.h":
     15    int PyBytes_AsStringAndSize(object obj, char **buffer, Py_ssize_t *length)
     16
     17
    1018# Original version written by Petru Paler
    1119
    1220__version__ = ("Cython", 0, 12)
     
    1321
    1422import sys
    1523if sys.version_info[0]>=3:
    16     raise ImportError("not ported to py3k yet")
    17     #work in progress:
    1824    StringType  = bytes
    1925    UnicodeType = str
    2026    IntType     = int
     
    3945    def bytestostr(x):      #@DuplicatedSignature
    4046        return str(x)
    4147
    42 cdef int unicode_support = 0
    43 def set_unicode_support(us):
    44     global unicode_support
    45     unicode_support = bool(us)
    4648
    47 
    4849cdef int find(const char *p, char c, int start, size_t len):
    4950    cdef int pos = start
    5051    while pos<len:
     
    7677    cdef int colon = find(x, ':', f, l)
    7778    cdef int slen
    7879    assert colon>=0, "colon not found in string size header"
     80    lenstr = x[f:colon]
    7981    try:
    80         slen = int(x[f:colon])
     82        slen = int(lenstr)
    8183    except (OverflowError, ValueError):
    82         slen = long(x[f:colon])
     84        try:
     85            slen = long(lenstr)
     86        except:
     87            raise ValueError("cannot parse length '%s' (f=%s, colon=%s, string=%s)" % (lenstr, f, colon, x))
    8388    if x[f] == '0' and colon != f+1:
    8489        raise ValueError("leading zeroes are not allowed (found in string length)")
    8590    colon += 1
    8691    return (x[colon:colon+slen], colon+slen)
    8792
     93cdef decode_py3kstring(const char *x, int f, int l):
     94    xs, fs = decode_string(x, f, l)
     95    return (xs.decode("utf8"), fs)
     96
    8897cdef decode_unicode(const char *x, int f, int l):
    8998    xs, fs = decode_string(x, f+1, l)
    9099    return (xs.decode("utf8"), fs)
     
    124133    elif c=='i':
    125134        return decode_int(x, f, l)
    126135    elif c in ('0', '1', '2', '3', '4', '5', '6', '7', '8', '9'):
    127         return decode_string(x, f, l)
     136        if sys.version_info[0]<3:
     137            return decode_string(x, f, l)
     138        else:
     139            return decode_py3kstring(x, f, l)
    128140    elif c=='u':
    129141        return decode_unicode(x, f, l)
    130142    else:
    131143        raise ValueError("invalid %s type identifier: %s at position %s" % (what, c, f))
    132144
     145"""
     146    cdef const char *s = x
     147    cdef size_t l = len(x)
     148"""
    133149def bdecode(x):
    134     cdef const char *s = x
     150    xs = x.encode("utf8")
     151    cdef const char *s
     152    cdef Py_ssize_t l
     153    #assert PyBytes_AsStringAndSize(x, <char **> &s, &l)==0, "failed to convert %s to a buffer" % type(x)
     154    assert object_as_buffer(xs, <const void **> &s, &l)==0, "failed to convert %s to a buffer" % type(x)
    135155    cdef int f = 0
    136     cdef size_t l = len(x)
    137156    try:
    138157        return decode(s, f, l, "bencoded string")
    139158    except (IndexError, KeyError):
     
    150169    r.extend((str(len(x)), ':', x))
    151170
    152171cdef void encode_unicode(x, r) except *:
    153     global unicode_support
    154172    x = x.encode("utf8")
    155     if unicode_support:
    156         r.extend(('u', str(len(x)), ':', x))
    157     else:
    158         encode_string(x, r)
     173    encode_string(x, r)
    159174
    160175cdef void encode_list(object x, r) except *:
    161176    r.append('l')
     
    199214    r = []
    200215    try:
    201216        encode(x, r)
     217        if sys.version_info[0]<3:
     218            return b''.join(x.decode("utf8") for x in r)
    202219        return ''.join(bytestostr(x) for x in r)
    203     except Exception:
     220    except Exception as e:
    204221        import traceback
    205222        traceback.print_exc()
    206         raise ValueError("cannot encode '%s'" % x)
     223        raise ValueError("cannot encode '%s': %s" % (x, e))
  • xpra/net/packet_encoding.py

     
    3535
    3636
    3737bencode, bdecode, bencode_version = None, None, None
    38 if sys.version_info[0]<3:
    39     #bencode needs porting to Python3..
     38try:
    4039    try:
    41         try:
    42             from xpra.net.bencode import bencode, bdecode, __version__ as bencode_version
    43         except ImportError as e:
    44             log.warn("bencode import error: %s", e, exc_info=True)
    45     except Exception as e:
    46         log.error("error loading bencoder", exc_info=True)
     40        from xpra.net.bencode import bencode, bdecode, __version__ as bencode_version
     41    except ImportError as e:
     42        log.warn("bencode import error: %s", e, exc_info=True)
     43except Exception as e:
     44    log.error("error loading bencoder", exc_info=True)
    4745has_bencode = bencode is not None and bdecode is not None
    4846use_bencode = has_bencode and os.environ.get("XPRA_USE_BENCODER", "1")=="1"
    4947log("packet encoding: has_bencode=%s, use_bencode=%s, version=%s", has_bencode, use_bencode, bencode_version)