diff --git a/.travis.yml b/.travis.yml index a8205ae..616d055 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,21 +1,28 @@ -dist: trusty sudo: false language: python - python: - - "2.6" - "2.7" - - "3.3" - - "3.4" - "3.5" - "3.6" - # - "pypy" # disable pypy builds until supported by trusty containers + - "3.7" + - "3.8-dev" + - "pypy" + +addons: + sonarcloud: + organization: "suminb-github" install: - pip install --requirement tests/requirements.txt + - pip install "black; python_version >= '3.6'" script: + - | + if [ -x "$(command -v black)" ]; then + black --check . + fi - py.test tests --cov base62 --durations=10 after_success: - coveralls + - sonar-scanner diff --git a/README.rst b/README.rst index 3f5744d..eac602f 100644 --- a/README.rst +++ b/README.rst @@ -1,32 +1,35 @@ base62 ====== -|Build Status| |PyPI| +|Build Status| |Coveralls| |PyPI| A Python module for ``base62`` encoding. Ported from PHP code that I wrote in mid-2000, which can be found -`here `__. +`here `__. .. |Build Status| image:: https://travis-ci.org/suminb/base62.svg?branch=master :target: https://travis-ci.org/suminb/base62 .. |PyPI| image:: https://img.shields.io/pypi/v/pybase62.svg :target: https://pypi.python.org/pypi/pybase62 +.. |Coveralls| image:: https://coveralls.io/repos/github/suminb/base62/badge.svg?branch=master + :target: https://coveralls.io/github/suminb/base62?branch=develop Rationale --------- -When writing a web application, often times we would like to keep the URLs short. +When writing a web application, often times we would like to keep the URLs +short. :: - http://localhost/post/V1Biicwt + http://localhost/posts/V1Biicwt This certainly gives a more concise look than the following. :: - http://localhost/post/109237591284123 + http://localhost/posts/109237591284123 This was the original motivation to write this module, but there shall be much more broader potential use cases of this module. The main advantage of @@ -84,6 +87,20 @@ From version ``0.2.0``, ``base62`` supports ``bytes`` array encoding as well. >>> base62.decodebytes('1') b'\x01' +Some may be inclined to assume that they both take ``bytes`` types as input +due to their namings. However, ``encodebytes()`` takes ``bytes`` types +whereas ``decodebytes()`` takes ``str`` types as an input. They are intended +to be commutative, so that a *roundtrip* between both functions yields the +original value. + +Formally speaking, we say function *f* and *g* commute if *f∘g* = *g∘f* where +*f(g(x))* = *(f∘g)(x)*. + +Therefore, we may expect the following relationships: + +* ``value == encodebytes(decodebytes(value))`` +* ``value == decodebytes(encodebytes(value))`` + Tests ===== @@ -93,8 +110,18 @@ You may run some test cases to ensure all functionalities are operational. py.test -v -If ``pytest`` is not installed, you may want to run the following commands: +If ``pytest`` is not installed, you may want to run the following command: :: pip install -r tests/requirements.txt + + +Deployment +========== + +Deploy a source package (to `pypi `_) as follows: + +:: + + python setup.py sdist upload diff --git a/base62.py b/base62.py index 138039a..3249d24 100644 --- a/base62.py +++ b/base62.py @@ -6,16 +6,28 @@ Originated from http://blog.suminb.com/archives/558 """ -__title__ = 'base62' -__author__ = 'Sumin Byeon' -__email__ = 'suminb@gmail.com' -__version__ = '0.3.2' - -CHARSET = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz' -BASE = 62 - - -def bytes_to_int(s, byteorder='big', signed=False): +__title__ = "base62" +__author__ = "Sumin Byeon" +__email__ = "suminb@gmail.com" +__version__ = "0.4.3" + +CHARSET_DEFAULT = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" +CHARSET_INVERTED = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" + +try: + # NOTE: This is for Python 2. Shall be removed as soon as Python 2 is + # deprecated. + string_types = (str, unicode) + bytes_types = ( + bytes, + bytearray, + ) +except NameError: + string_types = (str,) + bytes_types = (bytes,) + + +def bytes_to_int(barray, byteorder="big", signed=False): """Converts a byte array to an integer value. Python 3 comes with a built-in function to do this, but we would like to @@ -23,92 +35,104 @@ def bytes_to_int(s, byteorder='big', signed=False): """ try: - return int.from_bytes(s, byteorder, signed=signed) + return int.from_bytes(barray, byteorder, signed=signed) except AttributeError: # For Python 2.x - if byteorder != 'big' or signed: + if byteorder != "big" or signed: raise NotImplementedError() # NOTE: This won't work if a generator is given - n = len(s) - ds = (x << (8 * (n - 1 - i)) for i, x in enumerate(bytearray(s))) + n = len(barray) + ds = (x << (8 * (n - 1 - i)) for i, x in enumerate(bytearray(barray))) return sum(ds) -def encode(n, minlen=1): +def encode(n, minlen=1, charset=CHARSET_DEFAULT): """Encodes a given integer ``n``.""" + base = len(charset) chs = [] while n > 0: - r = n % BASE - n //= BASE + r = n % base + n //= base - chs.append(CHARSET[r]) + chs.append(charset[r]) if len(chs) > 0: chs.reverse() else: - chs.append('0') + chs.append("0") - s = ''.join(chs) - s = CHARSET[0] * max(minlen - len(s), 0) + s + s = "".join(chs) + s = charset[0] * max(minlen - len(s), 0) + s return s -def encodebytes(s): +def encodebytes(barray, charset=CHARSET_DEFAULT): """Encodes a bytestring into a base62 string. - :param s: A byte array + :param barray: A byte array + :type barray: bytes + :rtype: str """ - _check_bytes_type(s) - return encode(bytes_to_int(s)) + _check_type(barray, bytes_types) + return encode(bytes_to_int(barray), charset=charset) -def decode(b): - """Decodes a base62 encoded value ``b``.""" +def decode(encoded, charset=CHARSET_DEFAULT): + """Decodes a base62 encoded value ``encoded``. + + :type encoded: str + :rtype: int + """ + _check_type(encoded, string_types) + base = len(charset) - if b.startswith('0z'): - b = b[2:] + if encoded.startswith("0z"): + encoded = encoded[2:] - l, i, v = len(b), 0, 0 - for x in b: - v += _value(x) * (BASE ** (l - (i + 1))) + l, i, v = len(encoded), 0, 0 + for x in encoded: + v += _value(x, charset=charset) * (BASE ** (l - (i + 1))) i += 1 return v -def decodebytes(s): +def decodebytes(encoded, charset=CHARSET_DEFAULT): """Decodes a string of base62 data into a bytes object. - :param s: A string to be decoded in base62 + :param encoded: A string to be decoded in base62 + :type encoded: str :rtype: bytes """ - decoded = decode(s) + decoded = decode(encoded, charset=charset) buf = bytearray() while decoded > 0: - buf.append(decoded & 0xff) + buf.append(decoded & 0xFF) decoded //= 256 buf.reverse() return bytes(buf) -def _value(ch): +def _value(ch, charset): """Decodes an individual digit of a base62 encoded string.""" try: - return CHARSET.index(ch) + return charset.index(ch) except ValueError: - raise ValueError('base62: Invalid character (%s)' % ch) + raise ValueError("base62: Invalid character (%s)" % ch) -def _check_bytes_type(s): +def _check_type(value, expected_type): """Checks if the input is in an appropriate type.""" - if not isinstance(s, bytes): - msg = 'expected bytes-like object, not %s' % s.__class__.__name__ + if not isinstance(value, expected_type): + msg = "Expected {} object, not {}".format( + expected_type, value.__class__.__name__ + ) raise TypeError(msg) diff --git a/setup.py b/setup.py index ac7a803..05184c7 100644 --- a/setup.py +++ b/setup.py @@ -6,19 +6,26 @@ def readme(): try: - with open('README.rst') as f: + with open("README.rst") as f: return f.read() except: - return '(Could not read from README.rst)' + return "(Could not read from README.rst)" -setup(name='pybase62', - py_modules=['base62'], - version=base62.__version__, - description='Python module for base62 encoding', - long_description=readme(), - author='Sumin Byeon', - author_email='suminb@gmail.com', - url='http://github.com/suminb/base62', - packages=[], - ) +setup( + name="pybase62", + py_modules=["base62"], + version=base62.__version__, + description="Python module for base62 encoding", + long_description=readme(), + author="Sumin Byeon", + author_email="suminb@gmail.com", + url="http://github.com/suminb/base62", + packages=[], + classifiers=[ + "Programming Language :: Python :: 2.7", + "Programming Language :: Python :: 3.5", + "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: 3.7", + ], +) diff --git a/sonar-project.properties b/sonar-project.properties new file mode 100644 index 0000000..39686ef --- /dev/null +++ b/sonar-project.properties @@ -0,0 +1,4 @@ +sonar.projectKey=base62 +sonar.sources=. +sonar.host.url=https://sonarcloud.io +sonar.login=travisci diff --git a/tests/requirements.txt b/tests/requirements.txt index c7c53a1..dac19b6 100644 --- a/tests/requirements.txt +++ b/tests/requirements.txt @@ -1,3 +1,3 @@ -pytest +pytest==3.6.1 pytest-cov coveralls diff --git a/tests/test_basic.py b/tests/test_basic.py index c852524..ba302fb 100644 --- a/tests/test_basic.py +++ b/tests/test_basic.py @@ -1,52 +1,111 @@ +import sys + import pytest import base62 bytes_int_pairs = [ - (b'\x00', 0), - (b'\x01', 1), - (b'\x01\x01', 0x0101), - (b'\xff\xff', 0xffff), - (b'\x01\x01\x01', 0x010101), - (b'\x01\x02\x03\x04\x05\x06\x07\x08', 0x0102030405060708), + (b"\x00", 0), + (b"\x01", 1), + (b"\x01\x01", 0x0101), + (b"\xff\xff", 0xFFFF), + (b"\x01\x01\x01", 0x010101), + (b"\x01\x02\x03\x04\x05\x06\x07\x08", 0x0102030405060708), ] def test_const(): - assert len(base62.CHARSET) == base62.BASE == 62 + assert len(base62.CHARSET_DEFAULT) == base62.BASE == 62 + assert len(base62.CHARSET_INVERTED) == base62.BASE == 62 def test_basic(): - assert base62.encode(0) == '0' - assert base62.encode(0, minlen=0) == '0' - assert base62.encode(0, minlen=1) == '0' - assert base62.encode(0, minlen=5) == '00000' - assert base62.decode('0') == 0 - assert base62.decode('0000') == 0 - assert base62.decode('000001') == 1 + assert base62.encode(0) == "0" + assert base62.encode(0, minlen=0) == "0" + assert base62.encode(0, minlen=1) == "0" + assert base62.encode(0, minlen=5) == "00000" + assert base62.decode("0") == 0 + assert base62.decode("0000") == 0 + assert base62.decode("000001") == 1 + + assert base62.encode(34441886726) == "base62" + assert base62.decode("base62") == 34441886726 + + # NOTE: For backward compatibility. When I first wrote this module in PHP, + # I used to use the `0z` prefix to denote a base62 encoded string (similar + # to `0x` for hexadecimal strings). + assert base62.decode("0zbase62") == 34441886726 + + +def test_basic_inverted(): + kwargs = {"charset": base62.CHARSET_INVERTED} - assert base62.encode(34441886726) == 'base62' - assert base62.decode('base62') == 34441886726 + assert base62.encode(0, **kwargs) == "0" + assert base62.encode(0, minlen=0, **kwargs) == "0" + assert base62.encode(0, minlen=1, **kwargs) == "0" + assert base62.encode(0, minlen=5, **kwargs) == "00000" + assert base62.decode("0", **kwargs) == 0 + assert base62.decode("0000", **kwargs) == 0 + assert base62.decode("000001", **kwargs) == 1 + assert base62.encode(10231951886, **kwargs) == "base62" + assert base62.decode("base62", **kwargs) == 10231951886 -@pytest.mark.parametrize('b, i', bytes_int_pairs) + # NOTE: For backward compatibility. When I first wrote this module in PHP, + # I used to use the `0z` prefix to denote a base62 encoded string (similar + # to `0x` for hexadecimal strings). + assert base62.decode("0zbase62", **kwargs) == 10231951886 + + +@pytest.mark.parametrize("b, i", bytes_int_pairs) def test_bytes_to_int(b, i): assert base62.bytes_to_int(b) == i -@pytest.mark.parametrize('b, i', bytes_int_pairs) +@pytest.mark.parametrize("b, i", bytes_int_pairs) def test_encodebytes(b, i): assert base62.encodebytes(b) == base62.encode(i) -@pytest.mark.parametrize('s', ['0', '1', 'a', 'z', 'ykzvd7ga']) +@pytest.mark.skipif( + sys.version_info < (3, 0), + reason="Python 2.x does not have clear distinction between str and bytes types", +) +def test_encodebytes_type(): + with pytest.raises(TypeError): + base62.encodebytes("1234") + + +def test_encodebytes_rtype(): + """Make sure the return type of encodebytes() is string.""" + encoded = base62.encodebytes(b"1234") + assert isinstance(encoded, str) + + +@pytest.mark.parametrize("s", ["0", "1", "a", "z", "ykzvd7ga", "0z1234"]) def test_decodebytes(s): assert base62.bytes_to_int(base62.decodebytes(s)) == base62.decode(s) -@pytest.mark.parametrize('input_bytes', [ - b'', b'0', b'bytes to encode', b'\x01\x00\x80']) +@pytest.mark.skipif( + sys.version_info < (3, 0), + reason="Python 2.x does not have clear distinction between str and bytes types", +) +def test_decodebytes_type(): + with pytest.raises(TypeError): + base62.decodebytes(b"1234") + + +def test_decodebytes_rtype(): + """Make sure the return type of decodebytes() is bytes.""" + decoded = base62.decodebytes("1234") + assert isinstance(decoded, bytes) + + +@pytest.mark.parametrize( + "input_bytes", [b"", b"0", b"bytes to encode", b"\x01\x00\x80"] +) def test_roundtrip(input_bytes): """Ensures type consistency. Suggested by @dhimmel""" base62_encoded = base62.encodebytes(input_bytes) @@ -54,3 +113,13 @@ def test_roundtrip(input_bytes): output_bytes = base62.decodebytes(base62_encoded) assert isinstance(output_bytes, bytes) assert input_bytes == output_bytes + + +def test_invalid_alphabet(): + with pytest.raises(ValueError): + base62.decode("+") + + +def test_invalid_string(): + with pytest.raises(TypeError): + base62.encodebytes({})