Source code for python_hll.util

# -*- coding: utf-8 -*-

from math import log
import numpy as np


[docs]class BitUtil:
    """
    A collection of bit utilities.
    """

    # The set of least-significant bits for a given ``byte``. ``-1``
    # is used if no bits are set (so as to not be confused with "index of zero"
    # meaning that the least significant bit is the 0th (1st) bit).
    LEAST_SIGNIFICANT_BIT = [
        -1, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
        4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
        5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
        4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
        6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
        4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
        5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
        4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
        7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
        4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
        5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
        4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
        6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
        4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
        5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
        4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
    ]

[docs]    @classmethod
    def least_significant_bit(cls, value):
        """
        Computes the least-significant bit of the specified ``long``
        that is set to ``1``. Zero-indexed.

        See <http://stackoverflow.com/questions/757059/position-of-least-significant-bit-that-is-set>
        and <http://www-graphics.stanford.edu/~seander/bithacks.html>.

        :param long value: the ``long`` whose least-significant bit is desired.
        :returns: the least-significant bit of the specified ``long``.
                  ``-1`` is returned if there are no bits set.
        :rtype: int
        """

        if value == 0:
            # by contract
            return -1

        elif value & 0xFF != 0:
            index = int(cls.unsigned_right_shift_long(value, 0) & 0xFF)
            return cls.LEAST_SIGNIFICANT_BIT[index] + 0

        elif value & 0xFFFF != 0:
            index = int(cls.unsigned_right_shift_long(value, 8) & 0xFF)
            return cls.LEAST_SIGNIFICANT_BIT[index] + 8

        elif value & 0xFFFFFF != 0:
            index = int(cls.unsigned_right_shift_long(value, 16) & 0xFF)
            return cls.LEAST_SIGNIFICANT_BIT[index] + 16

        elif value & 0xFFFFFFFF != 0:
            index = int(cls.unsigned_right_shift_long(value, 24) & 0xFF)
            return cls.LEAST_SIGNIFICANT_BIT[index] + 24

        elif value & 0xFFFFFFFFFF != 0:
            index = int(cls.unsigned_right_shift_long(value, 32) & 0xFF)
            return cls.LEAST_SIGNIFICANT_BIT[index] + 32

        elif value & 0xFFFFFFFFFFFF != 0:
            index = int(cls.unsigned_right_shift_long(value, 40) & 0xFF)
            return cls.LEAST_SIGNIFICANT_BIT[index] + 40

        elif value & 0xFFFFFFFFFFFFFF != 0:
            index = int(cls.unsigned_right_shift_long(value, 48) & 0xFF)
            return cls.LEAST_SIGNIFICANT_BIT[index] + 48

        else:
            index = int(cls.unsigned_right_shift_long(value, 56) & 0xFF)
            return cls.LEAST_SIGNIFICANT_BIT[index] + 56

[docs]    @classmethod
    def unsigned_right_shift_long(cls, val, n):
        """
        Equivalent to Java >>> on a long value
        """
        return val if n == 0 else int(np.uint64(val) >> np.uint64(n))

[docs]    @classmethod
    def unsigned_right_shift_int(cls, val, n):
        """
        Equivalent to Java >>> on an int value
        """
        return val if n == 0 else int(np.uint32(val) >> np.uint32(n))

[docs]    @classmethod
    def unsigned_right_shift_byte(cls, val, n):
        """
        Equivalent to Java >>> on a byte value
        """
        return val if n == 0 else int(np.uint32(val) >> np.uint32(n))

[docs]    @classmethod
    def to_signed_byte(cls, i):
        """
        Converts a Python byte (unsigned integer from 0 to 255) to a Java byte
        (signed two's complement integer from -128 to 127).
        :type i: byte
        :rtype: byte
        """
        return i if i <= 127 else i - 256

[docs]    @classmethod
    def left_shift_long(cls, long_x, int_y):
        """
        Simulates a Java << for a long.

        :param long_x: expected long value in python code
        :param int_y: expected int value in python
        :returns: left shift result for, x << y
        :rtype: long
        """
        x = np.int64(long_x)
        y = np.int(int_y)
        z = np.left_shift(x, y)

        return np.int64(z.item())

[docs]    @classmethod
    def left_shift_int(cls, int_x, int_y):
        """
        Simulates a Java << for an integer.

        :param int_x: expected int value in python code
        :param int_y: expected int value in python
        :returns: left shift result for, x << y
        :rtype: int
        """
        x = np.int32(int_x)
        y = np.int(int_y)
        z = np.left_shift(x, y)

        return z.item()

[docs]    @classmethod
    def left_shift_byte(cls, byte_x, int_y):
        """
        Simulates a Java << for a byte.

        :param byte_x: expected byte value in python code
        :param int_y: expected int value in python
        :returns: left shift result for, x << y
        :rtype: int
        """
        x = np.int8(byte_x)  # converts to signed byte, since byte is signed in java
        y = np.int(int_y)
        z = np.left_shift(x, y)

        # In Java, (byte)128 << 3 produces an int.
        return z.item()


[docs]class LongIterator:
    """
    A ``long``-based iterator.
    """

    LOG2_BITS_PER_WORD = 6
    BITS_PER_WORD = BitUtil.left_shift_int(1, LOG2_BITS_PER_WORD)

    def __init__(self, register_width, words, register_mask, count):
        self._register_width = register_width
        self._words = words
        self._register_mask = register_mask
        self._count = count

        # register setup
        self._register_index = 0
        self._word_index = 0
        self._remaining_word_bits = self.BITS_PER_WORD
        self._word = self._words[self._word_index]

    def __iter__(self):
        return self

    def __next__(self):
        # Python 3 compatibility
        return self.next()

[docs]    def next(self):
        if self._register_index >= self._count:
            raise StopIteration

        if self._remaining_word_bits >= self._register_width:
            register = self._word & self._register_mask

            # shift to the next register
            self._word = BitUtil.unsigned_right_shift_long(self._word, self._register_width)
            self._remaining_word_bits -= self._register_width
        else:  # insufficient bits remaining in current word
            self._word_index += 1  # move to the next word

            register = (self._word | BitUtil.left_shift_long(self._words[self._word_index], self._remaining_word_bits)) & self._register_mask

            # shift to the next partial register (word)
            self._word = BitUtil.unsigned_right_shift_long(self._words[self._word_index], self._register_width - self._remaining_word_bits)
            self._remaining_word_bits += self.BITS_PER_WORD - self._register_width

        self._register_index += 1
        return register


[docs]class BitVector:
    """
    A vector (array) of bits that is accessed in units ("registers") of ``width``
    bits which are stored as 64bit "words" (``long``'s).  In this context
    a register is at most 64bits.
    """

    # NOTE:  in this context, a word is 64bits

    # rather than doing division to determine how a bit index fits into 64bit
    # words (i.e. longs), bit shifting is used
    LOG2_BITS_PER_WORD = 6  # =>64bits
    BITS_PER_WORD = BitUtil.left_shift_int(1, LOG2_BITS_PER_WORD)
    BITS_PER_WORD_MASK = BITS_PER_WORD - 1

    # ditto from above but for bytes (for output)
    LOG2_BITS_PER_BYTE = 3  # =>8bits
    BITS_PER_BYTE = BitUtil.left_shift_int(1, LOG2_BITS_PER_BYTE)

    BYTES_PER_WORD = 8  # 8 bytes in a long

    def __init__(self, width, count):
        """
        :param int width: the width of each register.  This cannot be negative or
               zero or greater than 63 (the signed word size).
        :param long count: the number of registers.  This cannot be negative or zero
        """
        # 64bit words
        # ceil((width * count)/BITS_PER_WORD)
        self._words = [0] * BitUtil.unsigned_right_shift_long((width * count) + self.BITS_PER_WORD_MASK, self.LOG2_BITS_PER_WORD)
        # the width of a register in bits (this cannot be more than 64 (the word size))
        self._register_width = width
        self._count = count
        self._register_mask = BitUtil.left_shift_long(1, width) - 1

[docs]    def get_register(self, register_index):
        """
        :param long register_index: the index of the register whose value is to be
               retrieved.  This cannot be negative.
        :returns: the value at the specified register index
        :rtype: long
        """
        # NOTE:  if this changes then setMaxRegister() must change
        bit_index = register_index * self._register_width
        first_word_index = BitUtil.unsigned_right_shift_long(bit_index, self.LOG2_BITS_PER_WORD)  # aka (bitIndex / BITS_PER_WORD)
        second_word_index = BitUtil.unsigned_right_shift_long(bit_index + self._register_width - 1, self.LOG2_BITS_PER_WORD)  # see above
        bit_remainder = bit_index & self.BITS_PER_WORD_MASK  # aka (bitIndex % BITS_PER_WORD)

        if first_word_index == second_word_index:
            return BitUtil.unsigned_right_shift_long(self._words[first_word_index], bit_remainder) & self._register_mask
        # else -- register spans words
        return BitUtil.unsigned_right_shift_long(self._words[first_word_index], bit_remainder) | \
            BitUtil.left_shift_long(self._words[second_word_index], self.BITS_PER_WORD - bit_remainder) & self._register_mask

[docs]    def set_register(self, register_index, value):
        """
        :param long register_index: the index of the register whose value is to be set.
               This cannot be negative
        :param long value: the value to set in the register
        :rtype: long
        """
        # NOTE:  if this changes then setMaxRegister() must change
        bit_index = register_index * self._register_width
        first_word_index = BitUtil.unsigned_right_shift_long(bit_index, self.LOG2_BITS_PER_WORD)  # aka (bitIndex / BITS_PER_WORD)
        second_word_index = BitUtil.unsigned_right_shift_long(bit_index + self._register_width - 1, self.LOG2_BITS_PER_WORD)  # see above
        bit_remainder = bit_index & self.BITS_PER_WORD_MASK  # aka (bitIndex % BITS_PER_WORD)

        if first_word_index == second_word_index:
            # clear then set
            self._words[first_word_index] &= ~BitUtil.left_shift_long(self._register_mask, bit_remainder)
            self._words[first_word_index] |= BitUtil.left_shift_long(value, bit_remainder)
        else:  # register spans words
            # clear then set each partial word
            self._words[first_word_index] &= BitUtil.left_shift_long(1, bit_remainder) - 1
            self._words[first_word_index] |= BitUtil.left_shift_long(value, bit_remainder)

            self._words[second_word_index] &= ~BitUtil.unsigned_right_shift_long(self._register_mask, self.BITS_PER_WORD - bit_remainder)
            self._words[second_word_index] |= BitUtil.unsigned_right_shift_long(value, self.BITS_PER_WORD - bit_remainder)

[docs]    def register_iterator(self):
        """
        :returns: a ``LongIterator`` for iterating starting at the register
                  with index zero. This will never be ``None``.
        :rtype: LongIterator
        """
        return LongIterator(self._register_width, self._words, self._register_mask, self._count)

[docs]    def set_max_register(self, register_index, value):
        """
        Sets the value of the specified index register if and only if the specified
        value is greater than the current value in the register.  This is equivalent
        to but much more performant than

        ``vector.setRegister(index, Math.max(vector.getRegister(index), value));``

        :param long register_index: the index of the register whose value is to be set.
               This cannot be negative
        :param long value: the value to set in the register if and only if this value
               is greater than the current value in the register
        :returns: True if and only if the specified value is greater
                  than or equal to the current register value. False
                  otherwise.
        :rtype: boolean
        """
        # NOTE:  if this changes then setRegister() must change
        bit_index = register_index * self._register_width
        first_word_index = BitUtil.unsigned_right_shift_long(bit_index, self.LOG2_BITS_PER_WORD)  # aka (bitIndex / BITS_PER_WORD)
        second_word_index = BitUtil.unsigned_right_shift_long(bit_index + self._register_width - 1, self.LOG2_BITS_PER_WORD)  # see above
        bit_remainder = bit_index & self.BITS_PER_WORD_MASK  # aka (bitIndex % BITS_PER_WORD)

        if first_word_index == second_word_index:
            register_value = BitUtil.unsigned_right_shift_long(self._words[first_word_index], bit_remainder) & self._register_mask
        else:  # register spans words
            # # no need to mask since at top of word
            register_value = BitUtil.unsigned_right_shift_long(self._words[first_word_index], bit_remainder) | \
               BitUtil.left_shift_long(self._words[second_word_index], self.BITS_PER_WORD - bit_remainder) & self._register_mask

        # determine which is the larger and update as necessary
        if value > register_value:
            # NOTE:  matches setRegister()
            if first_word_index == second_word_index:
                # clear then set
                self._words[first_word_index] &= ~BitUtil.left_shift_long(self._register_mask, bit_remainder)
                self._words[first_word_index] |= BitUtil.left_shift_long(value, bit_remainder)
            else:  # register spans words
                # clear then set each partial word
                self._words[first_word_index] &= BitUtil.left_shift_long(1, bit_remainder) - 1
                self._words[first_word_index] |= BitUtil.left_shift_long(value,  bit_remainder)

                self._words[second_word_index] &= ~BitUtil.unsigned_right_shift_long(self._register_mask, self.BITS_PER_WORD - bit_remainder)
                self._words[second_word_index] |= BitUtil.unsigned_right_shift_long(value, self.BITS_PER_WORD - bit_remainder)
        # else -- the register value is greater (or equal) so nothing needs to be done

        return value >= register_value

[docs]    def fill(self, value):
        """
        Fills this bit vector with the specified bit value.  This can be used to
        clear the vector by specifying ``0``.

        :param long value: the value to set all bits to (only the lowest bit is used)
        :rtype: void
        """
        for i in range(self._count):
            self.set_register(i, value)

[docs]    def get_register_contents(self, serializer):
        """
        Serializes the registers of the vector using the specified serializer.

        :param BigEndianAscendingWordSerializer serializer: the serializer to use. This cannot be ``None``.
        :rtype: void
        """
        iterator = self.register_iterator()

        for itr in iterator:
            serializer.write_word(itr)


[docs]class NumberUtil:
    """
    A collection of utilities to work with numbers.
    """

    # loge(2) (log-base e of 2)
    LOGE_2 = 0.6931471805599453

    # the hex characters
    HEX = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F']

[docs]    @classmethod
    def log2(cls, value):
        """
        Computes the ``log2`` (log-base-two) of the specified value.

        :param float value: the ``float`` for which the ``log2`` is
               desired.
        :returns: the ``log2`` of the specified value
        :rtype: float
        """
        # REF:  http://en.wikipedia.org/wiki/Logarithmic_scale (conversion of bases)
        return log(value) / cls.LOGE_2

[docs]    @classmethod
    def to_hex(cls, bytes, offset, count):
        """
        Converts the specified array of ``byte``'s into a string of
        hex characters (low ``byte`` first).

        :param list bytes: the array of ``byte``'s that are to be converted.
               This cannot be ``None`` though it may be empty.
        :param int offset: the offset in ``bytes`` at which the bytes will
               be taken.  This cannot be negative and must be less than
               ``bytes.length - 1``.
        :param int count: the number of bytes to be retrieved from the specified array.
               This cannot be negative.  If greater than ``bytes.length - offset``
               then that value is used.
        :returns: a string of at most ``count`` characters that represents
                  the specified byte array in hex.  This will never be ``None``
                  though it may be empty if ``bytes`` is empty or ``count``
                  is zero.
        :rtype: string
        """
        if offset >= len(bytes):  # by contract
            raise Exception("Offset is greater than the length, {offset} >= {byte_array_length}"
                            .format(offset=offset, byte_array_length=len(bytes)))
        byte_count = min(len(bytes) - offset, count)
        upper_bound = byte_count + offset

        chars = [None] * (byte_count * 2)   # two chars per byte
        char_index = 0
        for i in range(offset, upper_bound):
            value = bytes[i]
            chars[char_index] = cls.HEX[(BitUtil.unsigned_right_shift_byte(value, 4)) & 0x0F]
            char_index += 1
            chars[char_index] = cls.HEX[value & 0x0F]
            char_index += 1

        return ''.join(chars)

[docs]    @classmethod
    def from_hex(cls, string, offset, count):
        """
        Converts the specified array of hex characters into an array of ``byte``'s
        (low ``byte`` first).

        :param string string: the string of hex characters to be converted into ``byte``'s.
               This cannot be ``None`` though it may be blank.
        :param int offset: the offset in the string at which the characters will be
               taken.  This cannot be negative and must be less than ``string.length() - 1``.
        :param int count: the number of characters to be retrieved from the specified
               string.  This cannot be negative and must be divisible by two
               (since there are two characters per ``byte``).
        :returns: the array of ``byte``'s that were converted from the
                  specified string (in the specified range).  This will never be
                  ``None`` though it may be empty if ``string``
                  is empty or ``count`` is zero.
        :rtype: list
        """

        if offset >= len(string):  # by contract
            raise Exception("Offset is greater than the length, {offset} >= {string_length}"
                            .format(offset=offset, string_length=len(string)))
        if (count & 0x01) != 0:  # by contract
            raise Exception("Count is not divisible by two, ({})".format(count))

        char_count = min(len(string) - offset, count)
        upper_bound = offset + char_count

        byte_array = [0] * (BitUtil.unsigned_right_shift_int(char_count, 1))  # aka /2
        byte_index = 0  # beginning
        for i in range(0, upper_bound, 2):
            p1 = BitUtil.left_shift_int(cls._digit(string[i]), 4)
            p2 = cls._digit(string[i+1])
            p = (p1 | p2) & 0xFF

            byte_array[byte_index] = BitUtil.to_signed_byte(p)
            byte_index += 1
        return byte_array

    @classmethod
    def _digit(cls, character):
        """
        :param string character: a hex character to be converted to a ``byte``.
               This cannot be a character other than [a-fA-F0-9].
        :returns: the value of the specified character.  This will be a value ``0``
                  through ``15``.
        :rtype: int
        """
        if character == '0':
            return 0
        elif character == '1':
            return 1
        elif character == '2':
            return 2
        elif character == '3':
            return 3
        elif character == '4':
            return 4
        elif character == '5':
            return 5
        elif character == '6':
            return 6
        elif character == '7':
            return 7
        elif character == '8':
            return 8
        elif character == '9':
            return 9
        elif character in ['a', 'A']:
            return 10
        elif character in ['b', 'B']:
            return 11
        elif character in ['c', 'C']:
            return 12
        elif character in ['d', 'D']:
            return 13
        elif character in ['e', 'E']:
            return 14
        elif character in ['f', 'F']:
            return 15
        else:
            raise Exception("Character is not in [a-fA-F0-9]: ({})".format(character))
Source code for python_hll.util

python-hll

Navigation

Related Topics