"""Module with a simple buffer implementation using the memory manager""" import sys __all__ = ["SlidingWindowMapBuffer"] import sys try: bytes except NameError: bytes = str class SlidingWindowMapBuffer(object): """A buffer like object which allows direct byte-wise object and slicing into memory of a mapped file. The mapping is controlled by the provided cursor. The buffer is relative, that is if you map an offset, index 0 will map to the first byte at the offset you used during initialization or begin_access **Note:** Although this type effectively hides the fact that there are mapped windows underneath, it can unfortunately not be used in any non-pure python method which needs a buffer or string""" __slots__ = ( '_c', # our cursor '_size', # our supposed size ) def __init__(self, cursor=None, offset=0, size=sys.maxsize, flags=0): """Initalize the instance to operate on the given cursor. :param cursor: if not None, the associated cursor to the file you want to access If None, you have call begin_access before using the buffer and provide a cursor :param offset: absolute offset in bytes :param size: the total size of the mapping. Defaults to the maximum possible size From that point on, the __len__ of the buffer will be the given size or the file size. If the size is larger than the mappable area, you can only access the actually available area, although the length of the buffer is reported to be your given size. Hence it is in your own interest to provide a proper size ! :param flags: Additional flags to be passed to os.open :raise ValueError: if the buffer could not achieve a valid state""" self._c = cursor if cursor and not self.begin_access(cursor, offset, size, flags): raise ValueError("Failed to allocate the buffer - probably the given offset is out of bounds") # END handle offset def __del__(self): self.end_access() def __enter__(self): return self def __exit__(self, exc_type, exc_value, traceback): self.end_access() def __len__(self): return self._size def __getitem__(self, i): if isinstance(i, slice): return self.__getslice__(i.start or 0, i.stop or self._size) c = self._c assert c.is_valid() if i < 0: i = self._size + i if not c.includes_ofs(i): c.use_region(i, 1) # END handle region usage return c.buffer()[i - c.ofs_begin()] def __getslice__(self, i, j): c = self._c # fast path, slice fully included - safes a concatenate operation and # should be the default assert c.is_valid() if i < 0: i = self._size + i if j == sys.maxsize: j = self._size if j < 0: j = self._size + j if (c.ofs_begin() <= i) and (j < c.ofs_end()): b = c.ofs_begin() return c.buffer()[i - b:j - b] else: l = j - i # total length ofs = i # It's fastest to keep tokens and join later, especially in py3, which was 7 times slower # in the previous iteration of this code pyvers = sys.version_info[:2] md = list() while l: c.use_region(ofs, l) assert c.is_valid() d = c.buffer()[:l] ofs += len(d) l -= len(d) # Make sure we don't keep references, as c.use_region() might attempt to free resources, but # can't unless we use pure bytes if hasattr(d, 'tobytes'): d = d.tobytes() md.append(d) # END while there are bytes to read return bytes().join(md) # END fast or slow path #{ Interface def begin_access(self, cursor=None, offset=0, size=sys.maxsize, flags=0): """Call this before the first use of this instance. The method was already called by the constructor in case sufficient information was provided. For more information no the parameters, see the __init__ method :param path: if cursor is None the existing one will be used. :return: True if the buffer can be used""" if cursor: self._c = cursor # END update our cursor # reuse existing cursors if possible if self._c is not None and self._c.is_associated(): res = self._c.use_region(offset, size, flags).is_valid() if res: # if given size is too large or default, we computer a proper size # If its smaller, we assume the combination between offset and size # as chosen by the user is correct and use it ! # If not, the user is in trouble. if size > self._c.file_size(): size = self._c.file_size() - offset # END handle size self._size = size # END set size return res # END use our cursor return False def end_access(self): """Call this method once you are done using the instance. It is automatically called on destruction, and should be called just in time to allow system resources to be freed. Once you called end_access, you must call begin access before reusing this instance!""" self._size = 0 if self._c is not None: self._c.unuse_region() # END unuse region def cursor(self): """:return: the currently set cursor which provides access to the data""" return self._c #}END interface