Source code for labblouin.memEfficient
from array import array
from collections import MutableMapping
from itertools import izip
# Constants
DICT_START_NUM = 8
DICT_FREE_SLOT = -1
DICT_DUMMY_SLOT = -2
DICT_SHIFT = 5
[docs]class memEfficientDictionary(MutableMapping):
''' A memory-efficient alternative to the built-in Python dictionary with a trade-off
for (insertion) performance. Based on recipe found by Raymond Hettinger
(http://code.activestate.com/recipes/578375-proof-of-concept-for-a-more-space-efficient
-faster/). '''
def __init__(self,*args,**kwds):
if not hasattr(self,'keylist'): self.clear()
self.update(*args,**kwds)
def __len__(self): return self.used
def __iter__(self): return iter(self.keylist)
[docs] def iterkeys(self): return __iter__()
[docs] def keys(self): return list(self.keylist)
[docs] def itervalues(self): return iter(self.valuelist)
[docs] def values(self): return list(self.valuelist)
[docs] def iteritems(self): return izip(self.keylist,self.valuelist)
[docs] def items(self): return zip(self.keylist,self.valuelist)
def __contains__(self,key):
index,i = self._access(key,hash(key))
return index >= 0
[docs] def get(self,key,default=None):
index,i = self._access(key,hash(key))
if index >= 0: return self.valuelist[index]
return default
[docs] def popitem(self):
assert self.keylist
key = self.keylist[-1]
val = self.valuelist[-1]
del self[key]
return key,val
[docs] def clear(self):
''' Clear the data structure. '''
self.indices = self._index(DICT_START_NUM)
self.hashlist = []
self.keylist = []
self.valuelist = []
self.used = 0
self.filled = 0 # Includes dummies.
def __getitem__(self,key):
''' Access an item through the [] notation. '''
hashv = hash(key)
index,i = self._access(key,hashv)
if index < 0: return None
return self.valuelist[index]
def __setitem__(self,key,value):
hashv = hash(key)
index,i = self._access(key,hashv)
if index < 0: # Does not exist.
self.indices[i] = self.used
self.hashlist.append(hashv)
self.keylist.append(key)
self.valuelist.append(value)
self.used += 1
if index == DICT_FREE_SLOT:
self.filled += 1
if self.filled * 3 > len(self.indices) * 2:
self._resize(4 * len(self))
else: self.valuelist[index] = value
def __delitem__(self,key):
hashv = hash(key)
index,i = self._access(key,hashv)
if index < 0: raise KeyError(key)
self.indices[i] = DICT_DUMMY_SLOT
self.used -= 1
# Swap with last entry to avoid leaving hole.
if index != self.used:
lasth = self.hashlist[-1]
lastk = self.hashlist[-1]
lastv = self.valuelist[-1]
lasti,j = self._access(lastk,lasth)
self.indices[j] = index
self.hashlist[index] = lasth
self.keylist[index] = lastk
self.valuelist[index] = lastv
self.hashlist.pop()
self.keylist.pop()
self.valuelist.pop()
@staticmethod
def _index(n):
if (n <= 2**7): return array('b',[DICT_FREE_SLOT])*n # Signed Character
elif (n <= 2**15): return array('h',[DICT_FREE_SLOT])*n # Signed Short
elif (n <= 2**31): return array('l',[DICT_FREE_SLOT])*n # Signed Long
return [DICT_FREE_SLOT] *n # Python Integers
@staticmethod
def _probe(hashv,mask):
''' Mimic current dictionary probing behavior. '''
if hashv < 0: hashv = -hashv
i = hashv & mask
yield i
probe = hashv
while 1:
yield ((5*i+probe+1) & 0xFFFFFFFFFFFFFFFF) & mask
probe >>= DICT_SHIFT
def _access(self,key,hashv):
''' Lookup logic. '''
freeSlot = None
for i in self._probe(hashv,len(self.indices)-1):
index = self.indices[i]
if index == DICT_FREE_SLOT:
if freeSlot is None: return (DICT_FREE_SLOT,i)
else: return (DICT_DUMMY_SLOT,freeSlot)
elif index == DICT_DUMMY_SLOT:
if freeSlot is None: freeSlot = i
elif (self.keylist[index] is key or
self.hashlist[index] == hashv and
self.keylist[index] == key): return (index,i)
def _resize(self,n):
''' Reindex all entries (hash, key, value) without moving
entries. '''
n = 2 ** n.bit_length()
self.indices = self._index(n)
for index,hashv in enumerate(self.hashlist):
for i in self._probe(hashv,n-1):
if self.indices[i] == DICT_FREE_SLOT: break
self.indices[i] = index
self.filled = self.used