| 
						
						
							
								
							
						
						
					 | 
					 | 
					@ -6,7 +6,7 @@ from contextlib import closing | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					from common.file_helpers import mkdirs_exists_ok | 
					 | 
					 | 
					 | 
					from common.file_helpers import mkdirs_exists_ok | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					class ColumnStoreReader(): | 
					 | 
					 | 
					 | 
					class ColumnStoreReader(): | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					  def __init__(self, path, mmap=False, allow_pickle=False, direct_io=False): | 
					 | 
					 | 
					 | 
					  def __init__(self, path, mmap=False, allow_pickle=False, direct_io=False, prefix="", np_data=None): | 
				
			
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    if not (path and os.path.isdir(path)): | 
					 | 
					 | 
					 | 
					    if not (path and os.path.isdir(path)): | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					      raise ValueError("Not a column store: {}".format(path)) | 
					 | 
					 | 
					 | 
					      raise ValueError("Not a column store: {}".format(path)) | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
	
		
		
			
				
					| 
						
						
						
							
								
							
						
					 | 
					 | 
					@ -15,6 +15,9 @@ class ColumnStoreReader(): | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    self._mmap = mmap | 
					 | 
					 | 
					 | 
					    self._mmap = mmap | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    self._allow_pickle = allow_pickle | 
					 | 
					 | 
					 | 
					    self._allow_pickle = allow_pickle | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    self._direct_io = direct_io | 
					 | 
					 | 
					 | 
					    self._direct_io = direct_io | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					    self._is_dict = 'columnstore' in self._keys | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					    self._prefix = prefix | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					    self._np_data = np_data | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					  @property | 
					 | 
					 | 
					 | 
					  @property | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					  def path(self): | 
					 | 
					 | 
					 | 
					  def path(self): | 
				
			
			
		
	
	
		
		
			
				
					| 
						
						
						
							
								
							
						
					 | 
					 | 
					@ -30,6 +33,11 @@ class ColumnStoreReader(): | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					      return None | 
					 | 
					 | 
					 | 
					      return None | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					  def keys(self): | 
					 | 
					 | 
					 | 
					  def keys(self): | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					    if self._is_dict: | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					      if not self._np_data: | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					        self._np_data = self._load(os.path.join(self._path, 'columnstore')) | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					      return self._np_data.keys() | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    return list(self._keys) | 
					 | 
					 | 
					 | 
					    return list(self._keys) | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					  def iteritems(self): | 
					 | 
					 | 
					 | 
					  def iteritems(self): | 
				
			
			
		
	
	
		
		
			
				
					| 
						
						
						
							
								
							
						
					 | 
					 | 
					@ -49,25 +57,42 @@ class ColumnStoreReader(): | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    else: | 
					 | 
					 | 
					 | 
					    else: | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					      return None | 
					 | 
					 | 
					 | 
					      return None | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					  def _load(self, path): | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					    if self._mmap: | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					      # note that direct i/o does nothing for mmap since file read/write interface is not used | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					      return np.load(path, mmap_mode='r', allow_pickle=self._allow_pickle, fix_imports=False) | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					    if self._direct_io: | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					      opener = lambda path, flags: os.open(path, os.O_RDONLY | os.O_DIRECT) | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					      with open(path, 'rb', buffering=0, opener=opener) as f: | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					        return np.load(f, allow_pickle=self._allow_pickle, fix_imports=False) | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					    return np.load(path, allow_pickle=self._allow_pickle, fix_imports=False) | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					  def __getitem__(self, key): | 
					 | 
					 | 
					 | 
					  def __getitem__(self, key): | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    try: | 
					 | 
					 | 
					 | 
					    try: | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					      if self._is_dict: | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					        path = os.path.join(self._path, 'columnstore') | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					      else: | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        path = os.path.join(self._path, key) | 
					 | 
					 | 
					 | 
					        path = os.path.join(self._path, key) | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					      # TODO(mgraczyk): This implementation will need to change for zip. | 
					 | 
					 | 
					 | 
					      # TODO(mgraczyk): This implementation will need to change for zip. | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					      if os.path.isdir(path): | 
					 | 
					 | 
					 | 
					      if not self._is_dict and os.path.isdir(path): | 
				
			
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        return ColumnStoreReader(path) | 
					 | 
					 | 
					 | 
					        return ColumnStoreReader(path) | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					      else: | 
					 | 
					 | 
					 | 
					      else: | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        if self._mmap: | 
					 | 
					 | 
					 | 
					        if self._is_dict and self._np_data: | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					          # note that direct i/o does nothing for mmap since file read/write interface is not used | 
					 | 
					 | 
					 | 
					          ret = self._np_data | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					          ret = np.load(path, mmap_mode='r', allow_pickle=self._allow_pickle, fix_imports=False) | 
					 | 
					 | 
					 | 
					 | 
				
			
			
		
	
		
		
	
		
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        else: | 
					 | 
					 | 
					 | 
					        else: | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					          if self._direct_io: | 
					 | 
					 | 
					 | 
					          ret = self._load(path) | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					            opener = lambda path, flags: os.open(path, os.O_RDONLY | os.O_DIRECT) | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					            with open(path, 'rb', buffering=0, opener=opener) as f: | 
					 | 
					 | 
					 | 
					 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					              ret = np.load(f, allow_pickle=self._allow_pickle, fix_imports=False) | 
					 | 
					 | 
					 | 
					 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					          else: | 
					 | 
					 | 
					 | 
					 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					            ret = np.load(path, allow_pickle=self._allow_pickle, fix_imports=False) | 
					 | 
					 | 
					 | 
					 | 
				
			
			
		
	
		
		
	
		
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        if type(ret) == np.lib.npyio.NpzFile: | 
					 | 
					 | 
					 | 
					        if type(ret) == np.lib.npyio.NpzFile: | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					          if self._is_dict: | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					            if self._prefix+key in ret.keys(): | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					              return ret[self._prefix+key] | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					            if any(k.startswith(self._prefix+key+"/") for k in ret.keys()): | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					              return ColumnStoreReader(self._path, mmap=self._mmap, allow_pickle=self._allow_pickle, direct_io=self._direct_io, prefix=self._prefix+key+"/", np_data=ret) | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					            raise KeyError(self._prefix+key) | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					          # if it's saved as compressed, it has arr_0 only in the file. deref this | 
					 | 
					 | 
					 | 
					          # if it's saved as compressed, it has arr_0 only in the file. deref this | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					          return ret['arr_0'] | 
					 | 
					 | 
					 | 
					          return ret['arr_0'] | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					        else: | 
					 | 
					 | 
					 | 
					        else: | 
				
			
			
		
	
	
		
		
			
				
					| 
						
							
								
							
						
						
							
								
							
						
						
					 | 
					 | 
					@ -128,6 +153,27 @@ class ColumnStoreWriter(): | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    # TODO(mgraczyk): This implementation will need to change if we add zip or compression. | 
					 | 
					 | 
					 | 
					    # TODO(mgraczyk): This implementation will need to change if we add zip or compression. | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    return ColumnStoreWriter(os.path.join(self._path, group_name)) | 
					 | 
					 | 
					 | 
					    return ColumnStoreWriter(os.path.join(self._path, group_name)) | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					  def add_dict(self, data, dtype=None, compression=False, overwrite=False): | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					    # default name exists to have backward compatibility with equivalent directory structure | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					    npy_path = os.path.join(self._path, "columnstore") | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					    mkdirs_exists_ok(os.path.dirname(npy_path)) | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					    flat_dict = dict() | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					    _flatten_dict(flat_dict, "", data) | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					    for k, v in flat_dict.items(): | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					      flat_dict[k] = np.array(v, copy=False, dtype=dtype) | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					    if overwrite: | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					      f = open(npy_path, "wb") | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					    else: | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					      f = os.fdopen(os.open(npy_path, os.O_WRONLY | os.O_CREAT | os.O_EXCL), "wb") | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					    with closing(f) as f: | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					      if compression: | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					        np.savez_compressed(f, **flat_dict) | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					      else: | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					        np.savez(f, **flat_dict) | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					  def close(self): | 
					 | 
					 | 
					 | 
					  def close(self): | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    pass | 
					 | 
					 | 
					 | 
					    pass | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
	
		
		
			
				
					| 
						
						
						
							
								
							
						
					 | 
					 | 
					@ -135,6 +181,15 @@ class ColumnStoreWriter(): | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					  def __exit__(self, type, value, traceback): self.close() | 
					 | 
					 | 
					 | 
					  def __exit__(self, type, value, traceback): self.close() | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					def _flatten_dict(flat, ns, d): | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					  for k, v in d.items(): | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					    p = (ns + "/" if len(ns) else "") + k | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					    if isinstance(v, collections.Mapping): | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					      _flatten_dict(flat, p, v) | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					    else: | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					      flat[p] = v | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					def _save_dict_as_column_store(values, writer, compression): | 
					 | 
					 | 
					 | 
					def _save_dict_as_column_store(values, writer, compression): | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					  for k, v in values.items(): | 
					 | 
					 | 
					 | 
					  for k, v in values.items(): | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    if isinstance(v, collections.Mapping): | 
					 | 
					 | 
					 | 
					    if isinstance(v, collections.Mapping): | 
				
			
			
		
	
	
		
		
			
				
					| 
						
						
						
							
								
							
						
					 | 
					 | 
					@ -147,4 +202,3 @@ def save_dict_as_column_store(values, output_path, compression=False): | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					  with ColumnStoreWriter(output_path) as writer: | 
					 | 
					 | 
					 | 
					  with ColumnStoreWriter(output_path) as writer: | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					    _save_dict_as_column_store(values, writer, compression) | 
					 | 
					 | 
					 | 
					    _save_dict_as_column_store(values, writer, compression) | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					 | 
				
			
			
		
	
	
		
		
			
				
					| 
						
							
								
							
						
						
						
					 | 
					 | 
					
  |