Source code for omegaml.backends.rawfiles

  1import io
  2import os
  3from os.path import dirname, basename
  4
  5import smart_open
  6
  7from omegaml.backends.basedata import BaseDataBackend
  8
  9try:
 10    from smart_open import open
 11except:
 12    pass
 13
 14
[docs] 15class PythonRawFileBackend(BaseDataBackend): 16 """ 17 OmegaStore backend to support arbitrary files 18 """ 19 KIND = 'python.file' 20
[docs] 21 @classmethod 22 def supports(self, obj, name, open_kwargs=None, **kwargs): 23 is_filelike = hasattr(obj, 'read') 24 open_kwargs = dict(open_kwargs or {}) 25 if kwargs.get('kind') == self.KIND: 26 is_filelike |= self._is_openable(self, obj, **open_kwargs) 27 return is_filelike or self._is_path(self, obj)
28 29 def _is_openable(self, obj, **kwargs): 30 if 'mode' not in 'kwargs': 31 kwargs['mode'] = 'rb' 32 # already opened file 33 if isinstance(obj, io.IOBase): 34 return not obj.closed 35 try: 36 with open(obj, **kwargs) as fin: 37 fin.read(1) 38 except: 39 return False 40 return True 41
[docs] 42 def get(self, name, local=None, mode='wb', open_kwargs=None, chunksize=None, uri=None, **kwargs): 43 """ 44 get a stored file as a file-like object with binary contents or a local file 45 46 Args: 47 name (str): the name of the file 48 local (str): if set the local path will be created and the file 49 stored there. If local does not have an extension it is assumed 50 to be a directory name, in which case the file is stored as the 51 same name. 52 mode (str): the mode to use on .open() for the local file 53 chunksize (int): optional, the size of chunks to be read, as in 54 open_kwargs (dict): the kwargs to use .open() for the local file 55 **kwargs: any kwargs passed to datasets.metadata() 56 57 Returns: 58 the file-like output handler (local is None) 59 the path to the local file (local is given) 60 61 See also: 62 https://docs.python.org/3/glossary.html#term-file-object 63 https://docs.python.org/3/glossary.html#term-binary-file 64 """ 65 meta = self.data_store.metadata(name, **kwargs) 66 chunksize = chunksize or 1024 * 1024 * 4 67 uri = uri or meta.uri 68 if uri: 69 outf = open(uri, mode='rb') 70 else: 71 outf = self.data_store.metadata(name, **kwargs).gridfile 72 if local: 73 is_filename = '.' in basename(local) 74 target_dir = dirname(local) if is_filename else local 75 local = local if is_filename else '{local}/{name}'.format(**locals()) 76 os.makedirs(target_dir, exist_ok=True) 77 open_kwargs = open_kwargs or {} 78 with smart_open.open(local, mode=mode, **open_kwargs) as flocal: 79 while data := outf.read(chunksize): 80 flocal.write(data) 81 return local 82 return filelike(outf)
83
[docs] 84 def put(self, obj, name, attributes=None, encoding=None, uri=None, **kwargs): 85 """ 86 store the binary contents of a file-like object 87 88 Args: 89 obj (str|Path|filelike): the object to be stored 90 name (str): the name for the object's metadata 91 attributes (dict): optional, metadata attributes 92 encoding (str): optional, a valid encoding, such as utf8 93 uri (str): optional, the local or remote file url compatible with smart_open 94 **kwargs: 95 96 Returns: 97 Metadata 98 """ 99 self.data_store.drop(name, force=True) 100 storekey = self.data_store.object_store_key(name, 'file', hashed=True) 101 gridfile = self._store_to_file(self.data_store, obj, storekey, encoding=encoding, uri=uri, 102 **kwargs) 103 return self.data_store._make_metadata( 104 name=name, 105 prefix=self.data_store.prefix, 106 bucket=self.data_store.bucket, 107 kind=self.KIND, 108 attributes=attributes, 109 uri=str(uri or ''), 110 gridfile=gridfile).save()
111 112 113def filelike(obj): 114 # convert GridFsProxy to GridOut, a filelike object 115 # -- for actual files, returns just the actual file 116 actual = obj.get() if hasattr(obj, 'get') else obj 117 __doc__ = actual.__doc__ 118 return actual