1import io
2import os
3from os.path import dirname, basename
4
5import smart_open
6
7from omegaml.backends.basedata import BaseDataBackend
8
9try:
10 from smart_open import open
11except:
12 pass
13
14
[docs]
15class PythonRawFileBackend(BaseDataBackend):
16 """
17 OmegaStore backend to support arbitrary files
18 """
19 KIND = 'python.file'
20
[docs]
21 @classmethod
22 def supports(self, obj, name, open_kwargs=None, **kwargs):
23 is_filelike = hasattr(obj, 'read')
24 open_kwargs = dict(open_kwargs or {})
25 if kwargs.get('kind') == self.KIND:
26 is_filelike |= self._is_openable(self, obj, **open_kwargs)
27 return is_filelike or self._is_path(self, obj)
28
29 def _is_openable(self, obj, **kwargs):
30 if 'mode' not in 'kwargs':
31 kwargs['mode'] = 'rb'
32 # already opened file
33 if isinstance(obj, io.IOBase):
34 return not obj.closed
35 try:
36 with open(obj, **kwargs) as fin:
37 fin.read(1)
38 except:
39 return False
40 return True
41
[docs]
42 def get(self, name, local=None, mode='wb', open_kwargs=None, chunksize=None, uri=None, **kwargs):
43 """
44 get a stored file as a file-like object with binary contents or a local file
45
46 Args:
47 name (str): the name of the file
48 local (str): if set the local path will be created and the file
49 stored there. If local does not have an extension it is assumed
50 to be a directory name, in which case the file is stored as the
51 same name.
52 mode (str): the mode to use on .open() for the local file
53 chunksize (int): optional, the size of chunks to be read, as in
54 open_kwargs (dict): the kwargs to use .open() for the local file
55 **kwargs: any kwargs passed to datasets.metadata()
56
57 Returns:
58 the file-like output handler (local is None)
59 the path to the local file (local is given)
60
61 See also:
62 https://docs.python.org/3/glossary.html#term-file-object
63 https://docs.python.org/3/glossary.html#term-binary-file
64 """
65 meta = self.data_store.metadata(name, **kwargs)
66 chunksize = chunksize or 1024 * 1024 * 4
67 uri = uri or meta.uri
68 if uri:
69 outf = open(uri, mode='rb')
70 else:
71 outf = self.data_store.metadata(name, **kwargs).gridfile
72 if local:
73 is_filename = '.' in basename(local)
74 target_dir = dirname(local) if is_filename else local
75 local = local if is_filename else '{local}/{name}'.format(**locals())
76 os.makedirs(target_dir, exist_ok=True)
77 open_kwargs = open_kwargs or {}
78 with smart_open.open(local, mode=mode, **open_kwargs) as flocal:
79 while data := outf.read(chunksize):
80 flocal.write(data)
81 return local
82 return filelike(outf)
83
[docs]
84 def put(self, obj, name, attributes=None, encoding=None, uri=None, **kwargs):
85 """
86 store the binary contents of a file-like object
87
88 Args:
89 obj (str|Path|filelike): the object to be stored
90 name (str): the name for the object's metadata
91 attributes (dict): optional, metadata attributes
92 encoding (str): optional, a valid encoding, such as utf8
93 uri (str): optional, the local or remote file url compatible with smart_open
94 **kwargs:
95
96 Returns:
97 Metadata
98 """
99 self.data_store.drop(name, force=True)
100 storekey = self.data_store.object_store_key(name, 'file', hashed=True)
101 gridfile = self._store_to_file(self.data_store, obj, storekey, encoding=encoding, uri=uri,
102 **kwargs)
103 return self.data_store._make_metadata(
104 name=name,
105 prefix=self.data_store.prefix,
106 bucket=self.data_store.bucket,
107 kind=self.KIND,
108 attributes=attributes,
109 uri=str(uri or ''),
110 gridfile=gridfile).save()
111
112
113def filelike(obj):
114 # convert GridFsProxy to GridOut, a filelike object
115 # -- for actual files, returns just the actual file
116 actual = obj.get() if hasattr(obj, 'get') else obj
117 __doc__ = actual.__doc__
118 return actual