1import re
2
3
[docs]
4class LazyGetMixin:
5 """
6 OmegaStore mixin to support chunked lazy get via name
7
8 Usage:
9
10 equivalent of ``om.datasets.get('foo', lazy=True).iterchunks()``::
11
12 mdf = om.datasets.get('foo#')
13 mdf = om.datasets.get('foo#iterchunks')
14
15 equivalent of ``om.datasets.get('foo', lazy=True).iterchunks(chunksize=10)``::
16
17 mdf = om.datasets.get('foo#iterchunks:chunksize=10')
18
19 equivalent of ``om.datasets.get('foo', lazy=True).iloc[0:10]``::
20
21 mdf = om.datasets.get('foo#rows:start=1,end=10')
22 """
23 # requires a trailing ; to work in all cases, see https://regex101.com/r/lYeKAw/1
24 ops_pattern = re.compile(r"(?P<name>.*)#(?P<opspec>.*?);(.*)$")
25
26 def metadata(self, name, *args, **kwargs):
27 if isinstance(name, str):
28 name, opspec = self._extract_opspec(name)
29 return super().metadata(name, *args, **kwargs)
30
31 def get(self, name, *args, **kwargs):
32 name, opspec = self._extract_opspec(name or '')
33 if opspec is not None:
34 kwargs = {**kwargs, **dict(lazy=True)}
35 lazy = super().get(name, *args, **kwargs)
36 if ':' in opspec:
37 op, op_kwargs_specs = opspec.split(':', 1)
38 op_kwargs = {}
39 for kw in op_kwargs_specs.split(','):
40 k, v = kw.split('=')
41 op_kwargs[k] = v
42 else:
43 op, op_kwargs = self._default_op(name, lazy)
44 meth = getattr(lazy, op, lambda *args, **kwargs: value)
45 value = meth(**op_kwargs)
46 else:
47 value = super().get(name, *args, **kwargs)
48 return value
49
50 def _extract_opspec(self, name):
51 match = self.ops_pattern.match(name + ';') if '#' in name else None
52 opspec = None
53 if match is not None:
54 name, opspec, _ = match.groups()
55 return name, opspec
56
57 def _default_op(self, name, lazy):
58 from omegaml.mdataframe import MDataFrame
59
60 if isinstance(lazy, MDataFrame):
61 op = 'iterchunks'
62 opkwargs = {}
63 else:
64 op = '__repr__'
65 opkwargs = {}
66 return op, opkwargs