1import mimetypes
2from base64 import encodebytes, decodebytes
3
4import json
5import nbformat
6import os
7from datetime import datetime
8from io import BytesIO
9from jupyter_server.services.contents.manager import ContentsManager
10from tornado import web
11from traitlets import default
12from urllib.parse import unquote
13
14from omegaml.notebook.checkpoints import NoOpCheckpoints
15
16
[docs]
17class OmegaStoreContentsManager(ContentsManager):
18 """
19 Jupyter notebook storage manager for omegaml
20
21 Adopted from notebook/services/contents/filemanager.py
22
23 This requires a properly configured omegaml instance.
24 see https://jupyter-server.readthedocs.io/en/latest/developers/contents.html
25 """
26
27 def __init__(self, **kwargs):
28 # pass omega= for testing purpose
29 self._omega = kwargs.pop('omega', None)
30 super(OmegaStoreContentsManager, self).__init__(**kwargs)
31
32 def _checkpoints_class_default(self):
33 return NoOpCheckpoints
34
35 @property
36 def omega(self):
37 """
38 return the omega instance used by the contents manager
39 """
40 if self._omega is None:
41 import omegaml as om
42 self._omega = om
43 self._omega.jobs._include_dir_placeholder = True
44 return self._omega
45
46 @property
47 def store(self):
48 """
49 return the OmageStore for jobs (notebooks)
50 """
51 return self.omega.jobs.store
52
53 @property
54 def _dir_placeholder(self):
55 return self.omega.jobs._dir_placeholder
56
57 def get(self, path, content=True, type=None, format=None):
58 """
59 get an entry in the store
60
61 this is called by the contents engine to get the contents of the jobs
62 store.
63 """
64 path = unquote(path).strip('/')
65 if type == 'notebook' or (type is None and path.endswith('.ipynb')):
66 model = self._notebook_model(path, content=content)
67 elif type == 'file':
68 model = self._file_model(path, content=content)
69 elif type in (None, 'directory'):
70 # jupyterlab passes None to get directory
71 # we never return content with a directory listing to save time
72 # the frontend will request the specific contents
73 model = self._dir_model(path, content=False)
74 else:
75 raise web.HTTPError(404, u'Type {} at {} is not supported'.format(type, path))
76 return model
77
78 def save(self, model, path):
79 """
80 save an entry in the store
81
82 this is called by the contents engine to store a notebook
83 """
84 om = self.omega
85 path = unquote(path).strip('/')
86 type = model.get('type')
87 name = model.get('name')
88 self.run_pre_save_hooks(model=model, path=path)
89 if type is None:
90 raise web.HTTPError(400, u'No file type provided')
91 try:
92 if type == 'notebook' or (type == 'file' and path.endswith('.ipynb')):
93 content = model.get('content')
94 # parse input
95 if model.get('format') == 'text' and isinstance(content, bytes):
96 content = content.decode('utf8')
97 elif model.get('format') == 'base64':
98 if isinstance(content, str):
99 content = content.encode('ascii')
100 content = decodebytes(content).decode('utf8')
101 while isinstance(content, str):
102 content = json.loads(content)
103 model['content'] = content
104 model['format'] = None
105 if content is None or not isinstance(content, dict):
106 raise web.HTTPError(400, u'No file content provided or wrong format')
107 # create notebook
108 nb = nbformat.from_dict(content)
109 self.check_and_sign(nb, path)
110 self.omega.jobs.put(nb, path)
111 self.validate_notebook_model(model)
112 validation_message = model.get('message', None)
113 model = self.get(path, content=False, type=type)
114 if validation_message:
115 model['message'] = validation_message
116 elif type == 'directory':
117 ph_name = '{path}/{self._dir_placeholder}'.format(**locals()).strip('/')
118 self.omega.jobs.create("#placeholder", ph_name)
119 model = self.get(path, content=False, type=type)
120 model['content'] = None
121 model['format'] = None
122 validation_message = None
123 elif type == 'file':
124 content = model.get('content')
125 fmt = model.get('format')
126 self._save_file(path, content, fmt)
127 model = self.get(path, content=False, type=type)
128 else:
129 raise web.HTTPError(
130 400, "Unhandled contents type: %s" % model['type'])
131 except web.HTTPError:
132 raise
133 except Exception as e:
134 self.log.error(
135 u'Error while saving file: %s %s', path, e, exc_info=True)
136 raise web.HTTPError(
137 500, u'Unexpected error while saving file: %s %s' % (path, e))
138 return model
139
140 def delete_file(self, path):
141 """
142 delete an entry
143
144 this is called by the contents engine to delete an entry
145 """
146 path = unquote(path).strip('/')
147 try:
148 self.omega.jobs.drop(path)
149 except Exception as e:
150 self.omega.jobs.drop(path + '/' + self._dir_placeholder)
151
152 def rename_file(self, old_path, new_path):
153 """
154 rename a file
155
156 this is called by the contents engine to rename an entry
157 """
158 old_path = unquote(old_path).strip('/')
159 new_path = unquote(new_path).strip('/')
160 # check file or directory
161 if self.file_exists(new_path):
162 raise web.HTTPError(409, u'Notebook already exists: %s' % new_path)
163 elif self.dir_exists(new_path):
164 raise web.HTTPError(409, u'Directory already exists: %s' % new_path)
165 # do the renaming
166 if self.dir_exists(old_path):
167 old_dirname = old_path + '/' + self._dir_placeholder
168 new_dirname = new_path + '/' + self._dir_placeholder
169 meta = self.omega.jobs.metadata(old_dirname)
170 meta.name = new_dirname
171 elif self.file_exists(old_path):
172 meta = self.omega.jobs.metadata(old_path)
173 meta.name = new_path
174 # rename on metadata. Note the gridfile instance stays the same
175 meta.save()
176
177 def exists(self, path):
178 """
179 Does a file or dir exist at the given collection in gridFS?
180 We do not have dir so dir_exists returns true.
181
182 :param path: (str) The relative path to the file's directory
183 (with '/' as separator)
184 :returns exists: (boo) The relative path to the file's directory (with '/' as separator)
185 """
186 path = unquote(path).strip('/')
187 return self.file_exists(path) or self.dir_exists(path)
188
189 def dir_exists(self, path=''):
190 """check if directory exists
191
192 Args:
193 path: name of directory
194
195 Returns:
196 True if directory exists
197 """
198 path = unquote(path).strip('/')
199 if path == '':
200 return True
201 pattern = r'^{path}.*/({placeholder}|.+)'.format(path=path, placeholder=self._dir_placeholder)
202 return len(self.omega.jobs.list(regexp=pattern)) > 0
203
204 def file_exists(self, path=""):
205 """check if file exists
206
207 Args:
208 path: name of file
209
210 Returns:
211 True if file exists
212 """
213 path = unquote(path).strip('/')
214 if not path:
215 return False
216 # always check for an actual file, not some sub path
217 pattern = r'^{}$'.format(path)
218 does_exist = len(self.omega.jobs.list(regexp=pattern)) > 0
219 does_exist |= len(self.omega.datasets.list(regexp=pattern)) > 0
220 return does_exist
221
222 def is_hidden(self, path):
223 """check if path or file is hidden
224
225 Args:
226 path: name of file or path
227
228 Returns:
229 False, currently always returns false
230 """
231 return False
232
233 def _read_notebook(self, path, as_version=None):
234 path = unquote(path).strip('/')
235 return self.omega.jobs.get(path)
236
237 def _notebook_model(self, path, content=True, meta=None):
238 """
239 Build a notebook model
240 if content is requested, the notebook content will be populated
241 as a JSON structure (not double-serialized)
242 """
243 path = unquote(path).strip('/')
244 model = self._base_model(path)
245 model['type'] = 'notebook'
246 # always add accurate created and modified
247 meta = meta or self.omega.jobs.metadata(path)
248 if meta is not None:
249 model['created'] = meta.created
250 model['last_modified'] = meta.modified
251 if content:
252 nb = self._read_notebook(path, as_version=4)
253 if nb is None:
254 raise web.HTTPError(400, "Cannot read non-file {}".format(path))
255 self.mark_trusted_cells(nb, path)
256 model['content'] = nb
257 model['format'] = 'json'
258 self.validate_notebook_model(model)
259 return model
260
261 def _base_model(self, path, kind=None):
262 """Build the common base of a contents model"""
263 # http://jupyter-notebook.readthedocs.io/en/stable/extending/contents.html
264 path = unquote(path).strip('/')
265 last_modified = datetime.utcnow()
266 created = last_modified
267 # Create the base model.
268 model = {}
269 model['name'] = os.path.basename(path)
270 model['path'] = path
271 model['last_modified'] = last_modified
272 model['created'] = created
273 model['content'] = None
274 model['format'] = None
275 model['mimetype'] = None
276 model['writable'] = True
277 if kind:
278 model['type'] = kind
279 model['content'] = [] if kind == 'directory' else None
280 return model
281
282 def _dir_model(self, path, content=True):
283 """
284 Build a model to return all of the files in gridfs
285 if content is requested, will include a listing of the directory
286 """
287 # this looks like a seemingly simple task, it's carefully crafted
288 path = unquote(path).strip('/')
289 model = self._base_model(path, kind='directory')
290 model['format'] = 'json'
291 contents = model['content']
292 # get existing entries from a pattern that matches either
293 # top-level files: ([\w -]+\.[\w -]*)
294 # directories (files in): ([\w ]+/([\w ]+\.[\w]*))
295 # does not work:
296 # pattern = r'([\w ]+/_placeholder\.[\w]*)|([\w ]+\.[\w]*)$'
297 # it is too restrictive as entries can be generated without a placeholder
298 # so we get all, which may include sub/sub/file
299 # and we need to include them because we need to find sub/sub directories
300 # note \w is any word character (letter, digit, underscore)
301 # \s is any white space
302 # \d is any digit
303 # :_ match literally
304 # [^\/] matches any character except /
305 # pattern = r'([\w\s\-.\d:()+]+\/)?([\w\s\-.\d:()+]+\.[\w]*)$'
306 pattern = r'([^\/]+\/)?([^\/]+\.[^\/]*)$'
307 # if we're looking in an existing directory, prepend that
308 if path:
309 pattern = r'{path}/{pattern}'.format(path=path, pattern=pattern)
310 pattern = r'^{}'.format(pattern)
311 entries = self.omega.jobs.list(regexp=pattern, raw=True, hidden=True, include_temp=True)
312 if path and not entries:
313 raise web.HTTPError(400, "Directory not found {}".format(path))
314 # by default assume the current path is listed already
315 directories = [path]
316 for meta in entries:
317 # get path of entry, e.g. sub/foo.ipynb => sub
318 entry_path = os.path.dirname(meta.name)
319 # if not part of listed directories yet, include
320 if entry_path not in directories:
321 entry = self._base_model(entry_path, kind='directory')
322 contents.append(entry)
323 directories.append(entry_path)
324 # ignore placeholder files
325 if meta.name.endswith(self._dir_placeholder):
326 continue
327 # only include files that are in the path we're listing
328 if entry_path != path:
329 continue
330 # include the actual file
331 try:
332 entry = self._notebook_model(meta.name, content=content, meta=meta)
333 except Exception as e:
334 msg = ('_dir_model error, cannot get {}, '
335 'removing from list, exception {}'.format(meta.name, str(e)))
336 self.log.warning(msg)
337 else:
338 contents.append(entry)
339 return model
340
341 def _file_model(self, path, content=True, format=None):
342 model = self._base_model(path)
343 model['type'] = 'file'
344
345 model['mimetype'] = mimetypes.guess_type(path)[0]
346
347 if content:
348 content, format = self._read_file(path, format)
349 if model['mimetype'] is None:
350 default_mime = {
351 'text': 'text/plain',
352 'base64': 'application/octet-stream'
353 }[format]
354 model['mimetype'] = default_mime
355
356 model.update(
357 content=content,
358 format=format,
359 )
360
361 return model
362
363 def _read_file(self, os_path, format):
364 """Read a non-notebook file.
365
366 os_path: The path to be read.
367 format:
368 If 'text', the contents will be decoded as UTF-8.
369 If 'base64', the raw bytes contents will be encoded as base64.
370 If not specified, try to decode as UTF-8, and fall back to base64
371 """
372 if os_path.endswith('.ipynb'):
373 meta = self.omega.jobs.metadata(os_path)
374 else:
375 meta = self.omega.datasets.metadata(os_path)
376 if meta is None or meta.gridfile is None:
377 raise web.HTTPError(400, "Cannot read non-file %s" % os_path)
378
379 if meta.gridfile:
380 bcontent = meta.gridfile.read()
381 meta.gridfile.close()
382
383 if format is None or format == 'text':
384 # Try to interpret as unicode if format is unknown or if unicode
385 # was explicitly requested.
386 try:
387 return bcontent.decode('utf8'), 'text'
388 except UnicodeError:
389 if format == 'text':
390 raise web.HTTPError(
391 400,
392 "%s is not UTF-8 encoded" % os_path,
393 reason='bad format',
394 )
395 return encodebytes(bcontent).decode('ascii'), 'base64'
396
397 def _save_file(self, os_path, content, format):
398 """Save content of a generic file."""
399 if format not in {'text', 'base64'}:
400 raise web.HTTPError(
401 400,
402 "Must specify format of file contents as 'text' or 'base64'",
403 )
404 try:
405 if format == 'text':
406 bcontent = content.encode('utf8')
407 else:
408 b64_bytes = content.encode('ascii')
409 bcontent = decodebytes(b64_bytes)
410 except Exception as e:
411 raise web.HTTPError(
412 400, u'Encoding error saving %s: %s' % (os_path, e)
413 )
414
415 self.omega.datasets.put(BytesIO(bcontent), os_path)
416
417 @default('files_handler_params')
418 def _files_handler_params_default(self):
419 # avoid exception
420 # TypeError: StaticFileHandler.initialize() missing 1 required positional argument: 'path'
421 # issue: https://github.com/jupyter-server/jupyter_server/issues/1313
422 # What this does
423 # - ensure that ContentsManager.files_handler_class=FilesHandler gets has a path attribute on initialize()
424 # - this only happens when dealing with local files, e.g. when downloading a notebook
425 # - source of the issue is that ContentsManager.files_handler_params is not set
426 return {'path': self.root_dir}