Source code for omegaml.notebook.omegacontentsmgr

  1import mimetypes
  2from base64 import encodebytes, decodebytes
  3
  4import json
  5import nbformat
  6import os
  7from datetime import datetime
  8from io import BytesIO
  9from jupyter_server.services.contents.manager import ContentsManager
 10from tornado import web
 11from traitlets import default
 12from urllib.parse import unquote
 13
 14from omegaml.notebook.checkpoints import NoOpCheckpoints
 15
 16
[docs] 17class OmegaStoreContentsManager(ContentsManager): 18 """ 19 Jupyter notebook storage manager for omegaml 20 21 Adopted from notebook/services/contents/filemanager.py 22 23 This requires a properly configured omegaml instance. 24 see https://jupyter-server.readthedocs.io/en/latest/developers/contents.html 25 """ 26 27 def __init__(self, **kwargs): 28 # pass omega= for testing purpose 29 self._omega = kwargs.pop('omega', None) 30 super(OmegaStoreContentsManager, self).__init__(**kwargs) 31 32 def _checkpoints_class_default(self): 33 return NoOpCheckpoints 34 35 @property 36 def omega(self): 37 """ 38 return the omega instance used by the contents manager 39 """ 40 if self._omega is None: 41 import omegaml as om 42 self._omega = om 43 self._omega.jobs._include_dir_placeholder = True 44 return self._omega 45 46 @property 47 def store(self): 48 """ 49 return the OmageStore for jobs (notebooks) 50 """ 51 return self.omega.jobs.store 52 53 @property 54 def _dir_placeholder(self): 55 return self.omega.jobs._dir_placeholder 56 57 def get(self, path, content=True, type=None, format=None): 58 """ 59 get an entry in the store 60 61 this is called by the contents engine to get the contents of the jobs 62 store. 63 """ 64 path = unquote(path).strip('/') 65 if type == 'notebook' or (type is None and path.endswith('.ipynb')): 66 model = self._notebook_model(path, content=content) 67 elif type == 'file': 68 model = self._file_model(path, content=content) 69 elif type in (None, 'directory'): 70 # jupyterlab passes None to get directory 71 # we never return content with a directory listing to save time 72 # the frontend will request the specific contents 73 model = self._dir_model(path, content=False) 74 else: 75 raise web.HTTPError(404, u'Type {} at {} is not supported'.format(type, path)) 76 return model 77 78 def save(self, model, path): 79 """ 80 save an entry in the store 81 82 this is called by the contents engine to store a notebook 83 """ 84 om = self.omega 85 path = unquote(path).strip('/') 86 type = model.get('type') 87 name = model.get('name') 88 self.run_pre_save_hooks(model=model, path=path) 89 if type is None: 90 raise web.HTTPError(400, u'No file type provided') 91 try: 92 if type == 'notebook' or (type == 'file' and path.endswith('.ipynb')): 93 content = model.get('content') 94 # parse input 95 if model.get('format') == 'text' and isinstance(content, bytes): 96 content = content.decode('utf8') 97 elif model.get('format') == 'base64': 98 if isinstance(content, str): 99 content = content.encode('ascii') 100 content = decodebytes(content).decode('utf8') 101 while isinstance(content, str): 102 content = json.loads(content) 103 model['content'] = content 104 model['format'] = None 105 if content is None or not isinstance(content, dict): 106 raise web.HTTPError(400, u'No file content provided or wrong format') 107 # create notebook 108 nb = nbformat.from_dict(content) 109 self.check_and_sign(nb, path) 110 self.omega.jobs.put(nb, path) 111 self.validate_notebook_model(model) 112 validation_message = model.get('message', None) 113 model = self.get(path, content=False, type=type) 114 if validation_message: 115 model['message'] = validation_message 116 elif type == 'directory': 117 ph_name = '{path}/{self._dir_placeholder}'.format(**locals()).strip('/') 118 self.omega.jobs.create("#placeholder", ph_name) 119 model = self.get(path, content=False, type=type) 120 model['content'] = None 121 model['format'] = None 122 validation_message = None 123 elif type == 'file': 124 content = model.get('content') 125 fmt = model.get('format') 126 self._save_file(path, content, fmt) 127 model = self.get(path, content=False, type=type) 128 else: 129 raise web.HTTPError( 130 400, "Unhandled contents type: %s" % model['type']) 131 except web.HTTPError: 132 raise 133 except Exception as e: 134 self.log.error( 135 u'Error while saving file: %s %s', path, e, exc_info=True) 136 raise web.HTTPError( 137 500, u'Unexpected error while saving file: %s %s' % (path, e)) 138 return model 139 140 def delete_file(self, path): 141 """ 142 delete an entry 143 144 this is called by the contents engine to delete an entry 145 """ 146 path = unquote(path).strip('/') 147 try: 148 self.omega.jobs.drop(path) 149 except Exception as e: 150 self.omega.jobs.drop(path + '/' + self._dir_placeholder) 151 152 def rename_file(self, old_path, new_path): 153 """ 154 rename a file 155 156 this is called by the contents engine to rename an entry 157 """ 158 old_path = unquote(old_path).strip('/') 159 new_path = unquote(new_path).strip('/') 160 # check file or directory 161 if self.file_exists(new_path): 162 raise web.HTTPError(409, u'Notebook already exists: %s' % new_path) 163 elif self.dir_exists(new_path): 164 raise web.HTTPError(409, u'Directory already exists: %s' % new_path) 165 # do the renaming 166 if self.dir_exists(old_path): 167 old_dirname = old_path + '/' + self._dir_placeholder 168 new_dirname = new_path + '/' + self._dir_placeholder 169 meta = self.omega.jobs.metadata(old_dirname) 170 meta.name = new_dirname 171 elif self.file_exists(old_path): 172 meta = self.omega.jobs.metadata(old_path) 173 meta.name = new_path 174 # rename on metadata. Note the gridfile instance stays the same 175 meta.save() 176 177 def exists(self, path): 178 """ 179 Does a file or dir exist at the given collection in gridFS? 180 We do not have dir so dir_exists returns true. 181 182 :param path: (str) The relative path to the file's directory 183 (with '/' as separator) 184 :returns exists: (boo) The relative path to the file's directory (with '/' as separator) 185 """ 186 path = unquote(path).strip('/') 187 return self.file_exists(path) or self.dir_exists(path) 188 189 def dir_exists(self, path=''): 190 """check if directory exists 191 192 Args: 193 path: name of directory 194 195 Returns: 196 True if directory exists 197 """ 198 path = unquote(path).strip('/') 199 if path == '': 200 return True 201 pattern = r'^{path}.*/({placeholder}|.+)'.format(path=path, placeholder=self._dir_placeholder) 202 return len(self.omega.jobs.list(regexp=pattern)) > 0 203 204 def file_exists(self, path=""): 205 """check if file exists 206 207 Args: 208 path: name of file 209 210 Returns: 211 True if file exists 212 """ 213 path = unquote(path).strip('/') 214 if not path: 215 return False 216 # always check for an actual file, not some sub path 217 pattern = r'^{}$'.format(path) 218 does_exist = len(self.omega.jobs.list(regexp=pattern)) > 0 219 does_exist |= len(self.omega.datasets.list(regexp=pattern)) > 0 220 return does_exist 221 222 def is_hidden(self, path): 223 """check if path or file is hidden 224 225 Args: 226 path: name of file or path 227 228 Returns: 229 False, currently always returns false 230 """ 231 return False 232 233 def _read_notebook(self, path, as_version=None): 234 path = unquote(path).strip('/') 235 return self.omega.jobs.get(path) 236 237 def _notebook_model(self, path, content=True, meta=None): 238 """ 239 Build a notebook model 240 if content is requested, the notebook content will be populated 241 as a JSON structure (not double-serialized) 242 """ 243 path = unquote(path).strip('/') 244 model = self._base_model(path) 245 model['type'] = 'notebook' 246 # always add accurate created and modified 247 meta = meta or self.omega.jobs.metadata(path) 248 if meta is not None: 249 model['created'] = meta.created 250 model['last_modified'] = meta.modified 251 if content: 252 nb = self._read_notebook(path, as_version=4) 253 if nb is None: 254 raise web.HTTPError(400, "Cannot read non-file {}".format(path)) 255 self.mark_trusted_cells(nb, path) 256 model['content'] = nb 257 model['format'] = 'json' 258 self.validate_notebook_model(model) 259 return model 260 261 def _base_model(self, path, kind=None): 262 """Build the common base of a contents model""" 263 # http://jupyter-notebook.readthedocs.io/en/stable/extending/contents.html 264 path = unquote(path).strip('/') 265 last_modified = datetime.utcnow() 266 created = last_modified 267 # Create the base model. 268 model = {} 269 model['name'] = os.path.basename(path) 270 model['path'] = path 271 model['last_modified'] = last_modified 272 model['created'] = created 273 model['content'] = None 274 model['format'] = None 275 model['mimetype'] = None 276 model['writable'] = True 277 if kind: 278 model['type'] = kind 279 model['content'] = [] if kind == 'directory' else None 280 return model 281 282 def _dir_model(self, path, content=True): 283 """ 284 Build a model to return all of the files in gridfs 285 if content is requested, will include a listing of the directory 286 """ 287 # this looks like a seemingly simple task, it's carefully crafted 288 path = unquote(path).strip('/') 289 model = self._base_model(path, kind='directory') 290 model['format'] = 'json' 291 contents = model['content'] 292 # get existing entries from a pattern that matches either 293 # top-level files: ([\w -]+\.[\w -]*) 294 # directories (files in): ([\w ]+/([\w ]+\.[\w]*)) 295 # does not work: 296 # pattern = r'([\w ]+/_placeholder\.[\w]*)|([\w ]+\.[\w]*)$' 297 # it is too restrictive as entries can be generated without a placeholder 298 # so we get all, which may include sub/sub/file 299 # and we need to include them because we need to find sub/sub directories 300 # note \w is any word character (letter, digit, underscore) 301 # \s is any white space 302 # \d is any digit 303 # :_ match literally 304 # [^\/] matches any character except / 305 # pattern = r'([\w\s\-.\d:()+]+\/)?([\w\s\-.\d:()+]+\.[\w]*)$' 306 pattern = r'([^\/]+\/)?([^\/]+\.[^\/]*)$' 307 # if we're looking in an existing directory, prepend that 308 if path: 309 pattern = r'{path}/{pattern}'.format(path=path, pattern=pattern) 310 pattern = r'^{}'.format(pattern) 311 entries = self.omega.jobs.list(regexp=pattern, raw=True, hidden=True, include_temp=True) 312 if path and not entries: 313 raise web.HTTPError(400, "Directory not found {}".format(path)) 314 # by default assume the current path is listed already 315 directories = [path] 316 for meta in entries: 317 # get path of entry, e.g. sub/foo.ipynb => sub 318 entry_path = os.path.dirname(meta.name) 319 # if not part of listed directories yet, include 320 if entry_path not in directories: 321 entry = self._base_model(entry_path, kind='directory') 322 contents.append(entry) 323 directories.append(entry_path) 324 # ignore placeholder files 325 if meta.name.endswith(self._dir_placeholder): 326 continue 327 # only include files that are in the path we're listing 328 if entry_path != path: 329 continue 330 # include the actual file 331 try: 332 entry = self._notebook_model(meta.name, content=content, meta=meta) 333 except Exception as e: 334 msg = ('_dir_model error, cannot get {}, ' 335 'removing from list, exception {}'.format(meta.name, str(e))) 336 self.log.warning(msg) 337 else: 338 contents.append(entry) 339 return model 340 341 def _file_model(self, path, content=True, format=None): 342 model = self._base_model(path) 343 model['type'] = 'file' 344 345 model['mimetype'] = mimetypes.guess_type(path)[0] 346 347 if content: 348 content, format = self._read_file(path, format) 349 if model['mimetype'] is None: 350 default_mime = { 351 'text': 'text/plain', 352 'base64': 'application/octet-stream' 353 }[format] 354 model['mimetype'] = default_mime 355 356 model.update( 357 content=content, 358 format=format, 359 ) 360 361 return model 362 363 def _read_file(self, os_path, format): 364 """Read a non-notebook file. 365 366 os_path: The path to be read. 367 format: 368 If 'text', the contents will be decoded as UTF-8. 369 If 'base64', the raw bytes contents will be encoded as base64. 370 If not specified, try to decode as UTF-8, and fall back to base64 371 """ 372 if os_path.endswith('.ipynb'): 373 meta = self.omega.jobs.metadata(os_path) 374 else: 375 meta = self.omega.datasets.metadata(os_path) 376 if meta is None or meta.gridfile is None: 377 raise web.HTTPError(400, "Cannot read non-file %s" % os_path) 378 379 if meta.gridfile: 380 bcontent = meta.gridfile.read() 381 meta.gridfile.close() 382 383 if format is None or format == 'text': 384 # Try to interpret as unicode if format is unknown or if unicode 385 # was explicitly requested. 386 try: 387 return bcontent.decode('utf8'), 'text' 388 except UnicodeError: 389 if format == 'text': 390 raise web.HTTPError( 391 400, 392 "%s is not UTF-8 encoded" % os_path, 393 reason='bad format', 394 ) 395 return encodebytes(bcontent).decode('ascii'), 'base64' 396 397 def _save_file(self, os_path, content, format): 398 """Save content of a generic file.""" 399 if format not in {'text', 'base64'}: 400 raise web.HTTPError( 401 400, 402 "Must specify format of file contents as 'text' or 'base64'", 403 ) 404 try: 405 if format == 'text': 406 bcontent = content.encode('utf8') 407 else: 408 b64_bytes = content.encode('ascii') 409 bcontent = decodebytes(b64_bytes) 410 except Exception as e: 411 raise web.HTTPError( 412 400, u'Encoding error saving %s: %s' % (os_path, e) 413 ) 414 415 self.omega.datasets.put(BytesIO(bcontent), os_path) 416 417 @default('files_handler_params') 418 def _files_handler_params_default(self): 419 # avoid exception 420 # TypeError: StaticFileHandler.initialize() missing 1 required positional argument: 'path' 421 # issue: https://github.com/jupyter-server/jupyter_server/issues/1313 422 # What this does 423 # - ensure that ContentsManager.files_handler_class=FilesHandler gets has a path attribute on initialize() 424 # - this only happens when dealing with local files, e.g. when downloading a notebook 425 # - source of the issue is that ContentsManager.files_handler_params is not set 426 return {'path': self.root_dir}