Source code for pymor.core.cache

# This file is part of the pyMOR project (http://www.pymor.org).
# Copyright 2013-2020 pyMOR developers and contributors. All rights reserved.
# License: BSD 2-Clause License (http://opensource.org/licenses/BSD-2-Clause)

"""This module provides the caching facilities of pyMOR.

Any class that wishes to provide cached method calls should derive from
:class:`CacheableObject`. Methods which are to be cached can then
be marked using the :class:`cached` decorator.

To ensure consistency, :class:`CacheableObject` derives from
|ImmutableObject|: The return value of a cached method call should
only depend on its arguments as well as the immutable state of the class
instance.

Making this assumption, the keys for cache lookup are created from
the following data:

    1. the instance's :attr:`~CacheableObject.cache_id` in case of a
       :attr:`~CacheRegion.persistent` :class:`CacheRegion`, else the instance's
       :attr:`~pymor.core.base.BasicObject.uid`,
    2. the method's `__name__`,
    3. the method's arguments.

Note that instances of |ImmutableObject| are allowed to have mutable
private attributes. It is the implementors responsibility not to break things.
(See this :ref:`warning <ImmutableObjectWarning>`.)

Backends for storage of cached return values derive from :class:`CacheRegion`.
Currently two backends are provided for memory-based and disk-based caching
(:class:`MemoryRegion` and :class:`DiskRegion`). The available regions
are stored in the module level `cache_regions` dict. The user can add
additional regions (e.g. multiple disk cache regions) as required.
:attr:`CacheableObject.cache_region` specifies a key of the `cache_regions` dict
to select a cache region which should be used by the instance.
(Setting :attr:`~CacheableObject.cache_region` to `None` or `'none'` disables caching.)

By default, a 'memory', a 'disk' and a 'persistent' cache region are configured. The
paths and maximum sizes of the disk regions, as well as the maximum number of keys of
the memory cache region can be configured via the
`pymor.core.cache.default_regions.disk_path`,
`pymor.core.cache.default_regions.disk_max_size`,
`pymor.core.cache.default_regions.persistent_path`,
`pymor.core.cache.default_regions.persistent_max_size` and
`pymor.core.cache.default_regions.memory_max_keys` |defaults|.

There two ways to disable and enable caching in pyMOR:

    1. Calling :func:`disable_caching` (:func:`enable_caching`), to disable
       (enable) caching globally.
    2. Calling :meth:`CacheableObject.disable_caching`
       (:meth:`CacheableObject.enable_caching`) to disable (enable) caching
       for a given instance.

Caching of a method is only active if caching has been enabled both globally
(enabled by default) and on instance level. For debugging purposes, it is moreover
possible to set the environment variable `PYMOR_CACHE_DISABLE=1` which overrides
any call to :func:`enable_caching`.

A cache region can be emptied using :meth:`CacheRegion.clear`. The function
:func:`clear_caches` clears each cache region registered in `cache_regions`.
"""

import atexit
from collections import OrderedDict
import functools
import getpass
import hashlib
import inspect
from numbers import Number
import os
import tempfile
from types import MethodType

import diskcache
import numpy as np

from pymor.core.base import ImmutableObject
from pymor.core.defaults import defaults, defaults_changes
from pymor.core.exceptions import CacheKeyGenerationError
from pymor.core.logger import getLogger
from pymor.core.pickle import dumps
from pymor.parameters.base import Mu, Parameters


[docs]@atexit.register def cleanup_non_persistent_regions(): for region in cache_regions.values(): if not region.persistent: region.clear()
def _safe_filename(old_name): return ''.join(x for x in old_name if (x.isalnum() or x in '._- '))
[docs]class CacheRegion: """Base class for all pyMOR cache regions. Attributes ---------- persistent If `True`, cache entries are kept between multiple program runs. """ persistent = False
[docs] def get(self, key): """Return cache entry for given key. Parameters ---------- key The key for the cache entry. Returns ------- `(True, entry)` in case the `key` has been found in the cache region. `(False, None)` in case the `key` is not present in the cache region. """ raise NotImplementedError
[docs] def set(self, key, value): """Set cache entry for `key` to given `value`. This method is usually called only once for any given `key` (with the exemption of issues due to concurrency). """ raise NotImplementedError
[docs] def clear(self): """Clear the entire cache region.""" raise NotImplementedError
[docs]class MemoryRegion(CacheRegion): NO_VALUE = {} def __init__(self, max_keys): self.max_keys = max_keys self._cache = OrderedDict()
[docs] def get(self, key): value = self._cache.get(key, self.NO_VALUE) if value is self.NO_VALUE: return False, None else: from pymor.vectorarrays.interface import VectorArray if isinstance(value, VectorArray): value = value.copy() return True, value
[docs] def set(self, key, value): if key in self._cache: getLogger('pymor.core.cache.MemoryRegion').warning('Key already present in cache region, ignoring.') return if len(self._cache) == self.max_keys: self._cache.popitem(last=False) import numpy as np if isinstance(value, np.ndarray): value.setflags(write=False) self._cache[key] = value
[docs] def clear(self): self._cache = OrderedDict()
[docs]class DiskRegion(CacheRegion): def __init__(self, path, max_size, persistent): self.path = path self.max_size = max_size self.persistent = persistent self._cache = diskcache.Cache(path) self._cache.reset('size_limit', int(max_size)) if not persistent: self.clear()
[docs] def get(self, key): has_key = key in self._cache return has_key, self._cache.get(key, default=None)
[docs] def set(self, key, value): has_key = key in self._cache if has_key: getLogger('pymor.core.cache.DiskRegion').warning('Key already present in cache region, ignoring.') return self._cache.set(key, value)
[docs] def clear(self): self._cache.clear()
[docs]@defaults('disk_path', 'disk_max_size', 'persistent_path', 'persistent_max_size', 'memory_max_keys') def default_regions(disk_path=os.path.join(tempfile.gettempdir(), 'pymor.cache.' + getpass.getuser()), disk_max_size=1024 ** 3, persistent_path=os.path.join(tempfile.gettempdir(), 'pymor.persistent.cache.' + getpass.getuser()), persistent_max_size=1024 ** 3, memory_max_keys=1000): parse_size_string = lambda size: \ int(size[:-1]) * 1024 if size[-1] == 'K' else \ int(size[:-1]) * 1024 ** 2 if size[-1] == 'M' else \ int(size[:-1]) * 1024 ** 3 if size[-1] == 'G' else \ int(size) if isinstance(disk_max_size, str): disk_max_size = parse_size_string(disk_max_size) cache_regions['disk'] = DiskRegion(path=disk_path, max_size=disk_max_size, persistent=False) cache_regions['persistent'] = DiskRegion(path=persistent_path, max_size=persistent_max_size, persistent=True) cache_regions['memory'] = MemoryRegion(memory_max_keys)
cache_regions = {} _caching_disabled = int(os.environ.get('PYMOR_CACHE_DISABLE', 0)) == 1 if _caching_disabled: getLogger('pymor.core.cache').warning('caching globally disabled by environment')
[docs]def enable_caching(): """Globally enable caching.""" global _caching_disabled _caching_disabled = int(os.environ.get('PYMOR_CACHE_DISABLE', 0)) == 1
[docs]def disable_caching(): """Globally disable caching.""" global _caching_disabled _caching_disabled = True
[docs]def clear_caches(): """Clear all cache regions.""" for r in cache_regions.values(): r.clear()
[docs]class CacheableObject(ImmutableObject): """Base class for anything that wants to use our built-in caching. Attributes ---------- cache_region Name of the :class:`CacheRegion` to use. Must correspond to a key in the :attr:`cache_regions` dict. If `None` or `'none'`, caching is disabled. cache_id Identifier for the object instance on which a cached method is called. """ cache_region = None cache_id = None
[docs] def disable_caching(self): """Disable caching for this instance.""" self.__dict__['cache_region'] = None self.__dict__['cache_id'] = None
[docs] def enable_caching(self, region, cache_id=None): """Enable caching for this instance. .. warning:: Note that using :meth:`~pymor.core.base.ImmutableObject.with_` will reset :attr:`cache_region` and :attr:`cache_id` to their class defaults. Parameters ---------- region Name of the |CacheRegion| to use. Must correspond to a key in the :attr:`cache_regions` dict. If `None` or `'none'`, caching is disabled. cache_id Identifier for the object instance on which a cached method is called. Must be specified when `region` is :attr:`~CacheRegion.persistent`. When `region` is not :attr:`~CacheRegion.persistent` and no `cache_id` is given, the object's :attr:`~pymor.core.base.BasicObject.uid` is used instead. """ self.__dict__['cache_id'] = cache_id if region in (None, 'none'): self.__dict__['cache_region'] = None else: self.__dict__['cache_region'] = region r = cache_regions.get(region, None) if r and r.persistent and cache_id is None: raise ValueError('For persistent CacheRegions a cache_id has to be specified.')
[docs] def cached_method_call(self, method, *args, **kwargs): """Call a given `method` and cache the return value. This method can be used as an alternative to the :func:`cached` decorator. Parameters ---------- method The method that is to be called. This has to be a method of `self`. args Positional arguments for `method`. kwargs Keyword arguments for `method` Returns ------- The (possibly cached) return value of `method(*args, **kwargs)`. """ assert isinstance(method, MethodType) if _caching_disabled or self.cache_region is None: return method(*args, **kwargs) params = inspect.signature(method).parameters if any(v.kind == v.VAR_POSITIONAL for v in params.values()): raise NotImplementedError argnames = list(params.keys())[1:] # first argument is self defaults = {k: v.default for k, v in params.items() if v.default is not v.empty} return self._cached_method_call(method, False, argnames, defaults, args, kwargs)
def _cached_method_call(self, method, pass_self, argnames, defaults, args, kwargs): if not cache_regions: default_regions() try: region = cache_regions[self.cache_region] except KeyError: raise KeyError(f'No cache region "{self.cache_region}" found') # id for self assert self.cache_id or not region.persistent self_id = self.cache_id or self.uid # ensure that passing a value as positional or keyword argument does not matter kwargs.update(zip(argnames, args)) # ensure the values of optional parameters enter the cache key if defaults: kwargs = dict(defaults, **kwargs) key = build_cache_key((method.__name__, self_id, kwargs)) found, value = region.get(key) if found: value, cached_defaults_changes = value if cached_defaults_changes != defaults_changes(): getLogger('pymor.core.cache').warning('pyMOR defaults have been changed. Cached result may be wrong.') return value else: self.logger.debug(f'creating new cache entry for {self.__class__.__name__}.{method.__name__}') value = method(self, **kwargs) if pass_self else method(**kwargs) region.set(key, (value, defaults_changes())) return value
[docs]def cached(function): """Decorator to make a method of `CacheableObject` actually cached.""" params = inspect.signature(function).parameters if any(v.kind == v.VAR_POSITIONAL for v in params.values()): raise NotImplementedError argnames = list(params.keys())[1:] # first argument is self defaults = {k: v.default for k, v in params.items() if v.default is not v.empty} @functools.wraps(function) def wrapper(self, *args, **kwargs): if _caching_disabled or self.cache_region is None: return function(self, *args, **kwargs) return self._cached_method_call(function, True, argnames, defaults, args, kwargs) return wrapper
NoneType = type(None)
[docs]def build_cache_key(obj): def transform_obj(obj): t = type(obj) if t in (NoneType, bool, int, float, str, bytes): return obj elif t is np.ndarray: if obj.dtype == object: raise CacheKeyGenerationError('Cannot generate cache key for provided arguments') return obj elif t in (list, tuple): return tuple(transform_obj(o) for o in obj) elif t in (set, frozenset): return tuple(transform_obj(o) for o in sorted(obj)) elif t in (Mu, Parameters): return tuple((transform_obj(k), transform_obj(v)) for k, v in obj.items()) elif t in (dict, Mu, Parameters): return tuple((transform_obj(k), transform_obj(v)) for k, v in sorted(obj.items())) elif isinstance(obj, Number): # handle numpy number objects return obj else: raise CacheKeyGenerationError('Cannot generate cache key for provided arguments') obj = transform_obj(obj) key = hashlib.sha256(dumps(obj, protocol=-1)).hexdigest() return key