# This file is part of the pyMOR project (http://www.pymor.org).
# Copyright 2013-2020 pyMOR developers and contributors. All rights reserved.
# License: BSD 2-Clause License (http://opensource.org/licenses/BSD-2-Clause)
"""This module provides the caching facilities of pyMOR.
Any class that wishes to provide cached method calls should derive from
:class:`CacheableObject`. Methods which are to be cached can then
be marked using the :class:`cached` decorator.
To ensure consistency, :class:`CacheableObject` derives from
|ImmutableObject|: The return value of a cached method call should
only depend on its arguments as well as the immutable state of the class
instance.
Making this assumption, the keys for cache lookup are created from
the following data:
1. the instance's :attr:`~CacheableObject.cache_id` in case of a
:attr:`~CacheRegion.persistent` :class:`CacheRegion`, else the instance's
:attr:`~pymor.core.base.BasicObject.uid`,
2. the method's `__name__`,
3. the method's arguments.
Note that instances of |ImmutableObject| are allowed to have mutable
private attributes. It is the implementors responsibility not to break things.
(See this :ref:`warning <ImmutableObjectWarning>`.)
Backends for storage of cached return values derive from :class:`CacheRegion`.
Currently two backends are provided for memory-based and disk-based caching
(:class:`MemoryRegion` and :class:`DiskRegion`). The available regions
are stored in the module level `cache_regions` dict. The user can add
additional regions (e.g. multiple disk cache regions) as required.
:attr:`CacheableObject.cache_region` specifies a key of the `cache_regions` dict
to select a cache region which should be used by the instance.
(Setting :attr:`~CacheableObject.cache_region` to `None` or `'none'` disables caching.)
By default, a 'memory', a 'disk' and a 'persistent' cache region are configured. The
paths and maximum sizes of the disk regions, as well as the maximum number of keys of
the memory cache region can be configured via the
`pymor.core.cache.default_regions.disk_path`,
`pymor.core.cache.default_regions.disk_max_size`,
`pymor.core.cache.default_regions.persistent_path`,
`pymor.core.cache.default_regions.persistent_max_size` and
`pymor.core.cache.default_regions.memory_max_keys` |defaults|.
There two ways to disable and enable caching in pyMOR:
1. Calling :func:`disable_caching` (:func:`enable_caching`), to disable
(enable) caching globally.
2. Calling :meth:`CacheableObject.disable_caching`
(:meth:`CacheableObject.enable_caching`) to disable (enable) caching
for a given instance.
Caching of a method is only active if caching has been enabled both globally
(enabled by default) and on instance level. For debugging purposes, it is moreover
possible to set the environment variable `PYMOR_CACHE_DISABLE=1` which overrides
any call to :func:`enable_caching`.
A cache region can be emptied using :meth:`CacheRegion.clear`. The function
:func:`clear_caches` clears each cache region registered in `cache_regions`.
"""
import atexit
from collections import OrderedDict
import functools
import getpass
import hashlib
import inspect
from numbers import Number
import os
import tempfile
from types import MethodType
import diskcache
import numpy as np
from pymor.core.base import ImmutableObject
from pymor.core.defaults import defaults, defaults_changes
from pymor.core.exceptions import CacheKeyGenerationError
from pymor.core.logger import getLogger
from pymor.core.pickle import dumps
from pymor.parameters.base import Mu, Parameters
[docs]@atexit.register
def cleanup_non_persistent_regions():
for region in cache_regions.values():
if not region.persistent:
region.clear()
def _safe_filename(old_name):
return ''.join(x for x in old_name if (x.isalnum() or x in '._- '))
[docs]class CacheRegion:
"""Base class for all pyMOR cache regions.
Attributes
----------
persistent
If `True`, cache entries are kept between multiple
program runs.
"""
persistent = False
[docs] def get(self, key):
"""Return cache entry for given key.
Parameters
----------
key
The key for the cache entry.
Returns
-------
`(True, entry)`
in case the `key` has been found in the cache region.
`(False, None)`
in case the `key` is not present in the cache region.
"""
raise NotImplementedError
[docs] def set(self, key, value):
"""Set cache entry for `key` to given `value`.
This method is usually called only once for
any given `key` (with the exemption of issues
due to concurrency).
"""
raise NotImplementedError
[docs] def clear(self):
"""Clear the entire cache region."""
raise NotImplementedError
[docs]class MemoryRegion(CacheRegion):
NO_VALUE = {}
def __init__(self, max_keys):
self.max_keys = max_keys
self._cache = OrderedDict()
[docs] def get(self, key):
value = self._cache.get(key, self.NO_VALUE)
if value is self.NO_VALUE:
return False, None
else:
from pymor.vectorarrays.interface import VectorArray
if isinstance(value, VectorArray):
value = value.copy()
return True, value
[docs] def set(self, key, value):
if key in self._cache:
getLogger('pymor.core.cache.MemoryRegion').warning('Key already present in cache region, ignoring.')
return
if len(self._cache) == self.max_keys:
self._cache.popitem(last=False)
import numpy as np
if isinstance(value, np.ndarray):
value.setflags(write=False)
self._cache[key] = value
[docs] def clear(self):
self._cache = OrderedDict()
[docs]class DiskRegion(CacheRegion):
def __init__(self, path, max_size, persistent):
self.path = path
self.max_size = max_size
self.persistent = persistent
self._cache = diskcache.Cache(path)
self._cache.reset('size_limit', int(max_size))
if not persistent:
self.clear()
[docs] def get(self, key):
has_key = key in self._cache
return has_key, self._cache.get(key, default=None)
[docs] def set(self, key, value):
has_key = key in self._cache
if has_key:
getLogger('pymor.core.cache.DiskRegion').warning('Key already present in cache region, ignoring.')
return
self._cache.set(key, value)
[docs] def clear(self):
self._cache.clear()
[docs]@defaults('disk_path', 'disk_max_size', 'persistent_path', 'persistent_max_size', 'memory_max_keys')
def default_regions(disk_path=os.path.join(tempfile.gettempdir(), 'pymor.cache.' + getpass.getuser()),
disk_max_size=1024 ** 3,
persistent_path=os.path.join(tempfile.gettempdir(), 'pymor.persistent.cache.' + getpass.getuser()),
persistent_max_size=1024 ** 3,
memory_max_keys=1000):
parse_size_string = lambda size: \
int(size[:-1]) * 1024 if size[-1] == 'K' else \
int(size[:-1]) * 1024 ** 2 if size[-1] == 'M' else \
int(size[:-1]) * 1024 ** 3 if size[-1] == 'G' else \
int(size)
if isinstance(disk_max_size, str):
disk_max_size = parse_size_string(disk_max_size)
cache_regions['disk'] = DiskRegion(path=disk_path, max_size=disk_max_size, persistent=False)
cache_regions['persistent'] = DiskRegion(path=persistent_path, max_size=persistent_max_size, persistent=True)
cache_regions['memory'] = MemoryRegion(memory_max_keys)
cache_regions = {}
_caching_disabled = int(os.environ.get('PYMOR_CACHE_DISABLE', 0)) == 1
if _caching_disabled:
getLogger('pymor.core.cache').warning('caching globally disabled by environment')
[docs]def enable_caching():
"""Globally enable caching."""
global _caching_disabled
_caching_disabled = int(os.environ.get('PYMOR_CACHE_DISABLE', 0)) == 1
[docs]def disable_caching():
"""Globally disable caching."""
global _caching_disabled
_caching_disabled = True
[docs]def clear_caches():
"""Clear all cache regions."""
for r in cache_regions.values():
r.clear()
[docs]class CacheableObject(ImmutableObject):
"""Base class for anything that wants to use our built-in caching.
Attributes
----------
cache_region
Name of the :class:`CacheRegion` to use. Must correspond to a key in
the :attr:`cache_regions` dict. If `None` or `'none'`, caching
is disabled.
cache_id
Identifier for the object instance on which a cached method is called.
"""
cache_region = None
cache_id = None
[docs] def disable_caching(self):
"""Disable caching for this instance."""
self.__dict__['cache_region'] = None
self.__dict__['cache_id'] = None
[docs] def enable_caching(self, region, cache_id=None):
"""Enable caching for this instance.
.. warning::
Note that using :meth:`~pymor.core.base.ImmutableObject.with_`
will reset :attr:`cache_region` and :attr:`cache_id` to their class
defaults.
Parameters
----------
region
Name of the |CacheRegion| to use. Must correspond to a key in
the :attr:`cache_regions` dict. If `None` or `'none'`, caching
is disabled.
cache_id
Identifier for the object instance on which a cached method is called.
Must be specified when `region` is :attr:`~CacheRegion.persistent`.
When `region` is not :attr:`~CacheRegion.persistent` and no `cache_id`
is given, the object's :attr:`~pymor.core.base.BasicObject.uid`
is used instead.
"""
self.__dict__['cache_id'] = cache_id
if region in (None, 'none'):
self.__dict__['cache_region'] = None
else:
self.__dict__['cache_region'] = region
r = cache_regions.get(region, None)
if r and r.persistent and cache_id is None:
raise ValueError('For persistent CacheRegions a cache_id has to be specified.')
[docs] def cached_method_call(self, method, *args, **kwargs):
"""Call a given `method` and cache the return value.
This method can be used as an alternative to the :func:`cached`
decorator.
Parameters
----------
method
The method that is to be called. This has to be a method
of `self`.
args
Positional arguments for `method`.
kwargs
Keyword arguments for `method`
Returns
-------
The (possibly cached) return value of `method(*args, **kwargs)`.
"""
assert isinstance(method, MethodType)
if _caching_disabled or self.cache_region is None:
return method(*args, **kwargs)
params = inspect.signature(method).parameters
if any(v.kind == v.VAR_POSITIONAL for v in params.values()):
raise NotImplementedError
argnames = list(params.keys())[1:] # first argument is self
defaults = {k: v.default for k, v in params.items() if v.default is not v.empty}
return self._cached_method_call(method, False, argnames, defaults, args, kwargs)
def _cached_method_call(self, method, pass_self, argnames, defaults, args, kwargs):
if not cache_regions:
default_regions()
try:
region = cache_regions[self.cache_region]
except KeyError:
raise KeyError(f'No cache region "{self.cache_region}" found')
# id for self
assert self.cache_id or not region.persistent
self_id = self.cache_id or self.uid
# ensure that passing a value as positional or keyword argument does not matter
kwargs.update(zip(argnames, args))
# ensure the values of optional parameters enter the cache key
if defaults:
kwargs = dict(defaults, **kwargs)
key = build_cache_key((method.__name__, self_id, kwargs))
found, value = region.get(key)
if found:
value, cached_defaults_changes = value
if cached_defaults_changes != defaults_changes():
getLogger('pymor.core.cache').warning('pyMOR defaults have been changed. Cached result may be wrong.')
return value
else:
self.logger.debug(f'creating new cache entry for {self.__class__.__name__}.{method.__name__}')
value = method(self, **kwargs) if pass_self else method(**kwargs)
region.set(key, (value, defaults_changes()))
return value
[docs]def cached(function):
"""Decorator to make a method of `CacheableObject` actually cached."""
params = inspect.signature(function).parameters
if any(v.kind == v.VAR_POSITIONAL for v in params.values()):
raise NotImplementedError
argnames = list(params.keys())[1:] # first argument is self
defaults = {k: v.default for k, v in params.items() if v.default is not v.empty}
@functools.wraps(function)
def wrapper(self, *args, **kwargs):
if _caching_disabled or self.cache_region is None:
return function(self, *args, **kwargs)
return self._cached_method_call(function, True, argnames, defaults, args, kwargs)
return wrapper
NoneType = type(None)
[docs]def build_cache_key(obj):
def transform_obj(obj):
t = type(obj)
if t in (NoneType, bool, int, float, str, bytes):
return obj
elif t is np.ndarray:
if obj.dtype == object:
raise CacheKeyGenerationError('Cannot generate cache key for provided arguments')
return obj
elif t in (list, tuple):
return tuple(transform_obj(o) for o in obj)
elif t in (set, frozenset):
return tuple(transform_obj(o) for o in sorted(obj))
elif t in (Mu, Parameters):
return tuple((transform_obj(k), transform_obj(v)) for k, v in obj.items())
elif t in (dict, Mu, Parameters):
return tuple((transform_obj(k), transform_obj(v)) for k, v in sorted(obj.items()))
elif isinstance(obj, Number):
# handle numpy number objects
return obj
else:
raise CacheKeyGenerationError('Cannot generate cache key for provided arguments')
obj = transform_obj(obj)
key = hashlib.sha256(dumps(obj, protocol=-1)).hexdigest()
return key