Cache#

class servicex.cache.Cache(cache_path: Path, ignore_cache: bool = False, analysis_query_key: str = 'default')[source]#

Bases: object

Caching for all data returns from the system. It provides both in-memory and on-disk cache.

TODO: Rename this to be an adaptor, unifying how we name things

__init__(cache_path: Path, ignore_cache: bool = False, analysis_query_key: str = 'default')[source]#

Create the cache object

Arguments:

cache_path The path to the cache directory. Only sub-directories

will be created in this path.

ignore_cache If true, then always ignore the cache for any queries

against this dataset.

Attributes

path#

Return root path of cache directory

Methods

_files_cache_file(id: str) Path[source]#

Return the file that contains the list of files

_find_analysis_cached_query(query_hash: str, filename: Optional[str] = None, location: Optional[Path] = None) Optional[Dict[str, str]][source]#

Returns the contents of an analysis cache file and the file that contains a query hash

Args:

has (str): The hash of the query we are to find

Returns:

Tuple[Dict[str, str], Path]: The contents of the file and the path to the analysis cache that contains the hash. None if the query was not found

_get_analysis_cache_file(filename: Optional[str] = None, location: Optional[Path] = None) Path[source]#

Get our best guess as to where the analysis cache file will be

It will use the globally set defaults if nothing is specified.

Args:

filename (Optional[str], optional): Cache filename to use. Defaults to None. location (Optional[str], optional): Cache location to use. Defaults to None.

_in_memory_cache()#

dict() -> new empty dictionary dict(mapping) -> new dictionary initialized from a mapping object’s

(key, value) pairs

dict(iterable) -> new dictionary initialized as if via:

d = {} for k, v in iterable:

d[k] = v

dict(**kwargs) -> new dictionary initialized with the name=value pairs

in the keyword argument list. For example: dict(one=1, two=2)

_load_analysis_query_cache() Optional[Dict[str, str]][source]#

Safely load the analysis query cache.

  • If there is no cache file, return None

  • If the file is empty, return an empty cache

  • Return the contents of the file.

Returns:

Optional[Dict[str, str]]: Returns the query cache if it exists

_load_full_analysis_query_cache() Optional[Dict[str, Dict[str, str]]][source]#

Safely load the analysis query cache, with all elements of the cache

  • If there is no cache file, return None

  • If the file is empty, return an empty cache

  • Return the contents of the file.

Returns:

Optional[Dict[str, str]]: Returns the query cache if it exists

_lookup_analysis_query_cache(query_hash: str, filename: Optional[str] = None, location: Optional[Path] = None) Optional[str][source]#

Look at all possible query caches for this query.

If location is None, then start from the global location searching for a query file. If location is specified, check that directory. In both cases, if the query hash isn’t found, then move up one directory and try again.

filename is the name of the file we should be looking for. If None default to the global.

Args:

query_hash (str): The hash of the query we need to lookup. filename (Optional[str]): The name fo the file that contains the cache. If not specified then defaults to the global. location (Optional[Path]): Directory to start searching in. If not specified then defaults to the global. If that isn’t specified, defaults to the current directory.

Returns:

(Optional[str]): The return hash of what we need to look up

_query_cache_file(json: Dict[str, str]) Path[source]#

Return the query cache file

_query_status_cache_file(request_id: str) Path[source]#

Return the query cache file

_remove_from_analysis_cache(query_hash: str)[source]#

Remove an item from the analysis cache if we are writing to it!

Args:

query_hash (str): The hash we will remove

_save_analysis_query_cache(cache: Dict[str, str])[source]#
_write_analysis_query_cache(query_info: Dict[str, str], request_id: str)[source]#

Write out a local analysis query hash-request-id assocaition.

Args:

query_info (Dict[str, str]): The JSON of the request request_id (str): The request-id

data_file_location(request_id: str, data_name: str) Path[source]#

Return the path to the file that should be written out for this data_name. This is where the output file should get stored. Truncate the leftmost characters from filenames to avoid throwing a OSError: [Errno 63] File name too long error. Use a hash string to make sure that the file names remain unique.

ignore_cache()[source]#

Ignore the cache as long as we are held. Supports nesting.

lookup_files(id: str) Optional[List[Tuple[str, Path]]][source]#

Return a list of files in the cache for a request id.

  • Returns None if there is nothing in the cache

  • Returns None if any of the files are missing

Args:

id (str): Request-id we are looking up

Returns:

Optional[List[Tuple[str, Path]]]: List of minio-bucket to local file mappings

lookup_inmem(id: str) Optional[Any][source]#
lookup_query(json: Dict[str, str]) Optional[str][source]#
lookup_query_status(request_id: str) Dict[str, str][source]#

Returns the info from the last time the query status was cached.

Args:

request_id (str): Request id we should look up.

query_status_exists(request_id: str) bool[source]#

Returns true if the query status file exists on the local machine.

Args:

request_id (str): The request-id to look up

Returns:

bool: True if present, false otherwise.

remove_query(json: Dict[str, Any])[source]#

Remove the query from our local and analysis caches

Args:

json (Dict[str, Any]): The query to remove

classmethod reset_cache()[source]#

Reset the internal cache, usually used for testing

set_files(id: str, files: List[Tuple[str, Path]])[source]#

Cache the files for this request

Note: We do check to make sure all the files exist

Args:

id (str): The request-id files (List[Tuple[str, Path]]): the minio buck name and local file paths

set_inmem(id: str, v: Any)[source]#
set_query(json: Dict[str, str], v: str)[source]#

Associate a query with a request-id.

A hash is taken of the query.

Args:

json (Dict[str, str]): The query JSON v (str): The request-id

set_query_status(query_info: Dict[str, str])[source]#

Cache a query status (json dict)

Args:

query_info (Dict[str, str]): The info we should cache. Must contain request_id.