Module core.utils.hasher
Utilities to provide a 'standardized' way of producing hashes to identify uniqueness.
These hash functions are not cryptographically secure and should not be used for that. Instead, these functions should simply be used to create hashes that will be used to track changes throughout the system.
Expand source code
"""Utilities to provide a 'standardized' way of producing hashes to identify uniqueness.
These hash functions are **not**
cryptographically secure and should not be used for that. Instead, these functions should simply be used to create hashes
that will be used to track changes throughout the system.
"""
import hashlib
import os
from typing import List, Union
from pydantic.types import FilePath
from core.utils.exceptions import cdev_core_error
class FILE_CACHE_CLASS:
cache = {}
FILE_CACHE = FILE_CACHE_CLASS()
def hash_list(val: List[str], deliminator: str = ";") -> str:
"""Hash a list of str values
Note that the order of the list also determines the output, therefore if the input is stored in
an unsorted collection, you should used `hash_set` instead.
Args:
val (List[str]): The list of str to produce a hash of
deliminator (Optional[str]): A value use to seperate the input. Defaults to ';'
Returns:
str: hash of the values
"""
return hash_string(deliminator.join([str(x) for x in val]))
def hash_string(val: str) -> str:
"""Hash a str value
Implemented using md5.
Args:
val (str): value to hash
Returns:
str: hash of the value
"""
return hashlib.md5(val.encode()).hexdigest()
def clear_file_cache() -> None:
"""Clear the cache used by the `hash_file` utility."""
FILE_CACHE.cache = {}
def hash_file(fp: Union[FilePath, str], bypass_cache: bool = False) -> str:
"""Hash a file given a path
Note that the implementation contains a reference to a cache. Since this utility is primarily
used as an internal tool with the framework, the cache helps speeds things up when we know the file has
not changed. Note that the framework periodically flushes this cache when needed within the context of
the execution of the framework using the `clear_file_cache` function.
If using this outside the confides of the framework, you can by pass the cache by setting the `bypass_cache`
flag.
Note that the implementation returns a md5 hash of the bytes in the file.
Args:
fp (FilePath): Path to the file. Must be a resolvable path on the filesystem.
bypass_cache (Optional[bool]): By pass the internal cache. Default False.
Returns:
str: hash of the file
Raises:
Cdev_Error
"""
if fp in FILE_CACHE.cache and not bypass_cache:
return FILE_CACHE.cache.get(fp)
if not os.path.isfile(fp):
raise cdev_core_error(f"Could not find file ({fp}) to hash", FileNotFoundError)
with open(fp, "rb") as fh:
the_hash = hashlib.md5(fh.read()).hexdigest()
FILE_CACHE.cache[fp] = the_hash
return the_hash
Functions
def clear_file_cache() ‑> None
-
Clear the cache used by the
hash_file()
utility.Expand source code
def clear_file_cache() -> None: """Clear the cache used by the `hash_file` utility.""" FILE_CACHE.cache = {}
def hash_file(fp: Union[pydantic.types.FilePath, str], bypass_cache: bool = False) ‑> str
-
Hash a file given a path
Note that the implementation contains a reference to a cache. Since this utility is primarily used as an internal tool with the framework, the cache helps speeds things up when we know the file has not changed. Note that the framework periodically flushes this cache when needed within the context of the execution of the framework using the
clear_file_cache()
function.If using this outside the confides of the framework, you can by pass the cache by setting the
bypass_cache
flag.Note that the implementation returns a md5 hash of the bytes in the file.
Args
fp
:FilePath
- Path to the file. Must be a resolvable path on the filesystem.
bypass_cache
:Optional[bool]
- By pass the internal cache. Default False.
Returns
str
- hash of the file
Raises
Cdev_Error
Expand source code
def hash_file(fp: Union[FilePath, str], bypass_cache: bool = False) -> str: """Hash a file given a path Note that the implementation contains a reference to a cache. Since this utility is primarily used as an internal tool with the framework, the cache helps speeds things up when we know the file has not changed. Note that the framework periodically flushes this cache when needed within the context of the execution of the framework using the `clear_file_cache` function. If using this outside the confides of the framework, you can by pass the cache by setting the `bypass_cache` flag. Note that the implementation returns a md5 hash of the bytes in the file. Args: fp (FilePath): Path to the file. Must be a resolvable path on the filesystem. bypass_cache (Optional[bool]): By pass the internal cache. Default False. Returns: str: hash of the file Raises: Cdev_Error """ if fp in FILE_CACHE.cache and not bypass_cache: return FILE_CACHE.cache.get(fp) if not os.path.isfile(fp): raise cdev_core_error(f"Could not find file ({fp}) to hash", FileNotFoundError) with open(fp, "rb") as fh: the_hash = hashlib.md5(fh.read()).hexdigest() FILE_CACHE.cache[fp] = the_hash return the_hash
def hash_list(val: List[str], deliminator: str = ';') ‑> str
-
Hash a list of str values
Note that the order of the list also determines the output, therefore if the input is stored in an unsorted collection, you should used
hash_set
instead.Args
val
:List[str]
- The list of str to produce a hash of
deliminator
:Optional[str]
- A value use to seperate the input. Defaults to ';'
Returns
str
- hash of the values
Expand source code
def hash_list(val: List[str], deliminator: str = ";") -> str: """Hash a list of str values Note that the order of the list also determines the output, therefore if the input is stored in an unsorted collection, you should used `hash_set` instead. Args: val (List[str]): The list of str to produce a hash of deliminator (Optional[str]): A value use to seperate the input. Defaults to ';' Returns: str: hash of the values """ return hash_string(deliminator.join([str(x) for x in val]))
def hash_string(val: str) ‑> str
-
Hash a str value
Implemented using md5.
Args
val
:str
- value to hash
Returns
str
- hash of the value
Expand source code
def hash_string(val: str) -> str: """Hash a str value Implemented using md5. Args: val (str): value to hash Returns: str: hash of the value """ return hashlib.md5(val.encode()).hexdigest()
Classes
class FILE_CACHE_CLASS
-
Expand source code
class FILE_CACHE_CLASS: cache = {}
Class variables
var cache