QueryCache.py 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162
  1. #
  2. # Copyright (c) Daniel Sheffield 2023
  3. # All rights reserved
  4. #
  5. # THIS SOFTWARE IS PROVIDED AS IS WITHOUT WARRANTY
  6. import os
  7. from typing import Dict, Iterable, Tuple
  8. from urllib.parse import urlencode
  9. from bottle import FormsDict
  10. from ..data.filter import get_filter, get_query_param
  11. from . import BOOLEAN, PARAMS
  12. from .hash_util import base32_to_hash, blake, bytes_to_hash, hash_to_base32, normalize_base32
  13. def delete_query(name: str, root: str = 'app/rest/static/files'):
  14. directory = f'{root}/{name}'
  15. try:
  16. os.remove(f'{directory}/{name}.query')
  17. except FileNotFoundError:
  18. pass
  19. def save_query(name: str, content: bytes, tool: str, root='app/rest/static/files') -> str:
  20. directory = f'{root}/{name}'
  21. try:
  22. os.mkdir(directory, mode=0o700, dir_fd=None)
  23. except FileExistsError:
  24. pass
  25. fd = os.open(f'{directory}/{name}.query', os.O_WRONLY | os.O_TRUNC | os.O_CREAT, 0o600)
  26. with open(fd, "wb") as f:
  27. f.write(content)
  28. def get_query(name: str, root: str = 'app/rest/static/files') -> str:
  29. directory = f'{root}/{name}'
  30. try:
  31. mtime = os.stat(f'{directory}/{name}.query').st_mtime
  32. except:
  33. mtime = None
  34. # if mtime and time() - mtime > STALE:
  35. # delete_query(name)
  36. # return None
  37. # todo: store query hash and validate it
  38. fd = os.open(f'{directory}/{name}.query', os.O_RDONLY, 0o600)
  39. with open(fd, "rb") as f:
  40. f.seek(0)
  41. page = f.read()
  42. return page.decode('utf-8')
  43. def norm(key):
  44. if isinstance(key, tuple):
  45. query, _hash = key
  46. else:
  47. if isinstance(key, int):
  48. query, _hash = None, key
  49. else:
  50. query, _hash = key, None
  51. if _hash and not isinstance(_hash, int):
  52. _hash = base32_to_hash(normalize_base32(query.hash))
  53. # TODO: normalize should be implicit
  54. #_hash = base32_to_hash(query.hash)
  55. if None not in (query, _hash):
  56. if get_hash(query) != _hash:
  57. raise KeyError(f"Invalid key: {key}")
  58. return query, _hash
  59. if (_hash, query) is (None, None):
  60. raise KeyError(f"Invalid key: {key}")
  61. return query, _hash if _hash else get_hash(query)
  62. def get_hash(key):
  63. _bytes = blake(key.encode('utf-8'), person='grocery'.encode('utf-8'))
  64. return bytes_to_hash(_bytes)
  65. def normalize_query(query: FormsDict, allow: Iterable[str] = None) -> Tuple[str, str]:
  66. _hash = query.hash
  67. allow = allow or PARAMS
  68. param = get_filter(query, allow=allow)
  69. norm = urlencode(map(
  70. lambda k: (
  71. k, get_query_param(*param[k]) if k != 'organic' else BOOLEAN[
  72. BOOLEAN.get(query.organic, None)
  73. ]
  74. ),
  75. sorted(filter(bool, param))
  76. ))
  77. return norm, _hash
  78. class QueryCache:
  79. def __init__(self, limit) -> None:
  80. self._cache: Dict[int, str] = dict()
  81. self._limit = limit
  82. def __delitem__(self, key):
  83. return self.remove(key)
  84. def __getitem__(self, key):
  85. return self.get(key)
  86. def __setitem__(self, key, value):
  87. return self.add(key, value)
  88. def get(self, key: str) -> str:
  89. query, _hash = norm(key)
  90. if _hash not in self._cache:
  91. if query:
  92. return self.add(_hash, query)
  93. try:
  94. existing = get_query(hash_to_base32(_hash))
  95. except:
  96. existing = None
  97. if existing:
  98. return self.add(_hash, existing)
  99. return self.add(_hash, query)
  100. value = self._cache[_hash]
  101. # if value.stale:
  102. # del self._cache[key]
  103. # delete_query(hash_to_base32(key))
  104. # return None
  105. return value
  106. # def _enforce_limit(self, limit):
  107. # for idx, (_, k) in enumerate(sorted([
  108. # (v.age, k) for k, v in self._cache.items()
  109. # ])):
  110. # if idx >= limit: del self[k]
  111. # def _clear_stale(self):
  112. # for k in [k for k, v in self._cache.items() if v.stale]:
  113. # del self[k]
  114. def add(self, key: str, value: str) -> str:
  115. #self._clear_stale()
  116. #self._enforce_limit(self._limit)
  117. query, _hash = norm(key)
  118. value = value or query
  119. if not value:
  120. raise ValueError("Invalid query string: {value}")
  121. self._cache[_hash] = value
  122. save_query(hash_to_base32(_hash), value.encode("utf-8"), 'query')
  123. return value
  124. def remove(self, key: str):
  125. key = norm(key)
  126. self._cache.pop(key, None)
  127. delete_query(hash_to_base32(key))