Browse Source

wip - basically working. needs tests

Daniel Sheffield 1 year ago
parent
commit
a89cf72082
6 changed files with 187 additions and 23 deletions
  1. 32 2
      app/rest/Cache.py
  2. 110 0
      app/rest/hash_util.py
  3. 31 11
      app/rest/pyapi.py
  4. 1 1
      app/rest/query_to_xml.py
  5. 8 5
      app/rest/route_decorators.py
  6. 5 4
      app/rest/trend.py

+ 32 - 2
app/rest/Cache.py

@@ -4,15 +4,44 @@
 #
 # THIS SOFTWARE IS PROVIDED AS IS WITHOUT WARRANTY
 from typing import Dict
-
 from .CachedLoadingPage import CachedLoadingPage
 
+def key_to_hash(key):
+    if isinstance(key, tuple):
+        orig, _hash = key
+    else:
+        if isinstance(key, int):
+            orig, _hash = None, key
+        else:
+            orig, _hash = key, None
+
+    if None not in (orig, _hash):
+        if hash(orig) != _hash:
+            raise KeyError(f"Invalid key: {key}")
+
+        return _hash
+
+    if (_hash, orig) is (None, None):
+        raise KeyError(f"Invalid key: {key}")
+
+    return hash(orig) if _hash is None else _hash
+
 class Cache:
     def __init__(self, limit) -> None:
         self._cache: Dict[str, CachedLoadingPage] = dict()
         self._limit = limit
 
+    def __delitem__(self, key):
+        self._cache.pop(key_to_hash(key), None)
+
+    def __getitem__(self, key):
+        return self.get(key_to_hash(key))
+
+    def __setitem__(self, key, value):
+        self._cache[key_to_hash(key)] = value
+
     def get(self, key: str) -> str:
+        key = key_to_hash(key)
         if key not in self._cache:
             return None
         
@@ -35,10 +64,11 @@ class Cache:
     def add(self, key: str, page: CachedLoadingPage) -> str:
         self._clear_stale()
         self._enforce_limit(self._limit)
-        self._cache[key] = page
+        self._cache[key_to_hash(key)] = page
         return page.value
     
     def remove(self, key: str):
+        key = key_to_hash(key)
         if key in self._cache:
             del self._cache[key]
 

+ 110 - 0
app/rest/hash_util.py

@@ -0,0 +1,110 @@
+def normalize_hash(_hash: int) -> int:
+    #hex = hash_to_hex(_hash)
+    #return int(hex, 16)
+    #_bytes = _hash.to_bytes(8, byteorder='big', signed=True)
+    #return bytes_to_hash(_bytes)
+    return _hash & 0xffff
+
+def normalize_bytes(_bytes: bytes) -> bytes:
+    return (b'\x00\x00' + _bytes)[-2:]
+
+def normalize_hex(hex: str) -> str:
+    #_bytes = hex_to_bytes(hex)
+    #return _bytes.hex()
+    return hex.zfill(4)[-4:]
+    #return hash_to_hex(hex_to_hash(hex))
+
+def hex_to_bytes(hex: str) -> bytes:
+    _bytes = bytes.fromhex(hex.zfill(4))
+    return normalize_bytes(_bytes)
+
+def bytes_to_hex(_bytes: bytes) -> str:
+    return normalize_bytes(_bytes).hex()
+
+def hash_to_bytes(_hash: int) -> bytes:
+    _bytes = _hash.to_bytes(8, byteorder='big', signed=True)
+    return normalize_bytes(_bytes)
+
+def bytes_to_hash(_bytes: bytes) -> int:
+    norm = normalize_bytes(_bytes)
+    return int.from_bytes(norm, byteorder='big', signed=False)
+
+def hash_to_hex(_hash: int) -> str:
+    #return hash_to_bytes(_hash).hex()
+    #return normalize_hex(
+    #return f"{_hash + (1 << 64):x}"[-4:]
+    #return hex(_hash + (1<<64))[2:][-4:]
+    #return f"{_hash & 0xffff:04x}"
+    return hex((_hash|0x10000) & 0x1ffff)[3:]
+
+def hex_to_hash(hex: str) -> int:
+    #_bytes = bytes.fromhex(hex.zfill(4))
+    #return bytes_to_hash(_bytes)
+    #return int(normalize_hex(hex), 16)
+    return int(hex, 16) & 0xffff
+
+# PYTHONHASHSEED=0
+#assert hash_to_hex(10) == '000a', ":" + hash_to_hex(10)
+assert hash_to_hex(41545) == 'a249', hash_to_hex(41545)
+assert hash_to_hex(-7583489610679606711) == 'a249', hash_to_hex(-7583489610679606711)
+assert hash_to_hex(1421958803217889556) == normalize_hex('13bbcfff670ab914'), hash_to_hex(1421958803217889556)
+#assert hash('a').to_bytes(8,'big', signed=True) == b'\x96\xc2\x08`\xcd\x93\xa2I'
+assert normalize_bytes(b'\x96\xc2\x08`\xcd\x93\xa2I') == b'\xa2I'
+assert normalize_bytes(b'\xa2I') == b'\xa2I'
+assert normalize_bytes(b'\x0a') == b'\x00\x0a'
+
+#assert hash('a') == -7583489610679606711
+assert normalize_hash(-7583489610679606711) == 41545, normalize_hash(-7583489610679606711) 
+assert normalize_hash(41545) == 41545, normalize_hash(41545)
+
+#assert hash('a').to_bytes(8,'big', signed=True).hex() == '96c20860cd93a249'
+assert normalize_hex('96c20860cd93a249') == 'a249', normalize_hex('96c20860cd93a249')
+assert normalize_hex('a249') == 'a249'
+
+assert normalize_hex('a') == '000a', normalize_hex('a')
+assert normalize_hex('0a') == '000a'
+assert normalize_hex('00a') == '000a'
+
+assert hex_to_hash('a') == 10
+assert hex_to_hash('0a') == 10
+assert hex_to_hash('00a') == 10
+assert hex_to_hash('000a') == 10
+assert hex_to_hash('a249') == 41545
+assert hex_to_hash('96c20860cd93a249') == 41545
+
+assert bytes_to_hash(b'\n') == 10
+assert bytes_to_hash(b'\x00\n') == 10
+assert bytes_to_hash(b'\xa2I') == 41545
+assert bytes_to_hash(b'\x96\xc2\x08`\xcd\x93\xa2I') == 41545
+
+assert hex_to_bytes('a') == b'\x00\n'
+assert hex_to_bytes('0a') == b'\x00\n'
+assert hex_to_bytes('00a') == b'\x00\n'
+assert hex_to_bytes('000a') == b'\x00\n'
+assert hex_to_bytes('a249') == b'\xa2I'
+assert hex_to_bytes('96c20860cd93a249') == b'\xa2I'
+
+assert hash_to_hex(1421958803217889556) == normalize_hex('13bbcfff670ab914'), hash_to_hex(1421958803217889556)
+assert hash_to_hex(10) == '000a', hash_to_hex(10)
+assert hash_to_hex(41545) == 'a249', hash_to_hex(41545)
+assert hash_to_hex(-7583489610679606711) == 'a249', hash_to_hex(-7583489610679606711)
+
+
+if __name__ == '__main__':
+    import timeit
+    # For Python>=3.5 one can also write:
+    #print(timeit.timeit("normalize_bytes(b'a')", globals=locals(), number=5000000))
+    for method, arg in [
+        ('normalize_hash', "hash('a')"),
+        ('normalize_hex', "'a'"),
+        ('normalize_bytes', r"b'\x0a'"),
+        ('hex_to_hash', "'a'"),
+        ('hex_to_bytes', "'a'"),
+        ('bytes_to_hash', r"b'\n'"),
+        ('bytes_to_hex', r"b'\n'"),
+        ('hash_to_bytes', "10"),
+        ('hash_to_hex', "10"),
+    ]:
+        test = f"{method}({arg})"
+        elapsed = timeit.timeit(test, globals=locals(), number=1000000)
+        print(f"{method}: {elapsed}")

+ 31 - 11
app/rest/pyapi.py

@@ -5,14 +5,17 @@
 # THIS SOFTWARE IS PROVIDED AS IS WITHOUT WARRANTY
 import os
 from threading import Thread
-from bottle import route, request, response, template, static_file
+from bottle import route, request, response, template, static_file, FormsDict
 from psycopg import connect
 
-from .route_decorators import normalize, poison, cursor
+from app.data.filter import get_filter, get_query_param
+
+from .hash_util import hex_to_hash
+from .route_decorators import normalize, normalize_query, poison, cursor
 from .query_to_xml import get_categories, get_groups, get_products, get_tags
 from .CachedLoadingPage import CachedLoadingPage
 from .Cache import Cache
-from . import trend as worker
+from . import BOOLEAN, PARAMS, trend as worker
 
 host = f"host={os.getenv('HOST')}"
 db = f"dbname={os.getenv('DB', 'grocery')}"
@@ -29,21 +32,38 @@ def send_static(filename):
     return static_file(filename, root='app/rest/static')
 
 
-@route('/grocery/trend')
+@route('/grocery/trend', method=['GET', 'POST'])
 @poison(cache=CACHE)
 @normalize
 def trend():
-    page = CACHE.get(request.query_string)
+    key = normalize_query(request.params)
+    parts = key.split('=')
+    if len(parts) == 2 and parts[0] == 'hash':
+        _, _hash = parts
+        key = hex_to_hash(_hash)
+
+    page = CACHE[key]
+    
     _, _, path, *_ = request.urlparts
-    return page if page else CACHE.add(request.query_string, CachedLoadingPage(
+    if page:
+        return page
+    
+    param = get_filter(request.params, allow=PARAMS)
+    params = FormsDict({
+        k: get_query_param(*param[k]) if k != 'organic' else BOOLEAN[
+            BOOLEAN.get(request.params.organic, None)
+        ] for k in sorted(param) if param[k]
+    })
+
+    return CACHE.add(key, CachedLoadingPage(
         template("loading", progress=[]),
         lambda queue: Thread(target=worker.trend, args=(
-            queue, conn, path, request.query
+            queue, conn, path, params
         )).start()
     ))
 
 
-@route('/grocery/groups')
+@route('/grocery/groups', method=['GET', 'POST'])
 @normalize
 @cursor(connection=conn)
 def groups(cur):
@@ -51,7 +71,7 @@ def groups(cur):
     return get_groups(cur, request.query)
 
 
-@route('/grocery/categories')
+@route('/grocery/categories', method=['GET', 'POST'])
 @normalize
 @cursor(connection=conn)
 def categories(cur):
@@ -59,7 +79,7 @@ def categories(cur):
     return get_categories(cur, request.query)
 
 
-@route('/grocery/products')
+@route('/grocery/products', method=['GET', 'POST'])
 @normalize
 @cursor(connection=conn)
 def products(cur):
@@ -67,7 +87,7 @@ def products(cur):
     return get_products(cur, request.query)
 
 
-@route('/grocery/tags')
+@route('/grocery/tags', method=['GET', 'POST'])
 @normalize
 @cursor(connection=conn)
 def tags(cur):

+ 1 - 1
app/rest/query_to_xml.py

@@ -46,7 +46,7 @@ def render_form(cur: Cursor, inner: str, query: FormsDict):
     data = DataFrame(get_data(cur, inner)).dropna()
     action = request.path.split('/')[-1]
     organic = BOOLEAN.get(query.organic, None)
-    return get_form(action, 'get', _filter, organic, data)
+    return get_form(action, 'post', _filter, organic, data)
 
 
 def get_xml(cur: Cursor, sql: str):

+ 8 - 5
app/rest/route_decorators.py

@@ -12,26 +12,29 @@ from psycopg.connection import TupleRow
 from ..data.filter import get_filter, get_query_param
 from . import BOOLEAN, PARAMS
 from .Cache import Cache
-
+from .hash_util import hash_to_hex, normalize_hex
 
 def normalize_query(query: FormsDict, allow: Iterable[str] = None) -> str:
+    allow = allow or (PARAMS | { 'hash'})
+    if 'hash' in query and query.hash:
+        _hex = normalize_hex(query.hash)
+        return f'hash={_hex}'
     param = get_filter(query, allow=allow)
-    return urlencode([
+    norm = urlencode([
         (
             k, get_query_param(*param[k])
         ) if k != 'organic' else (
             "organic", BOOLEAN[BOOLEAN.get(query.organic, None)]
         ) for k in sorted(param) if param[k]
     ])
-
+    return norm if len(norm) < 2000 else f'hash={hash_to_hex(hash(norm))}'
 
 def _normalize_decorator(func: Callable, allow=None):
-    allow = allow or PARAMS
     def wrap(*args, **kwargs):
         _, _, path, *_ = request.urlparts
         normalized = normalize_query(request.params, allow=allow)
         if request.query_string != normalized:
-            return redirect(f'{path}?{normalized}')
+            return redirect(f'{path}?{normalized}', 307)
         return func(*args, **kwargs)
     return wrap
 

+ 5 - 4
app/rest/trend.py

@@ -7,7 +7,7 @@
 from io import StringIO
 from queue import Queue
 from bottle import (
-    DictProperty,
+    FormsDict,
     HTTPError,
     template,
 )
@@ -37,12 +37,13 @@ matplotlib.use('agg')
 def abort(code, text):
     return HTTPError(code, text)
 
-def trend(queue: Queue, conn: Connection[TupleRow], path: str, query: DictProperty):
+def trend(queue: Queue, conn: Connection[TupleRow], path: str, query: FormsDict):
     for item in trend_internal(conn, path, query):
         queue.put(item, block=True)
     queue.put(None)
 
-def trend_internal(conn: Connection[TupleRow], path: str, query: DictProperty):
+def trend_internal(conn: Connection[TupleRow], path: str, query: FormsDict):
+    print({ k: query[k] for k in query })
     progress = []
     try:
         with conn.cursor() as cur:
@@ -121,7 +122,7 @@ def trend_internal(conn: Connection[TupleRow], path: str, query: DictProperty):
             _filter = get_filter(query, allow=PARAMS)
             organic = BOOLEAN.get(query.organic, None)
             action = path.split('/')[-1]
-            form = get_form(action, 'get', _filter, organic, data)
+            form = get_form(action, 'post', _filter, organic, data)
             
             progress[-1]["status"] = "done"
             yield template("loading", progress=progress)