test_hash_util.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295
  1. #
  2. # Copyright (c) Daniel Sheffield 2023
  3. # All rights reserved
  4. #
  5. # THIS SOFTWARE IS PROVIDED AS IS WITHOUT WARRANTY
  6. from pytest import fixture, mark
  7. import app.rest.hash_util as hash_util
  8. import base32_lib as b32
  9. from app.rest.hash_util import(
  10. hash_to_hex,
  11. hash_to_bytes,
  12. bytes_to_base32,
  13. bytes_to_base64,
  14. bytes_to_base85,
  15. bytes_to_hash,
  16. bytes_to_hex,
  17. normalize_bytes,
  18. normalize_hash,
  19. normalize_hex,
  20. base85_to_hash,
  21. base85_to_bytes,
  22. base85_to_hex,
  23. base32_to_bytes,
  24. base32_to_hash,
  25. base32_to_hex,
  26. base64_to_bytes,
  27. base64_to_hash,
  28. base64_to_hex,
  29. hash_to_base32,
  30. hash_to_base85,
  31. hash_to_base64,
  32. hex_to_base32,
  33. hex_to_base85,
  34. hex_to_base64,
  35. hex_to_bytes,
  36. hex_to_hash,
  37. )
  38. def to_unsigned(_hash: int) -> int:
  39. return _hash & hash_util.DIGEST_SIZE_BITMASK
  40. def digest_size():
  41. for g in ( two_bytes, three_bytes, four_bytes):
  42. yield g
  43. def two_bytes():
  44. hash_util.DIGEST_SIZE_BYTES = 2
  45. hash_util.DIGEST_SIZE_NIBBLES = 4
  46. hash_util.DIGEST_SIZE_BITMASK = 0xffff
  47. hash_util.DIGEST_SIZE_SIGNED_TO_UNSIGNED_BIT = 0x10000
  48. hash_util.DIGEST_SIZE_SIGNED_TO_UNSIGNED_BITMASK = 0x1ffff
  49. return hash_util.DIGEST_SIZE_BYTES
  50. def three_bytes():
  51. hash_util.DIGEST_SIZE_BYTES = 3
  52. hash_util.DIGEST_SIZE_NIBBLES = 6
  53. hash_util.DIGEST_SIZE_BITMASK = 0xffffff
  54. hash_util.DIGEST_SIZE_SIGNED_TO_UNSIGNED_BIT = 0x1000000
  55. hash_util.DIGEST_SIZE_SIGNED_TO_UNSIGNED_BITMASK = 0x1ffffff
  56. return hash_util.DIGEST_SIZE_BYTES
  57. def four_bytes():
  58. hash_util.DIGEST_SIZE_BYTES = 4
  59. hash_util.DIGEST_SIZE_NIBBLES = 8
  60. hash_util.DIGEST_SIZE_BITMASK = 0xffffffff
  61. hash_util.DIGEST_SIZE_SIGNED_TO_UNSIGNED_BIT = 0x100000000
  62. hash_util.DIGEST_SIZE_SIGNED_TO_UNSIGNED_BITMASK = 0x1ffffffff
  63. return hash_util.DIGEST_SIZE_BYTES
  64. @fixture
  65. def two_byte_bitmask():
  66. return 0xffff
  67. def hash_codes():
  68. for _hash in [
  69. -7583489610679606711,
  70. 1421958803217889556,
  71. 10,
  72. ]:
  73. _bytes = b'\00' * 8 + _hash.to_bytes(8, byteorder='big', signed=True)[-8:]
  74. _hex = _bytes.hex()
  75. print(_hash, _hex, _bytes)
  76. yield _hash, _hex, _bytes
  77. # PYTHONHASHSEED=0
  78. def digest_size_bitmask(digest_size_bits):
  79. mask = 0x0
  80. for _ in range(digest_size_bits):
  81. mask = mask << 1 | 0x1
  82. return mask
  83. def digest_size_force_unsigned_bitmask(digest_size):
  84. mask = 0x1
  85. for _ in range(8*digest_size):
  86. mask = mask << 1 | 0x1
  87. return mask
  88. def test_three_byte_assertions():
  89. digest_size = three_bytes()
  90. assert hash_to_base64(-7583489610679606711) == 'k6JJ'
  91. assert bytes_to_base64(b'\x96\xc2\x08`\xcd\x93\xa2I') == 'k6JJ'
  92. assert hex_to_base64('96c20860cd93a249') == 'k6JJ'
  93. assert base64_to_hex('k6JJ') == '93a249'
  94. assert base64_to_bytes('aa') == b'\x00\x00i'
  95. assert base64_to_hex('aa') == '000069'
  96. assert base64_to_hash('aa') == 105
  97. @mark.parametrize('_hash, _hex', [
  98. (_hash, _hex) for _hash, _hex, *_ in hash_codes()
  99. ])
  100. @mark.parametrize('digest_size_gen', digest_size())
  101. def test_hash_to_hex(_hash, _hex, digest_size_gen):
  102. digest_size = digest_size_gen()
  103. assert hash_to_hex(_hash) == _hex[-digest_size*2:]
  104. for i in range(1, digest_size*2+1):
  105. # test hex is zero padded
  106. assert hash_to_hex(_hash & digest_size_bitmask(i*4)) == _hex[-i:].zfill(digest_size*2)
  107. @mark.parametrize('_hash, _bytes', [
  108. (_hash, _bytes) for _hash, _, _bytes, *_ in hash_codes()
  109. ])
  110. @mark.parametrize('digest_size_gen', digest_size())
  111. def test_hash_to_base32(_hash, _bytes, digest_size_gen):
  112. digest_size = digest_size_gen()
  113. norm = hash_to_base32(_hash)
  114. assert norm == b32.encode(
  115. to_unsigned(int.from_bytes(
  116. _bytes[-digest_size:], byteorder='big', signed=False)
  117. )
  118. ).upper().zfill(digest_size*8//5+1)
  119. assert len(norm) == digest_size*8//5 +1
  120. for i in range(1, digest_size+1):
  121. norm = hash_to_base32(_hash & digest_size_bitmask(i*8))
  122. # test base32 is zero padded
  123. assert norm == b32.encode(
  124. to_unsigned(int.from_bytes(
  125. (b'\x00'*digest_size + _bytes[-i:])[-digest_size:], byteorder='big', signed=False)
  126. )
  127. ).upper().zfill(digest_size*8//5+1)
  128. assert len(norm) == digest_size*8//5 +1
  129. @mark.parametrize('_hash, _hex', [
  130. (_hash, _hex) for _hash, _hex, *_ in hash_codes()
  131. ])
  132. @mark.parametrize('digest_size_gen', digest_size())
  133. def test_hex_to_hash(_hash, _hex, digest_size_gen):
  134. digest_size = digest_size_gen()
  135. assert hex_to_hash(_hex) == _hash & digest_size_bitmask(digest_size*8)
  136. for i in range(1, digest_size*2+1):
  137. # test non-zero padded hex
  138. _input = _hex[-i:]
  139. assert digest_size*2 == i or len(_input) < digest_size*2
  140. assert hex_to_hash(_input) == _hash & digest_size_bitmask(i*4)
  141. # test zero padded hex
  142. _input = _hex[-i:].zfill(digest_size*2)
  143. assert len(_input) == digest_size*2
  144. assert hex_to_hash(_input) == _hash & digest_size_bitmask(i*4)
  145. @mark.parametrize('_hex, _bytes', [
  146. (_hex, _bytes) for _, _hex, _bytes, *_ in hash_codes()
  147. ])
  148. @mark.parametrize('digest_size_gen', digest_size())
  149. def test_hex_to_bytes(_hex, _bytes, digest_size_gen):
  150. digest_size = digest_size_gen()
  151. assert hex_to_bytes(_hex) == _bytes[-digest_size:]
  152. for i in range(1, digest_size*2+1):
  153. # test non-zero padded hex
  154. _input = _hex[-i:]
  155. assert digest_size*2 == i or len(_input) < digest_size*2
  156. if i % 2:
  157. if i > 2:
  158. assert hex_to_bytes(_input) == (b'\x00'*digest_size + bytes([
  159. _bytes[-i//2] & 0x0f, *_bytes[-i//2+1:]
  160. ]))[-digest_size:]
  161. else:
  162. assert hex_to_bytes(_input) == (b'\x00'*digest_size + bytes([
  163. _bytes[-i//2] & 0x0f
  164. ]))[-digest_size:]
  165. else:
  166. assert hex_to_bytes(_input) == (b'\x00'*digest_size + _bytes[-i//2:])[-digest_size:]
  167. # test zero padded hex
  168. _input = _hex[-i:].zfill(digest_size*2)
  169. assert len(_input) == digest_size*2
  170. if i % 2:
  171. if i > 2:
  172. assert hex_to_bytes(_input) == (b'\x00'*digest_size + bytes([
  173. _bytes[-i//2] & 0x0f, *_bytes[-i//2+1:]
  174. ]))[-digest_size:]
  175. else:
  176. assert hex_to_bytes(_input) == (b'\x00'*digest_size + bytes([
  177. _bytes[-i//2] & 0x0f
  178. ]))[-digest_size:]
  179. else:
  180. assert hex_to_bytes(_input) == (b'\x00'*digest_size + _bytes[-i//2:])[-digest_size:]
  181. @mark.parametrize('_hash, _bytes', [
  182. (_hash, _bytes) for _hash, _, _bytes, *_ in hash_codes()
  183. ])
  184. @mark.parametrize('digest_size_gen', digest_size())
  185. def test_bytes_to_hash(_hash, _bytes, digest_size_gen):
  186. digest_size = digest_size_gen()
  187. assert bytes_to_hash(_bytes) == _hash & digest_size_bitmask(digest_size*8)
  188. for i in range(1, digest_size+1):
  189. # test non-zero padded bytes
  190. _input = _bytes[-i:]
  191. assert digest_size == i or len(_input) < digest_size
  192. assert bytes_to_hash(_input) == _hash & digest_size_bitmask(i*8)
  193. # test zero padded bytes
  194. _input = (b'\x00'*digest_size + _bytes[-i:])[-digest_size:]
  195. assert len(_input) == digest_size
  196. assert bytes_to_hash(_input) == _hash & digest_size_bitmask(i*8)
  197. @mark.parametrize('_hash, _bytes', [
  198. (_hash, _bytes) for _hash, _, _bytes, *_ in hash_codes()
  199. ])
  200. @mark.parametrize('digest_size_gen', digest_size())
  201. def test_base32_to_hash(_hash, _bytes, digest_size_gen):
  202. digest_size = digest_size_gen()
  203. _base32 = b32.encode(
  204. to_unsigned(int.from_bytes(
  205. _bytes[-digest_size:], byteorder='big', signed=False)
  206. )
  207. ).upper().zfill(digest_size*8//5+1)
  208. norm = base32_to_hash(_base32)
  209. assert norm == _hash & digest_size_bitmask(digest_size*8)
  210. for i in range(1, digest_size+1):
  211. # test non-zero padded base32
  212. _base32 = b32.encode(
  213. to_unsigned(int.from_bytes(
  214. (b'\x00'*digest_size + _bytes[-i:])[-digest_size:], byteorder='big', signed=False)
  215. )
  216. ).upper()
  217. norm = base32_to_hash(_base32)
  218. assert norm == _hash & digest_size_bitmask(i*8)
  219. # test zero padded base32
  220. _base32 = b32.encode(
  221. to_unsigned(int.from_bytes(
  222. (b'\x00'*digest_size + _bytes[-i:])[-digest_size:], byteorder='big', signed=False)
  223. )
  224. ).upper().zfill(digest_size*8//5+1)
  225. norm = base32_to_hash(_base32)
  226. assert norm == _hash & digest_size_bitmask(i*8)
  227. @mark.parametrize('_hex', [
  228. _hex for _, _hex, *_ in hash_codes()
  229. ])
  230. @mark.parametrize('digest_size_gen', digest_size())
  231. def test_normalize_hex(_hex, digest_size_gen):
  232. digest_size = digest_size_gen()
  233. assert normalize_hex(_hex) == _hex[-digest_size*2:]
  234. assert len(normalize_hex(_hex)) == digest_size*2
  235. # test commutivity
  236. for i in range(1, digest_size*2+1):
  237. assert i == digest_size*2 or _hex[-i:] != _hex[-i:].zfill(digest_size*2)
  238. assert normalize_hex(_hex[-i:]) == _hex[-i:].zfill(digest_size*2)
  239. assert len(normalize_hex(_hex[-i:])) == digest_size*2
  240. @mark.parametrize('_bytes', [
  241. _bytes for _, _, _bytes, *_ in hash_codes()
  242. ])
  243. @mark.parametrize('digest_size_gen', digest_size())
  244. def test_normalize_bytes(_bytes, digest_size_gen):
  245. digest_size = digest_size_gen()
  246. assert normalize_bytes(_bytes) == _bytes[-digest_size:]
  247. assert len(normalize_bytes(_bytes)) == digest_size
  248. # test commutivity
  249. for i in range(1, digest_size+1):
  250. assert i == digest_size or _bytes[-i:] != (b'\x00'*digest_size + _bytes[-i:])[-digest_size:]
  251. assert normalize_bytes(_bytes[-i:]) == (b'\x00'*digest_size + _bytes[-i:])[-digest_size:]
  252. assert len(normalize_bytes(_bytes[-i:])) == digest_size
  253. @mark.parametrize('_hash', [
  254. _hash for _hash, *_ in hash_codes()
  255. ])
  256. @mark.parametrize('digest_size_gen', digest_size())
  257. def test_normalize_hash(_hash, digest_size_gen):
  258. digest_size = digest_size_gen()
  259. norm = normalize_hash(_hash)
  260. assert norm == _hash & digest_size_bitmask(digest_size*8)
  261. # test commutivity
  262. for i in range(1, digest_size+1):
  263. norm = normalize_hash(_hash & digest_size_bitmask(i*8))
  264. assert norm == _hash & digest_size_bitmask(i*8)
  265. assert norm == norm & digest_size_bitmask(i*8)