Skip to content

Commit

Permalink
improve test coverage to 100% (#5)
Browse files Browse the repository at this point in the history
* improve test coverage to 100%

* compatible with python 3.7 and 3.8
  • Loading branch information
xyb authored Nov 13, 2022
1 parent 24fd991 commit 56a7bfc
Show file tree
Hide file tree
Showing 3 changed files with 99 additions and 22 deletions.
19 changes: 9 additions & 10 deletions chunksum/cdc.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,16 +27,6 @@ def chunk_func(stream):
return chunk_func


def default_chunker() -> "Chunker":
global _default_chunker

if not _default_chunker:
_default_chunker = Chunker(
avg_chunk_size=CHUNKER_AVG_CHUNK_SIZE,
)
return _default_chunker


class Chunker:
"""Chunker of CDC(Content-defined Chunking).
Expand Down Expand Up @@ -66,6 +56,8 @@ def __init__(
max_chunk_size: int = None,
):
self.avg_chunk_size = avg_chunk_size
self.min_chunk_size = min_chunk_size
self.max_chunk_size = max_chunk_size
self._chunker = get_chunk_func(
avg_chunk_size=self.avg_chunk_size,
min_chunk_size=min_chunk_size,
Expand All @@ -74,6 +66,13 @@ def __init__(
self._iter = None
self._tail = b""

def __repr__(self):
return "<Chunker avg={}, min={}, max={}>".format(
self.avg_chunk_size,
self.min_chunk_size,
self.max_chunk_size,
)

def update(self, message: bytes) -> "Chunker":
self.message = message
self._iter = self._chunker(io.BytesIO(self._tail + message))
Expand Down
98 changes: 88 additions & 10 deletions chunksum/chunksum.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,15 @@ class ChunkSize:
"""
>>> ChunkSize(1024)
ChunkSize<1024>
"""
>>> ChunkSize(1) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
Traceback (most recent call last):
...
chunksum.chunksum.ChunkSizeError: chunk size too small: 1
>>> ChunkSize(1024 + 1) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
Traceback (most recent call last):
...
chunksum.chunksum.ChunkSizeError: chunk size should be a multiple of 4, but 1025 % 5 = 1
""" # noqa: E501

def __init__(self, avg_bytes=AVERAGE_MIN):
if (avg_bytes) < AVERAGE_MIN:
Expand Down Expand Up @@ -103,6 +111,24 @@ def __rmul__(self, x):


def get_chunker(size_name="", avg=1024, min=256, max=4096):
"""
>>> get_chunker('k0')
<Chunker avg=1024, min=256.0, max=4096>
>>> get_chunker('K9')
<Chunker avg=524288, min=131072.0, max=2097152>
>>> get_chunker('m2')
<Chunker avg=4194304, min=1048576.0, max=16777216>
>>> get_chunker('g1')
<Chunker avg=2147483648, min=536870912.0, max=8589934592>
>>> get_chunker('x1') # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
Traceback (most recent call last):
...
Exception: wrong unit of chunk size: x
>>> get_chunker('ka') # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
Traceback (most recent call last):
...
Exception: chunk size is not a number: a
"""
if size_name and len(size_name) == 2:
unit, power = size_name
coefficient = {"k": KILO, "m": MEGA, "g": GIGA}.get(unit.lower())
Expand All @@ -117,6 +143,28 @@ def get_chunker(size_name="", avg=1024, min=256, max=4096):


def get_hasher(name):
"""
>>> get_hasher('sha2')
<sha256 ...>
>>> get_hasher('blake2b')
<_blake2.blake2b ...>
>>> get_hasher('blake2b32')
<_blake2.blake2b ...>
>>> get_hasher('blake2s')
<_blake2.blake2s ...>
>>> get_hasher('badname') # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
Traceback (most recent call last):
...
Exception: unsupported hash: badname
>>> get_hasher('blake2x') # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
Traceback (most recent call last):
...
Exception: unsupported blake2 hash: blake2x
>>> get_hasher('blake2') # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
Traceback (most recent call last):
...
Exception: unsupported blake2 hash: blake2
"""
name = name.lower()
if name == "sha2":
return sha256()
Expand All @@ -137,7 +185,7 @@ def get_hasher(name):
raise Exception(f"unsupported hash: {name}")


def compute_file(file, alg_name="fck4sha2", avg=0, min=0, max=0):
def compute_file(file, alg_name="fck4sha2", avg=0, min=0, max=0, hash="sha2"):
"""
>>> import io
Expand All @@ -148,12 +196,20 @@ def compute_file(file, alg_name="fck4sha2", avg=0, min=0, max=0):
(b'\\xfb...\\xd3', 65536)
(b'\\xfb...\\xd3', 65536)
(b'tG...\\xfe', 28928)
>>> stream = io.BytesIO(b'abcdefgh' * 2000)
>>> result = compute_file(stream, alg_name='', avg=1024, min=256, max=4096)
>>> for i in result:
... print(i)
(b'\\xbfb...\\x10T', 4096)
(b'\\xbfb...\\x10T', 4096)
(b'\\xbfb...\\x10T', 4096)
(b't\\x87...\\xcft', 3712)
"""
if alg_name:
chunk_size_name = alg_name[2:4]
chunker = get_chunker(chunk_size_name)
else:
chunker = Chunker(avg=avg, min=min, max=max)
chunker = get_chunker(avg=avg, min=min, max=max)
result = []
buffer_size = 4 * 1024 * 1024
if hasattr(file, "name"):
Expand All @@ -167,7 +223,7 @@ def gen_item(data, hasher_name):
h.update(data)
return (h.digest(), size)

hasher_name = alg_name[len("fck0") :]
hasher_name = alg_name[len("fck0") :] or hash
for data in iter_:
chunker.update(data)
for chunk in chunker.chunks:
Expand Down Expand Up @@ -250,15 +306,37 @@ def help():


def main():
"""
>>> sys.argv = ['chunksup']
>>> main() # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
Print ...
Usage: ...
...
>>> import tempfile
>>> import os.path
>>> dir = tempfile.TemporaryDirectory()
>>> path = os.path.join(dir.name, 'testfile')
>>> _ = open(path, 'wb').write(b'hello')
>>> sys.argv = ['chunksum', dir.name]
>>> main()
9595...3d50 .../testfile fck4sha2!2cf2...9824:5
>>> sys.argv = ['chunksum', dir.name, 'fcm0blake2b32']
>>> main()
901c...ce59 .../testfile fcm0blake2b32!324d...72cf:5
>>> sys.argv = ['chunksum', dir.name, 'fcm0blake2s']
>>> main()
8d95...5ee5 .../testfile fcm0blake2s!1921...ca25:5
"""
if len(sys.argv) == 1:
help()
sys.exit()
if len(sys.argv) > 2:
path, alg_name = sys.argv[1:3]
else:
path, alg_name = sys.argv[1], "fck4sha2"
walk(path, sys.stdout, alg_name)
if len(sys.argv) > 2:
path, alg_name = sys.argv[1:3]
else:
path, alg_name = sys.argv[1], "fck4sha2"
walk(path, sys.stdout, alg_name)


if __name__ == "__main__":
main()
main() # pragma: no cover
4 changes: 2 additions & 2 deletions chunksum/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,11 @@ def parse_line(line):
'checksum': 'sum2',
'chunks': [('abcd', 10)],
'path': './file1'}
>>> pprint(parse_line('sum3 ./file2 fck0sha2!bad:20,beef:30'))
>>> pprint(parse_line('sum3 ./long file name fck0sha2!bad:20,beef:30'))
{'alg_name': 'fck0sha2',
'checksum': 'sum3',
'chunks': [('bad', 20), ('beef', 30)],
'path': './file2'}
'path': './long file name'}
"""
items = line.split(" ")
checksum = items[0]
Expand Down

0 comments on commit 56a7bfc

Please sign in to comment.