Skip to content

Commit

Permalink
formatting
Browse files Browse the repository at this point in the history
  • Loading branch information
DavidBuchanan314 committed Nov 15, 2024
1 parent ad56c83 commit f02446c
Show file tree
Hide file tree
Showing 9 changed files with 174 additions and 114 deletions.
10 changes: 7 additions & 3 deletions atheris_fuzz.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,17 @@

ATJSON_MODE = True


def TestOneInput(data):
try:
roundtrip = cbrrr.encode_dag_cbor(cbrrr.decode_dag_cbor(data, ATJSON_MODE), ATJSON_MODE)
roundtrip = cbrrr.encode_dag_cbor(
cbrrr.decode_dag_cbor(data, ATJSON_MODE), ATJSON_MODE
)
except:
return
if not ATJSON_MODE: # atjson does not roundtrip consistently!!!
assert(roundtrip==data)
if not ATJSON_MODE: # atjson does not roundtrip consistently!!!
assert roundtrip == data


atheris.Setup(sys.argv, TestOneInput)
atheris.Fuzz()
58 changes: 33 additions & 25 deletions car_parse_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,56 +7,64 @@

sys.setrecursionlimit(99999999)


# LEB128 (has not been strictly tested!)
def parse_varint(stream):
n = 0
shift = 0
while True:
val = stream.read(1)[0]
n |= (val & 0x7f) << shift
n |= (val & 0x7F) << shift
if not val & 0x80:
return n
shift += 7


enctime = 00
dectime = 0


def parse_car(stream, length):
global enctime
global dectime

header_len = parse_varint(stream)
header_bytes = stream.read(header_len)
assert(len(header_bytes) == header_len)
assert len(header_bytes) == header_len
car_header = decode_dag_cbor(header_bytes)
assert(car_header.get("version") == 1)
assert(len(car_header.get("roots", [])) == 1)
assert car_header.get("version") == 1
assert len(car_header.get("roots", [])) == 1

root = car_header["roots"][0]
nodes = {}

while stream.tell() != length:
block_len = parse_varint(stream)
cid = CID(stream.read(36)) # XXX: this needs to be parsed properly, length might not be 36
assert(cid.is_cidv1_dag_cbor_sha256_32()) # this is enough to validate atproto-flavoured CIDs

block_data = stream.read(block_len-36)
assert(len(block_data) == block_len-36)
#content_hash = hashlib.sha256(block_data).digest()
#assert(cid_raw.endswith(content_hash))
cid = CID(
stream.read(36)
) # XXX: this needs to be parsed properly, length might not be 36
assert (
cid.is_cidv1_dag_cbor_sha256_32()
) # this is enough to validate atproto-flavoured CIDs

block_data = stream.read(block_len - 36)
assert len(block_data) == block_len - 36
# content_hash = hashlib.sha256(block_data).digest()
# assert(cid_raw.endswith(content_hash))
start = time.time()
block = decode_dag_cbor(block_data, atjson_mode=ATJSON_MODE)
#block = libipld.decode_dag_cbor(block_data)
dectime += time.time()-start
# block = libipld.decode_dag_cbor(block_data)
dectime += time.time() - start
start = time.time()
roundtrip = encode_dag_cbor(block, atjson_mode=ATJSON_MODE)
enctime += time.time()-start
assert(block_data == roundtrip)
#print(block)
enctime += time.time() - start
assert block_data == roundtrip
# print(block)
nodes[cid] = block

return root, nodes


if __name__ == "__main__":
import sys

Expand All @@ -65,16 +73,16 @@ def parse_car(stream, length):

root, nodes = parse_car(io.BytesIO(car), len(car))

dec_speed = (len(car)/(1024*1024))/dectime
dec_speed = (len(car) / (1024 * 1024)) / dectime
print(f"Parsed {len(car)} bytes at {dec_speed:.2f}MB/s")

enc_speed = (len(car)/(1024*1024))/enctime
enc_speed = (len(car) / (1024 * 1024)) / enctime
print(f"Encoded {len(car)} bytes at {enc_speed:.2f}MB/s")

#start = time.time()
#libipld.decode_car(car)
#duration = time.time()-start
#car_speed = (len(car)/(1024*1024))/duration
#print(f"libipld.decode_car {len(car)} bytes at {car_speed:.2f}MB/s")
# start = time.time()
# libipld.decode_car(car)
# duration = time.time()-start
# car_speed = (len(car)/(1024*1024))/duration
# print(f"libipld.decode_car {len(car)} bytes at {car_speed:.2f}MB/s")

#print(nodes[root])
# print(nodes[root])
24 changes: 12 additions & 12 deletions fuzz.py
Original file line number Diff line number Diff line change
@@ -1,27 +1,27 @@
import cbrrr
import os
import gc
#import sys
# import sys

#gc.collect()
#print(len(gc.get_objects()))
# gc.collect()
# print(len(gc.get_objects()))
gc.collect()
prev_heap = {}

for i in range(9999999999):
try:
#print(sys.getrefcount(cbrrr.CID))
res = cbrrr.decode_dag_cbor(os.urandom(1024), atjson_mode=i&1)
#print(res)
except Exception as e:
#print(e)
# print(sys.getrefcount(cbrrr.CID))
res = cbrrr.decode_dag_cbor(os.urandom(1024), atjson_mode=i & 1)
# print(res)
except Exception:# as e:
# print(e)
pass
if i%100000 == 0:
if i % 100000 == 0:
gc.collect()
print(len(gc.get_objects()))
if 1:
print("="*128)
this_heap = {id(x):x for x in gc.get_objects()}
print("=" * 128)
this_heap = {id(x): x for x in gc.get_objects()}
if id(prev_heap) in this_heap:
del this_heap[id(prev_heap)]
if prev_heap:
Expand All @@ -30,4 +30,4 @@
continue
print(this_heap[obj])
prev_heap = this_heap
print("="*128)
print("=" * 128)
4 changes: 4 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -38,3 +38,7 @@ fuzz = [
[project.urls]
Homepage = "https://github.com/DavidBuchanan314/dag-cbrrr"
Issues = "https://github.com/DavidBuchanan314/dag-cbrrr/issues"


[tool.ruff.format]
indent-style = "tab"
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
Extension(
"cbrrr._cbrrr",
sources=["src/cbrrr/_cbrrr.c"],
extra_compile_args=["-O3", "-Wall", "-Wextra", "-Wpedantic", "-std=c99"]
extra_compile_args=["-O3", "-Wall", "-Wextra", "-Wpedantic", "-std=c99"],
),
]
],
)
51 changes: 23 additions & 28 deletions src/cbrrr/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,17 @@

CbrrrDecodeError = _cbrrr.CbrrrDecodeError


class CID:
"""
This class is very minimal, intended to support atproto use cases and not
much else.
"""

# fmt: off
CIDV1_DAG_CBOR_SHA256_32_PFX = b"\x01\x71\x12\x20"
CIDV1_RAW_SHA256_32_PFX = b"\x01\x55\x12\x20"
# fmt: on

__slots__ = ("cid_bytes",)

Expand All @@ -27,35 +30,29 @@ def __init__(self, cid_bytes: bytes) -> None:
and is_cidv1_raw_sha256_32() methods may be useful for this.
"""
self.cid_bytes = cid_bytes

@classmethod
def cidv1_dag_cbor_sha256_32_from(cls, data: bytes) -> "CID":
return cls(
cls.CIDV1_DAG_CBOR_SHA256_32_PFX
+ hashlib.sha256(data).digest()
)
return cls(cls.CIDV1_DAG_CBOR_SHA256_32_PFX + hashlib.sha256(data).digest())

@classmethod
def cidv1_raw_sha256_32_from(cls, data: bytes) -> "CID":
return cls(
cls.CIDV1_RAW_SHA256_32_PFX
+ hashlib.sha256(data).digest()
)

return cls(cls.CIDV1_RAW_SHA256_32_PFX + hashlib.sha256(data).digest())

@classmethod
def decode(cls, data: Union[bytes, str]) -> "CID":
"""
Currently supported codecs: identity/raw, base32
"""

if type(data) is str:
if isinstance(data, str):
data = data.encode()

if data.startswith(b"\x00"): # identity multibase codec
if data.startswith(b"\x00"): # identity multibase codec
return cls(data[1:])

if data.startswith(b"b"): # base32 multibase codec
data = data[1:] # strip prefix
if data.startswith(b"b"): # base32 multibase codec
data = data[1:] # strip prefix
if data.endswith(b"="):
raise ValueError("unexpected base32 padding")
# add back correct amount of padding (python is fussy)
Expand All @@ -67,8 +64,7 @@ def decode(cls, data: Union[bytes, str]) -> "CID":

def encode(self, base="base32") -> str:
if base == "base32":
return "b" + base64.b32encode(self.cid_bytes) \
.decode().lower().rstrip("=")
return "b" + base64.b32encode(self.cid_bytes).decode().lower().rstrip("=")
# this function might support other encodings in the future
raise ValueError("unsupported base encoding")

Expand All @@ -89,22 +85,22 @@ def __bytes__(self):

def __repr__(self):
return f"CID({self.encode()})"

def __hash__(self) -> int:
return self.cid_bytes.__hash__()

def __eq__(self, __value: object) -> bool:
if not isinstance(__value, CID):
return False
return self.cid_bytes == __value.cid_bytes


# nb: | syntax not supported in <=py3.9
DagCborTypes = Union[str, bytes, int, bool, float, CID, list, dict, None]


def decode_dag_cbor(
data: bytes,
atjson_mode: bool=False,
cid_ctor: Callable[[bytes], Any]=CID
data: bytes, atjson_mode: bool = False, cid_ctor: Callable[[bytes], Any] = CID
) -> DagCborTypes:
"""
Decode DAG-CBOR bytes into python objects.
Expand All @@ -119,10 +115,9 @@ def decode_dag_cbor(
raise ValueError("did not parse to end of buffer")
return parsed


def decode_multi_dag_cbor_in_violation_of_the_spec(
data: bytes,
atjson_mode: bool=False,
cid_ctor: Callable[[bytes], Any]=CID
data: bytes, atjson_mode: bool = False, cid_ctor: Callable[[bytes], Any] = CID
) -> Iterator[DagCborTypes]:
"""
https://ipld.io/specs/codecs/dag-cbor/spec/#strictness
Expand All @@ -137,12 +132,11 @@ def decode_multi_dag_cbor_in_violation_of_the_spec(
parsed, length = _cbrrr.decode_dag_cbor(view[offset:], cid_ctor, atjson_mode)
yield parsed
offset += length
assert(offset == len(data)) # should never fail!
assert offset == len(data) # should never fail!


def encode_dag_cbor(
obj: DagCborTypes,
atjson_mode: bool=False,
cid_type: Type=CID
obj: DagCborTypes, atjson_mode: bool = False, cid_type: Type = CID
) -> bytes:
"""
Encode python objects to DAG-CBOR bytes.
Expand All @@ -153,6 +147,7 @@ def encode_dag_cbor(
"""
return _cbrrr.encode_dag_cbor(obj, cid_type, atjson_mode)


__all__ = [
"CbrrrDecodeError",
"CID",
Expand Down
9 changes: 4 additions & 5 deletions src/cbrrr/_cbrrr.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,7 @@ from typing import Type, TypeVar, Tuple, Callable, Any
CbrrrDecodeErrorType = TypeVar("CbrrrDecodeErrorType", bound=ValueError)
CbrrrDecodeError: CbrrrDecodeErrorType

def decode_dag_cbor(buf: bytes, cid_ctor: Callable[[bytes], Any], atjson_mode: bool) -> Tuple[Any, int]:
...

def encode_dag_cbor(obj: Any, cid_type: Type, atjson_mode: bool) -> bytes:
...
def decode_dag_cbor(
buf: bytes, cid_ctor: Callable[[bytes], Any], atjson_mode: bool
) -> Tuple[Any, int]: ...
def encode_dag_cbor(obj: Any, cid_type: Type, atjson_mode: bool) -> bytes: ...
Loading

0 comments on commit f02446c

Please sign in to comment.