Skip to content

Commit bdfe4e2

Browse files
committed
feat!: switch CBOR library to cborg
BREAKING CHANGE! * Will be much more strict with data types on encode and decode, only IPLD Data Model values will be accepted, all others will be rejected (including `undefined`). No other tags than 42 will be involved in encode or decode. * Will strictly reject decode of data where integers are encoded using more bytes than are required, as per deterministic DAG-CBOR rules. * Strictly only tag 42 and no others. Also no exotic types like Simple Values. * Indefinite length items will not be accepted for decode. * BigInt will be used for integers decoded outside of the safe integer range. They are also accepted for encodes, within the 64-bit range. The exotic bignumber type from borc is no longer supported. * Floats are now always encoded as 64-bit, to match the updated DAG-CBOR spec and the Go implementation. * Uint8Arrays are native through the stack now, `Buffer` isn't required for browser bundling.
1 parent d498de9 commit bdfe4e2

File tree

3 files changed

+80
-181
lines changed

3 files changed

+80
-181
lines changed

index.js

Lines changed: 45 additions & 116 deletions
Original file line numberDiff line numberDiff line change
@@ -1,141 +1,70 @@
11
// @ts-check
22

3-
import cbor from 'borc'
4-
import isCircular from '@ipld/is-circular'
3+
import * as cborg from 'cborg'
54
// @ts-ignore
6-
import { bytes, CID } from 'multiformats'
7-
8-
const { asCID } = CID
9-
const decodeCID = CID.decode
5+
import { CID } from 'multiformats'
106

117
// https://github.com/ipfs/go-ipfs/issues/3570#issuecomment-273931692
128
const CID_CBOR_TAG = 42
13-
149
const code = 0x71
1510
const name = 'dag-cbor'
1611

17-
function tagCID (cid) {
18-
const tag = bytes.fromHex('00')
19-
const buffer = new Uint8Array(tag.byteLength + cid.bytes.byteLength)
20-
buffer.set(tag)
21-
buffer.set(cid.bytes, tag.byteLength)
22-
const tagged = new cbor.Tagged(CID_CBOR_TAG, buffer, null)
23-
return tagged
24-
}
25-
26-
function replaceCIDbyTAG (dagNode, config) {
27-
if (dagNode && typeof dagNode === 'object' && isCircular(dagNode, { asCID: true })) {
28-
throw new Error('The object passed has circular references')
12+
// this will receive all Objects, we need to filter out anything that's not
13+
// a CID and return `null` for that so it's encoded as normal
14+
function cidEncoder (obj) {
15+
if (obj.asCID !== obj) {
16+
return null // any other kind of object
2917
}
30-
31-
function transform (obj) {
32-
if (bytes.isBinary(obj)) return bytes.coerce(obj)
33-
if (!obj || typeof obj === 'string') {
34-
return obj
35-
}
36-
37-
if (Array.isArray(obj)) {
38-
return obj.map(transform)
39-
}
40-
41-
const cid = asCID(obj, config)
42-
if (cid) {
43-
return tagCID(cid)
44-
}
45-
46-
const keys = Object.keys(obj)
47-
48-
if (keys.length > 0) {
49-
// Recursive transform
50-
const out = {}
51-
keys.forEach((key) => {
52-
if (typeof obj[key] === 'object') {
53-
out[key] = transform(obj[key])
54-
} else {
55-
out[key] = obj[key]
56-
}
57-
})
58-
return out
59-
} else {
60-
return obj
61-
}
18+
const cid = CID.asCID(obj)
19+
/* c8 ignore next 4 */
20+
// very unlikely case, and it'll probably throw a recursion error in cborg
21+
if (!cid) {
22+
return null
6223
}
63-
64-
return transform(dagNode)
24+
const bytes = new Uint8Array(cid.bytes.byteLength + 1)
25+
bytes.set(cid.bytes, 1) // prefix is 0x00, for historical reasons
26+
return [
27+
new cborg.Token(cborg.Type.tag, CID_CBOR_TAG),
28+
new cborg.Token(cborg.Type.bytes, bytes)
29+
]
6530
}
6631

67-
const defaultTags = {
68-
[CID_CBOR_TAG]: (val) => {
69-
return decodeCID(val.subarray(1), cidConfig)
70-
}
32+
function undefinedEncoder () {
33+
throw new Error('`undefined` is not supported by the IPLD Data Model and cannot be encoded')
7134
}
7235

73-
const defaultSize = 64 * 1024 // current decoder heap size, 64 Kb
74-
const defaultMaxSize = 64 * 1024 * 1024 // max heap size when auto-growing, 64 Mb
75-
76-
let currentSize = defaultSize
77-
let maxSize = defaultMaxSize
78-
let decoder = null
79-
let cidConfig = null
80-
81-
/**
82-
* Configure the underlying CBOR decoder.
83-
*
84-
* @param {Object} [options] - The options the decoder takes. The decoder will reset to the defaul values if no options are given.
85-
* @param {number} [options.size=65536] - The current heap size used in CBOR parsing, this may grow automatically as larger blocks are encountered up to `maxSize`
86-
* @param {number} [options.maxSize=67108864] - The maximum size the CBOR parsing heap is allowed to grow to before `dagCBOR.util.deserialize()` returns an error
87-
* @param {Object} [options.tags] - An object whose keys are CBOR tag numbers and values are transform functions that accept a `value` and return a decoded representation of that `value`
88-
*/
89-
const configureDecoder = (options) => {
90-
const tags = defaultTags
91-
92-
if (options) {
93-
if (typeof options.size === 'number') {
94-
currentSize = options.size
95-
}
96-
if (typeof options.maxSize === 'number') {
97-
maxSize = options.maxSize
98-
}
99-
} else {
100-
// no options, reset to defaults
101-
currentSize = defaultSize
102-
maxSize = defaultMaxSize
103-
}
104-
105-
const decoderOptions = {
106-
tags,
107-
size: currentSize
36+
const encodeOptions = {
37+
float64: true,
38+
typeEncoders: {
39+
Object: cidEncoder,
40+
undefined: undefinedEncoder
10841
}
109-
110-
decoder = new cbor.Decoder(decoderOptions)
111-
// borc edits opts.size in-place so we can capture _actual_ size
112-
currentSize = decoderOptions.size
11342
}
114-
configureDecoder()
11543

116-
const encode = (node, config) => {
117-
const nodeTagged = replaceCIDbyTAG(node, config)
118-
const serialized = cbor.encode(nodeTagged)
119-
return bytes.coerce(serialized)
44+
function encode (node) {
45+
return cborg.encode(node, encodeOptions)
12046
}
12147

122-
const decode = (data, config) => {
123-
cidConfig = config
124-
if (data.length > currentSize && data.length <= maxSize) {
125-
configureDecoder({ size: data.length })
48+
function cidDecoder (bytes) {
49+
if (bytes[0] !== 0) {
50+
throw new Error('Invalid CID for CBOR tag 42; expected leading 0x00')
12651
}
52+
return CID.decode(bytes.subarray(1)) // ignore leading 0x00
53+
}
12754

128-
if (data.length > currentSize) {
129-
throw new Error('Data is too large to deserialize with current decoder')
130-
}
55+
const decodeOptions = {
56+
allowIndefinite: false,
57+
allowUndefined: false,
58+
allowBigInt: true, // this will lead to BigInt for ints outside of
59+
// safe-integer range, which may surprise users
60+
strict: true,
61+
useMaps: false,
62+
tags: []
63+
}
64+
decodeOptions.tags[CID_CBOR_TAG] = cidDecoder
13165

132-
// borc will decode back-to-back objects into an implicit top-level array, we
133-
// strictly want to only see a single explicit top-level object
134-
const all = decoder.decodeAll(data)
135-
if (all.length !== 1) {
136-
throw new Error('Extraneous CBOR data found beyond initial top-level object')
137-
}
138-
return all[0]
66+
function decode (data) {
67+
return cborg.decode(data, decodeOptions)
13968
}
14069

141-
export { name, code, encode, decode, configureDecoder }
70+
export { name, code, encode, decode }

package.json

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -29,16 +29,16 @@
2929
},
3030
"homepage": "https://github.com/ipld/js-dag-cbor",
3131
"dependencies": {
32-
"@ipld/is-circular": "^2.0.0",
33-
"borc": "^2.1.2",
32+
"cborg": "^1.0.1",
3433
"multiformats": "^4.0.0"
3534
},
3635
"devDependencies": {
37-
"garbage": "0.0.0",
36+
"chai": "^4.2.0",
3837
"hundreds": "0.0.8",
39-
"mocha": "^8.1.3",
38+
"ipld-garbage": "^1.0.3",
39+
"mocha": "^8.2.1",
4040
"polendina": "^1.1.0",
41-
"standard": "^14.3.4"
41+
"standard": "^16.0.3"
4242
},
4343
"directories": {
4444
"test": "test"

test/test-basics.js

Lines changed: 30 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,13 @@
11
/* eslint-env mocha */
22
'use strict'
3-
import garbage from 'garbage'
4-
import assert from 'assert'
5-
import { encode, decode, configureDecoder } from '../index.js'
3+
import garbage from 'ipld-garbage'
4+
import chai from 'chai'
5+
import { encode, decode } from '../index.js'
66
import { bytes, CID } from 'multiformats'
77

8+
const { assert } = chai
89
const test = it
9-
const _same = assert.deepStrictEqual
10-
11-
const same = (x, y) => {
12-
if (typeof x !== 'object') return _same(x, y)
13-
const skip = { nested: null, bytes: null, multihash: null, digest: null, link: null }
14-
for (const prop of Object.keys(skip)) {
15-
if (x[prop]) same(x[prop], y[prop])
16-
}
17-
if (x.links) {
18-
same(x.links.length, y.links.length)
19-
for (let i = 0; i < x.links.length; i++) {
20-
same(x[i], y[i])
21-
}
22-
}
23-
skip.links = null
24-
_same({ ...x, ...skip }, { ...y, ...skip })
25-
}
10+
const same = assert.deepStrictEqual
2611

2712
describe('dag-cbor', () => {
2813
const obj = {
@@ -36,7 +21,7 @@ describe('dag-cbor', () => {
3621
hello: 'world',
3722
link: CID.parse('QmRgutAxd8t7oGkSm4wmeuByG6M51wcTso6cubDdQtuEfL')
3823
},
39-
bytes: Buffer.from('asdf')
24+
bytes: new TextEncoder().encode('asdf')
4025
}
4126
const serializedObj = encode(obj)
4227

@@ -48,7 +33,7 @@ describe('dag-cbor', () => {
4833
same(bytes.toHex(serializedObj).match(/d82a/g).length, 4)
4934

5035
const deserializedObj = decode(serializedObj)
51-
same(obj, deserializedObj)
36+
same(deserializedObj, obj)
5237
})
5338

5439
test('.serialize and .deserialize large objects', () => {
@@ -61,36 +46,6 @@ describe('dag-cbor', () => {
6146

6247
const deserialized = decode(serialized)
6348
same(largeObj, deserialized)
64-
// reset decoder to default
65-
configureDecoder()
66-
})
67-
68-
test('.deserialize fail on large objects beyond maxSize', () => {
69-
// larger than the default borc heap size, should bust the heap if we turn off auto-grow
70-
const dataSize = (128 * 1024) + 1
71-
const largeObj = { someKey: [].slice.call(new Uint8Array(dataSize)) }
72-
73-
configureDecoder({ size: 64 * 1024, maxSize: 128 * 1024 }) // 64 Kb start, 128 Kb max
74-
const serialized = encode(largeObj)
75-
same(bytes.isBinary(serialized), true)
76-
77-
assert.throws(() => decode(serialized), /^Error: Data is too large to deserialize with current decoder$/)
78-
// reset decoder to default
79-
configureDecoder()
80-
})
81-
82-
test('.deserialize fail on large objects beyond maxSize - omit size', () => {
83-
// larger than the default borc heap size, should bust the heap if we turn off auto-grow
84-
const dataSize = (128 * 1024) + 1
85-
const largeObj = { someKey: [].slice.call(new Uint8Array(dataSize)) }
86-
87-
configureDecoder({ maxSize: 128 * 1024 }) // 64 Kb start, 128 Kb max
88-
const serialized = encode(largeObj)
89-
same(bytes.isBinary(serialized), true)
90-
91-
assert.throws(() => decode(serialized), /^Error: Data is too large to deserialize with current decoder$/)
92-
// reset decoder to default
93-
configureDecoder()
9449
})
9550

9651
test('.serialize and .deserialize object with slash as property', () => {
@@ -108,15 +63,24 @@ describe('dag-cbor', () => {
10863
same(actual, expected)
10964
})
11065

111-
test('error catching', () => {
112-
const circlarObj = {}
113-
circlarObj.a = circlarObj
114-
assert.throws(() => encode(circlarObj), /^Error: The object passed has circular references$/)
66+
test('error on circular references', () => {
67+
const circularObj = {}
68+
circularObj.a = circularObj
69+
assert.throws(() => encode(circularObj), /object contains circular references/)
70+
const circularArr = [circularObj]
71+
circularObj.a = circularArr
72+
assert.throws(() => encode(circularArr), /object contains circular references/)
73+
})
74+
75+
test('error on encoding undefined', () => {
76+
assert.throws(() => encode(undefined), /\Wundefined\W.*not supported/)
77+
const objWithUndefined = { a: 'a', b: undefined }
78+
assert.throws(() => encode(objWithUndefined), /\Wundefined\W.*not supported/)
11579
})
11680

11781
test('fuzz serialize and deserialize with garbage', () => {
11882
for (let ii = 0; ii < 1000; ii++) {
119-
const original = { in: garbage(100) }
83+
const original = garbage(100)
12084
const encoded = encode(original)
12185
const decoded = decode(encoded)
12286
same(decoded, original)
@@ -125,13 +89,12 @@ describe('dag-cbor', () => {
12589

12690
test('CIDv1', () => {
12791
const link = CID.parse('zdj7Wd8AMwqnhJGQCbFxBVodGSBG84TM7Hs1rcJuQMwTyfEDS')
128-
12992
const encoded = encode({ link })
13093
const decoded = decode(encoded)
13194
same(decoded, { link })
13295
})
13396

134-
test('encode and decode consistency with Uint8Array and Buffer fields', () => {
97+
test('encode and decode consistency with Uint8Array and Buffer fields', () => {
13598
const buffer = Buffer.from('some data')
13699
const bytes = Uint8Array.from(buffer)
137100

@@ -155,6 +118,13 @@ describe('dag-cbor', () => {
155118
// two top-level CBOR objects, the original and a single uint=0, valid if using
156119
// CBOR in streaming mode, not valid here
157120
decode(Buffer.concat([Buffer.from(serializedObj), Buffer.alloc(1)]))
158-
}, /^Error: Extraneous CBOR data found beyond initial top-level object/)
121+
}, /too many terminals/)
122+
})
123+
124+
test('reject bad CID lead-in', () => {
125+
// this is the same data as the CIDv1 produces but has the lead-in to the
126+
// CID replaced with 0x01 ....................... ↓↓ here
127+
const encoded = bytes.fromHex('a1646c696e6bd82a582501017012207252523e6591fb8fe553d67ff55a86f84044b46a3e4176e10c58fa529a4aabd5')
128+
assert.throws(() => decode(encoded), /Invalid CID for CBOR tag 42; expected leading 0x00/)
159129
})
160130
})

0 commit comments

Comments
 (0)