Skip to content

Commit

Permalink
gguf: calculate tensor data offset (#1076)
Browse files Browse the repository at this point in the history
adding tensors offset within file. offsets stored in metadata are
relative to this offset and it's absent in metadata. with this field
it's possible to actually access layers in gguf. without of it - offsets
shown are useless.

---------

Co-authored-by: Xuan Son Nguyen <[email protected]>
Co-authored-by: Julien Chaumond <[email protected]>
  • Loading branch information
3 people authored Dec 29, 2024
1 parent 11274e4 commit 9706030
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 4 deletions.
5 changes: 5 additions & 0 deletions packages/gguf/src/gguf.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -283,4 +283,9 @@ describe("gguf", () => {
expect(parseGGUFQuantLabel("Codestral-22B-v0.1-IQ3_XS.gguf")).toEqual(undefined); // TODO: investigate IQ3_XS
expect(parseGGUFQuantLabel("Codestral-22B-v0.1-Q4_0_4_4.gguf")).toEqual("Q4_0"); // TODO: investigate Q4_0_4_4
});

it("calculate tensor data offset", async () => {
const { tensorDataOffset } = await gguf(URL_LLAMA);
expect(tensorDataOffset).toEqual(741056n);
});
});
17 changes: 13 additions & 4 deletions packages/gguf/src/gguf.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ export { parseGGUFQuantLabel, GGUF_QUANT_RE, GGUF_QUANT_RE_GLOBAL } from "@huggi

export const RE_GGUF_FILE = /\.gguf$/;
export const RE_GGUF_SHARD_FILE = /^(?<prefix>.*?)-(?<shard>\d{5})-of-(?<total>\d{5})\.gguf$/;
const GGUF_DEFAULT_ALIGNMENT = 32; // defined in ggml.h
const GGML_PAD = (x: number, n: number) => (x + n - 1) & ~(n - 1); // defined in ggml.h
const PARALLEL_DOWNLOADS = 20;

export interface GgufShardFileInfo {
Expand Down Expand Up @@ -384,14 +386,18 @@ export async function gguf(
});
}

// calculate absolute offset of tensor data
const alignment: number = Number(metadata["general.alignment"] ?? GGUF_DEFAULT_ALIGNMENT);
const tensorDataOffset = BigInt(GGML_PAD(offset, alignment));

if (params?.computeParametersCount) {
const parameterCount = tensorInfos
.map(({ shape }) => shape.reduce((acc, val) => acc * Number(val), 1))
.reduce((acc, val) => acc + val, 0);

return { metadata, tensorInfos, parameterCount };
return { metadata, tensorInfos, tensorDataOffset, parameterCount };
} else {
return { metadata, tensorInfos };
return { metadata, tensorInfos, tensorDataOffset };
}
}

Expand Down Expand Up @@ -429,7 +435,10 @@ export async function ggufAllShards(
parameterCount: shards.map(({ parameterCount }) => parameterCount).reduce((acc, val) => acc + val, 0),
};
} else {
const { metadata, tensorInfos, parameterCount } = await gguf(url, { ...params, computeParametersCount: true });
return { shards: [{ metadata, tensorInfos }], parameterCount };
const { metadata, tensorInfos, tensorDataOffset, parameterCount } = await gguf(url, {
...params,
computeParametersCount: true,
});
return { shards: [{ metadata, tensorInfos, tensorDataOffset }], parameterCount };
}
}
1 change: 1 addition & 0 deletions packages/gguf/src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -155,4 +155,5 @@ export interface GGUFTensorInfo {
export interface GGUFParseOutput<Options extends GGUFMetadataOptions = { strict: true }> {
metadata: GGUFMetadata<Options>;
tensorInfos: GGUFTensorInfo[];
tensorDataOffset: bigint;
}

0 comments on commit 9706030

Please sign in to comment.