Skip to content

Commit

Permalink
Updating the example.
Browse files Browse the repository at this point in the history
  • Loading branch information
codetalker7 committed May 30, 2024
1 parent 8503c7a commit 66642b3
Showing 1 changed file with 31 additions and 1 deletion.
32 changes: 31 additions & 1 deletion examples/data.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,37 @@ using ColBERT
dataroot = "downloads/lotte"
dataset = "lifestyle"
datasplit = "dev"
path = joinpath(dataroot, dataset, datasplit, "collection.tsv")
path = joinpath(dataroot, dataset, datasplit, "short_collection.tsv")

collection = Collection(path)
length(collection.data)

nbits = 2 # encode each dimension with 2 bits
doc_maxlen = 300 # truncate passages at 300 tokens

checkpoint = "colbert-ir/colbertv2.0" # the HF checkpoint
index_root = "experiments/notebook/indexes"
index_name = "short_$(dataset).$(datasplit).$(nbits)bits"
index_path = joinpath(index_root, index_name)

config = ColBERTConfig(
RunSettings(
experiment="notebook",
),
TokenizerSettings(),
ResourceSettings(
checkpoint=checkpoint,
collection=collection,
index_name=index_name,
),
DocSettings(
doc_maxlen=doc_maxlen,
),
QuerySettings(),
IndexingSettings(
index_path=index_path,
nbits=nbits,
kmeans_niters=20,
),
SearchSettings(),
)

0 comments on commit 66642b3

Please sign in to comment.