Skip to content

Commit

Permalink
Adding the DocTokenizer type, and loading in inside Checkpoint.
Browse files Browse the repository at this point in the history
  • Loading branch information
codetalker7 committed May 30, 2024
1 parent b28af5c commit 8a6ac59
Show file tree
Hide file tree
Showing 5 changed files with 21 additions and 5 deletions.
5 changes: 3 additions & 2 deletions src/ColBERT.jl
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,10 @@ export RunSettings, TokenizerSettings, ResourceSettings,
DocSettings, QuerySettings, IndexingSettings,
SearchSettings, ColBERTConfig

# models
# models, document/query tokenizers
include("modelling/tokenization/doc_tokenization.jl")
include("modelling/checkpoint.jl")
export BaseColBERT, Checkpoint
export BaseColBERT, Checkpoint, DocTokenizer

# indexer
include("indexing.jl")
Expand Down
9 changes: 9 additions & 0 deletions src/indexing/collection_encoder.jl
Original file line number Diff line number Diff line change
@@ -1,2 +1,11 @@
using ..ColBERT: ColBERTConfig

struct CollectionEncoder
config::ColBERTConfig
end

function encode_passages(encoder::CollectionEncoder, passages::Vector{String})
@info "Encoding $(length(passages)) passages."

# TODO: complete this implementation!
end
7 changes: 4 additions & 3 deletions src/modelling/checkpoint.jl
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
using ..ColBERT: DocTokenizer, ColBERTConfig

struct BaseColBERT
bert::Any
linear::Any
Expand All @@ -18,8 +20,7 @@ function BaseColBERT(checkpoint::String, config::ColBERTConfig)
end

struct Checkpoint

model::BaseColBERT
doc_tokenizer::Any
colbert_config::Any
doc_tokenizer::DocTokenizer
colbert_config::ColBERTConfig
end
5 changes: 5 additions & 0 deletions src/modelling/tokenization/doc_tokenization.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
using ...ColBERT: ColBERTConfig

struct DocTokenizer
config::ColBERTConfig
end
Empty file.

0 comments on commit 8a6ac59

Please sign in to comment.