Skip to content

Commit

Permalink
Adding function to compute codes for embeddings.
Browse files Browse the repository at this point in the history
  • Loading branch information
codetalker7 committed Jun 15, 2024
1 parent beb65be commit 76a68ef
Showing 1 changed file with 18 additions and 3 deletions.
21 changes: 18 additions & 3 deletions src/indexing/codecs/residual.jl
Original file line number Diff line number Diff line change
@@ -1,10 +1,25 @@
using ..ColBERT: ColBERTConfig
using ProtoStructs

@proto mutable struct ResidualCodec
mutable struct ResidualCodec
config::ColBERTConfig
centroids::Matrix{Float64}
avg_residual:: Float64
avg_residual::Float64
bucket_cutoffs::Vector{Float64}
bucket_weights::Vector{Float64}
end

function compress_into_codes(codec::ResidualCodec, embs::Matrix{Float64})
codes = []

bsize = Int(floor((1 << 29) / size(codec.centroids)[2]))
offset = 1
while (offset <= size(embs)[2]) # batch on the second dimension
dot_products = transpose(embs[:, offset:min(size(embs)[2], offset + bsize - 1)]) * codec.centroids
indices = (cartesian_index -> cartesian_index.I[2]).(argmax(dot_products, dims = 2)[:, 1])
append!(codes, indices)
offset += bsize
end

codes
end

0 comments on commit 76a68ef

Please sign in to comment.