Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Python binding for the training #118

Open
wants to merge 30 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 86 additions & 1 deletion python/_kenlm.pxd
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
from libcpp.string cimport string
from libcpp.vector cimport vector
from libc.stdint cimport uint64_t
from libcpp cimport bool

cdef extern from "lm/word_index.hh" namespace "lm":
ctypedef unsigned WordIndex

Expand Down Expand Up @@ -39,7 +44,7 @@ cdef extern from "util/mmap.hh" namespace "util":

cdef extern from "lm/config.hh" namespace "lm::ngram":
cdef cppclass Config:
Config()
Config() except +
float probing_multiplier
LoadMethod load_method

Expand All @@ -48,3 +53,83 @@ cdef extern from "lm/model.hh" namespace "lm::ngram":
#default constructor
cdef Model *LoadVirtual(char *) except +

cdef extern from "util/file.hh" namespace "util":
cdef cppclass scoped_fd:
scoped_fd() except +
scoped_fd(int) except +
void reset(int) except +
int release() except +

int OpenReadOrThrow(char*) except +
int CreateOrThrow(char*) except +
string DefaultTempDirectory() except +
void NormalizeTempPrefix(string) except +

cdef extern from "util/usage.hh" namespace "util":
uint64_t GuessPhysicalMemory() except +
uint64_t ParseSize(string) except +

cdef extern from "util/stream/config.hh" namespace "util::stream":
cdef struct SortConfig:
string temp_prefix
size_t buffer_size
size_t total_memory

cdef struct ChainConfig:
size_t entry_size
size_t block_count
size_t total_memory

cdef extern from "lm/builder/initial_probabilities.hh" namespace "lm::builder":
cdef struct InitialProbabilitiesConfig:
ChainConfig adder_in
ChainConfig adder_out
bool interpolate_unigrams

cdef extern from "lm/lm_exception.hh" namespace "lm":
ctypedef enum WarningAction:
THROW_UP
COMPLAIN
SILENT

cdef extern from "lm/builder/discount.hh" namespace "lm::builder":
cdef struct Discount:
float amount[4]

cdef extern from "lm/builder/adjust_counts.hh" namespace "lm::builder":
cdef struct DiscountConfig:
vector[Discount] overwrite
Discount fallback
WarningAction bad_action

cdef extern from "lm/builder/output.hh" namespace "lm::builder":

cdef cppclass OutputHook:
pass

cdef cppclass Output:
Output(char*, bool, bool) except +
void Add(OutputHook*) except +

cdef cppclass PrintHook(OutputHook):
PrintHook(int, bool) except +

cdef extern from "lm/builder/pipeline.hh" namespace "lm::builder":
struct PipelineConfig:
size_t order
SortConfig sort
InitialProbabilitiesConfig initial_probs
ChainConfig read_backoffs
WordIndex vocab_estimate
size_t minimum_block
size_t block_count
vector[uint64_t] prune_thresholds
bool prune_vocab
string prune_vocab_file
bool renumber_vocabulary
DiscountConfig discount
bool output_q
uint64_t vocab_size_for_unk
WarningAction disallowed_symbol_action

void Pipeline(PipelineConfig, int, Output) except +
Loading