forked from skeskinen/bert.cpp
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbert.h
91 lines (70 loc) · 2.12 KB
/
bert.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
#ifndef BERT_H
#define BERT_H
#include <stddef.h>
#include <stdint.h>
#include <stdbool.h>
#if defined(_WIN32)
#define BERT_API __declspec(dllexport)
#else
#define BERT_API __attribute__ ((visibility ("default")))
#endif
#define BERT_FILE_MAGIC_GGSN 0x67676d6c // 'ggsn'
#ifdef __cplusplus
extern "C" {
#endif
struct bert_params
{
int32_t n_threads = 6;
int32_t port = 8080; // server mode port to bind
const char* model = "models/all-MiniLM-L6-v2/ggml-model-q4_0.bin"; // model path
const char* prompt = "test prompt";
};
BERT_API bool bert_params_parse(int argc, char **argv, bert_params ¶ms);
struct bert_ctx;
typedef int32_t bert_vocab_id;
// gpu -1 is uses CPU. Otherwise, loads on GPU index of gpo parameter
BERT_API struct bert_ctx * bert_load_model_from_file(const char * fname, int gpu);
BERT_API void bert_free(bert_ctx * ctx);
// Main api, does both tokenizing and evaluation
BERT_API void bert_encode(
struct bert_ctx * ctx,
int32_t n_threads,
const char * texts,
float * embeddings);
// n_batch_size - how many to process at a time
// n_inputs - total size of texts and embeddings arrays
BERT_API void bert_encode_batch(
struct bert_ctx * ctx,
int32_t n_threads,
int32_t n_batch_size,
int32_t n_inputs,
const char ** texts,
float ** embeddings);
// Api for separate tokenization & eval
BERT_API void bert_tokenize(
struct bert_ctx * ctx,
const char * text,
bert_vocab_id * tokens,
int32_t * n_tokens,
int32_t n_max_tokens);
BERT_API void bert_eval(
struct bert_ctx * ctx,
int32_t n_threads,
bert_vocab_id * tokens,
int32_t n_tokens,
float * embeddings);
// NOTE: for batch processing the longest input must be first
BERT_API void bert_eval_batch(
struct bert_ctx * ctx,
int32_t n_threads,
int32_t n_batch_size,
bert_vocab_id ** batch_tokens,
int32_t * n_tokens,
float ** batch_embeddings);
BERT_API int32_t bert_n_embd(bert_ctx * ctx);
BERT_API int32_t bert_n_max_tokens(bert_ctx * ctx);
BERT_API const char* bert_vocab_id_to_token(bert_ctx * ctx, bert_vocab_id id);
#ifdef __cplusplus
}
#endif
#endif // BERT_H