Skip to content

Commit

Permalink
'update20241203' (#1589)
Browse files Browse the repository at this point in the history
add '--modeling-unit' and "--bpe-vocab" to /sherpa-onnx/python-api-examples/streaming_server.py make it specifiable.
  • Loading branch information
goddamnVincent authored Dec 4, 2024
1 parent 0d6bf52 commit 47a2dd4
Showing 1 changed file with 24 additions and 0 deletions.
24 changes: 24 additions & 0 deletions python-api-examples/streaming_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,28 @@ def add_hotwords_args(parser: argparse.ArgumentParser):
--hotwords-file is given.
""",
)
parser.add_argument(
"--modeling-unit",
type=str,
default='cjkchar',
help="""
The modeling unit of the used model. Current supported units are:
- cjkchar(for Chinese)
- bpe(for English like languages)
- cjkchar+bpe(for multilingual models)
""",
)
parser.add_argument(
"--bpe-vocab",
type=str,
default='',
help="""
The bpe vocabulary generated by sentencepiece toolkit.
It is only used when modeling-unit is bpe or cjkchar+bpe.
if you can’t find bpe.vocab in the model directory, please run:
python script/export_bpe_vocab.py --bpe-model exp/bpe.model
""",
)


def add_modified_beam_search_args(parser: argparse.ArgumentParser):
Expand Down Expand Up @@ -409,6 +431,8 @@ def create_recognizer(args) -> sherpa_onnx.OnlineRecognizer:
rule2_min_trailing_silence=args.rule2_min_trailing_silence,
rule3_min_utterance_length=args.rule3_min_utterance_length,
provider=args.provider,
modeling_unit=args.modeling_unit,
bpe_vocab=args.bpe_vocab
)
elif args.paraformer_encoder:
recognizer = sherpa_onnx.OnlineRecognizer.from_paraformer(
Expand Down

0 comments on commit 47a2dd4

Please sign in to comment.