-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathapp.py
51 lines (43 loc) · 1.78 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import streamlit as st
from tokenizers import Tokenizer
import fasttext
import ml_collections
def config():
cfg_dictionary = {
"model_file": "fasttext-model.ftz",
"tokenizer": "tokenizer.json",
}
configuration = ml_collections.FrozenConfigDict(cfg_dictionary)
return configuration
def predict(text, tokenizer, model):
"""
Returns Prediction of FastText model.
The input string is tokenized and is then passed to the model for prediction
Args:
text(str) : Input string
tokenizer : Custom tokenizer object
model : FastText model
"""
tokenized_text = ' '.join(tokenizer.encode(text).tokens)
return model.predict(tokenized_text)
if __name__ == '__main__':
cfg = config()
loaded_tokenizer = Tokenizer.from_file(cfg.tokenizer)
loaded_model = fasttext.load_model(cfg.model_file)
st.write("# Browse Node ID Classification")
description = """
Browse node ID's are numeric codes that identify inside Amazon, a given
product category. There are more than 30 thousand product categories on
Amazon, each one identified by a unique Node ID. In Amazon's own words
> *Browse Node ID's are positive integers that uniquely identify product
> sets, such as Literature & Fiction: (17), Medicine: (13996), Mystery &
> Thrillers: (18), Nonfiction: (53), Outdoors & Nature: (290060). Amazon
> uses thousands of browse node ID's*
"""
st.write(description)
product_description = st.text_area("Enter the Product Description",
height=400)
node_id, precision = predict(product_description, loaded_tokenizer,
loaded_model)
browse_node_id, = node_id # unpacking node_id
st.write(f"# Node ID {browse_node_id[9:]}")