-
Notifications
You must be signed in to change notification settings - Fork 0
/
main_.py
32 lines (24 loc) · 861 Bytes
/
main_.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
# Importing libraries
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
# Handling exceptions
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)
# Loading dataset
data = pd.read_csv("data.csv")
data.head()
# Featuring & labelling
y = data["label"]
url_list = data["url"]
# Tokenizing and fitting the vectors into variable X
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(url_list)
# Spliting data into 80:20 ratio
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Applying logistic regression
logit = LogisticRegression()
logit.fit(X_train, y_train)
# Evaluating model's accuracy
print("Accuracy of model is: ",logit.score(X_test, y_test))