-
Notifications
You must be signed in to change notification settings - Fork 0
/
carProb.py
91 lines (65 loc) · 2.29 KB
/
carProb.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
# Load libraries
import pandas
from pandas.plotting import scatter_matrix
import matplotlib.pyplot as plt
from sklearn import model_selection
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
import seaborn as sns
#sns.set()
# Load dataset
#url = "./iris.csv"
#names = ['sepal-length', 'sepal-width', 'petal-length', 'petal-width', 'class']
#dataset = pandas.read_csv(url, names=names)
#url = "./thefile - Copy (2).csv"
url = "./thefile.csv"
#names = ['Year','Name', 'Price', 'Mileage', 'Body Type', 'Description']
names = ['Year','Maker', 'Model','Price', 'Mileage','Engine','Description']
dataset = pandas.read_csv(url, names=names)
#print(dataset)
print(dataset.head())
print(dataset.describe())
#print(df)
dataset.drop_duplicates(subset=None, keep='first', inplace=False)
#print(df)
print(dataset.describe()) # Mean avg and other functions
#dataset.plot()
#plt.show()
#dataset.plot(kind='box', subplots=True, layout=(2,2), sharex=False, sharey=False)
#plt.show()
sns.pairplot(dataset[['Year', 'Maker', 'Model','Mileage', 'Price']], size=1.5)
plt.show()
#matrix = dataset.corr()
#f, ax = plt.subplots(figsize=(4, 6))
#sns.heatmap(matrix, vmax=0.69, square=True)
#sns.lmplot(x='Mileage',y='Price',data=dataset)
matrix = dataset.corr()
f, ax = plt.subplots(figsize=(8, 6))
sns.heatmap(matrix, vmax=0.7, square=True)
plt.title('Car Price Variables')
#pair Plot
#sns.pairplot(dataset)
plt.title('Car Price Variables')
plt.show()
#print(dataset[['year','Price','Mileage']])
#filteredDataset = dataset['Mileage'] <15000
#print(filteredDataset)
#print(dataset[filteredDataset])
#dataset[filteredDataset].plot(kind='box', subplots=True, layout=(2,2), sharex=True, sharey=False)
#plt.show()
#filteredDataset.plot()
#plt.show()
## histograms
#dataset.hist()
#plt.show()
## scatter plot matrix
#scatter_matrix(dataset)
#plt.show()
print("--SOLVED CARPROB--")