diff --git a/regression_autos b/regression_autos new file mode 100644 index 000000000000..fd51b6f9a1d3 --- /dev/null +++ b/regression_autos @@ -0,0 +1,52 @@ +# -*- coding: utf-8 -*- +"""Regression Autos + +Automatically generated by Colab. + +Original file is located at + https://colab.research.google.com/drive/1beRq-XbKLbs_4AOP_0nIX12uKUdTQW6U +""" + +import pandas as pd +import numpy as np +from sklearn.model_selection import train_test_split +from sklearn.linear_model import LinearRegression +from sklearn.metrics import mean_squared_error, r2_score + +# Load the dataset +file_path = "/content/_autos - regression - autos.csv" # Replace with your actual file path +data = pd.read_csv(file_path) + +# Preprocessing: Handle missing values, convert data types, etc. +# Fill missing numerical values with the median +data.fillna(data.median(numeric_only=True), inplace=True) + +# Encode categorical variables using one-hot encoding +data = pd.get_dummies(data, drop_first=True) + +# Define the features (X) and target variable (y) +# Assuming 'price' is the target variable +X = data.drop(columns=['price']) +y = data['price'] + +# Split the data into training and testing sets +X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) + +# Train a linear regression model +model = LinearRegression() +model.fit(X_train, y_train) + +# Predict on the test set +y_pred = model.predict(X_test) + +# Evaluate the model +mse = mean_squared_error(y_test, y_pred) +r2 = r2_score(y_test, y_pred) + +# Display results +print(f"Mean Squared Error: {mse}") +print(f"R-squared: {r2}") + +# Optional: Display coefficients for interpretation +coefficients = pd.DataFrame(model.coef_, X.columns, columns=['Coefficient']) +print(coefficients)