Features are normalized

This commit is contained in:
Drew Giffin
2025-10-19 17:25:57 -04:00
parent 106637956c
commit 441c121751
+17 -4
View File
@@ -2,6 +2,7 @@ import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler
data_path = "student_lifestyle_dataset.csv"
@@ -13,7 +14,10 @@ def main():
df_clean = preprocess_data(df)
#exploratory data analysis
draw_plots(df_clean)
# draw_plots(df_clean)
#feature engineering
normalize_features(df_clean)
def load_data():
df = pd.read_csv(data_path, encoding="ascii", delimiter=",")
@@ -35,9 +39,9 @@ def inspect_data(df):
print("\n")
def clean_data(df):
print("Missing values:")
print(df.isnull().sum())
print("\n")
# print("Missing values:")
# print(df.isnull().sum())
# print("\n")
df.dropna(inplace=False)
return df
@@ -82,4 +86,13 @@ def preprocess_data(df):
order_data_stress_level(df_clean)
return df_clean
def normalize_features(df):
scaler = MinMaxScaler()
df[["Study_Hours_Per_Day"]] = scaler.fit_transform(df[["Study_Hours_Per_Day"]])
df[["Extracurricular_Hours_Per_Day"]] = scaler.fit_transform(df[["Extracurricular_Hours_Per_Day"]])
df[["Sleep_Hours_Per_Day"]] = scaler.fit_transform(df[["Sleep_Hours_Per_Day"]])
df[["Social_Hours_Per_Day"]] = scaler.fit_transform(df[["Social_Hours_Per_Day"]])
df[["Physical_Activity_Hours_Per_Day"]] = scaler.fit_transform(df[["Physical_Activity_Hours_Per_Day"]])
df[["GPA"]] = scaler.fit_transform(df[["GPA"]])
main()