From 441c121751b73d23472005dd258283cf270f22e9 Mon Sep 17 00:00:00 2001 From: Drew Giffin Date: Sun, 19 Oct 2025 17:25:57 -0400 Subject: [PATCH] Features are normalized --- main.py | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/main.py b/main.py index 9fe235b..bff6dbb 100644 --- a/main.py +++ b/main.py @@ -2,6 +2,7 @@ import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns +from sklearn.preprocessing import MinMaxScaler data_path = "student_lifestyle_dataset.csv" @@ -13,7 +14,10 @@ def main(): df_clean = preprocess_data(df) #exploratory data analysis - draw_plots(df_clean) + # draw_plots(df_clean) + + #feature engineering + normalize_features(df_clean) def load_data(): df = pd.read_csv(data_path, encoding="ascii", delimiter=",") @@ -35,9 +39,9 @@ def inspect_data(df): print("\n") def clean_data(df): - print("Missing values:") - print(df.isnull().sum()) - print("\n") + # print("Missing values:") + # print(df.isnull().sum()) + # print("\n") df.dropna(inplace=False) return df @@ -82,4 +86,13 @@ def preprocess_data(df): order_data_stress_level(df_clean) return df_clean +def normalize_features(df): + scaler = MinMaxScaler() + df[["Study_Hours_Per_Day"]] = scaler.fit_transform(df[["Study_Hours_Per_Day"]]) + df[["Extracurricular_Hours_Per_Day"]] = scaler.fit_transform(df[["Extracurricular_Hours_Per_Day"]]) + df[["Sleep_Hours_Per_Day"]] = scaler.fit_transform(df[["Sleep_Hours_Per_Day"]]) + df[["Social_Hours_Per_Day"]] = scaler.fit_transform(df[["Social_Hours_Per_Day"]]) + df[["Physical_Activity_Hours_Per_Day"]] = scaler.fit_transform(df[["Physical_Activity_Hours_Per_Day"]]) + df[["GPA"]] = scaler.fit_transform(df[["GPA"]]) + main() \ No newline at end of file