Cleaned data is now kept separate from raw

This commit is contained in:
Drew Giffin
2025-10-19 17:17:41 -04:00
parent 75f4279ea9
commit 106637956c
+7 -5
View File
@@ -10,10 +10,10 @@ def main():
df = load_data() df = load_data()
#preprocessing #preprocessing
preprocess_data(df) df_clean = preprocess_data(df)
#exploratory data analysis #exploratory data analysis
draw_plots(df) draw_plots(df_clean)
def load_data(): def load_data():
df = pd.read_csv(data_path, encoding="ascii", delimiter=",") df = pd.read_csv(data_path, encoding="ascii", delimiter=",")
@@ -39,7 +39,8 @@ def clean_data(df):
print(df.isnull().sum()) print(df.isnull().sum())
print("\n") print("\n")
df.dropna(inplace=True) df.dropna(inplace=False)
return df
def order_data_stress_level(df): def order_data_stress_level(df):
df["Stress_Level"] = pd.Categorical( df["Stress_Level"] = pd.Categorical(
@@ -77,7 +78,8 @@ def draw_plots(df):
display_feature_boxplots(df) display_feature_boxplots(df)
def preprocess_data(df): def preprocess_data(df):
clean_data(df) df_clean = clean_data(df)
order_data_stress_level(df) order_data_stress_level(df_clean)
return df_clean
main() main()