Refactored evaluation

This commit is contained in:
Drew Giffin
2025-10-20 14:08:44 -04:00
parent cb2ff18b89
commit 602623029c
+11 -7
View File
@@ -19,7 +19,7 @@ def main():
df_clean = preprocess_data(df) df_clean = preprocess_data(df)
# exploratory data analysis # exploratory data analysis
# draw_graphs(df_clean) # draw_plots(df_clean)
# separate features and target # separate features and target
X = df_clean.drop('Stress_Level', axis=1) X = df_clean.drop('Stress_Level', axis=1)
@@ -37,10 +37,11 @@ def main():
# feature engineering # feature engineering
X_train_normalized, X_test_normalized = normalize_features(X_train, X_test) X_train_normalized, X_test_normalized = normalize_features(X_train, X_test)
model = train_logistic_regression(X_train_normalized, X_test_normalized, y_train, y_test, le)
evaluate_model(model, X, X_test_normalized, y_test, le)
def evaluate_model(model, X, X_test, y_test, le):
feature_names = X.columns feature_names = X.columns
model = train_logistic_regression(X_train_normalized, X_test_normalized, y_train, y_test, le, feature_names)
y_pred = model.predict(X_test) y_pred = model.predict(X_test)
# Evaluate # Evaluate
@@ -57,12 +58,15 @@ def main():
}) })
print(feature_importance.sort_values(by='Coefficient', ascending=False)) print(feature_importance.sort_values(by='Coefficient', ascending=False))
def train_logistic_regression(X_train, X_test, y_train, y_test, le, feature_names): def train_logistic_regression(X_train, X_test, y_train, y_test, le):
model = LogisticRegression( model = LogisticRegression(
solver='lbfgs', solver='lbfgs',
max_iter=10000 max_iter=10000
) )
model.fit(X_train, y_train) model.fit(X_train, y_train)
return model return model
def load_data(): def load_data():
@@ -121,7 +125,7 @@ def display_feature_boxplots(df):
plt.title(f"{col} by Stress Level") plt.title(f"{col} by Stress Level")
plt.show() plt.show()
def draw_graphs(df): def draw_plots(df):
display_feature_distributions_histogram(df) display_feature_distributions_histogram(df)
display_scatter_plot_matrix(df) display_scatter_plot_matrix(df)
display_correlation_heatmap(df) display_correlation_heatmap(df)
@@ -134,7 +138,7 @@ def preprocess_data(df):
def normalize_features(X_train, X_test): def normalize_features(X_train, X_test):
scaler = MinMaxScaler() scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train) X_train_scaled = scaler.fit_transform(X_train) # fit only on training data
X_test_scaled = scaler.transform(X_test) X_test_scaled = scaler.transform(X_test)
return X_train_scaled, X_test_scaled return X_train_scaled, X_test_scaled