The average accuracy is calculated after 1000 trainings

This commit is contained in:
Drew Giffin
2025-10-22 08:57:57 -04:00
parent b5f6069cea
commit 5c15f6204b
3 changed files with 36 additions and 22 deletions
+26 -20
View File
@@ -25,29 +25,35 @@ def main():
X, y = separate_features_and_target(df_clean)
# split into train and test data
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, stratify=y, random_state=0
)
accuracy_scores = []
# run training many times using different splits to get an average accuracy score
for i in range(1000):
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, stratify=y, random_state=i
)
# pre training processing
X_train_normalized, X_test_normalized = normalize_features(X_train, X_test)
# training
model = train_logistic_regression(X_train_normalized, y_train)
# prediction
y_pred = predict_target(model, X_test_normalized)
# pre training processing
X_train_normalized, X_test_normalized = normalize_features(X_train, X_test)
# training
model = train_logistic_regression(X_train_normalized, y_train)
# prediction
y_pred = predict_target(model, X_test_normalized)
# evaluation
le = get_label_encoder(df_clean)
# draw_feature_importance(model, X)
# draw_confusion_matrix(y_test, y_pred, le)
# draw_classification_report(y_test, y_pred, le)
evaluate_accuracy(y_test, y_pred)
# evaluation
le = get_label_encoder(df_clean)
# draw_feature_importance(model, X)
# draw_confusion_matrix(y_test, y_pred, le)
# draw_classification_report(y_test, y_pred, le)
accuracy = get_accuracy(y_test, y_pred)
accuracy_scores.append(accuracy)
print(f"Average Accuracy: {np.mean(accuracy_scores):.4f}")
print(f"Samples: {len(accuracy_scores)}")
def evaluate_accuracy(y_test, y_pred):
acc = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {acc:.4f}")
def get_accuracy(y_test, y_pred):
accuracy = accuracy_score(y_test, y_pred)
return accuracy
def get_label_encoder(df):
le = LabelEncoder()