Cleaned data is now kept separate from raw
This commit is contained in:
@@ -10,10 +10,10 @@ def main():
|
|||||||
df = load_data()
|
df = load_data()
|
||||||
|
|
||||||
#preprocessing
|
#preprocessing
|
||||||
preprocess_data(df)
|
df_clean = preprocess_data(df)
|
||||||
|
|
||||||
#exploratory data analysis
|
#exploratory data analysis
|
||||||
draw_plots(df)
|
draw_plots(df_clean)
|
||||||
|
|
||||||
def load_data():
|
def load_data():
|
||||||
df = pd.read_csv(data_path, encoding="ascii", delimiter=",")
|
df = pd.read_csv(data_path, encoding="ascii", delimiter=",")
|
||||||
@@ -39,7 +39,8 @@ def clean_data(df):
|
|||||||
print(df.isnull().sum())
|
print(df.isnull().sum())
|
||||||
print("\n")
|
print("\n")
|
||||||
|
|
||||||
df.dropna(inplace=True)
|
df.dropna(inplace=False)
|
||||||
|
return df
|
||||||
|
|
||||||
def order_data_stress_level(df):
|
def order_data_stress_level(df):
|
||||||
df["Stress_Level"] = pd.Categorical(
|
df["Stress_Level"] = pd.Categorical(
|
||||||
@@ -77,7 +78,8 @@ def draw_plots(df):
|
|||||||
display_feature_boxplots(df)
|
display_feature_boxplots(df)
|
||||||
|
|
||||||
def preprocess_data(df):
|
def preprocess_data(df):
|
||||||
clean_data(df)
|
df_clean = clean_data(df)
|
||||||
order_data_stress_level(df)
|
order_data_stress_level(df_clean)
|
||||||
|
return df_clean
|
||||||
|
|
||||||
main()
|
main()
|
||||||
Reference in New Issue
Block a user