Missing data is reported and cleaned
This commit is contained in:
@@ -5,6 +5,7 @@ data_path = "student_lifestyle_dataset.csv"
|
||||
def main():
|
||||
df = load_data()
|
||||
inspect_data(df)
|
||||
df_clean = clean_data(df)
|
||||
|
||||
def load_data():
|
||||
df = pd.read_csv(data_path, encoding="ascii", delimiter=",")
|
||||
@@ -23,4 +24,12 @@ def inspect_data(df):
|
||||
print(df.describe(include="all"))
|
||||
print("\n")
|
||||
|
||||
def clean_data(df):
|
||||
print("Missing values:")
|
||||
print(df.isnull().sum())
|
||||
print("\n")
|
||||
|
||||
df_clean = df.dropna(inplace=False)
|
||||
return df_clean
|
||||
|
||||
main()
|
||||
Reference in New Issue
Block a user