Missing data is reported and cleaned
This commit is contained in:
@@ -5,6 +5,7 @@ data_path = "student_lifestyle_dataset.csv"
|
|||||||
def main():
|
def main():
|
||||||
df = load_data()
|
df = load_data()
|
||||||
inspect_data(df)
|
inspect_data(df)
|
||||||
|
df_clean = clean_data(df)
|
||||||
|
|
||||||
def load_data():
|
def load_data():
|
||||||
df = pd.read_csv(data_path, encoding="ascii", delimiter=",")
|
df = pd.read_csv(data_path, encoding="ascii", delimiter=",")
|
||||||
@@ -23,4 +24,12 @@ def inspect_data(df):
|
|||||||
print(df.describe(include="all"))
|
print(df.describe(include="all"))
|
||||||
print("\n")
|
print("\n")
|
||||||
|
|
||||||
|
def clean_data(df):
|
||||||
|
print("Missing values:")
|
||||||
|
print(df.isnull().sum())
|
||||||
|
print("\n")
|
||||||
|
|
||||||
|
df_clean = df.dropna(inplace=False)
|
||||||
|
return df_clean
|
||||||
|
|
||||||
main()
|
main()
|
||||||
Reference in New Issue
Block a user