Diabetes pandas(Python)
import pandas as pd
# Read the dataset
df = pd.read_csv('pima-indians-diabetes.csv')
# Set column names
df.columns = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']
# Identify and remove duplicate consecutive rows with the same class
df = df.drop_duplicates(subset='class', keep='first')
# Calculate the averages of the features
averages = df.groupby('class').mean().reset_index()
# Calculate the percentage reduction
original_rows = len(df)
reduced_rows = len(averages)
percentage_reduction = ((original_rows - reduced_rows) / original_rows) * 100
# Print the new dataframe
print(averages)
print(f"Percentage reduction: {percentage_reduction:.2f}%")
Comments
Post a Comment