Diabetes pandas(Python)

import pandas as pd

# Read the dataset

df = pd.read_csv('pima-indians-diabetes.csv')

# Set column names

df.columns = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']

# Identify and remove duplicate consecutive rows with the same class

df = df.drop_duplicates(subset='class', keep='first')

# Calculate the averages of the features

averages = df.groupby('class').mean().reset_index()

# Calculate the percentage reduction

original_rows = len(df)

reduced_rows = len(averages)

percentage_reduction = ((original_rows - reduced_rows) / original_rows) * 100

# Print the new dataframe

print(averages)

print(f"Percentage reduction: {percentage_reduction:.2f}%")

Engineer Raja