Posts

Showing posts from June, 2023

Diabetes pandas(Python)

 import pandas as pd # Read the dataset df = pd.read_csv('pima-indians-diabetes.csv') # Set column names df.columns = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class'] # Identify and remove duplicate consecutive rows with the same class df = df.drop_duplicates(subset='class', keep='first') # Calculate the averages of the features averages = df.groupby('class').mean().reset_index() # Calculate the percentage reduction original_rows = len(df) reduced_rows = len(averages) percentage_reduction = ((original_rows - reduced_rows) / original_rows) * 100 # Print the new dataframe print(averages) print(f"Percentage reduction: {percentage_reduction:.2f}%")

Species sepal & petal etc

 import pandas as pd # Read the dataset data = pd.read_csv("Iris.csv") # Calculate the average measurements for each distinct species avg_measurements = data.groupby("species").mean().reset_index() # Create a new dataframe with species and average features avg_features = avg_measurements[["species", "sepal_length", "sepal_width", "petal_length", "petal_width"]] # Rename the columns for better representation avg_features.columns = ["Species", "Avg Sepal Length", "Avg Sepal Width", "Avg Petal Length", "Avg Petal Width"] # Print the new dataframe print(avg_features) Output Species Avg Sepal Length Avg Sepal Width Avg Petal Length Avg Petal Width 0 setosa 5.006 3.428 1.462 0.246 1 versicolor 5.936 2.770 4.260 1.326 2 virginica 6.588 2.974 5.552 2.026 a. Which species is having the maximum sepal width? max_sepal_width_species = avg_features.loc[avg_features["Avg Sepal

Python Wheather Lab Work

  import pandas as pd  weather_data= {      'day':['4/05/2022', '4/04 /2023', '10/02/2011', '30/09/2021', '9/10/2001','13/05/2019'],     'temp':['20','30','45','39','25','51'],     'windspeed':['7','3','5','9'],      'event':['rain', 'sunny', 'Litt cloudy', 'cloudy', 'sunny', 'windy'],     } df= pd.DataFrame(weather_data) rows, columns= df.shape df.tail(2)  df[2:5] df.columns  df['day']  df[['day' 'event']]  df['temp'].max()  df['temp'].mean()  df['temp'].min()  df['temp'].std()  df.describe()  df['temp']=df['temp'].astype(int)  df[df['temp']>32] df[df.tempdf.temp.max()]  df[['day', 'temp']][df.tempdf.temp.max()]  #df-index()  df.set_index("day", inplace=True)  df.loc['4