Digital : Python Pandas_FP Hands-on Solution | Course ID 55937 | TCS Fresco Play

 



Join our channel if you haven’t joined yet https://t.me/+YQHE-_1QlGc5MTFl

Python Pandas HandsOns 

1. Pandas Data Structures 

import pandas as pd 

importnumpy as np 

heights_A= pd.Series([176.2, 158.4, 167.6, 156.2, 161.4]) 

heights_A.index = ['s1', 's2', 's3', 's4','s5'] 

print(heights_A.shape) 

weights_A = pd.Series([85.1, 90.2, 76.8, 80.4 , 78.9]) 

weights_A.index = ['s1', 's2', 's3', 's4','s5'] 

print(weights_A.dtypes) 

df_A = pd.DataFrame() 

df_A['Student_height'] = heights_A 

df_A['Student_weight'] = weights_A 

print(df_A.shape) 

my_mean = 170.0 

my_std = 25.0 

np.random.seed(100) 

heights_B= pd.Series(np.random.normal(loc=my_mean, scale=my_std, size=5)) 

heights_B.index = ['s1', 's2', 's3', 's4','s5'] 

my_mean1 = 75.0 

my_std1 = 12.0 

weights_B =pd.Series(np.random.normal(loc=my_mean, scale=my_std, size=5)) 

weights_B.index = ['s1', 's2', 's3', 's4','s5'] 

print(heights_B.mean()) 

df_B = pd.DataFrame() 

df_B['Student_height'] = heights_B 

df_B['Student_weight'] = weights_B 

print(df_B.columns) 


2. Accessing Pandas Data Structures 

#Write your code here 

import pandas as pd 

import numpy as np 

heights_A = pd.Series([176.2, 158.4, 167.6, 156.2, 161.4]) 

heights_A.index = ['s1', 's2', 's3', 's4','s5'] 

print(heights_A[1]) 

print(heights_A[[1,2,3]]) 

weights_A = pd.Series([85.1, 90.2, 76.8, 80.4 , 78.9]) 

weights_A.index = ['s1', 's2', 's3', 's4','s5'] 

df_A = pd.DataFrame() 

df_A['Student_height'] = heights_A 

df_A['Student_weight'] = weights_A 

height = df_A['Student_height'] 

print(type(height)) 

df_s1s2 = df_A[df_A.index.isin(['s1','s2'])] 

print(df_s1s2) 

df_s2s5s1 = df_A[df_A.index.isin(['s1','s2','s5'])] 

df_s2s5s1 = df_s2s5s1.reindex(['s2', 's5', 's1']) 

print(df_s2s5s1) 

df_s1s4 = df_A[df_A.index.isin(['s1','s4'])] 

print(df_s1s4) 


3. Working with CSV files

#Write your code here 

import pandas as pd 

import numpy as np 

heights_A = pd.Series([176.2, 158.4, 167.6, 156.2, 161.4]) 

heights_A.index = ['s1', 's2', 's3', 's4','s5'] 

weights_A = pd.Series([85.1, 90.2, 76.8, 80.4 , 78.9]) 

weights_A.index = ['s1', 's2', 's3', 's4','s5'] 

df_A = pd.DataFrame() 

df_A['Student_height'] = heights_A 

df_A['Student_weight'] = weights_A 

df_A.to_csv('classA.csv') 

df_A2 = pd.read_csv('classA.csv') 

print(df_A2) 

df_A3 = pd.read_csv('classA.csv',index_col='Unnamed: 0') 

print(df_A3) 

my_mean = 170.0 

my_std = 25.0 

np.random.seed(100) 

heights_B = pd.Series(np.random.normal(loc=my_mean, scale=my_std, size=5)) 

heights_B.index = ['s1', 's2', 's3', 's4','s5'] 

my_mean1 = 75.0 

my_std1 = 12.0 

np.random.seed(100) 

weights_B = pd.Series(np.random.normal(loc=my_mean1, scale=my_std1, size=5)) 

weights_B.index = ['s1', 's2', 's3', 's4','s5'] 

df_B = pd.DataFrame() 

df_B['Student_height'] = heights_B 

df_B['Student_weight'] = weights_B 

df_B.to_csv('classB.csv', index=False) 

df_B2 = pd.read_csv('classB.csv') 

print(df_B2) 

df_B3 = pd.read_csv('classB.csv',header=None) 

print(df_B3) 

df_B4 = pd.read_csv('classB.csv',header=None,skiprows=2) 

print(df_B4) 


4. Indexing Dataframes

#Write your code here 

import pandas as pd 

import numpy as np 

DatetimeIndex = pd.date_range(start='09/1/2017', end='09/15/2017') 

print(DatetimeIndex[2]) 

datelist = ['14-Sep-2017', '9-Sep-2017'] 

dates_to_be_searched = pd.to_datetime(datelist) 

print(dates_to_be_searched) 

print(dates_to_be_searched.isin(DatetimeIndex)) 

arraylist = [['classA']*5 + ['classB']*5, ['s1', 's2', 's3','s4', 's5']*2] 

mi_index = pd.MultiIndex.from_product(arraylist, names=['First Level','Second Level']) 

print(mi_index.levels) 


5. Data Cleaning

#Write your code here 

import pandas as pd 

import numpy as np 

heights_A = pd.Series([176.2, 158.4, 167.6, 156.2, 161.4]) 

heights_A.index = ['s1', 's2', 's3', 's4','s5'] 

weights_A = pd.Series([85.1, 90.2, 76.8, 80.4 , 78.9]) 

weights_A.index = ['s1', 's2', 's3', 's4','s5'] 

df_A = pd.DataFrame() 

df_A['Student_height'] = heights_A 

df_A['Student_weight'] = weights_A 

df_A.loc['s3'] = np.nan 

df_A.loc['s5'][1] = np.nan 

df_A2 = df_A.dropna(how ='any') 

print(df_A2) 


6. Data Aggregation

#Write your code here 

import pandas as pd 

import numpy as np 

heights_A = pd.Series([176.2, 158.4, 167.6, 156.2, 161.4]) 

heights_A.index = ['s1', 's2', 's3', 's4','s5'] 

weights_A = pd.Series([85.1, 90.2, 76.8, 80.4 , 78.9]) 

weights_A.index = ['s1', 's2', 's3', 's4','s5'] 

df_A = pd.DataFrame() 

df_A['Student_height'] = heights_A 

df_A['Student_weight'] = weights_A 

df_A_filter1 = df_A[(df_A.Student_height > 160.0) & (df_A.Student_weight < 80.0)] 

print(df_A_filter1) 

df_A_filter2 = df_A[df_A.index.isin(['s5'])] 

print(df_A_filter2) 

df_A['Gender'] = ['M', 'F', 'M', 'M', 'F'] 

df_groups = df_A.groupby('Gender') 

print(df_groups.mean()) 


7. Data Merge 1

#Write your code here 

import pandas as pd 

import numpy as np 

heights_A = pd.Series([176.2, 158.4, 167.6, 156.2, 161.4]) 

heights_A.index = ['s1', 's2', 's3', 's4','s5'] 

weights_A = pd.Series([85.1, 90.2, 76.8, 80.4 , 78.9]) 

weights_A.index = ['s1', 's2', 's3', 's4','s5'] 

df_A = pd.DataFrame() 

df_A['Student_height'] = heights_A 

df_A['Student_weight'] = weights_A 

df_A['Gender'] = ['M', 'F', 'M', 'M', 'F'] 

s = pd.Series([165.4, 82.7, 'F'],index=['Student_height', 'Student_weight', 'Gender'],name='s6') 

df_AA = df_A.append(s) 

print(df_AA) 

my_mean = 170.0 

my_std = 25.0 

np.random.seed(100) 

heights_B = pd.Series(np.random.normal(loc=my_mean, scale=my_std, size=5)) 

heights_B.index = ['s1', 's2', 's3', 's4','s5'] 

my_mean1 = 75.0 

my_std1 = 12.0 

np.random.seed(100) 

weights_B = pd.Series(np.random.normal(loc=my_mean1, scale=my_std1, size=5)) 

weights_B.index = ['s1', 's2', 's3', 's4','s5'] 

df_B = pd.DataFrame() 

df_B['Student_height'] = heights_B 

df_B['Student_weight'] = weights_B 

df_B.index = [ 's7', 's8', 's9', 's10', 's11'] 

df_B['Gender'] = ['F', 'M', 'F', 'F', 'M'] 

df = pd.concat([df_AA,df_B]) 

print(df)

 

8. Data Merge – 2 

#Write your code here 

import pandas as pd 

import numpy as np 

nameid = pd.Series(range(101, 111)) 

name = pd.Series(['person' + str(i) for i in range(1, 11)]) 

master = pd.DataFrame() 

master['nameid'] = nameid 

master['name'] = name 

transaction = pd.DataFrame({'nameid':[108, 108, 108,103], 'product':['iPhone', 'Nokia', 'Micromax', 'Viv

o']}) 

mdf = pd.merge(master,transaction,on='nameid') 

print(mdf) 

Post a Comment

0 Comments