# Preamble script block to identify host, user, and kernel
import sys
! hostname
! whoami
print(sys.executable)
print(sys.version)
print(sys.version_info)


import numpy 
import pandas


mytabular = numpy.random.randint(1,100,(5,4))
myrowname = ['A','B','C','D','E']
mycolname = ['W','X','Y','Z']
mytable = [['' for jcol in range(len(mycolname)+1)] for irow in range(len(myrowname)+1)] #non-null destination matrix, note the implied loop construction


for irow in range(1,len(myrowname)+1): # write the row names
    mytable[irow][0]=myrowname[irow-1]
for jcol in range(1,len(mycolname)+1): # write the column names
    mytable[0][jcol]=mycolname[jcol-1]  
for irow in range(1,len(myrowname)+1): # fill the table (note the nested loop)
    for jcol in range(1,len(mycolname)+1):
        mytable[irow][jcol]=mytabular[irow-1][jcol-1]


for irow in range(0,len(myrowname)+1):
    print(mytable[irow][0:len(mycolname)+1])


print(mytable[3][0:len(mycolname)+1])


for irow in range(0,len(myrowname)+1):  #cannot use implied loop in a column slice
    print(mytable[irow][2])


print(' ',mytable[0][3])
print(mytable[3][0],mytable[3][3])


df = pandas.DataFrame(numpy.random.randint(1,100,(5,4)), ['A','B','C','D','E'], ['W','X','Y','Z'])
df


df1 = pandas.DataFrame(mytable)
df1


df2 = pandas.DataFrame(mytabular,myrowname,mycolname)
df2


df.shape


df1.shape


df2.shape


df['new']= 'NA'
df


newrow = df.loc[['E']].rename(index={"E": "X"}) # create a single row, rename the index
newtable = pandas.concat([df,newrow]) # concatenate the row to bottom of df - note the syntax


newtable


newtable.drop('new', axis=1, inplace = True)
newtable


newtable = newtable.loc[['A','B','D','E','X']] # select all rows except C
newtable


newtable['X'] #Selecing a single column


newtable[['X','W']] #Selecing a multiple columns


newtable.loc['E'] #Selecing rows based on label via loc[ ] indexer


newtable.loc[['E','X','B']] #Selecing multiple rows based on label via loc[ ] indexer


newtable.loc[['B','E','D'],['X','Y']] #Selecting elemens via both rows and columns via loc[ ] indexer


df = pandas.DataFrame({'col1':[1,2,3,4,5,6,7,8],
                   'col2':[444,555,666,444,666,111,222,222],
                   'col3':['orange','apple','grape','mango','jackfruit','watermelon','banana','peach']})
df


#What fruit corresponds to the number 555 in ‘col2’?

df[df['col2']==555]['col3']


#What fruit corresponds to the minimum number in ‘col2’?

df[df['col2']==df['col2'].min()]['col3']


#Creating a dataframe from a dictionary

df = pandas.DataFrame({'col1':[1,2,3,4,5,6,7,8],
                   'col2':[444,555,666,444,666,111,222,222],
                   'col3':['orange','apple','grape','mango','jackfruit','watermelon','banana','peach']})
df


#Returns only the first five rows

df.head()


#Info about the dataframe

df.info()


#Statistics of the dataframe

df.describe()


df['col2'].sum() #Sum of a specified column


df['col2'].unique() #Returns the list of unique values along the indexed column


df['col2'].nunique() #Returns the total number of unique values along the indexed column


df['col2'].value_counts()  #Returns the number of occurences of each unique value


def times2(x):  # A prototype function to scalar multiply an object x by 2
    return(x*2)

print(df)
print('Apply the times2 function to col2')
df['col2'].apply(times2) #Symbolic apply the function to each element of column col2, result is another dataframe


df.sort_values('col2', ascending = True) #Sorting based on columns


# Define your function here:



# Symbolic apply here:


#Creating a dataframe from a dictionary

data = {
    'key' : ['A', 'B', 'C', 'A', 'B', 'C'],
    'data1' : [1, 2, 3, 4, 5, 6],
    'data2' : [10, 11, 12, 13, 14, 15],
    'data3' : [20, 21, 22, 13, 24, 25]
}

df1 = pandas.DataFrame(data)
df1


# Grouping and summing values in all the columns based on the column 'key'

df1.groupby('key').sum()


# Grouping and summing values in the selected columns based on the column 'key'

df1.groupby('key')[['data1', 'data2']].sum()


#Creating a dataframe from a dictionary

df = pandas.DataFrame({'col1':[1,2,3,4,None,6,7,None],
                   'col2':[444,555,None,444,666,111,None,222],
                   'col3':['orange','apple','grape','mango','jackfruit','watermelon','banana','peach']})
df


df_dropped = df.dropna()
df_dropped


df_filled1 = df.fillna(0)
df_filled1


df_filled2 = df.fillna(df.mean())
df_filled2


# Replace the NaN with the string 'missing' here:

Laboratory 10: Databases

Pandas Cheat Sheet(s)

Pandas¶

Dataframe-structure using primative python¶

Create a proper dataframe¶

Getting the shape of dataframes¶

Appending new columns¶

Appending new rows¶

Removing Rows and Columns¶

Indexing¶

Conditional Selection¶

Descriptor Functions¶

`head` method¶

`info` method¶

`describe` method¶

Counting and Sum methods¶

Using functions in dataframes - symbolic apply¶

Sorts¶

Exercise 1¶

Aggregating (Grouping Values) dataframe contents¶

Filtering out missing values¶

Exercise 2¶

Laboratory 10: Databases

Pandas Cheat Sheet(s)

Pandas¶

Dataframe-structure using primative python¶

Create a proper dataframe¶

Getting the shape of dataframes¶

Appending new columns¶

Appending new rows¶

Removing Rows and Columns¶

Indexing¶

Conditional Selection¶

Descriptor Functions¶

head method¶

info method¶

describe method¶

Counting and Sum methods¶

Using functions in dataframes - symbolic apply¶

Sorts¶

Exercise 1¶

Aggregating (Grouping Values) dataframe contents¶

Filtering out missing values¶

Exercise 2¶

`head` method¶

`info` method¶

`describe` method¶