# Script block to identify host, user, and kernel
import sys
! hostname; ! whoami; ! pwd; 
print(sys.executable)

atomickitty
sensei
/home/sensei/engr-1330-webroot/1-Lessons/Lesson14
/opt/jupyterhub/bin/python3


%%html
<!--Script block to left align Markdown Tables-->
<style>
  table {margin-left: 0 !important;}
</style>


# Load the necessary packages
import numpy as np
import pandas as pd
import statistics # this package contains correlation and covariance, so we don't have to write code
from matplotlib import pyplot as plt

# Create a dataframe:
time = [0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0]
speed = [0, 3, 7, 12, 20, 30, 45.6, 60.3, 77.7, 97.3, 121.2]
data = pd.DataFrame({'Time':time, 'Speed':speed})
data


data.describe()


time_var = statistics.variance(time)
speed_var = statistics.variance(speed)

print("Variance of recorded times is ",time_var)
print("Variance of recorded times is ",speed_var)

Variance of recorded times is  11.0
Variance of recorded times is  1697.7759999999998


# To find the covariance  
data.cov()


# To find the correlation among the columns 
# using pearson method 
data.corr(method ='pearson')


import matplotlib.pyplot as plt
def make2plot(listx1,listy1,listx2,listy2,strlablx,strlably,strtitle):
    mydata = plt.figure(figsize = (10,5)) # build a square drawing canvass from figure class
    plt.plot(listx1,listy1, c='red', marker='v',linewidth=0) # basic data plot
    plt.plot(listx2,listy2, c='blue',linewidth=1) # basic model plot
    plt.xlabel(strlablx)
    plt.ylabel(strlably)
    plt.legend(['Observations','Model'])# modify for argument insertion
    plt.title(strtitle)
    plt.show()
    return


model = data['Speed'].mean()+(data['Time']-data['Time'].mean())*(data.cov().iloc[1,0]/data.cov().iloc[0,0] )
make2plot(data['Time'],data['Speed'],data['Time'],model,'Time','Speed','Speed vs Time using Correlation Model')


#Making Random Choice from an Array (or list)
import numpy as np
two_groups = np.array(['treatment', 'control'])
np.random.choice(two_groups,1)
# mylist = ['treatment', 'control']  # this works too
# np.random.choice(mylist)

array(['control'], dtype='<U9')


my_die = np.array(['one', 'two','three', 'four','five', 'six'])
np.random.choice(my_die)

'three'


# now a bunch of rolls
print('roll #1 ',np.random.choice(my_die) )
print('roll #2 ',np.random.choice(my_die) )
print('roll #3 ',np.random.choice(my_die) )
print('roll #4 ',np.random.choice(my_die) )
print('roll #5 ',np.random.choice(my_die) )
print('roll #6 ',np.random.choice(my_die) )

roll #1  four
roll #2  three
roll #3  two
roll #4  three
roll #5  one
roll #6  three


# or multiple rolls, single call
myDiceRolls = np.random.choice(my_die,6) 
print(myDiceRolls)

['two' 'four' 'six' 'two' 'six' 'six']


my_wallet = 1 # start with 1 dollars

def place_a_bet(wallet):
    print("Place your bet!")
    if wallet == 0:
        print("You have no money, get out of my Casino!")
        return(wallet)
    else:
        wallet = wallet - 1
        return(wallet)

def make_a_roll(wallet):
    """Returns my net gain on one bet"""
    print("Roll the die!")
    x = np.random.choice(np.arange(1, 7))  # roll a die once and record the number of spots
    if x <= 2:
        print("You Lose,  Bummer!")
        return(wallet) # lose the bet
    elif x <= 4:
        print("You Draw, Take your bet back.")
        wallet = wallet+1
        return(wallet) # draw, get bet back
    elif x <= 6:
        print("You win a dollar!")
        wallet = wallet+2
        return (wallet) # win, get bet back and win a dollar!
    



# Single play    
print("Amount in my account =:",my_wallet)
my_wallet = place_a_bet(my_wallet)
my_wallet = make_a_roll(my_wallet)
print("Amount in my account =:",my_wallet)

Amount in my account =: 1
Place your bet!
Roll the die!
You Draw, Take your bet back.
Amount in my account =: 1


# Some printing tricks
CRED = '\033[91m'
CEND = '\033[0m'

my_wallet = 10
how_many_throws = 1 

for i in range(how_many_throws):
    print("Amount in my account =:",my_wallet)
    my_wallet = place_a_bet(my_wallet)
    my_wallet = make_a_roll(my_wallet)

#print(CRED + "Error, does not compute!" + CEND)
    print("After ",i+1," plays")
    print(CRED + "Amount in my account =:",my_wallet,CEND)
    print("_______________________")

Amount in my account =: 10
Place your bet!
Roll the die!
You Draw, Take your bet back.
After  1  plays
Amount in my account =: 10 
_______________________


outcomes = np.array([])  #null array to store outcomes

# redefine functions to suppress output

def place_a_bet(wallet):
    # print("Place your bet!")
    if wallet == 0:
        # print("You have no money, get out of my Casino!")
        return(wallet)
    else:
        wallet = wallet - 1
        return(wallet)

def make_a_roll(wallet):
    """Returns my net gain on one bet"""
    # print("Roll the die!")
    x = np.random.choice(np.arange(1, 7))  # roll a die once and record the number of spots
    if x <= 2:
        #print("You Lose,  Bummer!")
        return(wallet) # lose the bet
    elif x <= 4:
        #print("You Draw, Take your bet back.")
        wallet = wallet+1
        return(wallet) # draw, get bet back
    elif x <= 6:
        #print("You win a dollar!")
        wallet = wallet+2
        return (wallet) # win, get bet back and win a dollar!
    

# Some printing tricks
CRED = '\033[91m'
CEND = '\033[0m'


how_many_simulations = 100

for j in range(how_many_simulations):
    my_wallet = 1
    how_many_throws = 30 

    for i in range(how_many_throws):
     #   print("Amount in my account =:",my_wallet)
        my_wallet = place_a_bet(my_wallet)
        my_wallet = make_a_roll(my_wallet)

#print(CRED + "Error, does not compute!" + CEND)
#    print("After ",i+1," plays")
#    print(CRED + "Amount in my account =:",my_wallet,CEND)
#    print("_______________________")

    outcomes = np.append(outcomes,my_wallet)

# build a histogram chart - outcomes is an array

import matplotlib.pyplot as plt
from scipy.stats import gamma

#ax.hist(r, density=True, histtype='stepfilled', alpha=0.2)
plt.hist(outcomes, density=True, bins = 20)
plt.xlabel("Dollars in Gamer's Wallet")
plt.ylabel('Relative Frequency')
#### just a data model, gamma distribution ##############
# code below adapted from https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.gamma.html
a = 5 # bit of trial and error
x = np.linspace(gamma.ppf(0.001, a),gamma.ppf(0.999, a), 1000)
plt.plot(x, gamma.pdf(x, a, loc=-1.25, scale=1),'r-', lw=5, alpha=1.0, label='gamma pdf')
#########################################################
# Render the plot
plt.show()

#print("Expected value of wallet (mean) =: ",outcomes.mean())

import pandas as pd
df = pd.DataFrame(outcomes)
df.describe()


import numpy as np
import pandas as pd
import matplotlib.pyplot as plt  

def othergoat(x):         #Define a function to return "the other goat"!
    if x == "Goat 1":
        return "Goat 2"
    elif x == "Goat 2":
        return "Goat 1"

Doors = np.array(["Car","Goat 1","Goat 2"])     #Define a list for objects behind the doors
goats = np.array(["Goat 1" , "Goat 2"])          #Define a list for goats!

def MHgame():
    #Function to simulate the Monty Hall Game
    #For each guess, return ["the guess","the revealed", "the remaining"]
    userguess=np.random.choice(Doors)         #randomly selects a door as userguess
    if userguess == "Goat 1":
        return [userguess, "Goat 2","Car"]
    if userguess == "Goat 2":
        return [userguess, "Goat 1","Car"]
    if userguess == "Car":
        revealed = np.random.choice(goats)
        return [userguess, revealed,othergoat(revealed)]


# Check and see if the MHgame function is doing what it is supposed to do:
for i in np.arange(1):
    a =MHgame()
    print(a)
    print(a[0])
    print(a[1])
    print(a[2])

['Car', 'Goat 2', 'Goat 1']
Car
Goat 2
Goat 1


c1 = []         #Create an empty list for the userguess
c2 = []         #Create an empty list for the revealed
c3 = []         #Create an empty list for the remaining

how_many_games = 10000

for i in np.arange(how_many_games):         #Simulate the game for 1000 rounds - or any other number of rounds you desire
    game = MHgame()
    c1.append(game[0])             #In each round, add the first element to the userguess list
    c2.append(game[1])             #In each round, add the second element to the revealed list
    c3.append(game[2])             #In each round, add the third element to the remaining list


#Create a data frame (gamedf) with 3 columns ("Guess","Revealed", "Remaining") and 1000 (or how many number of rounds) rows
gamedf = pd.DataFrame({'Guess':c1,
                       'Revealed':c2,
                       'Remaining':c3})
gamedf


# Get the count of each item in the first and 3rd column
original_car =gamedf[gamedf.Guess == 'Car'].shape[0]
remaining_car =gamedf[gamedf.Remaining == 'Car'].shape[0]

original_g1 =gamedf[gamedf.Guess == 'Goat 1'].shape[0]
remaining_g1 =gamedf[gamedf.Remaining == 'Goat 1'].shape[0]

original_g2 =gamedf[gamedf.Guess == 'Goat 2'].shape[0]
remaining_g2 =gamedf[gamedf.Remaining == 'Goat 2'].shape[0]


# Let's plot a grouped barplot

# set width of bar
barWidth = 0.25
 
# set height of bar
bars1 = [original_car,original_g1,original_g2]
bars2 = [remaining_car,remaining_g1,remaining_g2]
 
# Set position of bar on X axis
r1 = np.arange(len(bars1))
r2 = [x + barWidth for x in r1]
 
# Make the plot
plt.bar(r1, bars1, color='darkorange', width=barWidth, edgecolor='white', label='Original Guess')
plt.bar(r2, bars2, color='midnightblue', width=barWidth, edgecolor='white', label='Remaining Door')
 
# Add xticks on the middle of the group bars
plt.xlabel('Item', fontweight='bold')
plt.xticks([r + barWidth/2 for r in range(len(bars1))], ['Car', 'Goat 1', 'Goat 2'])
 
# Create legend & Show graphic
plt.legend()
plt.show()


import pandas as pd
HowManyRollsToTake = 50
numRolls = []
probabilities = []
for i in range(HowManyRollsToTake+1):
    numRolls.append(i)
    probabilities.append(1-(5/6)**i)

rolls = {
    "NumRolls": numRolls,
    "Prob at least one 6": probabilities
}

df = pd.DataFrame(rolls)
df.plot.scatter(x="NumRolls", y="Prob at least one 6")

<AxesSubplot:xlabel='NumRolls', ylabel='Prob at least one 6'>


import pandas as pd
HowManyYears = 60
numYears = []
nolossprobabilities = []
lossprobabilities = []
for i in range(HowManyYears+1):
    numYears.append(i) # How many years in the sequence
    nolossprobabilities.append((1-(1/100))**i) #Probability of No Loss after i-years
    lossprobabilities.append(1 - (1-(1/100))**i) #Probability of Loss after i-years
years = {
    "Years from Start of Loan": numYears,
    "Probability of No Loss": nolossprobabilities,
    "Probability of Loss": lossprobabilities
}

df = pd.DataFrame(years)
df.plot.line(x="Years from Start of Loan", y="Probability of Loss")
# df.plot.line(x="Years from Start of Loan", y="Probability of No Loss")

<AxesSubplot:xlabel='Years from Start of Loan'>


df.head(30)


df["Probability of Loss"].loc[30]

0.2602996266117198

	Time	Speed
count	11.000000	11.000000
mean	5.000000	43.100000
std	3.316625	41.204077
min	0.000000	0.000000
25%	2.500000	9.500000
50%	5.000000	30.000000
75%	7.500000	69.000000
max	10.000000	121.200000

	0
count	100.000000
mean	4.190000
std	2.798611
min	0.000000
25%	2.000000
50%	4.000000
75%	6.000000
max	13.000000

	Years from Start of Loan	Probability of No Loss	Probability of Loss
0	0	1.000000	0.000000
1	1	0.990000	0.010000
2	2	0.980100	0.019900
3	3	0.970299	0.029701
4	4	0.960596	0.039404
5	5	0.950990	0.049010
6	6	0.941480	0.058520
7	7	0.932065	0.067935
8	8	0.922745	0.077255
9	9	0.913517	0.086483
10	10	0.904382	0.095618
11	11	0.895338	0.104662
12	12	0.886385	0.113615
13	13	0.877521	0.122479
14	14	0.868746	0.131254
15	15	0.860058	0.139942
16	16	0.851458	0.148542
17	17	0.842943	0.157057
18	18	0.834514	0.165486
19	19	0.826169	0.173831
20	20	0.817907	0.182093
21	21	0.809728	0.190272
22	22	0.801631	0.198369
23	23	0.793614	0.206386
24	24	0.785678	0.214322
25	25	0.777821	0.222179
26	26	0.770043	0.229957
27	27	0.762343	0.237657
28	28	0.754719	0.245281
29	29	0.747172	0.252828

ENGR 1330 Computational Thinking with Data Science¶

Lesson 14 Causality, Correlation, Randomness, and Probability¶

Objectives¶

Computational Thinking Concepts¶

Readings:¶

Correlation and Causality¶

What is causality? (A long winded psuedo definition!)¶

Correlation (Causality's mimic!)¶

Association Measures (Covariance and Correlation)¶

Now, let's explore the data:¶

Implications¶

Confounding Factors¶

Randomization¶

Simulation of multiple gamblers/multiple visits to the Casino¶

Simulation¶

Simulation Example¶

Interpret Results¶

Randomness and Probability¶

Simple Exclusion¶

Complete Enumeration¶

Conditioning (Two events must happen)¶

Partitioning (When sequence doesn't matter) - A kind of enumeration!¶

At Least One Success (A kind of exclusion/partition)¶

Why Should anyone buy Flood Insurance?¶

Elapsed Time (s)	Speed (m/s)
0	0
1.0	3
2.0	7
3.0	12
4.0	20
5.0	30
6.0	45.6
7.0	60.3
8.0	77.7
9.0	97.3
10.0	121.1

	Time	Speed
0	0.0	0.0
1	1.0	3.0
2	2.0	7.0
3	3.0	12.0
4	4.0	20.0
5	5.0	30.0
6	6.0	45.6
7	7.0	60.3
8	8.0	77.7
9	9.0	97.3
10	10.0	121.2

	Guess	Revealed	Remaining
0	Goat 2	Goat 1	Car
1	Goat 1	Goat 2	Car
2	Goat 2	Goat 1	Car
3	Car	Goat 1	Goat 2
4	Car	Goat 1	Goat 2
...	...	...	...
9995	Car	Goat 2	Goat 1
9996	Car	Goat 1	Goat 2
9997	Car	Goat 2	Goat 1
9998	Goat 2	Goat 1	Car
9999	Goat 1	Goat 2	Car

	Time	Speed
Time	11.00	131.750
Speed	131.75	1697.776