# Preamble script block to identify host, user, and kernel
import sys
! hostname
! whoami
print(sys.executable)
print(sys.version)
print(sys.version_info)
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import statistics
import scipy.stats
import seaborn as sns
data = pd.read_csv("lab15_minidf.csv")
data
data.info()
#For set1:
set1 = data['Set1']
print('For set 1',' the arithmetic mean is: ',set1.mean())
print('For set 1',' the median is: ',set1.median())
#For set2:
set2 = data['Set2']
print('For set 2',' the arithmetic mean is: ',set2.mean())
print('For set 2',' the median is: ',set2.median())
#For set3:
set3 = data['Set3']
print('For set 3',' the arithmetic mean is: ',set3.mean())
print('For set 3',' the median is: ',set3.median())
#For set1:
print('For set 1',' the range is: ',np.ptp(set1))
print('For set 1',' the IQR is: ',scipy.stats.iqr(set1))
print('For set 1',' the 5-number summary is: ',set1.describe())
print('For set 1',' the variance is: ',statistics.variance(set1))
print('For set 1',' the standard deviation is: ',statistics.stdev(set1))
#For set2:
print('For set 2',' the range is: ',np.ptp(set2))
print('For set 2',' the IQR is: ',scipy.stats.iqr(set2))
print('For set 2',' the 5-number summary is: ',set2.describe())
print('For set 2',' the variance is: ',statistics.variance(set2))
print('For set 2',' the standard deviation is: ',statistics.stdev(set2))
#For set3:
print('For set 3',' the range is: ',np.ptp(set3))
print('For set 3',' the IQR is: ',scipy.stats.iqr(set3))
print('For set 3',' the 5-number summary is: ',set3.describe())
print('For set 3',' the variance is: ',statistics.variance(set3))
print('For set 3',' the standard deviation is: ',statistics.stdev(set3))
skew1 = set1.skew()
skew2 = set2.skew()
skew3 = set3.skew()
print('For set 1 the skewness is ',skew1,'For set 2 the skewness is ',skew2,'For set 3 the skewness is ',skew3)
fig = plt.figure(figsize =(10, 7))
plt.boxplot ([set1, set2, set3],1, '')
plt.show()
set1.plot.hist(density=False, bins=6,color="red")
set2.plot.hist(density=False, bins=6,color="blue")
set3.plot.hist(density=False, bins=6,color="gold")
fig, ax = plt.subplots()
data.plot.hist(density=False, ax=ax, bins=6,color=("red","blue","gold"))
sns.distplot(set1,color='red', rug=True,kde=True)
sns.distplot(set2,color='blue', rug=True,kde=True)
sns.distplot(set3,color='gold', rug=True,kde=True)
# First, define the function for the Gringorten Plotting Position Formula:
def gringorten_pp(sample): # plotting position function
# returns a list of plotting positions; sample must be a numeric list
gringorten_pp = [] # null list to return after fill
sample.sort() # sort the sample list in place
for i in range(0,len(sample),1):
gringorten_pp.append((i+1-0.44)/(len(sample)+0.12)) #values from the gringorten formula
return gringorten_pp
# Second, apply it on each set
set1 = np.array(set1)
set2 = np.array(set2)
set3 = np.array(set3)
set1_grin = gringorten_pp(set1)
set2_grin = gringorten_pp(set2)
set3_grin = gringorten_pp(set3)
# Third, plot them
myfigure = plt.figure(figsize = (12,8)) # generate a object from the figure class, set aspect ratio
plt.scatter(set1_grin, set1 ,color ='red',
marker ="^",
s = 50)
plt.scatter(set2_grin, set2 ,color ='blue',
marker ="o",
s = 20)
plt.scatter(set3_grin, set3 ,color ='gold',
marker ="s",
s = 20)
plt.xlabel("Density or Quantile Value")
plt.ylabel("Value")
plt.title("Quantile Plot for Set1, Set2, and Set3 based on Gringorton Plotting Functions")
plt.show()
# Normal Quantile Function
import math
def normdist(x,mu,sigma):
argument = (x - mu)/(math.sqrt(2.0)*sigma)
normdist = (1.0 + math.erf(argument))/2.0
return normdist
#For Set1
mu = set1.mean() # Fitted Model
sigma = set1.std()
x = []; ycdf = []
xlow = 0; xhigh = 1.2*max(set1) ; howMany = 100
xstep = (xhigh - xlow)/howMany
for i in range(0,howMany+1,1):
x.append(xlow + i*xstep)
yvalue = normdist(xlow + i*xstep,mu,sigma)
ycdf.append(yvalue)
# Fitting Data to Normal Data Model
# Now plot the sample values and plotting position
myfigure = plt.figure(figsize = (7,9)) # generate a object from the figure class, set aspect ratio
plt.scatter(set1_grin, set1 ,color ='red')
plt.plot(ycdf, x, color ='darkred')
plt.xlabel("Quantile Value")
plt.ylabel("Value")
mytitle = "For Set1 | Normal Distribution Data Model sample mean = : " + str(mu)+ " sample variance =:" + str(sigma**2)
plt.title(mytitle)
plt.show()
#For Set2
mu = set2.mean() # Fitted Model
sigma = set2.std()
x = []; ycdf = []
xlow = 0; xhigh = 1.2*max(set2) ; howMany = 100
xstep = (xhigh - xlow)/howMany
for i in range(0,howMany+1,1):
x.append(xlow + i*xstep)
yvalue = normdist(xlow + i*xstep,mu,sigma)
ycdf.append(yvalue)
# Fitting Data to Normal Data Model
# Now plot the sample values and plotting position
myfigure = plt.figure(figsize = (7,9)) # generate a object from the figure class, set aspect ratio
plt.scatter(set2_grin, set2 ,color ='blue')
plt.plot(ycdf, x, color ='darkblue')
plt.xlabel("Quantile Value")
plt.ylabel("Value")
mytitle = "For Set2 | Normal Distribution Data Model sample mean = : " + str(mu)+ " sample variance =:" + str(sigma**2)
plt.title(mytitle)
plt.show()
#For Set3
mu = set3.mean() # Fitted Model
sigma = set3.std()
x = []; ycdf = []
xlow = 0; xhigh = 1.2*max(set3) ; howMany = 100
xstep = (xhigh - xlow)/howMany
for i in range(0,howMany+1,1):
x.append(xlow + i*xstep)
yvalue = normdist(xlow + i*xstep,mu,sigma)
ycdf.append(yvalue)
# Fitting Data to Normal Data Model
# Now plot the sample values and plotting position
myfigure = plt.figure(figsize = (7,9)) # generate a object from the figure class, set aspect ratio
plt.scatter(set3_grin, set3 ,color ='gold')
plt.plot(ycdf, x, color ='orange')
plt.xlabel("Quantile Value")
plt.ylabel("Value")
mytitle = "For Set3 | Normal Distribution Data Model sample mean = : " + str(mu)+ " sample variance =:" + str(sigma**2)
plt.title(mytitle)
plt.show()
# Gumbell (Extreme Value Type I) Quantile Function
def ev1dist(x,alpha,beta):
argument = (x - alpha)/beta
constant = 1.0/beta
ev1dist = math.exp(-1.0*math.exp(-1.0*argument))
return ev1dist
#For Set1
sample = set1
sample_mean = np.array(sample).mean()
sample_variance = np.array(sample).std()**2
alpha_mom = sample_mean*math.sqrt(6)/math.pi
beta_mom = math.sqrt(sample_variance)*0.45
################
mu = sample_mean # Fitted Model
sigma = math.sqrt(sample_variance)
x = []; ycdf = []
xlow = 0; xhigh = 1.2*max(sample) ; howMany = 100
xstep = (xhigh - xlow)/howMany
for i in range(0,howMany+1,1):
x.append(xlow + i*xstep)
yvalue = ev1dist(xlow + i*xstep,alpha_mom,beta_mom)
ycdf.append(yvalue)
# Now plot the sample values and plotting position
myfigure = plt.figure(figsize = (7,8)) # generate a object from the figure class, set aspect ratio
plt.scatter(set1_grin, set1 ,color ='red')
plt.plot(ycdf, x, color ='darkred')
plt.xlabel("Quantile Value")
plt.ylabel("Value")
mytitle = "For Set1 | Extreme Value Type 1 Distribution Data Model sample mean = : " + str(sample_mean)+ " sample variance =:" + str(sample_variance)
plt.title(mytitle)
plt.show()
#For Set2
sample = set2
sample_mean = np.array(sample).mean()
sample_variance = np.array(sample).std()**2
alpha_mom = sample_mean*math.sqrt(6)/math.pi
beta_mom = math.sqrt(sample_variance)*0.45
################
mu = sample_mean # Fitted Model
sigma = math.sqrt(sample_variance)
x = []; ycdf = []
xlow = 0; xhigh = 1.2*max(sample) ; howMany = 100
xstep = (xhigh - xlow)/howMany
for i in range(0,howMany+1,1):
x.append(xlow + i*xstep)
yvalue = ev1dist(xlow + i*xstep,alpha_mom,beta_mom)
ycdf.append(yvalue)
# Now plot the sample values and plotting position
myfigure = plt.figure(figsize = (7,8)) # generate a object from the figure class, set aspect ratio
plt.scatter(set2_grin, set2 ,color ='blue')
plt.plot(ycdf, x, color ='darkblue')
plt.xlabel("Quantile Value")
plt.ylabel("Value")
mytitle = "For Set2 | Extreme Value Type 1 Distribution Data Model sample mean = : " + str(sample_mean)+ " sample variance =:" + str(sample_variance)
plt.title(mytitle)
plt.show()
#For Set2
sample = set3
sample_mean = np.array(sample).mean()
sample_variance = np.array(sample).std()**2
alpha_mom = sample_mean*math.sqrt(6)/math.pi
beta_mom = math.sqrt(sample_variance)*0.45
################
mu = sample_mean # Fitted Model
sigma = math.sqrt(sample_variance)
x = []; ycdf = []
xlow = 0; xhigh = 1.2*max(sample) ; howMany = 100
xstep = (xhigh - xlow)/howMany
for i in range(0,howMany+1,1):
x.append(xlow + i*xstep)
yvalue = ev1dist(xlow + i*xstep,alpha_mom,beta_mom)
ycdf.append(yvalue)
# Now plot the sample values and plotting position
myfigure = plt.figure(figsize = (7,8)) # generate a object from the figure class, set aspect ratio
plt.scatter(set3_grin, set3 ,color ='gold')
plt.plot(ycdf, x, color ='orange')
plt.xlabel("Quantile Value")
plt.ylabel("Value")
mytitle = "For Set3 | Extreme Value Type 1 Distribution Data Model sample mean = : " + str(sample_mean)+ " sample variance =:" + str(sample_variance)
plt.title(mytitle)
plt.show()
# Gamma (Pearson Type III) Quantile Function
def gammacdf(x,tau,alpha,beta): # Gamma Cumulative Density function - with three parameter to one parameter convert
xhat = x-tau
lamda = 1.0/beta
gammacdf = scipy.stats.gamma.cdf(lamda*xhat, alpha)
return gammacdf
#For Set1
set1_mean = np.array(set1).mean()
set1_stdev = np.array(set1).std()
set1_skew = scipy.stats.skew(set1)
set1_alpha = 4.0/(set1_skew**2)
set1_beta = np.sign(set1_skew)*math.sqrt(set1_stdev**2/set1_alpha)
set1_tau = set1_mean - set1_alpha*set1_beta
#
x = []; ycdf = []
xlow = (0.9*min(set1)); xhigh = (1.1*max(set1)) ; howMany = 100
xstep = (xhigh - xlow)/howMany
for i in range(0,howMany+1,1):
x.append(xlow + i*xstep)
yvalue = gammacdf(xlow + i*xstep,set1_tau,set1_alpha,set1_beta)
ycdf.append(yvalue)
####
rycdf = ycdf[::-1]
myfigure = plt.figure(figsize = (7,8)) # generate a object from the figure class, set aspect ratio
plt.scatter(set1_grin, set1 ,color ='red')
plt.plot(rycdf, x, color ='darkred')
plt.xlabel("Quantile Value")
plt.ylabel("Value")
mytitle = "For Set1 | Pearson (Gamma) Type III Distribution Data Model\n "
mytitle += "Mean = " + str(set1_mean) + "\n"
mytitle += "SD = " + str(set1_stdev) + "\n"
mytitle += "Skew = " + str(set1_skew) + "\n"
plt.title(mytitle)
plt.show()
#For Set2
set2_mean = np.array(set2).mean()
set2_stdev = np.array(set2).std()
set2_skew = scipy.stats.skew(set2)
set2_alpha = 4.0/(set2_skew**2)
set2_beta = np.sign(set2_skew)*math.sqrt(set2_stdev**2/set2_alpha)
set2_tau = set2_mean - set2_alpha*set2_beta
#
x = []; ycdf = []
xlow = (0.9*min(set2)); xhigh = (1.1*max(set2)) ; howMany = 100
xstep = (xhigh - xlow)/howMany
for i in range(0,howMany+1,1):
x.append(xlow + i*xstep)
yvalue = gammacdf(xlow + i*xstep,set2_tau,set2_alpha,set2_beta)
ycdf.append(yvalue)
####
rycdf = ycdf[::-1]
myfigure = plt.figure(figsize = (7,8)) # generate a object from the figure class, set aspect ratio
plt.scatter(set2_grin, set2 ,color ='blue')
plt.plot(rycdf, x, color ='darkblue')
plt.xlabel("Quantile Value")
plt.ylabel("Value")
mytitle = "For Set2 | Pearson (Gamma) Type III Distribution Data Model\n "
mytitle += "Mean = " + str(set2_mean) + "\n"
mytitle += "SD = " + str(set2_stdev) + "\n"
mytitle += "Skew = " + str(set2_skew) + "\n"
plt.title(mytitle)
plt.show()
#For Set3
set3_mean = np.array(set3).mean()
set3_stdev = np.array(set3).std()
set3_skew = scipy.stats.skew(set3)
set3_alpha = 4.0/(set3_skew**2)
set3_beta = np.sign(set3_skew)*math.sqrt(set3_stdev**2/set3_alpha)
set3_tau = set3_mean - set3_alpha*set3_beta
#
x = []; ycdf = []
xlow = (0.9*min(set3)); xhigh = (1.1*max(set3)) ; howMany = 100
xstep = (xhigh - xlow)/howMany
for i in range(0,howMany+1,1):
x.append(xlow + i*xstep)
yvalue = gammacdf(xlow + i*xstep,set3_tau,set3_alpha,set3_beta)
ycdf.append(yvalue)
####
#rycdf = ycdf[::-1]
myfigure = plt.figure(figsize = (7,8)) # generate a object from the figure class, set aspect ratio
plt.scatter(set3_grin, set3 ,color ='gold')
plt.plot(ycdf, x, color ='orange')
plt.xlabel("Quantile Value")
plt.ylabel("Value")
mytitle = "For Set3 | Pearson (Gamma) Type III Distribution Data Model\n "
mytitle += "Mean = " + str(set3_mean) + "\n"
mytitle += "SD = " + str(set3_stdev) + "\n"
mytitle += "Skew = " + str(set3_skew) + "\n"
plt.title(mytitle)
plt.show()
# The Shapiro-Wilk Normality Test for Set1
from scipy.stats import shapiro
stat, p = shapiro(set1)
print('stat=%.3f, p=%.3f' % (stat, p))
if p > 0.05:
print('Probably Gaussian')
else:
print('Probably not Gaussian')
# The Shapiro-Wilk Normality Test for Set2
from scipy.stats import shapiro
stat, p = shapiro(set2)
print('stat=%.3f, p=%.3f' % (stat, p))
if p > 0.05:
print('Probably Gaussian')
else:
print('Probably not Gaussian')
# The Shapiro-Wilk Normality Test for Set3
from scipy.stats import shapiro
stat, p = shapiro(set3)
print('stat=%.3f, p=%.3f' % (stat, p))
if p > 0.05:
print('Probably Gaussian')
else:
print('Probably not Gaussian')
# The Student's t-test for Set1 and Set2
from scipy.stats import ttest_ind
stat, p = ttest_ind(set1, set2)
print('stat=%.3f, p=%.3f' % (stat, p))
if p > 0.05:
print('Probably the same distribution')
else:
print('Probably different distributions')
# The Student's t-test for Set1 and Set3
from scipy.stats import ttest_ind
stat, p = ttest_ind(set1, set3)
print('stat=%.3f, p=%.3f' % (stat, p))
if p > 0.05:
print('Probably the same distribution')
else:
print('Probably different distributions')
# The Student's t-test for Set2 and Set3
from scipy.stats import ttest_ind
stat, p = ttest_ind(set2, set3)
print('stat=%.3f, p=%.3f' % (stat, p))
if p > 0.05:
print('Probably the same distribution')
else:
print('Probably different distributions')
data = pd.read_csv("lab15_maxidf.csv")
data
data.info()
#For set1:
set1 = data['SetA']
print('For set 1',' the arithmetic mean is: ',set1.mean())
print('For set 1',' the median is: ',set1.median())
#For set2:
set2 = data['SetB']
print('For set 2',' the arithmetic mean is: ',set2.mean())
print('For set 2',' the median is: ',set2.median())
#For set3:
set3 = data['SetC']
print('For set 3',' the arithmetic mean is: ',set3.mean())
print('For set 3',' the median is: ',set3.median())
#For set1:
print('For set 1',' the range is: ',np.ptp(set1))
print('For set 1',' the IQR is: ',scipy.stats.iqr(set1))
print('For set 1',' the 5-number summary is: ',set1.describe())
print('For set 1',' the variance is: ',statistics.variance(set1))
print('For set 1',' the standard deviation is: ',statistics.stdev(set1))
#For set2:
print('For set 2',' the range is: ',np.ptp(set2))
print('For set 2',' the IQR is: ',scipy.stats.iqr(set2))
print('For set 2',' the 5-number summary is: ',set2.describe())
print('For set 2',' the variance is: ',statistics.variance(set2))
print('For set 2',' the standard deviation is: ',statistics.stdev(set2))
#For set3:
print('For set 3',' the range is: ',np.ptp(set3))
print('For set 3',' the IQR is: ',scipy.stats.iqr(set3))
print('For set 3',' the 5-number summary is: ',set3.describe())
print('For set 3',' the variance is: ',statistics.variance(set3))
print('For set 3',' the standard deviation is: ',statistics.stdev(set3))
skew1 = set1.skew()
skew2 = set2.skew()
skew3 = set3.skew()
print('For set 1 the skewness is ',skew1,'For set 2 the skewness is ',skew2,'For set 3 the skewness is ',skew3)
fig = plt.figure(figsize =(10, 7))
plt.boxplot ([set1, set2, set3],1, '')
plt.show()
set1.plot.hist(density=False, bins=50,color="red")
set2.plot.hist(density=False, bins=50,color="blue")
set3.plot.hist(density=False, bins=50,color="gold")
fig, ax = plt.subplots()
data.plot.hist(density=False, ax=ax, bins=50,color=("red","blue","gold"))
sns.distplot(set1,color='red', rug=True,kde=True)
sns.distplot(set2,color='blue', rug=True,kde=True)
sns.distplot(set3,color='gold', rug=True,kde=True)
# First, define the function for the Gringorten Plotting Position Formula:
def gringorten_pp(sample): # plotting position function
# returns a list of plotting positions; sample must be a numeric list
gringorten_pp = [] # null list to return after fill
sample.sort() # sort the sample list in place
for i in range(0,len(sample),1):
gringorten_pp.append((i+1-0.44)/(len(sample)+0.12)) #values from the gringorten formula
return gringorten_pp
# Second, apply it on each set
set1 = np.array(set1)
set2 = np.array(set2)
set3 = np.array(set3)
set1_grin = gringorten_pp(set1)
set2_grin = gringorten_pp(set2)
set3_grin = gringorten_pp(set3)
# Third, plot them
myfigure = plt.figure(figsize = (12,8)) # generate a object from the figure class, set aspect ratio
plt.scatter(set1_grin, set1 ,color ='red',
marker ="^",
s = 50)
plt.scatter(set2_grin, set2 ,color ='blue',
marker ="o",
s = 20)
plt.scatter(set3_grin, set3 ,color ='gold',
marker ="s",
s = 20)
plt.xlabel("Density or Quantile Value")
plt.ylabel("Value")
plt.title("Quantile Plot for Set1, Set2, and Set3 based on Gringorton Plotting Functions")
plt.show()
# Normal Quantile Function
import math
def normdist(x,mu,sigma):
argument = (x - mu)/(math.sqrt(2.0)*sigma)
normdist = (1.0 + math.erf(argument))/2.0
return normdist
#For Set1
mu = set1.mean() # Fitted Model
sigma = set1.std()
x = []; ycdf = []
xlow = 0; xhigh = 1.2*max(set1) ; howMany = 100
xstep = (xhigh - xlow)/howMany
for i in range(0,howMany+1,1):
x.append(xlow + i*xstep)
yvalue = normdist(xlow + i*xstep,mu,sigma)
ycdf.append(yvalue)
# Fitting Data to Normal Data Model
# Now plot the sample values and plotting position
myfigure = plt.figure(figsize = (7,9)) # generate a object from the figure class, set aspect ratio
plt.scatter(set1_grin, set1 ,color ='red')
plt.plot(ycdf, x, color ='darkred')
plt.xlabel("Quantile Value")
plt.ylabel("Value")
mytitle = "For Set1 | Normal Distribution Data Model sample mean = : " + str(mu)+ " sample variance =:" + str(sigma**2)
plt.title(mytitle)
plt.show()
#For Set2
mu = set2.mean() # Fitted Model
sigma = set2.std()
x = []; ycdf = []
xlow = 0; xhigh = 1.2*max(set2) ; howMany = 100
xstep = (xhigh - xlow)/howMany
for i in range(0,howMany+1,1):
x.append(xlow + i*xstep)
yvalue = normdist(xlow + i*xstep,mu,sigma)
ycdf.append(yvalue)
# Fitting Data to Normal Data Model
# Now plot the sample values and plotting position
myfigure = plt.figure(figsize = (7,9)) # generate a object from the figure class, set aspect ratio
plt.scatter(set2_grin, set2 ,color ='blue')
plt.plot(ycdf, x, color ='darkblue')
plt.xlabel("Quantile Value")
plt.ylabel("Value")
mytitle = "For Set2 | Normal Distribution Data Model sample mean = : " + str(mu)+ " sample variance =:" + str(sigma**2)
plt.title(mytitle)
plt.show()
#For Set3
mu = set3.mean() # Fitted Model
sigma = set3.std()
x = []; ycdf = []
xlow = 0; xhigh = 1.2*max(set3) ; howMany = 100
xstep = (xhigh - xlow)/howMany
for i in range(0,howMany+1,1):
x.append(xlow + i*xstep)
yvalue = normdist(xlow + i*xstep,mu,sigma)
ycdf.append(yvalue)
# Fitting Data to Normal Data Model
# Now plot the sample values and plotting position
myfigure = plt.figure(figsize = (7,9)) # generate a object from the figure class, set aspect ratio
plt.scatter(set3_grin, set3 ,color ='gold')
plt.plot(ycdf, x, color ='orange')
plt.xlabel("Quantile Value")
plt.ylabel("Value")
mytitle = "For Set3 | Normal Distribution Data Model sample mean = : " + str(mu)+ " sample variance =:" + str(sigma**2)
plt.title(mytitle)
plt.show()
# Gumbell (Extreme Value Type I) Quantile Function
def ev1dist(x,alpha,beta):
argument = (x - alpha)/beta
constant = 1.0/beta
ev1dist = math.exp(-1.0*math.exp(-1.0*argument))
return ev1dist
#For Set1
sample = set1
sample_mean = np.array(sample).mean()
sample_variance = np.array(sample).std()**2
alpha_mom = sample_mean*math.sqrt(6)/math.pi
beta_mom = math.sqrt(sample_variance)*0.45
################
mu = sample_mean # Fitted Model
sigma = math.sqrt(sample_variance)
x = []; ycdf = []
xlow = 0; xhigh = 1.2*max(sample) ; howMany = 100
xstep = (xhigh - xlow)/howMany
for i in range(0,howMany+1,1):
x.append(xlow + i*xstep)
yvalue = ev1dist(xlow + i*xstep,alpha_mom,beta_mom)
ycdf.append(yvalue)
# Now plot the sample values and plotting position
myfigure = plt.figure(figsize = (7,8)) # generate a object from the figure class, set aspect ratio
plt.scatter(set1_grin, set1 ,color ='red')
plt.plot(ycdf, x, color ='darkred')
plt.xlabel("Quantile Value")
plt.ylabel("Value")
mytitle = "For Set1 | Extreme Value Type 1 Distribution Data Model sample mean = : " + str(sample_mean)+ " sample variance =:" + str(sample_variance)
plt.title(mytitle)
plt.show()
#For Set2
sample = set2
sample_mean = np.array(sample).mean()
sample_variance = np.array(sample).std()**2
alpha_mom = sample_mean*math.sqrt(6)/math.pi
beta_mom = math.sqrt(sample_variance)*0.45
################
mu = sample_mean # Fitted Model
sigma = math.sqrt(sample_variance)
x = []; ycdf = []
xlow = 0; xhigh = 1.2*max(sample) ; howMany = 100
xstep = (xhigh - xlow)/howMany
for i in range(0,howMany+1,1):
x.append(xlow + i*xstep)
yvalue = ev1dist(xlow + i*xstep,alpha_mom,beta_mom)
ycdf.append(yvalue)
# Now plot the sample values and plotting position
myfigure = plt.figure(figsize = (7,8)) # generate a object from the figure class, set aspect ratio
plt.scatter(set2_grin, set2 ,color ='blue')
plt.plot(ycdf, x, color ='darkblue')
plt.xlabel("Quantile Value")
plt.ylabel("Value")
mytitle = "For Set2 | Extreme Value Type 1 Distribution Data Model sample mean = : " + str(sample_mean)+ " sample variance =:" + str(sample_variance)
plt.title(mytitle)
plt.show()
#For Set2
sample = set3
sample_mean = np.array(sample).mean()
sample_variance = np.array(sample).std()**2
alpha_mom = sample_mean*math.sqrt(6)/math.pi
beta_mom = math.sqrt(sample_variance)*0.45
################
mu = sample_mean # Fitted Model
sigma = math.sqrt(sample_variance)
x = []; ycdf = []
xlow = 0; xhigh = 1.2*max(sample) ; howMany = 100
xstep = (xhigh - xlow)/howMany
for i in range(0,howMany+1,1):
x.append(xlow + i*xstep)
yvalue = ev1dist(xlow + i*xstep,alpha_mom,beta_mom)
ycdf.append(yvalue)
# Now plot the sample values and plotting position
myfigure = plt.figure(figsize = (7,8)) # generate a object from the figure class, set aspect ratio
plt.scatter(set3_grin, set3 ,color ='gold')
plt.plot(ycdf, x, color ='orange')
plt.xlabel("Quantile Value")
plt.ylabel("Value")
mytitle = "For Set3 | Extreme Value Type 1 Distribution Data Model sample mean = : " + str(sample_mean)+ " sample variance =:" + str(sample_variance)
plt.title(mytitle)
plt.show()
# Gamma (Pearson Type III) Quantile Function
def gammacdf(x,tau,alpha,beta): # Gamma Cumulative Density function - with three parameter to one parameter convert
xhat = x-tau
lamda = 1.0/beta
gammacdf = scipy.stats.gamma.cdf(lamda*xhat, alpha)
return gammacdf
#For Set1
set1_mean = np.array(set1).mean()
set1_stdev = np.array(set1).std()
set1_skew = scipy.stats.skew(set1)
set1_alpha = 4.0/(set1_skew**2)
set1_beta = np.sign(set1_skew)*math.sqrt(set1_stdev**2/set1_alpha)
set1_tau = set1_mean - set1_alpha*set1_beta
#
x = []; ycdf = []
xlow = (0.9*min(set1)); xhigh = (1.1*max(set1)) ; howMany = 100
xstep = (xhigh - xlow)/howMany
for i in range(0,howMany+1,1):
x.append(xlow + i*xstep)
yvalue = gammacdf(xlow + i*xstep,set1_tau,set1_alpha,set1_beta)
ycdf.append(yvalue)
####
#rycdf = ycdf[::-1]
myfigure = plt.figure(figsize = (7,8)) # generate a object from the figure class, set aspect ratio
plt.scatter(set1_grin, set1 ,color ='red')
plt.plot(ycdf, x, color ='darkred')
plt.xlabel("Quantile Value")
plt.ylabel("Value")
mytitle = "For Set1 | Pearson (Gamma) Type III Distribution Data Model\n "
mytitle += "Mean = " + str(set1_mean) + "\n"
mytitle += "SD = " + str(set1_stdev) + "\n"
mytitle += "Skew = " + str(set1_skew) + "\n"
plt.title(mytitle)
plt.show()
#For Set2
set2_mean = np.array(set2).mean()
set2_stdev = np.array(set2).std()
set2_skew = scipy.stats.skew(set2)
set2_alpha = 4.0/(set2_skew**2)
set2_beta = np.sign(set2_skew)*math.sqrt(set2_stdev**2/set2_alpha)
set2_tau = set2_mean - set2_alpha*set2_beta
#
x = []; ycdf = []
xlow = (0.9*min(set2)); xhigh = (1.1*max(set2)) ; howMany = 100
xstep = (xhigh - xlow)/howMany
for i in range(0,howMany+1,1):
x.append(xlow + i*xstep)
yvalue = gammacdf(xlow + i*xstep,set2_tau,set2_alpha,set2_beta)
ycdf.append(yvalue)
####
#rycdf = ycdf[::-1]
myfigure = plt.figure(figsize = (7,8)) # generate a object from the figure class, set aspect ratio
plt.scatter(set2_grin, set2 ,color ='blue')
plt.plot(ycdf, x, color ='darkblue')
plt.xlabel("Quantile Value")
plt.ylabel("Value")
mytitle = "For Set2 | Pearson (Gamma) Type III Distribution Data Model\n "
mytitle += "Mean = " + str(set2_mean) + "\n"
mytitle += "SD = " + str(set2_stdev) + "\n"
mytitle += "Skew = " + str(set2_skew) + "\n"
plt.title(mytitle)
plt.show()
#For Set3
set3_mean = np.array(set3).mean()
set3_stdev = np.array(set3).std()
set3_skew = scipy.stats.skew(set3)
set3_alpha = 4.0/(set3_skew**2)
set3_beta = np.sign(set3_skew)*math.sqrt(set3_stdev**2/set3_alpha)
set3_tau = set3_mean - set3_alpha*set3_beta
#
x = []; ycdf = []
xlow = (0.9*min(set3)); xhigh = (1.1*max(set3)) ; howMany = 100
xstep = (xhigh - xlow)/howMany
for i in range(0,howMany+1,1):
x.append(xlow + i*xstep)
yvalue = gammacdf(xlow + i*xstep,set3_tau,set3_alpha,set3_beta)
ycdf.append(yvalue)
####
#rycdf = ycdf[::-1]
myfigure = plt.figure(figsize = (7,8)) # generate a object from the figure class, set aspect ratio
plt.scatter(set3_grin, set3 ,color ='gold')
plt.plot(ycdf, x, color ='orange')
plt.xlabel("Quantile Value")
plt.ylabel("Value")
mytitle = "For Set3 | Pearson (Gamma) Type III Distribution Data Model\n "
mytitle += "Mean = " + str(set3_mean) + "\n"
mytitle += "SD = " + str(set3_stdev) + "\n"
mytitle += "Skew = " + str(set3_skew) + "\n"
plt.title(mytitle)
plt.show()
# The Shapiro-Wilk Normality Test for Set1
from scipy.stats import shapiro
stat, p = shapiro(set1)
print('stat=%.3f, p=%.3f' % (stat, p))
if p > 0.05:
print('Probably Gaussian')
else:
print('Probably not Gaussian')
# The Shapiro-Wilk Normality Test for Set2
from scipy.stats import shapiro
stat, p = shapiro(set2)
print('stat=%.3f, p=%.3f' % (stat, p))
if p > 0.05:
print('Probably Gaussian')
else:
print('Probably not Gaussian')
# The Shapiro-Wilk Normality Test for Set3
from scipy.stats import shapiro
stat, p = shapiro(set3)
print('stat=%.3f, p=%.3f' % (stat, p))
if p > 0.05:
print('Probably Gaussian')
else:
print('Probably not Gaussian')
# The Student's t-test for Set1 and Set2
from scipy.stats import ttest_ind
stat, p = ttest_ind(set1, set2)
print('stat=%.3f, p=%.3f' % (stat, p))
if p > 0.05:
print('Probably the same distribution')
else:
print('Probably different distributions')
# The Student's t-test for Set1 and Set3
from scipy.stats import ttest_ind
stat, p = ttest_ind(set1, set3)
print('stat=%.3f, p=%.3f' % (stat, p))
if p > 0.05:
print('Probably the same distribution')
else:
print('Probably different distributions')
# The Student's t-test for Set2 and Set3
from scipy.stats import ttest_ind
stat, p = ttest_ind(set2, set3)
print('stat=%.3f, p=%.3f' % (stat, p))
if p > 0.05:
print('Probably the same distribution')
else:
print('Probably different distributions')
# Example of the Analysis of Variance Test
from scipy.stats import f_oneway
stat, p = f_oneway(set1, set2, set3)
print('stat=%.3f, p=%.3f' % (stat, p))
if p > 0.05:
print('Probably the same distribution')
else:
print('Probably different distributions')