Poison Mushrooms

An application example of multiple logistic regression.

Here we will have to encode the categories, then build the model.

attributes=['poison-class','cap-shape','cap-surface','cap-color','bruises','odor','gill-attachment','gill-spacing','gill-size','gill-color','stalk-shape','stalk-root','stalk-surface-above-ring','stalk-surface-below-ring','stalk-color-above-ring','stalk-color-below-ring','veil-type','veil-color','ring-number','ring-type','spore-print-color','population','habitat']
len(attributes)
23
import sys # Module to process commands to/from the OS using a shell-type syntax
import requests
remote_url="http://54.243.252.9/ce-5319-webroot/ce5319jb/lessons/logisticregression/agaricus-lepiota.data"  # set the url
rget = requests.get(remote_url, allow_redirects=True)  # get the remote resource, follow imbedded links
localfile = open('poisonmushroom.csv','wb') # open connection to a local file same name as remote
localfile.write(rget.content) # extract from the remote the contents,insert into the local file same name
localfile.close() # close connection to the local file
# delete file if it exists
import pandas as pd
mymushroom = pd.read_csv('poisonmushroom.csv',header=None)
req_col_names = attributes
curr_col_names = list(mymushroom.columns)

mapper = {}
for i, name in enumerate(curr_col_names):
    mapper[name] = req_col_names[i]

mymushroom = mymushroom.rename(columns=mapper)
interim = pd.DataFrame(mymushroom)
mymushroom.head(20)
poison-class cap-shape cap-surface cap-color bruises odor gill-attachment gill-spacing gill-size gill-color ... stalk-surface-below-ring stalk-color-above-ring stalk-color-below-ring veil-type veil-color ring-number ring-type spore-print-color population habitat
0 p x s n t p f c n k ... s w w p w o p k s u
1 e x s y t a f c b k ... s w w p w o p n n g
2 e b s w t l f c b n ... s w w p w o p n n m
3 p x y w t p f c n n ... s w w p w o p k s u
4 e x s g f n f w b k ... s w w p w o e n a g
5 e x y y t a f c b n ... s w w p w o p k n g
6 e b s w t a f c b g ... s w w p w o p k n m
7 e b y w t l f c b n ... s w w p w o p n s m
8 p x y w t p f c n p ... s w w p w o p k v g
9 e b s y t a f c b g ... s w w p w o p k s m
10 e x y y t l f c b g ... s w w p w o p n n g
11 e x y y t a f c b n ... s w w p w o p k s m
12 e b s y t a f c b w ... s w w p w o p n s g
13 p x y w t p f c n k ... s w w p w o p n v u
14 e x f n f n f w b n ... f w w p w o e k a g
15 e s f g f n f c n k ... s w w p w o p n y u
16 e f f w f n f w b k ... s w w p w o e n a g
17 p x s n t p f c n n ... s w w p w o p k s g
18 p x y w t p f c n n ... s w w p w o p n s u
19 p x s n t p f c n k ... s w w p w o p n s u

20 rows × 23 columns

mymushroom.head(10)
poison-class cap-shape cap-surface cap-color bruises odor gill-attachment gill-spacing gill-size gill-color ... stalk-surface-below-ring stalk-color-above-ring stalk-color-below-ring veil-type veil-color ring-number ring-type spore-print-color population habitat
0 p x s n t p f c n k ... s w w p w o p k s u
1 e x s y t a f c b k ... s w w p w o p n n g
2 e b s w t l f c b n ... s w w p w o p n n m
3 p x y w t p f c n n ... s w w p w o p k s u
4 e x s g f n f w b k ... s w w p w o e n a g
5 e x y y t a f c b n ... s w w p w o p k n g
6 e b s w t a f c b g ... s w w p w o p k n m
7 e b y w t l f c b n ... s w w p w o p n s m
8 p x y w t p f c n p ... s w w p w o p k v g
9 e b s y t a f c b g ... s w w p w o p k s m

10 rows × 23 columns

def p0(stringvalue):
    if stringvalue == 'e':
        p0 = 0
    elif stringvalue == 'p':
        p0 = 1
    else: 
        raise Exception("Encoding failed in p0 missing data maybe?")
    return(p0)
######################################################################
# Feature Encoding Functions using a Simple Substitution Cipher     ##
######################################################################
def c1(stringvalue):
#cap-shape:  bell=b,conical=c,convex=x,flat=f,knobbed=k,sunken=s
    ncode=True # set exception flag
    alphabet=['b','c','x','f','k','s']
    for i in range(len(alphabet)):
        if stringvalue == alphabet[i]:
            c1=i
            ncode=False #  if encoding swithc flag value
    if ncode:
        raise Exception("Encoding failed in c1 missing data maybe?")
    return(c1)
######################################################################
def c2(stringvalue):
#cap-surface: fibrous=f,grooves=g,scaly=y,smooth=s
    ncode=True
    alphabet=['f','g','y','s']
    for i in range(len(alphabet)):
        if stringvalue == alphabet[i]:
            c2=i
            ncode=False
    if ncode:
        raise Exception("Encoding failed in c2 missing data maybe?")
    return(c2)
######################################################################
def c3(stringvalue):
#cap-color: brown=n,buff=b,cinnamon=c,gray=g,green=r,pink=p,purple=u,red=e,white=w,yellow=y
    ncode=True
    alphabet=['n','b','c','g','r','p','u','e','w','y']
    for i in range(len(alphabet)):
        if stringvalue == alphabet[i]:
            c3=i
            ncode=False
    if ncode:
        raise Exception("Encoding failed in c3 missing data maybe?")
    return(c3)
######################################################################
def c4(stringvalue): #this is a simple binary encoding column
#bruises?:bruises=t,no=f
    ncode=True
    alphabet=['f','t']
    for i in range(len(alphabet)):
        if stringvalue == alphabet[i]:
            c4=i
            ncode=False
    if ncode:
        raise Exception("Encoding failed in c4 missing data maybe?")
    return(c4)
######################################################################
def c5(stringvalue):
#odor:  almond=a,anise=l,creosote=c,fishy=y,foul=f,musty=m,none=n,pungent=p,spicy=s
    ncode=True
    alphabet=['a','l','c','y','f','m','n','p','s']
    for i in range(len(alphabet)):
        if stringvalue == alphabet[i]:
            c5=i
            ncode=False
    if ncode:
        raise Exception("Encoding failed in c5 missing data maybe?")
    return(c5)
######################################################################
def c6(stringvalue):
#gill-attachment: attached=a,descending=d,free=f,notched=n
    ncode=True
    alphabet=['a','d','f','n']
    for i in range(len(alphabet)):
        if stringvalue == alphabet[i]:
            c6=i
            ncode=False
    if ncode:
        raise Exception("Encoding failed in c6 missing data maybe?")
    return(c6)
######################################################################
def c7(stringvalue):
#gill-spacing: close=c,crowded=w,distant=d
    ncode=True
    alphabet=['c','w','d']
    for i in range(len(alphabet)):
        if stringvalue == alphabet[i]:
            c7=i
            ncode=False
    if ncode:
        raise Exception("Encoding failed in c7 missing data maybe?")
    return(c7)
######################################################################
def c8(stringvalue):
#gill-size:  broad=b,narrow=n
    ncode=True
    alphabet=['b','n']
    for i in range(len(alphabet)):
        if stringvalue == alphabet[i]:
            c8=i
            ncode=False
    if ncode:
        raise Exception("Encoding failed in c8 missing data maybe?")
    return(c8)
######################################################################
def c9(stringvalue):
#gill-color: black=k,brown=n,buff=b,chocolate=h,gray=g,green=r,orange=o,pink=p,purple=u,red=e,white=w,yellow=y
    ncode=True
    alphabet=['k','n','b','h','g','r','o','p','u','e','w','y']
    for i in range(len(alphabet)):
        if stringvalue == alphabet[i]:
            c9=i
            ncode=False
    if ncode:
        raise Exception("Encoding failed in c9 missing data maybe?")
    return(c9)
######################################################################
def c10(stringvalue):
#stalk-shape:enlarging=e,tapering=t
    ncode=True
    alphabet=['e','t']
    for i in range(len(alphabet)):
        if stringvalue == alphabet[i]:
            c10=i
            ncode=False
    if ncode:
        raise Exception("Encoding failed in c10 missing data maybe?")
    return(c10)
######################################################################
def c11(stringvalue):
#stalk-root: bulbous=b,club=c,cup=u,equal=e,rhizomorphs=z,rooted=r,missing=?
    ncode=True
    alphabet=['b','c','u','e','z','r','?'] # set missing to zero position
    for i in range(len(alphabet)):
        if stringvalue == alphabet[i]:
            c11=i
            ncode=False
    if ncode:
        raise Exception("Encoding failed in c11 missing data maybe?")
    return(c11)
######################################################################
def c12(stringvalue):
#stalk-surface-above-ring: fibrous=f,scaly=y,silky=k,smooth=s
    ncode=True
    alphabet=['f','y','k','s','?'] # set missing to zero position
    for i in range(len(alphabet)):
        if stringvalue == alphabet[i]:
            c12=i
            ncode=False
    if ncode:
        raise Exception("Encoding failed in c12 missing data maybe?")
    return(c12)
######################################################################
def c13(stringvalue):
#stalk-surface-below-ring: fibrous=f,scaly=y,silky=k,smooth=s
    ncode=True
    alphabet=['f','y','k','s','?'] # set missing to zero position
    for i in range(len(alphabet)):
        if stringvalue == alphabet[i]:
            c13=i
            ncode=False
    if ncode:
        raise Exception("Encoding failed in c13 missing data maybe?")
    return(c13)
######################################################################
def c14(stringvalue):
#stalk-color-above-ring:   brown=n,buff=b,cinnamon=c,gray=g,orange=o,pink=p,red=e,white=w,yellow=y
    ncode=True
    alphabet=['n','b','c','g','o','p','e','w','y'] # set missing to zero position
    for i in range(len(alphabet)):
        if stringvalue == alphabet[i]:
            c14=i
            ncode=False
    if ncode:
        raise Exception("Encoding failed in c14 missing data maybe?")
    return(c14)
######################################################################
def c15(stringvalue):
#stalk-color-below-ring:   brown=n,buff=b,cinnamon=c,gray=g,orange=o,pink=p,red=e,white=w,yellow=y
    ncode=True
    alphabet=['n','b','c','g','o','p','e','w','y'] # set missing to zero position
    for i in range(len(alphabet)):
        if stringvalue == alphabet[i]:
            c15=i
            ncode=False
    if ncode:
        raise Exception("Encoding failed in c15 missing data maybe?")
    return(c15)
######################################################################
def c16(stringvalue):
#veil-type:                partial=p,universal=u
    ncode=True
    alphabet=['p','u'] # 
    for i in range(len(alphabet)):
        if stringvalue == alphabet[i]:
            c16=i
            ncode=False
    if ncode:
        raise Exception("Encoding failed in c16 missing data maybe?")
    return(c16)
######################################################################
def c17(stringvalue):
#veil-color:               brown=n,orange=o,white=w,yellow=y
    ncode=True
    alphabet=['n','o','w','y'] # 
    for i in range(len(alphabet)):
        if stringvalue == alphabet[i]:
            c17=i
            ncode=False
    if ncode:
        raise Exception("Encoding failed in c17 missing data maybe?")
    return(c17)
######################################################################
def c18(stringvalue):
#ring-number:              none=n,one=o,two=t
    ncode=True
    alphabet=['n','o','t'] # 
    for i in range(len(alphabet)):
        if stringvalue == alphabet[i]:
            c18=i
            ncode=False
    if ncode:
        raise Exception("Encoding failed in c18 missing data maybe?")
    return(c18)
######################################################################
def c19(stringvalue):
#ring-type:  cobwebby=c,evanescent=e,flaring=f,large=l,none=n,pendant=p,sheathing=s,zone=z
    ncode=True
    alphabet=['c','e','f','l','n','p','s','z'] # 
    for i in range(len(alphabet)):
        if stringvalue == alphabet[i]:
            c19=i
            ncode=False
    if ncode:
        raise Exception("Encoding failed in c19 missing data maybe?")
    return(c19)
######################################################################
def c20(stringvalue):
#spore-print-color:        black=k,brown=n,buff=b,chocolate=h,green=r,orange=o,purple=u,white=w,yellow=y
    ncode=True
    alphabet=['k','n','b','h','r','o','u','w','y','?'] # 
    for i in range(len(alphabet)):
        if stringvalue == alphabet[i]:
            c20=i
            ncode=False
    if ncode:
        raise Exception("Encoding failed in c20 missing data maybe?")
    return(c20)
######################################################################
def c21(stringvalue):
#population:abundant=a,clustered=c,numerous=n,scattered=s,several=v,solitary=y
    ncode=True
    alphabet=['a','c','n','s','v','y','?'] # 
    for i in range(len(alphabet)):
        if stringvalue == alphabet[i]:
            c21=i
            ncode=False
    if ncode:
        raise Exception("Encoding failed in c21 missing data maybe?")
    return(c21)
######################################################################
def c22(stringvalue):
# habitat: grasses=g,leaves=l,meadows=m,paths=p,urban=u,waste=w,woods=d   
    ncode=True
    alphabet=['g','l','m','p','u','w','d']
    for i in range(len(alphabet)):
        if stringvalue == alphabet[i]:
            c22=i
            ncode=False
    if ncode:
        raise Exception("Encoding failed in c22 missing data maybe?")
    return(c22)

interim['poison-class']            = mymushroom['poison-class'].apply(p0)
interim['cap-shape']               = mymushroom['cap-shape'].apply(c1)
interim['cap-surface']             = mymushroom['cap-surface'].apply(c2)
interim['cap-color']               = mymushroom['cap-color'].apply(c3)
interim['bruises']                 = mymushroom['bruises'].apply(c4)
interim['odor']                    = mymushroom['odor'].apply(c5)
interim['gill-attachment']         = mymushroom['gill-attachment'].apply(c6)
interim['gill-spacing']            = mymushroom['gill-spacing'].apply(c7)
interim['gill-size']               = mymushroom['gill-size'].apply(c8)
interim['gill-color']              = mymushroom['gill-color'].apply(c9)
interim['stalk-shape']             = mymushroom['stalk-shape'].apply(c10)
interim['stalk-root']              = mymushroom['stalk-root'].apply(c11)
interim['stalk-surface-above-ring']= mymushroom['stalk-surface-above-ring'].apply(c12)
interim['stalk-surface-below-ring']= mymushroom['stalk-surface-below-ring'].apply(c13)
interim['stalk-color-above-ring']  = mymushroom['stalk-color-above-ring'].apply(c14)
interim['stalk-color-below-ring']  = mymushroom['stalk-color-below-ring'].apply(c15)
interim['veil-type']               = mymushroom['veil-type'].apply(c16)
interim['veil-color']              = mymushroom['veil-color'].apply(c17)
interim['ring-number']             = mymushroom['ring-number'].apply(c18)
interim['ring-type']               = mymushroom['ring-type'].apply(c19)
interim['spore-print-color']       = mymushroom['spore-print-color'].apply(c20)
interim['population']              = mymushroom['population'].apply(c21)
interim['habitat']                 = mymushroom['habitat'].apply(c22)
interim.head()
poison-class cap-shape cap-surface cap-color bruises odor gill-attachment gill-spacing gill-size gill-color ... stalk-surface-below-ring stalk-color-above-ring stalk-color-below-ring veil-type veil-color ring-number ring-type spore-print-color population habitat
0 1 2 3 0 1 7 2 0 1 0 ... 3 7 7 0 2 1 5 0 3 4
1 0 2 3 9 1 0 2 0 0 0 ... 3 7 7 0 2 1 5 1 2 0
2 0 0 3 8 1 1 2 0 0 1 ... 3 7 7 0 2 1 5 1 2 2
3 1 2 2 8 1 7 2 0 1 1 ... 3 7 7 0 2 1 5 0 3 4
4 0 2 3 3 0 6 2 1 0 0 ... 3 7 7 0 2 1 1 1 0 0

5 rows × 23 columns

#split dataset in features and target variable
#feature_cols = ['cap-shape', 'cap-surface','cap-color','bruises','odor','gill-attachment','gill-size','gill-color','habitat']
feature_cols = attributes[1:]
#feature_cols = attributes[1:16]+attributes[17:]
X = interim[feature_cols] # Features
y = interim['poison-class'] # Target variable
# split X and y into training and testing sets
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.25,random_state=0)
feature_cols[16]
'veil-color'
# import the class
from sklearn.linear_model import LogisticRegression

# instantiate the model (using the default parameters)
#logreg = LogisticRegression()
logreg = LogisticRegression(max_iter=1000)
# fit the model with data
logreg.fit(X_train,y_train)

#
y_pred=logreg.predict(X_test)
print(logreg.intercept_[0])
print(logreg.coef_)
#y.head()
7.985575946121393
[[-0.10794435  1.08926906  0.08797603 -3.47364929  0.14180387  0.1864682
  -8.98042887  7.73751204 -0.11238355 -0.05928835 -2.02832314 -0.7033172
  -0.05014807 -0.30924012  0.33874996  0.          0.31015366 -0.78437801
  -0.84016796  0.46958621 -0.59083624 -0.196766  ]]
# import the metrics class
from sklearn import metrics
cnf_matrix = metrics.confusion_matrix(y_pred, y_test)
cnf_matrix
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import pandas as pd
class_names=[0,1] # name  of classes
fig, ax = plt.subplots()
tick_marks = np.arange(len(class_names))
plt.xticks(tick_marks, class_names)
plt.yticks(tick_marks, class_names)
# create heatmap
sns.heatmap(pd.DataFrame(cnf_matrix), annot=True, cmap="YlGnBu" ,fmt='g')
ax.xaxis.set_label_position("top")
plt.tight_layout()
plt.title('Confusion matrix', y=1.1)
plt.ylabel('Predicted label')
plt.xlabel('Actual label');
../../_images/poisonmushroom_12_0.png
'''
/* Nonlinear Optimization using the algorithm of Hooke and Jeeves  */
'''
'\n/* Nonlinear Optimization using the algorithm of Hooke and Jeeves  */\n'