In [1]:
import pandas as pd

Data Description

vehicle: model of the car
year: year of manufacture
msrp: manufacturer's suggested retail price in 2013 dollars 
acceleration: acceleration rate in km per hour per second 
mpg: fuel econonmy in miles per gallon 
class: the model's class. 
In [3]:
hybrid = pd.read_csv("hybrid.csv")
hybrid
Out[3]:
vehicle year msrp acceleration mpg class
0 Prius (1st Gen) 1997 24509.74 7.46 41.26 Compact
1 Tino 2000 35354.97 8.20 54.10 Compact
2 Prius (2nd Gen) 2000 26832.25 7.97 45.23 Compact
3 Insight 2000 18936.41 9.52 53.00 Two Seater
4 Civic (1st Gen) 2001 25833.38 7.04 47.04 Compact
... ... ... ... ... ... ...
148 S400 2013 92350.00 13.89 21.00 Large
149 Prius Plug-in 2013 32000.00 9.17 50.00 Midsize
150 C-Max Energi Plug-in 2013 32950.00 11.76 43.00 Midsize
151 Fusion Energi Plug-in 2013 38700.00 11.76 43.00 Midsize
152 Chevrolet Volt 2013 39145.00 11.11 37.00 Compact

153 rows × 6 columns

positive association

In [4]:
hybrid.plot.scatter(x="acceleration", y="msrp")
Out[4]:
<matplotlib.axes._subplots.AxesSubplot at 0x16f82060bc8>

negative association

In [5]:
hybrid.plot.scatter('mpg', 'msrp')
Out[5]:
<matplotlib.axes._subplots.AxesSubplot at 0x16f838c2bc8>

Using standard units

In [6]:
hybrid["sd_acceleration"] = (hybrid["acceleration"] - hybrid["acceleration"].mean()) / hybrid["acceleration"].std()
hybrid.head()
Out[6]:
vehicle year msrp acceleration mpg class sd_acceleration
0 Prius (1st Gen) 1997 24509.74 7.46 41.26 Compact -1.529984
1 Tino 2000 35354.97 8.20 54.10 Compact -1.278303
2 Prius (2nd Gen) 2000 26832.25 7.97 45.23 Compact -1.356528
3 Insight 2000 18936.41 9.52 53.00 Two Seater -0.829357
4 Civic (1st Gen) 2001 25833.38 7.04 47.04 Compact -1.672830
In [7]:
hybrid["sd_acceleration"] = (hybrid["acceleration"] - hybrid["acceleration"].mean()) / hybrid["acceleration"].std()
hybrid["sd_msrp"] = (hybrid["msrp"] - hybrid["msrp"].mean()) / hybrid["msrp"].std()
hybrid["sd_mpg"] = (hybrid["mpg"] - hybrid["mpg"].mean()) / hybrid["mpg"].std()
In [10]:
hybrid.plot.scatter(x="sd_acceleration", y="sd_msrp")
Out[10]:
<matplotlib.axes._subplots.AxesSubplot at 0x16f83a6a188>
In [11]:
hybrid.plot.scatter('sd_mpg', 'sd_msrp')
Out[11]:
<matplotlib.axes._subplots.AxesSubplot at 0x16f8301f9c8>

Correlation coefficient

In [20]:
raw_data = {
    'x' : [1, 2, 3, 4, 5, 6],
    'y' : [2, 3, 1, 5, 2, 7]
}
df = pd.DataFrame(raw_data)
df.plot.scatter(x='x', y='y')
Out[20]:
<matplotlib.axes._subplots.AxesSubplot at 0x16f83bb2148>
In [21]:
df.corr()
Out[21]:
x y
x 1.000000 0.617416
y 0.617416 1.000000
In [19]:
df.corr()
Out[19]:
x y
x 1.000000 0.617416
y 0.617416 1.000000
In [13]:
raw_data = {
    'x' : [1, 2, 3, 4],
    'y' : [1, 2, 3, 4]
}
df = pd.DataFrame(raw_data)
df
Out[13]:
x y
0 1 1
1 2 2
2 3 3
3 4 4
In [14]:
df.corr()
Out[14]:
x y
x 1.0 1.0
y 1.0 1.0
In [15]:
raw_data = {
    'x' : [1, 2, 3, 4, 5],
    'y' : [1, 2, 3, 4, 0]
}
df = pd.DataFrame(raw_data)
df
Out[15]:
x y
0 1 1
1 2 2
2 3 3
3 4 4
4 5 0
In [16]:
df.corr()
Out[16]:
x y
x 1.0 0.0
y 0.0 1.0
In [ ]: