# Preamble script block to identify host, user, and kernel
import sys
! hostname
! whoami
print(sys.executable)
print(sys.version)
print(sys.version_info)
# Let's import the necessary libraries:
import numpy as np
import pandas as pd
import statistics
import scipy.stats
import matplotlib.pyplot as plt
1. Read the "AirTraffic.csv" file as a dataframe and check its first few rows.
2. Use descriptive functions of the Pandas library to learn more about the dataframe
3. Compute the arithmetic and harmonic mean of 'Distance'.
4. Find the median of 'Distance'.
5. Find the range of 'Distance'.
6. Find the IQR of 'Distance'.
7. Use descriptive functions of the Pandas library to get a 5-number summary of 'Distance'. Plot a box plot without outliers.
8. Find the variance and standard deviation of 'Distance'.
9. Find the skewness and kurtosis 'Distance'.
AT = pd.read_csv("AirTraffic.csv") #Read the .csv file a data frame
AT.head()
AT.info()
Distance = AT['DISTANCE']
# Use the mean function from the statistics library
mean = statistics.mean(Distance)
print("The arithmetic mean distance of the 2020 flights is ",round(mean,2),"miles")
hmean = statistics.harmonic_mean(Distance)
print("The harmonic mean distance of the 2020 flights is ",round(hmean,2),"miles")
Distance = AT['DISTANCE']
# Use the mean function from the statistics library
median = statistics.median(Distance)
print("The median of distance of the 2020 flights is ",median,"miles")
Distance = AT['DISTANCE']
Range = np.ptp(Distance) #ptp stands for Peak To Peak
print("The range of distance of the 2020 flights is ",Range,"miles")
Distance = AT['DISTANCE']
IQR = scipy.stats.iqr(Distance)
print("The IQR of distance of the 2020 flights is ",IQR,"miles")
Distance = AT['DISTANCE']
Distance.describe()
fig = plt.figure(figsize =(7, 5))
plt.boxplot(Distance,medianprops={'linewidth': 1, 'color': 'purple'},showfliers=False)
plt.show()
Distance = AT['DISTANCE']
var = statistics.variance(Distance)
sd = statistics.stdev(Distance)
print("The variance and standard deviation of distance of the 2020 flights is ",round(var,2)," and ",round(sd,2)," respectively")
Distance = AT['DISTANCE']
skew = scipy.stats.skew(Distance)
kurtosis = scipy.stats.kurtosis(Distance)
print("The skewness and kurtosis of distance of the 2020 flights is ",round(skew,2)," and ",round(kurtosis,2)," respectively")
1. Read the "Lubbock_Oct_T&P.csv" file as a dataframe and check its first few rows.
2. Use descriptive functions of the Pandas library and explain the format of the dataframe
3. Compute the arithmetic and harmonic mean of 'temperature'.
4. Find the median of 'precipitation' and 'temperature'.
5. Find the range and IQR of 'precipitation'.
6. Find the 10th,40th, and 70th percentile of 'temperature'.
7. Provide a 5-number summary of 'precipitation'. Plot a box plot without outliers. Interpret it in your own words
8. Find the variance and standard deviation of 'precipitation'.
9. Find the skewness and kurtosis 'precipitation'.
Here are some great reads on this topic:
Here are some great videos on these topics: