""" Relating Gender and IQ ======================= Going back to the brain size + IQ data, test if the VIQ of male and female are different after removing the effect of brain size, height and weight. Notice that here 'Gender' is a categorical value. As it is a non-float data type, statsmodels is able to automatically infer this. """ import pandas from statsmodels.formula.api import ols data = pandas.read_csv('../brain_size.csv', sep=';', na_values='.') model = ols('VIQ ~ Gender + MRI_Count + Height', data).fit() print(model.summary()) # Here, we don't need to define a contrast, as we are testing a single # coefficient of our model, and not a combination of coefficients. # However, defining a contrast, which would then be a 'unit contrast', # will give us the same results print(model.f_test([0, 1, 0, 0])) ############################################################################### # Here we plot a scatter matrix to get intuitions on our results. # This goes beyond what was asked in the exercise # This plotting is useful to get an intuitions on the relationships between # our different variables from pandas.tools import plotting import matplotlib.pyplot as plt # Fill in the missing values for Height for plotting data['Height'].fillna(method='pad', inplace=True) # The parameter 'c' is passed to plt.scatter and will control the color # The same holds for parameters 'marker', 'alpha' and 'cmap', that # control respectively the type of marker used, their transparency and # the colormap plotting.scatter_matrix(data[['VIQ', 'MRI_Count', 'Height']], c=(data['Gender'] == 'Female'), marker='o', alpha=1, cmap='winter') fig = plt.gcf() fig.suptitle("blue: male, green: female", size=13) plt.show()