import pandas as pd
A = [12.6, 12, 11.8, 11.9, 13, 12.5, 14]
B = [10, 10.2, 10, 12, 14, 13]
C = [10.1, 13, 13.4, 12.9, 8.9, 10.7, 13.6, 12]
all_scores = A + B + C
company_names = (['A'] * len(A)) + (['B'] * len(B)) + (['C'] * len(C))
data = pd.DataFrame({'company': company_names, 'score': all_scores})
data
data.groupby('company').mean()
import statsmodels.api as sm
from statsmodels.formula.api import ols
lm = ols('score ~ company',data=data).fit()
table = sm.stats.anova_lm(lm)
print(table)
# compute overall mean
overall_mean = data['score'].mean()
overall_mean
# compute Sum of Squares Total
data['overall_mean'] = overall_mean
ss_total = sum((data['score'] - data['overall_mean'])**2)
ss_total
# compute group means
group_means = data.groupby('company').mean()
group_means = group_means.rename(columns = {'score': 'group_mean'})
group_means
# add group means and overall mean to the original data frame
data = data.merge(group_means, left_on = 'company', right_index = True)
# compute Sum of Squares Residual
ss_residual = sum((data['score'] - data['group_mean'])**2)
ss_residual
# compute Sum of Squares Model
ss_explained = sum((data['overall_mean'] - data['group_mean'])**2)
ss_explained
# compute Mean Square Residual
n_groups = len(set(data['company']))
n_obs = data.shape[0]
df_residual = n_obs - n_groups
ms_residual = ss_residual / df_residual
ms_residual
# compute Mean Square Explained
df_explained = n_groups - 1
ms_explained = ss_explained / df_explained
ms_explained
# compute F-Value
f = ms_explained / ms_residual
f
# compute p-value
import scipy.stats
p_value = 1 - scipy.stats.f.cdf(f, df_explained, df_residual)
p_value