import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import ols
weekday = ['sat', 'sat', 'sat', 'sat', 'sat', 'sat', 'sun', 'sun', 'sun', 'sun']
weather = ['rain', 'rain', 'rain', 'rain', 'rain', 'sun', 'sun', 'sun', 'sun', 'sun']
sales = [100, 100, 100, 100, 100, 10000, 10000, 10000, 10000, 10000]
data = pd.DataFrame({'weekday': weekday, 'weather': weather, 'sales': sales})
data
In this example:
# Type I tells us that weekday is more important. The interaction effect is not signifcant.
lm = ols('sales ~ C(weekday)*C(weather)',data=data).fit()
table = sm.stats.anova_lm(lm, typ=1) # Type 1 ANOVA DataFrame
print(table)
# Type II tells us that weather is more important. There is no interaction effect.
lm = ols('sales ~ C(weekday) + C(weather)',data=data).fit()
table = sm.stats.anova_lm(lm, typ=2) # Type 2 ANOVA DataFrame
print(table)
# Type III tells us that weekday is more important. The interaction effect is not signifcant.
lm = ols('sales ~ C(weekday)*C(weather)',data=data).fit()
table = sm.stats.anova_lm(lm, typ=3) # Type 3 ANOVA DataFrame
print(table)