In [ ]:
import numpy as np
import pandas as pd
from fbprophet import Prophet
import datetime as dt
from sklearn import metrics
import math
from tqdm import tqdm
from multiprocessing import Pool, cpu_count
In [ ]:
# Prepare Train and test data
sales_data = pd.read_csv('/kaggle/input/m5-forecasting-accuracy/sales_train_evaluation.csv')
Xcols = ['d_' + str(i) for i in range(1, 1914)]
ycols = ['d_' + str(i) for i in range(1914, 1942)]

ids = sales_data.item_id + '_' + sales_data.store_id

train = sales_data[Xcols]
train = train.T
train.columns = ids
#train

test = sales_data[ycols]
test = test.T
test.columns = ids
#test
In [ ]:
# Prepare days variables
firstday = dt.datetime.strptime('2011-01-29', '%Y-%m-%d').date()
lastday = dt.datetime.strptime('2016-06-19', '%Y-%m-%d').date() - dt.timedelta(days = 28)
In [ ]:
# Prepare calendar data
calendar = pd.read_csv('/kaggle/input/m5-forecasting-accuracy/calendar.csv')
calendar['datelist'] = calendar['date'].apply(lambda x: [x])
calendarnew = pd.DataFrame(calendar.groupby('event_name_1')['datelist'].sum()).merge(pd.DataFrame(calendar.groupby('event_name_2')['datelist'].sum()),
                                                                       left_index = True, right_index = True, how = 'outer')
calendarnew.datelist_y = [x if type(x) == list else [] for x in calendarnew.datelist_y  ]
calendarnew.datelist_x = calendarnew.datelist_x + calendarnew.datelist_y
calendarnew = calendarnew.drop('datelist_y', 1)
calendarnew
In [ ]:
#Prepare holidays data
holidayslist = []
for i, row in calendarnew.iterrows():
    holidayslist.append(
        pd.DataFrame({
            'holiday': i,
            'ds': pd.to_datetime(list(row)[0]),
            'lower_window': 0,
            'upper_window': 1
        })
    )
holidaysconcat = pd.concat(holidayslist)
In [ ]:
# Prepare date periods
total_data = pd.concat([train, test])

date_period = pd.date_range(firstday, lastday)
date_period2 = pd.date_range(lastday + dt.timedelta(days = 1), lastday + dt.timedelta(days = 28 * 2))
date_period3 = pd.date_range(firstday, lastday + dt.timedelta(days = 28))
date_period4 = pd.date_range(lastday + dt.timedelta(days = 29), lastday + dt.timedelta(days = 56 + 28))
In [ ]:
#run forecast for one column
def onecolfcst(current_col):    

    current_data = pd.DataFrame({
        'ds' : date_period[:-28],
        'y' : train[current_col]})

    m = Prophet(holidays = holidaysconcat)
    m.fit(current_data)

    #future = m.make_future_dataframe(periods = 28)
    future = pd.DataFrame({
        'ds': date_period[-28:]
    })
    forecast = m.predict(future)

    return [current_col + '_validation'] + list(forecast['yhat'])
In [ ]:
# run all columns in parallel
p = Pool(cpu_count())
predictions = list(tqdm(p.imap(onecolfcst, train.columns[5000:10000])))
p.close()
p.join()
In [ ]:
# concatenate results
newdata = pd.DataFrame.from_records(predictions)
newdata.columns = ['id'] + ['F' + str(i) for i in range(1, 29)]
newdata.to_csv('myoutput.csv')