import numpy as np
import pandas as pd
from fbprophet import Prophet
import datetime as dt
from sklearn import metrics
import math
from tqdm import tqdm
from multiprocessing import Pool, cpu_count
# Prepare Train and test data
sales_data = pd.read_csv('/kaggle/input/m5-forecasting-accuracy/sales_train_evaluation.csv')
Xcols = ['d_' + str(i) for i in range(1, 1914)]
ycols = ['d_' + str(i) for i in range(1914, 1942)]
ids = sales_data.item_id + '_' + sales_data.store_id
train = sales_data[Xcols]
train = train.T
train.columns = ids
#train
test = sales_data[ycols]
test = test.T
test.columns = ids
#test
# Prepare days variables
firstday = dt.datetime.strptime('2011-01-29', '%Y-%m-%d').date()
lastday = dt.datetime.strptime('2016-06-19', '%Y-%m-%d').date() - dt.timedelta(days = 28)
# Prepare calendar data
calendar = pd.read_csv('/kaggle/input/m5-forecasting-accuracy/calendar.csv')
calendar['datelist'] = calendar['date'].apply(lambda x: [x])
calendarnew = pd.DataFrame(calendar.groupby('event_name_1')['datelist'].sum()).merge(pd.DataFrame(calendar.groupby('event_name_2')['datelist'].sum()),
left_index = True, right_index = True, how = 'outer')
calendarnew.datelist_y = [x if type(x) == list else [] for x in calendarnew.datelist_y ]
calendarnew.datelist_x = calendarnew.datelist_x + calendarnew.datelist_y
calendarnew = calendarnew.drop('datelist_y', 1)
calendarnew
#Prepare holidays data
holidayslist = []
for i, row in calendarnew.iterrows():
holidayslist.append(
pd.DataFrame({
'holiday': i,
'ds': pd.to_datetime(list(row)[0]),
'lower_window': 0,
'upper_window': 1
})
)
holidaysconcat = pd.concat(holidayslist)
# Prepare date periods
total_data = pd.concat([train, test])
date_period = pd.date_range(firstday, lastday)
date_period2 = pd.date_range(lastday + dt.timedelta(days = 1), lastday + dt.timedelta(days = 28 * 2))
date_period3 = pd.date_range(firstday, lastday + dt.timedelta(days = 28))
date_period4 = pd.date_range(lastday + dt.timedelta(days = 29), lastday + dt.timedelta(days = 56 + 28))
#run forecast for one column
def onecolfcst(current_col):
current_data = pd.DataFrame({
'ds' : date_period[:-28],
'y' : train[current_col]})
m = Prophet(holidays = holidaysconcat)
m.fit(current_data)
#future = m.make_future_dataframe(periods = 28)
future = pd.DataFrame({
'ds': date_period[-28:]
})
forecast = m.predict(future)
return [current_col + '_validation'] + list(forecast['yhat'])
# run all columns in parallel
p = Pool(cpu_count())
predictions = list(tqdm(p.imap(onecolfcst, train.columns[5000:10000])))
p.close()
p.join()
# concatenate results
newdata = pd.DataFrame.from_records(predictions)
newdata.columns = ['id'] + ['F' + str(i) for i in range(1, 29)]
newdata.to_csv('myoutput.csv')