diff --git a/python/tasks/mlb_player.py b/python/tasks/mlb_player.py index ce22379..d5e3d2a 100644 --- a/python/tasks/mlb_player.py +++ b/python/tasks/mlb_player.py @@ -1,22 +1,34 @@ import glob import os import pandas +import pickle def kernel_1(): - t1 = '/kaggle/input/mlb-player-digital-engagement-forecasting' - t2 = glob.glob( - os.path.join( - t1, - '*.csv' + t4 = 'kernel_1-t3.dat' + + def preprocess(t4): + t1 = '/kaggle/input/mlb-player-digital-engagement-forecasting' + t2 = glob.glob( + os.path.join( + t1, + '*.csv' + ) ) - ) - t3 = { - o : pandas.read_csv(o) - for o in t2 - } + + t3 = { + o : pandas.read_csv(o) + for o in t2 + } + + with io.open(t4, 'wb') as f: + pickle.dump(t3, f) + + if not os.path.exists(t4): + preprocess(t4=t4) + + with io.open(t4, 'rb') as f: + t3 = pickle.load(f) return dict( - t1=t1, - t2=t2, t3=t3, )