freelance-project-34-market.../python/tasks/mlb_player.py
Siarhei Siniak eb1b18c153 [~] Refactor
2021-07-16 11:36:05 +03:00

89 lines
1.9 KiB
Python

import xarray
import json
import glob
import io
import os
import pandas
import pickle
def kernel_1():
t4 = 'kernel_1-t3.dat'
def preprocess(t4):
t1 = '/kaggle/input/mlb-player-digital-engagement-forecasting'
t2 = glob.glob(
os.path.join(
t1,
'*.csv'
)
)
t3 = {
o : pandas.read_csv(o).to_xarray()
for o in t2
}
with io.open(t4, 'wb') as f:
pickle.dump(t3, f)
if not os.path.exists(t4):
preprocess(t4=t4)
with io.open(t4, 'rb') as f:
t3 = pickle.load(f)
return dict(
t3=t3,
)
def kernel_2(
o_1=None,
):
t1 = {}
for k in [
'playerTwitterFollowers',
'teamTwitterFollowers',
'games',
'events'
]:
t4 = '%s.nc' % k
if not os.path.exists(t4):
print('started %s' % t4)
t2 = '/kaggle/input/mlb-player-digital-engagement-forecasting/train.csv'
t3 = pandas.DataFrame(
sum(
[
json.loads(o)
for o in o_1['t3'][t2][k].values
if isinstance(o, str)
],
[]
)
).to_xarray()
t3.to_netcdf(t4)
print('cached %s' % t4)
if k == 'events':
t5 = '%s-v2.nc' % k
if not os.path.exists(t5):
t2 = xarray.load_dataset(t4)
t3 = t2.sel(
index=numpy.arange(
2017653 - 10 * 1000,
2017653 + 1
)
)
t3.to_netcdf(t5)
t1[k] = xarray.load_dataset(t5)
print('loaded %s' % t5)
else:
t1[k] = xarray.load_dataset(t4)
print('loaded %s' % t4)
return dict(
t1=t1,
)