import pprint
import xarray
import numpy
import json
import glob
import io
import os
import pandas
import pickle

def kernel_1():
    t4 = 'kernel_1-t3.dat'

    def preprocess(t4):
        t1 = '/kaggle/input/mlb-player-digital-engagement-forecasting'
        t2 = glob.glob(
            os.path.join(
                t1,
                '*.csv'
            )
        )

        t3 = {
            o : pandas.read_csv(o).to_xarray()
            for o in t2
        }

        with io.open(t4, 'wb') as f:
            pickle.dump(t3, f)

    if not os.path.exists(t4):
        preprocess(t4=t4)

    with io.open(t4, 'rb') as f:
        t3 = pickle.load(f)


    return dict(
        t3=t3,
    )

def kernel_2(
    o_1=None,
):
    t1 = {}

    for k in [
        'playerTwitterFollowers',
        'teamTwitterFollowers',
        'games',
        'events'
    ]:
        t4 = '%s.nc' % k
        if not os.path.exists(t4):
            print('started %s' % t4)
            t2 = '/kaggle/input/mlb-player-digital-engagement-forecasting/train.csv'
            t3 = pandas.DataFrame(
                sum(
                    [
                        json.loads(o)
                        for o in o_1['t3'][t2][k].values
                        if isinstance(o, str)
                    ],
                    []
                )
            ).to_xarray()
            t3.to_netcdf(t4)
            print('cached %s' % t4)

        if k == 'events':
            t5 = '%s-v2.nc' % k
            if not os.path.exists(t5):
                t2 = xarray.load_dataset(t4)
                t3 = t2.sel(
                    index=numpy.arange(
                        2017653 - 10 * 1000,
                        2017653 + 1
                    )
                )
                t3.to_netcdf(t5)
            t1[k] = xarray.load_dataset(t5)
            print('loaded %s' % t5)
        else:
            t1[k] = xarray.load_dataset(t4)
            print('loaded %s' % t4)


    return dict(
        t1=t1,
    )

def kernel_3(should_exist=None):
    if should_exist is None:
        should_exist = False

    t3 = [
        ('playerTwitterFollowers', None),
        ('teamTwitterFollowers', None),
        ('games', None),
        ('events', 'events-v2.nc'),
    ]

    o_1 = None
    o_2 = None

    t4 = '/kaggle/input/garbage'
    t5 = {}
    for k, v in t3:
        if v is None:
            t1 = os.path.join(
                t4,
                '%s.nc' % k,
            )
        else:
            t1 = os.path.join(
                t4,
                v,
            )

        if os.path.exists(t1):
            t2 = xarray.load_dataset(t1)
        else:
            if should_exist:
                pprint.pprint([k, v, t1])
                raise NotImplementedError

            if o_1 is None:
                o_1 = kernel_1()
            if o_2 is None:
                o_2 = kernel_2(
                    o_1=o_1
                )

            t2 = o_2['events']
        t5[k] = t2

    return dict(
        t5=t5,
    )

def kernel_4(o_3=o_3):
    [
        print(
            o_3['t5']['events'].to_dataframe().iloc[k].to_json(indent=4)
            for k in range(-10, -1)
        )
    ]