import pprint
import xarray
import numpy
import json
import glob
import io
import os
import pandas
import pickle

def kernel_1():
    t4 = 'kernel_1-t3.dat'

    def preprocess(t4):
        t1 = '/kaggle/input/mlb-player-digital-engagement-forecasting'
        t2 = glob.glob(
            os.path.join(
                t1,
                '*.csv'
            )
        )

        t3 = {
            o : pandas.read_csv(o).to_xarray()
            for o in t2
        }

        with io.open(t4, 'wb') as f:
            pickle.dump(t3, f)

    if not os.path.exists(t4):
        preprocess(t4=t4)

    with io.open(t4, 'rb') as f:
        t3 = pickle.load(f)


    return dict(
        t3=t3,
    )

def kernel_2(
    o_1=None,
):
    t1 = {}

    for k in [
        'playerTwitterFollowers',
        'teamTwitterFollowers',
        'games',
        'events'
    ]:
        t4 = '%s.nc' % k
        if not os.path.exists(t4):
            print('started %s' % t4)
            t2 = '/kaggle/input/mlb-player-digital-engagement-forecasting/train.csv'
            t3 = pandas.DataFrame(
                sum(
                    [
                        json.loads(o)
                        for o in o_1['t3'][t2][k].values
                        if isinstance(o, str)
                    ],
                    []
                )
            ).to_xarray()
            t3.to_netcdf(t4)
            print('cached %s' % t4)

        if k == 'events':
            t5 = '%s-v2.nc' % k
            if not os.path.exists(t5):
                t2 = xarray.load_dataset(t4)
                t3 = t2.sel(
                    index=numpy.arange(
                        2017653 - 10 * 1000,
                        2017653 + 1
                    )
                )
                t3.to_netcdf(t5)
            t1[k] = xarray.load_dataset(t5)
            print('loaded %s' % t5)
        else:
            t1[k] = xarray.load_dataset(t4)
            print('loaded %s' % t4)


    return dict(
        t1=t1,
    )

def kernel_3(should_exist=None):
    if should_exist is None:
        should_exist = False

    t3 = [
        ('playerTwitterFollowers', None),
        ('teamTwitterFollowers', None),
        ('games', None),
        ('events', 'events-v2.nc'),
    ]

    o_1 = None
    o_2 = None

    t4 = '/kaggle/input/garbage'
    t5 = {}
    for k, v in t3:
        if v is None:
            t1 = os.path.join(
                t4,
                '%s.nc' % k,
            )
        else:
            t1 = os.path.join(
                t4,
                v,
            )

        if os.path.exists(t1):
            t2 = xarray.load_dataset(t1)
        else:
            if should_exist:
                pprint.pprint([k, v, t1])
                raise NotImplementedError

            if o_1 is None:
                o_1 = kernel_1()
            if o_2 is None:
                o_2 = kernel_2(
                    o_1=o_1
                )

            t2 = o_2['events']
        t5[k] = t2

    return dict(
        t5=t5,
    )

def kernel_4(
    o_3=None,
):
    [
        print(
            o_3['t5']['events'].to_dataframe().iloc[k].to_json(indent=4)
        )
        for k in range(-10, -1)
    ]

    [
        print(
            o_3['t5']['games'].to_dataframe().iloc[k].to_json(indent=4)
        )
        for k in range(-10, -1)
    ]


    t4 = 'https://www.youtube.com/watch?v=reaC7BHgL3M'

    r"""
        {
            "gamePk":634280,
            "gameType":"R",
            "season":2021,
            "gameDate":"2021-04-30",
            "gameTimeUTC":"2021-04-30T23:37:00Z",
            "resumeDate":"",
            "resumedFrom":"",
            "codedGameState":"F",
            "detailedGameState":"Final",
            "isTie":0.0,
            "gameNumber":1,
            "doubleHeader":"N",
            "dayNight":"night",
            "scheduledInnings":9,
            "gamesInSeries":3.0,
            "seriesDescription":"Regular Season",
            "homeId":141,
            "homeName":"Toronto Blue Jays",
            "homeAbbrev":"TOR",
            "homeWins":12,
            "homeLosses":12,
            "homeWinPct":0.5,
            "homeWinner":true,
            "homeScore":13.0,
            "awayId":144,
            "awayName":"Atlanta Braves",
            "awayAbbrev":"ATL",
            "awayWins":12.0,
            "awayLosses":14.0,
            "awayWinPct":0.462,
            "awayWinner":false,
            "awayScore":5.0
        }
    """

    t1 = numpy.where(o_3['t5']['events']['gamePk'] == 634280)[0]
    t5 = o_3['t5']['events'].index.data
    t6 = t5[t1]
    t2 = o_3['t5']['events'].sel(index=t6)
    t3 = o_3['t5']['games'].to_dataframe().iloc[-2].to_dict()
    pprint.pprint(t3)
    assert t3['gamePk'] == 634280

    dict(
        t2=t2,
        t3=t3,
        t4=t4,
    )