diff --git a/.mypy.ini b/.mypy.ini deleted file mode 100644 index 9811c75..0000000 --- a/.mypy.ini +++ /dev/null @@ -1,18 +0,0 @@ -[mypy] -mypy_path = - mypy-stubs, - deps/com.github.aiortc.aiortc/src, - mypy-stubs/marisa-trie-types, - mypy-stubs/types-debugpy, - python - -exclude = - python/tmp, - python/build - -plugins = - numpy.typing.mypy_plugin, - pydantic.mypy - -explicit_package_bases = true -namespace_packages = true \ No newline at end of file diff --git a/d1/jigsaw_toxic.py b/d1/jigsaw_toxic.py deleted file mode 100644 index 41a0287..0000000 --- a/d1/jigsaw_toxic.py +++ /dev/null @@ -1,694 +0,0 @@ -# %% [markdown] -# # About this Notebook -# -# NLP is a very hot topic right now and as belived by many experts '2020 is going to be NLP's Year' ,with its ever changing dynamics it is experiencing a boom , same as computer vision once did. Owing to its popularity Kaggle launched two NLP competitions recently and me being a lover of this Hot topic prepared myself to join in my first Kaggle Competition.

-# As I joined the competitions and since I was a complete beginner with Deep Learning Techniques for NLP, all my enthusiasm took a beating when I saw everyone Using all kinds of BERT , everything just went over my head,I thought to quit but there is a special thing about Kaggle ,it just hooks you. I thought I have to learn someday , why not now , so I braced myself and sat on the learning curve. I wrote a kernel on the Tweet Sentiment Extraction competition that has now got a gold medal , it can be viewed here : https://www.kaggle.com/tanulsingh077/twitter-sentiment-extaction-analysis-eda-and-model

-# After 10 days of extensive learning(finishing all the latest NLP approaches) , I am back here to share my leaning , by writing a kernel that starts from the very Basic RNN's to built over , all the way to BERT . I invite you all to come and learn alongside with me and take a step closer towards becoming an NLP expert - -# %% [markdown] -# # Contents -# -# In this Notebook I will start with the very Basics of RNN's and Build all the way to latest deep learning architectures to solve NLP problems. It will cover the Following: -# * Simple RNN's -# * Word Embeddings : Definition and How to get them -# * LSTM's -# * GRU's -# * BI-Directional RNN's -# * Encoder-Decoder Models (Seq2Seq Models) -# * Attention Models -# * Transformers - Attention is all you need -# * BERT -# -# I will divide every Topic into four subsections: -# * Basic Overview -# * In-Depth Understanding : In this I will attach links of articles and videos to learn about the topic in depth -# * Code-Implementation -# * Code Explanation -# -# This is a comprehensive kernel and if you follow along till the end , I promise you would learn all the techniques completely -# -# Note that the aim of this notebook is not to have a High LB score but to present a beginner guide to understand Deep Learning techniques used for NLP. Also after discussing all of these ideas , I will present a starter solution for this competiton - -# %% [markdown] -# **This kernel has been a work of more than 10 days If you find my kernel useful and my efforts appreciable, Please Upvote it , it motivates me to write more Quality content** - -# %% [code] -import numpy as np # linear algebra -import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv) -from tqdm import tqdm -from sklearn.model_selection import train_test_split -import tensorflow as tf -from keras.models import Sequential -from keras.layers.recurrent import LSTM, GRU,SimpleRNN -from keras.layers.core import Dense, Activation, Dropout -from keras.layers.embeddings import Embedding -from keras.layers.normalization import BatchNormalization -from keras.utils import np_utils -from sklearn import preprocessing, decomposition, model_selection, metrics, pipeline -from keras.layers import GlobalMaxPooling1D, Conv1D, MaxPooling1D, Flatten, Bidirectional, SpatialDropout1D -from keras.preprocessing import sequence, text -from keras.callbacks import EarlyStopping - - -import matplotlib.pyplot as plt -import seaborn as sns -#%matplotlib inline -from plotly import graph_objs as go -import plotly.express as px -import plotly.figure_factory as ff - -# %% [markdown] -# # Configuring TPU's -# -# For this version of Notebook we will be using TPU's as we have to built a BERT Model - -# %% [code] -# Detect hardware, return appropriate distribution strategy -try: - # TPU detection. No parameters necessary if TPU_NAME environment variable is - # set: this is always the case on Kaggle. - tpu = tf.distribute.cluster_resolver.TPUClusterResolver() - print('Running on TPU ', tpu.master()) -except ValueError: - tpu = None - -if tpu: - tf.config.experimental_connect_to_cluster(tpu) - tf.tpu.experimental.initialize_tpu_system(tpu) - strategy = tf.distribute.experimental.TPUStrategy(tpu) -else: - # Default distribution strategy in Tensorflow. Works on CPU and single GPU. - strategy = tf.distribute.get_strategy() - -print("REPLICAS: ", strategy.num_replicas_in_sync) - -# %% [code] -train = pd.read_csv('/kaggle/input/jigsaw-multilingual-toxic-comment-classification/jigsaw-toxic-comment-train.csv') -validation = pd.read_csv('/kaggle/input/jigsaw-multilingual-toxic-comment-classification/validation.csv') -test = pd.read_csv('/kaggle/input/jigsaw-multilingual-toxic-comment-classification/test.csv') - -# %% [markdown] -# We will drop the other columns and approach this problem as a Binary Classification Problem and also we will have our exercise done on a smaller subsection of the dataset(only 12000 data points) to make it easier to train the models - -# %% [code] -train.drop(['severe_toxic','obscene','threat','insult','identity_hate'],axis=1,inplace=True) - -# %% [code] -train = train.loc[:12000,:] -train.shape - -# %% [markdown] -# We will check the maximum number of words that can be present in a comment , this will help us in padding later - -# %% [code] -train['comment_text'].apply(lambda x:len(str(x).split())).max() - -# %% [markdown] -# Writing a function for getting auc score for validation - -# %% [code] -def roc_auc(predictions,target): - ''' - This methods returns the AUC Score when given the Predictions - and Labels - ''' - - fpr, tpr, thresholds = metrics.roc_curve(target, predictions) - roc_auc = metrics.auc(fpr, tpr) - return roc_auc - -# %% [markdown] -# ### Data Preparation - -# %% [code] -xtrain, xvalid, ytrain, yvalid = train_test_split(train.comment_text.values, train.toxic.values, - stratify=train.toxic.values, - random_state=42, - test_size=0.2, shuffle=True) - -# %% [markdown] -# # Before We Begin -# -# Before we Begin If you are a complete starter with NLP and never worked with text data, I am attaching a few kernels that will serve as a starting point of your journey -# * https://www.kaggle.com/arthurtok/spooky-nlp-and-topic-modelling-tutorial -# * https://www.kaggle.com/abhishek/approaching-almost-any-nlp-problem-on-kaggle -# -# If you want a more basic dataset to practice with here is another kernel which I wrote: -# * https://www.kaggle.com/tanulsingh077/what-s-cooking -# -# Below are some Resources to get started with basic level Neural Networks, It will help us to easily understand the upcoming parts -# * https://www.youtube.com/watch?v=aircAruvnKk&list=PL_h2yd2CGtBHEKwEH5iqTZH85wLS-eUzv -# * https://www.youtube.com/watch?v=IHZwWFHWa-w&list=PL_h2yd2CGtBHEKwEH5iqTZH85wLS-eUzv&index=2 -# * https://www.youtube.com/watch?v=Ilg3gGewQ5U&list=PL_h2yd2CGtBHEKwEH5iqTZH85wLS-eUzv&index=3 -# * https://www.youtube.com/watch?v=tIeHLnjs5U8&list=PL_h2yd2CGtBHEKwEH5iqTZH85wLS-eUzv&index=4 -# -# For Learning how to visualize test data and what to use view: -# * https://www.kaggle.com/tanulsingh077/twitter-sentiment-extaction-analysis-eda-and-model -# * https://www.kaggle.com/jagangupta/stop-the-s-toxic-comments-eda - -# %% [markdown] -# # Simple RNN -# -# ## Basic Overview -# -# What is a RNN? -# -# Recurrent Neural Network(RNN) are a type of Neural Network where the output from previous step are fed as input to the current step. In traditional neural networks, all the inputs and outputs are independent of each other, but in cases like when it is required to predict the next word of a sentence, the previous words are required and hence there is a need to remember the previous words. Thus RNN came into existence, which solved this issue with the help of a Hidden Layer. -# -# Why RNN's? -# -# https://www.quora.com/Why-do-we-use-an-RNN-instead-of-a-simple-neural-network -# -# ## In-Depth Understanding -# -# * https://medium.com/mindorks/understanding-the-recurrent-neural-network-44d593f112a2 -# * https://www.youtube.com/watch?v=2E65LDnM2cA&list=PL1F3ABbhcqa3BBWo170U4Ev2wfsF7FN8l -# * https://www.d2l.ai/chapter_recurrent-neural-networks/rnn.html -# -# ## Code Implementation -# -# So first I will implement the and then I will explain the code step by step - -# %% [code] -# using keras tokenizer here -token = text.Tokenizer(num_words=None) -max_len = 1500 - -token.fit_on_texts(list(xtrain) + list(xvalid)) -xtrain_seq = token.texts_to_sequences(xtrain) -xvalid_seq = token.texts_to_sequences(xvalid) - -#zero pad the sequences -xtrain_pad = sequence.pad_sequences(xtrain_seq, maxlen=max_len) -xvalid_pad = sequence.pad_sequences(xvalid_seq, maxlen=max_len) - -word_index = token.word_index - -# %% [code] -#%%time -with strategy.scope(): - # A simpleRNN without any pretrained embeddings and one dense layer - model = Sequential() - model.add(Embedding(len(word_index) + 1, - 300, - input_length=max_len)) - model.add(SimpleRNN(100)) - model.add(Dense(1, activation='sigmoid')) - model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) - -model.summary() - -# %% [code] -model.fit(xtrain_pad, ytrain, nb_epoch=5, batch_size=64*strategy.num_replicas_in_sync) #Multiplying by Strategy to run on TPU's - -# %% [code] -scores = model.predict(xvalid_pad) -print("Auc: %.2f%%" % (roc_auc(scores,yvalid))) - -# %% [code] -scores_model = [] -scores_model.append({'Model': 'SimpleRNN','AUC_Score': roc_auc(scores,yvalid)}) - -# %% [markdown] -# ## Code Explanantion -# * Tokenization

-# So if you have watched the videos and referred to the links, you would know that in an RNN we input a sentence word by word. We represent every word as one hot vectors of dimensions : Numbers of words in Vocab +1.
-# What keras Tokenizer does is , it takes all the unique words in the corpus,forms a dictionary with words as keys and their number of occurences as values,it then sorts the dictionary in descending order of counts. It then assigns the first value 1 , second value 2 and so on. So let's suppose word 'the' occured the most in the corpus then it will assigned index 1 and vector representing 'the' would be a one-hot vector with value 1 at position 1 and rest zereos.
-# Try printing first 2 elements of xtrain_seq you will see every word is represented as a digit now - -# %% [code] -xtrain_seq[:1] - -# %% [markdown] -# Now you might be wondering What is padding? Why its done

-# -# Here is the answer : -# * https://www.quora.com/Which-effect-does-sequence-padding-have-on-the-training-of-a-neural-network -# * https://machinelearningmastery.com/data-preparation-variable-length-input-sequences-sequence-prediction/ -# * https://www.coursera.org/lecture/natural-language-processing-tensorflow/padding-2Cyzs -# -# Also sometimes people might use special tokens while tokenizing like EOS(end of string) and BOS(Begining of string). Here is the reason why it's done -# * https://stackoverflow.com/questions/44579161/why-do-we-do-padding-in-nlp-tasks -# -# -# The code token.word_index simply gives the dictionary of vocab that keras created for us - -# %% [markdown] -# * Building the Neural Network -# -# To understand the Dimensions of input and output given to RNN in keras her is a beautiful article : https://medium.com/@shivajbd/understanding-input-and-output-shape-in-lstm-keras-c501ee95c65e -# -# The first line model.Sequential() tells keras that we will be building our network sequentially . Then we first add the Embedding layer. -# Embedding layer is also a layer of neurons which takes in as input the nth dimensional one hot vector of every word and converts it into 300 dimensional vector , it gives us word embeddings similar to word2vec. We could have used word2vec but the embeddings layer learns during training to enhance the embeddings. -# Next we add an 100 LSTM units without any dropout or regularization -# At last we add a single neuron with sigmoid function which takes output from 100 LSTM cells (Please note we have 100 LSTM cells not layers) to predict the results and then we compile the model using adam optimizer -# -# * Comments on the model

-# We can see our model achieves an accuracy of 1 which is just insane , we are clearly overfitting I know , but this was the simplest model of all ,we can tune a lot of hyperparameters like RNN units, we can do batch normalization , dropouts etc to get better result. The point is we got an AUC score of 0.82 without much efforts and we know have learnt about RNN's .Deep learning is really revolutionary - -# %% [markdown] -# # Word Embeddings -# -# While building our simple RNN models we talked about using word-embeddings , So what is word-embeddings and how do we get word-embeddings? -# Here is the answer : -# * https://www.coursera.org/learn/nlp-sequence-models/lecture/6Oq70/word-representation -# * https://machinelearningmastery.com/what-are-word-embeddings/ -#

-# The latest approach to getting word Embeddings is using pretained GLoVe or using Fasttext. Without going into too much details, I would explain how to create sentence vectors and how can we use them to create a machine learning model on top of it and since I am a fan of GloVe vectors, word2vec and fasttext. In this Notebook, I'll be using the GloVe vectors. You can download the GloVe vectors from here http://www-nlp.stanford.edu/data/glove.840B.300d.zip or you can search for GloVe in datasets on Kaggle and add the file - -# %% [code] -# load the GloVe vectors in a dictionary: - -embeddings_index = {} -f = open('/kaggle/input/glove840b300dtxt/glove.840B.300d.txt','r',encoding='utf-8') -for line in tqdm(f): - values = line.split(' ') - word = values[0] - coefs = np.asarray([float(val) for val in values[1:]]) - embeddings_index[word] = coefs -f.close() - -print('Found %s word vectors.' % len(embeddings_index)) - -# %% [markdown] -# # LSTM's -# -# ## Basic Overview -# -# Simple RNN's were certainly better than classical ML algorithms and gave state of the art results, but it failed to capture long term dependencies that is present in sentences . So in 1998-99 LSTM's were introduced to counter to these drawbacks. -# -# ## In Depth Understanding -# -# Why LSTM's? -# * https://www.coursera.org/learn/nlp-sequence-models/lecture/PKMRR/vanishing-gradients-with-rnns -# * https://www.analyticsvidhya.com/blog/2017/12/fundamentals-of-deep-learning-introduction-to-lstm/ -# -# What are LSTM's? -# * https://www.coursera.org/learn/nlp-sequence-models/lecture/KXoay/long-short-term-memory-lstm -# * https://distill.pub/2019/memorization-in-rnns/ -# * https://towardsdatascience.com/illustrated-guide-to-lstms-and-gru-s-a-step-by-step-explanation-44e9eb85bf21 -# -# # Code Implementation -# -# We have already tokenized and paded our text for input to LSTM's - -# %% [code] -# create an embedding matrix for the words we have in the dataset -embedding_matrix = np.zeros((len(word_index) + 1, 300)) -for word, i in tqdm(word_index.items()): - embedding_vector = embeddings_index.get(word) - if embedding_vector is not None: - embedding_matrix[i] = embedding_vector - -# %% [code] -#%%time -with strategy.scope(): - - # A simple LSTM with glove embeddings and one dense layer - model = Sequential() - model.add(Embedding(len(word_index) + 1, - 300, - weights=[embedding_matrix], - input_length=max_len, - trainable=False)) - - model.add(LSTM(100, dropout=0.3, recurrent_dropout=0.3)) - model.add(Dense(1, activation='sigmoid')) - model.compile(loss='binary_crossentropy', optimizer='adam',metrics=['accuracy']) - -model.summary() - -# %% [code] -model.fit(xtrain_pad, ytrain, nb_epoch=5, batch_size=64*strategy.num_replicas_in_sync) - -# %% [code] -scores = model.predict(xvalid_pad) -print("Auc: %.2f%%" % (roc_auc(scores,yvalid))) - -# %% [code] -scores_model.append({'Model': 'LSTM','AUC_Score': roc_auc(scores,yvalid)}) - -# %% [markdown] -# ## Code Explanation -# -# As a first step we calculate embedding matrix for our vocabulary from the pretrained GLoVe vectors . Then while building the embedding layer we pass Embedding Matrix as weights to the layer instead of training it over Vocabulary and thus we pass trainable = False. -# Rest of the model is same as before except we have replaced the SimpleRNN By LSTM Units -# -# * Comments on the Model -# -# We now see that the model is not overfitting and achieves an auc score of 0.96 which is quite commendable , also we close in on the gap between accuracy and auc . -# We see that in this case we used dropout and prevented overfitting the data - -# %% [markdown] -# # GRU's -# -# ## Basic Overview -# -# Introduced by Cho, et al. in 2014, GRU (Gated Recurrent Unit) aims to solve the vanishing gradient problem which comes with a standard recurrent neural network. GRU's are a variation on the LSTM because both are designed similarly and, in some cases, produce equally excellent results . GRU's were designed to be simpler and faster than LSTM's and in most cases produce equally good results and thus there is no clear winner. -# -# ## In Depth Explanation -# -# * https://towardsdatascience.com/understanding-gru-networks-2ef37df6c9be -# * https://www.coursera.org/learn/nlp-sequence-models/lecture/agZiL/gated-recurrent-unit-gru -# * https://www.geeksforgeeks.org/gated-recurrent-unit-networks/ -# -# ## Code Implementation - -# %% [code] -#%%time -with strategy.scope(): - # GRU with glove embeddings and two dense layers - model = Sequential() - model.add(Embedding(len(word_index) + 1, - 300, - weights=[embedding_matrix], - input_length=max_len, - trainable=False)) - model.add(SpatialDropout1D(0.3)) - model.add(GRU(300)) - model.add(Dense(1, activation='sigmoid')) - - model.compile(loss='binary_crossentropy', optimizer='adam',metrics=['accuracy']) - -model.summary() - -# %% [code] -model.fit(xtrain_pad, ytrain, nb_epoch=5, batch_size=64*strategy.num_replicas_in_sync) - -# %% [code] -scores = model.predict(xvalid_pad) -print("Auc: %.2f%%" % (roc_auc(scores,yvalid))) - -# %% [code] -scores_model.append({'Model': 'GRU','AUC_Score': roc_auc(scores,yvalid)}) - -# %% [code] -scores_model - -# %% [markdown] -# # Bi-Directional RNN's -# -# ## In Depth Explanation -# -# * https://www.coursera.org/learn/nlp-sequence-models/lecture/fyXnn/bidirectional-rnn -# * https://towardsdatascience.com/understanding-bidirectional-rnn-in-pytorch-5bd25a5dd66 -# * https://d2l.ai/chapter_recurrent-modern/bi-rnn.html -# -# ## Code Implementation - -# %% [code] -#%%time -with strategy.scope(): - # A simple bidirectional LSTM with glove embeddings and one dense layer - model = Sequential() - model.add(Embedding(len(word_index) + 1, - 300, - weights=[embedding_matrix], - input_length=max_len, - trainable=False)) - model.add(Bidirectional(LSTM(300, dropout=0.3, recurrent_dropout=0.3))) - - model.add(Dense(1,activation='sigmoid')) - model.compile(loss='binary_crossentropy', optimizer='adam',metrics=['accuracy']) - - -model.summary() - -# %% [code] -model.fit(xtrain_pad, ytrain, nb_epoch=5, batch_size=64*strategy.num_replicas_in_sync) - -# %% [code] -scores = model.predict(xvalid_pad) -print("Auc: %.2f%%" % (roc_auc(scores,yvalid))) - -# %% [code] -scores_model.append({'Model': 'Bi-directional LSTM','AUC_Score': roc_auc(scores,yvalid)}) - -# %% [markdown] -# ## Code Explanation -# -# Code is same as before,only we have added bidirectional nature to the LSTM cells we used before and is self explanatory. We have achieve similar accuracy and auc score as before and now we have learned all the types of typical RNN architectures - -# %% [markdown] -# **We are now at the end of part 1 of this notebook and things are about to go wild now as we Enter more complex and State of the art models .If you have followed along from the starting and read all the articles and understood everything , these complex models would be fairly easy to understand.I recommend Finishing Part 1 before continuing as the upcoming techniques can be quite overwhelming** - -# %% [markdown] -# # Seq2Seq Model Architecture -# -# ## Overview -# -# RNN's are of many types and different architectures are used for different purposes. Here is a nice video explanining different types of model architectures : https://www.coursera.org/learn/nlp-sequence-models/lecture/BO8PS/different-types-of-rnns. -# Seq2Seq is a many to many RNN architecture where the input is a sequence and the output is also a sequence (where input and output sequences can be or cannot be of different lengths). This architecture is used in a lot of applications like Machine Translation, text summarization, question answering etc -# -# ## In Depth Understanding -# -# I will not write the code implementation for this,but rather I will provide the resources where code has already been implemented and explained in a much better way than I could have ever explained. -# -# * https://www.coursera.org/learn/nlp-sequence-models/lecture/HyEui/basic-models ---> A basic idea of different Seq2Seq Models -# -# * https://blog.keras.io/a-ten-minute-introduction-to-sequence-to-sequence-learning-in-keras.html , https://machinelearningmastery.com/define-encoder-decoder-sequence-sequence-model-neural-machine-translation-keras/ ---> Basic Encoder-Decoder Model and its explanation respectively -# -# * https://towardsdatascience.com/how-to-implement-seq2seq-lstm-model-in-keras-shortcutnlp-6f355f3e5639 ---> A More advanced Seq2seq Model and its explanation -# -# * https://d2l.ai/chapter_recurrent-modern/machine-translation-and-dataset.html , https://d2l.ai/chapter_recurrent-modern/encoder-decoder.html ---> Implementation of Encoder-Decoder Model from scratch -# -# * https://www.youtube.com/watch?v=IfsjMg4fLWQ&list=PLtmWHNX-gukKocXQOkQjuVxglSDYWsSh9&index=8&t=0s ---> Introduction to Seq2seq By fast.ai - -# %% [code] -# Visualization of Results obtained from various Deep learning models -results = pd.DataFrame(scores_model).sort_values(by='AUC_Score',ascending=False) -results.style.background_gradient(cmap='Blues') - -# %% [code] -fig = go.Figure(go.Funnelarea( - text =results.Model, - values = results.AUC_Score, - title = {"position": "top center", "text": "Funnel-Chart of Sentiment Distribution"} - )) -fig.show() - -# %% [markdown] -# # Attention Models -# -# This is the toughest and most tricky part. If you are able to understand the intiuition and working of attention block , understanding transformers and transformer based architectures like BERT will be a piece of cake. This is the part where I spent the most time on and I suggest you do the same . Please read and view the following resources in the order I am providing to ignore getting confused, also at the end of this try to write and draw an attention block in your own way :- -# -# * https://www.coursera.org/learn/nlp-sequence-models/lecture/RDXpX/attention-model-intuition --> Only watch this video and not the next one -# * https://towardsdatascience.com/sequence-2-sequence-model-with-attention-mechanism-9e9ca2a613a -# * https://towardsdatascience.com/attention-and-its-different-forms-7fc3674d14dc -# * https://distill.pub/2016/augmented-rnns/ -# -# ## Code Implementation -# -# * https://www.analyticsvidhya.com/blog/2019/11/comprehensive-guide-attention-mechanism-deep-learning/ --> Basic Level -# * https://pytorch.org/tutorials/intermediate/seq2seq_translation_tutorial.html ---> Implementation from Scratch in Pytorch - -# %% [markdown] -# # Transformers : Attention is all you need -# -# So finally we have reached the end of the learning curve and are about to start learning the technology that changed NLP completely and are the reasons for the state of the art NLP techniques .Transformers were introduced in the paper Attention is all you need by Google. If you have understood the Attention models,this will be very easy , Here is transformers fully explained: -# -# * http://jalammar.github.io/illustrated-transformer/ -# -# ## Code Implementation -# -# * http://nlp.seas.harvard.edu/2018/04/03/attention.html ---> This presents the code implementation of the architecture presented in the paper by Google - -# %% [markdown] -# # BERT and Its Implementation on this Competition -# -# As Promised I am back with Resiurces , to understand about BERT architecture , please follow the contents in the given order :- -# -# * http://jalammar.github.io/illustrated-bert/ ---> In Depth Understanding of BERT -# -# After going through the post Above , I guess you must have understood how transformer architecture have been utilized by the current SOTA models . Now these architectures can be used in two ways :

-# 1) We can use the model for prediction on our problems using the pretrained weights without fine-tuning or training the model for our sepcific tasks -# * EG: http://jalammar.github.io/a-visual-guide-to-using-bert-for-the-first-time/ ---> Using Pre-trained BERT without Tuning -# -# 2) We can fine-tune or train these transformer models for our task by tweaking the already pre-trained weights and training on a much smaller dataset -# * EG:* https://www.youtube.com/watch?v=hinZO--TEk4&t=2933s ---> Tuning BERT For your TASK -# -# We will be using the first example as a base for our implementation of BERT model using Hugging Face and KERAS , but contrary to first example we will also Fine-Tune our model for our task -# -# Acknowledgements : https://www.kaggle.com/xhlulu/jigsaw-tpu-distilbert-with-huggingface-and-keras -# -# -# Steps Involved : -# * Data Preparation : Tokenization and encoding of data -# * Configuring TPU's -# * Building a Function for Model Training and adding an output layer for classification -# * Train the model and get the results - -# %% [code] -# Loading Dependencies -import os -import tensorflow as tf -from tensorflow.keras.layers import Dense, Input -from tensorflow.keras.optimizers import Adam -from tensorflow.keras.models import Model -from tensorflow.keras.callbacks import ModelCheckpoint -from kaggle_datasets import KaggleDatasets -import transformers - -from tokenizers import BertWordPieceTokenizer - -# %% [code] -# LOADING THE DATA - -train1 = pd.read_csv("/kaggle/input/jigsaw-multilingual-toxic-comment-classification/jigsaw-toxic-comment-train.csv") -valid = pd.read_csv('/kaggle/input/jigsaw-multilingual-toxic-comment-classification/validation.csv') -test = pd.read_csv('/kaggle/input/jigsaw-multilingual-toxic-comment-classification/test.csv') -sub = pd.read_csv('/kaggle/input/jigsaw-multilingual-toxic-comment-classification/sample_submission.csv') - -# %% [markdown] -# Encoder FOr DATA for understanding waht encode batch does read documentation of hugging face tokenizer : -# https://huggingface.co/transformers/main_classes/tokenizer.html here - -# %% [code] -def fast_encode(texts, tokenizer, chunk_size=256, maxlen=512): - """ - Encoder for encoding the text into sequence of integers for BERT Input - """ - tokenizer.enable_truncation(max_length=maxlen) - tokenizer.enable_padding(max_length=maxlen) - all_ids = [] - - for i in tqdm(range(0, len(texts), chunk_size)): - text_chunk = texts[i:i+chunk_size].tolist() - encs = tokenizer.encode_batch(text_chunk) - all_ids.extend([enc.ids for enc in encs]) - - return np.array(all_ids) - -# %% [code] -#IMP DATA FOR CONFIG - -AUTO = tf.data.experimental.AUTOTUNE - - -# Configuration -EPOCHS = 3 -BATCH_SIZE = 16 * strategy.num_replicas_in_sync -MAX_LEN = 192 - -# %% [markdown] -# ## Tokenization -# -# For understanding please refer to hugging face documentation again - -# %% [code] -# First load the real tokenizer -tokenizer = transformers.DistilBertTokenizer.from_pretrained('distilbert-base-multilingual-cased') -# Save the loaded tokenizer locally -tokenizer.save_pretrained('.') -# Reload it with the huggingface tokenizers library -fast_tokenizer = BertWordPieceTokenizer('vocab.txt', lowercase=False) -fast_tokenizer - -# %% [code] -x_train = fast_encode(train1.comment_text.astype(str), fast_tokenizer, maxlen=MAX_LEN) -x_valid = fast_encode(valid.comment_text.astype(str), fast_tokenizer, maxlen=MAX_LEN) -x_test = fast_encode(test.content.astype(str), fast_tokenizer, maxlen=MAX_LEN) - -y_train = train1.toxic.values -y_valid = valid.toxic.values - -# %% [code] -train_dataset = ( - tf.data.Dataset - .from_tensor_slices((x_train, y_train)) - .repeat() - .shuffle(2048) - .batch(BATCH_SIZE) - .prefetch(AUTO) -) - -valid_dataset = ( - tf.data.Dataset - .from_tensor_slices((x_valid, y_valid)) - .batch(BATCH_SIZE) - .cache() - .prefetch(AUTO) -) - -test_dataset = ( - tf.data.Dataset - .from_tensor_slices(x_test) - .batch(BATCH_SIZE) -) - -# %% [code] -def build_model(transformer, max_len=512): - """ - function for training the BERT model - """ - input_word_ids = Input(shape=(max_len,), dtype=tf.int32, name="input_word_ids") - sequence_output = transformer(input_word_ids)[0] - cls_token = sequence_output[:, 0, :] - out = Dense(1, activation='sigmoid')(cls_token) - - model = Model(inputs=input_word_ids, outputs=out) - model.compile(Adam(lr=1e-5), loss='binary_crossentropy', metrics=['accuracy']) - - return model - -# %% [markdown] -# ## Starting Training -# -# If you want to use any another model just replace the model name in transformers._____ and use accordingly - -# %% [code] -#%%time -with strategy.scope(): - transformer_layer = ( - transformers.TFDistilBertModel - .from_pretrained('distilbert-base-multilingual-cased') - ) - model = build_model(transformer_layer, max_len=MAX_LEN) -model.summary() - -# %% [code] -n_steps = x_train.shape[0] // BATCH_SIZE -train_history = model.fit( - train_dataset, - steps_per_epoch=n_steps, - validation_data=valid_dataset, - epochs=EPOCHS -) - -# %% [code] -n_steps = x_valid.shape[0] // BATCH_SIZE -train_history_2 = model.fit( - valid_dataset.repeat(), - steps_per_epoch=n_steps, - epochs=EPOCHS*2 -) - -# %% [code] -sub['toxic'] = model.predict(test_dataset, verbose=1) -sub.to_csv('submission.csv', index=False) - -# %% [markdown] -# # End Notes -# -# This was my effort to share my learnings so that everyone can benifit from it.As this community has been very kind to me and helped me in learning all of this , I want to take this forward. I have shared all the resources I used to learn all the stuff .Join me and make these NLP competitions your first ,without being overwhelmed by the shear number of techniques used . It took me 10 days to learn all of this , you can learn it at your pace and dont give in , at the end of all this you will be a different person and it will all be worth it. -# -# -# ### I am attaching more resources if you want NLP end to end: -# -# 1) Books -# -# * https://d2l.ai/ -# * Jason Brownlee's Books -# -# 2) Courses -# -# * https://www.coursera.org/learn/nlp-sequence-models/home/welcome -# * Fast.ai NLP Course -# -# 3) Blogs and websites -# -# * Machine Learning Mastery -# * https://distill.pub/ -# * http://jalammar.github.io/ -# -# **This is subtle effort of contributing towards the community, if it helped you in any way please show a token of love by upvoting** diff --git a/d1/mlb_player.py b/d1/mlb_player.py deleted file mode 100644 index 0d8d28b..0000000 --- a/d1/mlb_player.py +++ /dev/null @@ -1,757 +0,0 @@ -# %% [markdown] -#
-#

MLB Player Digital Engagement Forecasting

-#

LightGBM + CatBoost + ANN 2505f2

-#
- -# %% [markdown] -#
-#
- -# %% [markdown] -#
-#

If you find this work useful, please don't forget upvoting :)

-#
- -# %% [markdown] -# #### Thanks to: @lhagiimn https://www.kaggle.com/lhagiimn/lightgbm-catboost-ann-2505f2 -# -# #### https://www.kaggle.com/columbia2131/mlb-lightgbm-starter-dataset-code-en-ja -# -# #### https://www.kaggle.com/mlconsult/1-3816-lb-lbgm-descriptive-stats-param-tune -# -# #### https://www.kaggle.com/batprem/lightgbm-ann-weight-with-love -# -# #### https://www.kaggle.com/mlconsult/1-3816-lb-lbgm-descriptive-stats-param-tune -# -# #### https://www.kaggle.com/ulrich07/mlb-ann-with-lags-tf-keras -# - -# %% [markdown] -#
-#
- -# %% [markdown] -# ## About Dataset - -# %% [markdown] -# Train.csv is stored as a csv file with each column as follows. -# -# train.csvを以下のようにして各カラムをcsvファイルとして保管しています。 - -# %% [code] {"execution":{"iopub.status.busy":"2021-06-26T07:16:47.242749Z","iopub.execute_input":"2021-06-26T07:16:47.243324Z","iopub.status.idle":"2021-06-26T07:16:48.030215Z","shell.execute_reply.started":"2021-06-26T07:16:47.243266Z","shell.execute_reply":"2021-06-26T07:16:48.029Z"}} -import os - -assert os.system(r'''cp ../input/fork-of-1-35-lightgbm-ann-2505f2-c4e96a/* .''') == 0 - -# %% [code] {"execution":{"iopub.status.busy":"2021-06-26T07:16:48.031858Z","iopub.execute_input":"2021-06-26T07:16:48.032396Z","iopub.status.idle":"2021-06-26T07:16:48.799514Z","shell.execute_reply.started":"2021-06-26T07:16:48.032357Z","shell.execute_reply":"2021-06-26T07:16:48.798628Z"}} -assert os.system(r'''ls''') == 0 - -# %% [code] {"jupyter":{"outputs_hidden":false},"execution":{"iopub.status.busy":"2021-06-26T07:16:48.801992Z","iopub.execute_input":"2021-06-26T07:16:48.802645Z","iopub.status.idle":"2021-06-26T07:16:48.813801Z","shell.execute_reply.started":"2021-06-26T07:16:48.802592Z","shell.execute_reply":"2021-06-26T07:16:48.812863Z"}} -#%%capture - -""" -!pip install pandarallel - -import gc - -import numpy as np -import pandas as pd -from pathlib import Path - -from pandarallel import pandarallel -pandarallel.initialize() - -BASE_DIR = Path('../input/mlb-player-digital-engagement-forecasting') -train = pd.read_csv(BASE_DIR / 'train.csv') - -null = np.nan -true = True -false = False - -for col in train.columns: - - if col == 'date': continue - - _index = train[col].notnull() - train.loc[_index, col] = train.loc[_index, col].parallel_apply(lambda x: eval(x)) - - outputs = [] - for index, date, record in train.loc[_index, ['date', col]].itertuples(): - _df = pd.DataFrame(record) - _df['index'] = index - _df['date'] = date - outputs.append(_df) - - outputs = pd.concat(outputs).reset_index(drop=True) - - outputs.to_csv(f'{col}_train.csv', index=False) - outputs.to_pickle(f'{col}_train.pkl') - - del outputs - del train[col] - gc.collect() -""" - -# %% [markdown] {"execution":{"iopub.status.busy":"2021-06-16T09:14:33.869464Z","iopub.execute_input":"2021-06-16T09:14:33.869905Z","iopub.status.idle":"2021-06-16T09:14:33.874766Z","shell.execute_reply.started":"2021-06-16T09:14:33.869879Z","shell.execute_reply":"2021-06-16T09:14:33.873097Z"}} -# ## Training - -# %% [code] {"jupyter":{"outputs_hidden":false},"execution":{"iopub.status.busy":"2021-06-26T07:16:48.81564Z","iopub.execute_input":"2021-06-26T07:16:48.816326Z","iopub.status.idle":"2021-06-26T07:16:50.081995Z","shell.execute_reply.started":"2021-06-26T07:16:48.816246Z","shell.execute_reply":"2021-06-26T07:16:50.080828Z"}} -import numpy as np -import pandas as pd -from pathlib import Path -from sklearn.metrics import mean_absolute_error -from datetime import timedelta -from functools import reduce -from tqdm import tqdm -import lightgbm as lgbm -import mlb -import os - -# %% [code] {"jupyter":{"outputs_hidden":false},"execution":{"iopub.status.busy":"2021-06-26T07:16:50.083534Z","iopub.execute_input":"2021-06-26T07:16:50.083899Z","iopub.status.idle":"2021-06-26T07:16:50.088159Z","shell.execute_reply.started":"2021-06-26T07:16:50.083863Z","shell.execute_reply":"2021-06-26T07:16:50.087357Z"}} -BASE_DIR = Path('../input/mlb-player-digital-engagement-forecasting') -TRAIN_DIR = Path('../input/mlb-pdef-train-dataset') - -# %% [code] {"jupyter":{"outputs_hidden":false},"execution":{"iopub.status.busy":"2021-06-26T07:16:50.08951Z","iopub.execute_input":"2021-06-26T07:16:50.090053Z","iopub.status.idle":"2021-06-26T07:16:54.221868Z","shell.execute_reply.started":"2021-06-26T07:16:50.090018Z","shell.execute_reply":"2021-06-26T07:16:54.220656Z"}} -players = pd.read_csv(BASE_DIR / 'players.csv') - -rosters = pd.read_pickle(TRAIN_DIR / 'rosters_train.pkl') -targets = pd.read_pickle(TRAIN_DIR / 'nextDayPlayerEngagement_train.pkl') -scores = pd.read_pickle(TRAIN_DIR / 'playerBoxScores_train.pkl') -scores = scores.groupby(['playerId', 'date']).sum().reset_index() - -# %% [code] {"jupyter":{"outputs_hidden":false},"execution":{"iopub.status.busy":"2021-06-26T07:16:54.223547Z","iopub.execute_input":"2021-06-26T07:16:54.224Z","iopub.status.idle":"2021-06-26T07:16:54.243132Z","shell.execute_reply.started":"2021-06-26T07:16:54.22395Z","shell.execute_reply":"2021-06-26T07:16:54.242076Z"}} -targets_cols = ['playerId', 'target1', 'target2', 'target3', 'target4', 'date'] -players_cols = ['playerId', 'primaryPositionName'] -rosters_cols = ['playerId', 'teamId', 'status', 'date'] -scores_cols = ['playerId', 'battingOrder', 'gamesPlayedBatting', 'flyOuts', - 'groundOuts', 'runsScored', 'doubles', 'triples', 'homeRuns', - 'strikeOuts', 'baseOnBalls', 'intentionalWalks', 'hits', 'hitByPitch', - 'atBats', 'caughtStealing', 'stolenBases', 'groundIntoDoublePlay', - 'groundIntoTriplePlay', 'plateAppearances', 'totalBases', 'rbi', - 'leftOnBase', 'sacBunts', 'sacFlies', 'catchersInterference', - 'pickoffs', 'gamesPlayedPitching', 'gamesStartedPitching', - 'completeGamesPitching', 'shutoutsPitching', 'winsPitching', - 'lossesPitching', 'flyOutsPitching', 'airOutsPitching', - 'groundOutsPitching', 'runsPitching', 'doublesPitching', - 'triplesPitching', 'homeRunsPitching', 'strikeOutsPitching', - 'baseOnBallsPitching', 'intentionalWalksPitching', 'hitsPitching', - 'hitByPitchPitching', 'atBatsPitching', 'caughtStealingPitching', - 'stolenBasesPitching', 'inningsPitched', 'saveOpportunities', - 'earnedRuns', 'battersFaced', 'outsPitching', 'pitchesThrown', 'balls', - 'strikes', 'hitBatsmen', 'balks', 'wildPitches', 'pickoffsPitching', - 'rbiPitching', 'gamesFinishedPitching', 'inheritedRunners', - 'inheritedRunnersScored', 'catchersInterferencePitching', - 'sacBuntsPitching', 'sacFliesPitching', 'saves', 'holds', 'blownSaves', - 'assists', 'putOuts', 'errors', 'chances', 'date'] - -feature_cols = ['label_playerId', 'label_primaryPositionName', 'label_teamId', - 'label_status', 'battingOrder', 'gamesPlayedBatting', 'flyOuts', - 'groundOuts', 'runsScored', 'doubles', 'triples', 'homeRuns', - 'strikeOuts', 'baseOnBalls', 'intentionalWalks', 'hits', 'hitByPitch', - 'atBats', 'caughtStealing', 'stolenBases', 'groundIntoDoublePlay', - 'groundIntoTriplePlay', 'plateAppearances', 'totalBases', 'rbi', - 'leftOnBase', 'sacBunts', 'sacFlies', 'catchersInterference', - 'pickoffs', 'gamesPlayedPitching', 'gamesStartedPitching', - 'completeGamesPitching', 'shutoutsPitching', 'winsPitching', - 'lossesPitching', 'flyOutsPitching', 'airOutsPitching', - 'groundOutsPitching', 'runsPitching', 'doublesPitching', - 'triplesPitching', 'homeRunsPitching', 'strikeOutsPitching', - 'baseOnBallsPitching', 'intentionalWalksPitching', 'hitsPitching', - 'hitByPitchPitching', 'atBatsPitching', 'caughtStealingPitching', - 'stolenBasesPitching', 'inningsPitched', 'saveOpportunities', - 'earnedRuns', 'battersFaced', 'outsPitching', 'pitchesThrown', 'balls', - 'strikes', 'hitBatsmen', 'balks', 'wildPitches', 'pickoffsPitching', - 'rbiPitching', 'gamesFinishedPitching', 'inheritedRunners', - 'inheritedRunnersScored', 'catchersInterferencePitching', - 'sacBuntsPitching', 'sacFliesPitching', 'saves', 'holds', 'blownSaves', - 'assists', 'putOuts', 'errors', 'chances','target1_mean', - 'target1_median', - 'target1_std', - 'target1_min', - 'target1_max', - 'target1_prob', - 'target2_mean', - 'target2_median', - 'target2_std', - 'target2_min', - 'target2_max', - 'target2_prob', - 'target3_mean', - 'target3_median', - 'target3_std', - 'target3_min', - 'target3_max', - 'target3_prob', - 'target4_mean', - 'target4_median', - 'target4_std', - 'target4_min', - 'target4_max', - 'target4_prob'] -feature_cols2 = ['label_playerId', 'label_primaryPositionName', 'label_teamId', - 'label_status', 'battingOrder', 'gamesPlayedBatting', 'flyOuts', - 'groundOuts', 'runsScored', 'doubles', 'triples', 'homeRuns', - 'strikeOuts', 'baseOnBalls', 'intentionalWalks', 'hits', 'hitByPitch', - 'atBats', 'caughtStealing', 'stolenBases', 'groundIntoDoublePlay', - 'groundIntoTriplePlay', 'plateAppearances', 'totalBases', 'rbi', - 'leftOnBase', 'sacBunts', 'sacFlies', 'catchersInterference', - 'pickoffs', 'gamesPlayedPitching', 'gamesStartedPitching', - 'completeGamesPitching', 'shutoutsPitching', 'winsPitching', - 'lossesPitching', 'flyOutsPitching', 'airOutsPitching', - 'groundOutsPitching', 'runsPitching', 'doublesPitching', - 'triplesPitching', 'homeRunsPitching', 'strikeOutsPitching', - 'baseOnBallsPitching', 'intentionalWalksPitching', 'hitsPitching', - 'hitByPitchPitching', 'atBatsPitching', 'caughtStealingPitching', - 'stolenBasesPitching', 'inningsPitched', 'saveOpportunities', - 'earnedRuns', 'battersFaced', 'outsPitching', 'pitchesThrown', 'balls', - 'strikes', 'hitBatsmen', 'balks', 'wildPitches', 'pickoffsPitching', - 'rbiPitching', 'gamesFinishedPitching', 'inheritedRunners', - 'inheritedRunnersScored', 'catchersInterferencePitching', - 'sacBuntsPitching', 'sacFliesPitching', 'saves', 'holds', 'blownSaves', - 'assists', 'putOuts', 'errors', 'chances','target1_mean', - 'target1_median', - 'target1_std', - 'target1_min', - 'target1_max', - 'target1_prob', - 'target2_mean', - 'target2_median', - 'target2_std', - 'target2_min', - 'target2_max', - 'target2_prob', - 'target3_mean', - 'target3_median', - 'target3_std', - 'target3_min', - 'target3_max', - 'target3_prob', - 'target4_mean', - 'target4_median', - 'target4_std', - 'target4_min', - 'target4_max', - 'target4_prob', - 'target1'] - -# %% [code] {"jupyter":{"outputs_hidden":false},"execution":{"iopub.status.busy":"2021-06-26T07:16:54.244866Z","iopub.execute_input":"2021-06-26T07:16:54.24532Z","iopub.status.idle":"2021-06-26T07:16:54.296844Z","shell.execute_reply.started":"2021-06-26T07:16:54.245257Z","shell.execute_reply":"2021-06-26T07:16:54.295689Z"}} -player_target_stats = pd.read_csv("../input/player-target-stats/player_target_stats.csv") -data_names=player_target_stats.columns.values.tolist() -data_names - -# %% [code] {"jupyter":{"outputs_hidden":false},"execution":{"iopub.status.busy":"2021-06-26T07:16:54.300157Z","iopub.execute_input":"2021-06-26T07:16:54.300622Z","iopub.status.idle":"2021-06-26T07:17:02.252208Z","shell.execute_reply.started":"2021-06-26T07:16:54.300578Z","shell.execute_reply":"2021-06-26T07:17:02.250423Z"}} -# creat dataset -train = targets[targets_cols].merge(players[players_cols], on=['playerId'], how='left') -train = train.merge(rosters[rosters_cols], on=['playerId', 'date'], how='left') -train = train.merge(scores[scores_cols], on=['playerId', 'date'], how='left') -train = train.merge(player_target_stats, how='inner', left_on=["playerId"],right_on=["playerId"]) - - -# label encoding -player2num = {c: i for i, c in enumerate(train['playerId'].unique())} -position2num = {c: i for i, c in enumerate(train['primaryPositionName'].unique())} -teamid2num = {c: i for i, c in enumerate(train['teamId'].unique())} -status2num = {c: i for i, c in enumerate(train['status'].unique())} -train['label_playerId'] = train['playerId'].map(player2num) -train['label_primaryPositionName'] = train['primaryPositionName'].map(position2num) -train['label_teamId'] = train['teamId'].map(teamid2num) -train['label_status'] = train['status'].map(status2num) - -# %% [code] {"jupyter":{"outputs_hidden":false},"execution":{"iopub.status.busy":"2021-06-26T07:17:02.253453Z","iopub.status.idle":"2021-06-26T07:17:02.254076Z"}} -train_X = train[feature_cols] -train_y = train[['target1', 'target2', 'target3', 'target4']] - -_index = (train['date'] < 20210401) -x_train1 = train_X.loc[_index].reset_index(drop=True) -y_train1 = train_y.loc[_index].reset_index(drop=True) -x_valid1 = train_X.loc[~_index].reset_index(drop=True) -y_valid1 = train_y.loc[~_index].reset_index(drop=True) - -# %% [code] {"execution":{"iopub.status.busy":"2021-06-26T07:17:02.255068Z","iopub.status.idle":"2021-06-26T07:17:02.255685Z"}} -train_X = train[feature_cols2] -train_y = train[['target1', 'target2', 'target3', 'target4']] - -_index = (train['date'] < 20210401) -x_train2 = train_X.loc[_index].reset_index(drop=True) -y_train2 = train_y.loc[_index].reset_index(drop=True) -x_valid2 = train_X.loc[~_index].reset_index(drop=True) -y_valid2 = train_y.loc[~_index].reset_index(drop=True) - -# %% [code] {"execution":{"iopub.status.busy":"2021-06-26T07:17:02.256629Z","iopub.status.idle":"2021-06-26T07:17:02.257215Z"}} -train_X - -# %% [code] {"jupyter":{"outputs_hidden":false},"execution":{"iopub.status.busy":"2021-06-26T07:17:02.258224Z","iopub.status.idle":"2021-06-26T07:17:02.258854Z"}} -def fit_lgbm(x_train, y_train, x_valid, y_valid, params: dict=None, verbose=100): - oof_pred = np.zeros(len(y_valid), dtype=np.float32) - model = lgbm.LGBMRegressor(**params) - model.fit(x_train, y_train, - eval_set=[(x_valid, y_valid)], - early_stopping_rounds=verbose, - verbose=verbose) - oof_pred = model.predict(x_valid) - score = mean_absolute_error(oof_pred, y_valid) - print('mae:', score) - return oof_pred, model, score - - -# training lightgbm - -params1 = {'objective':'mae', - 'reg_alpha': 0.14947461820098767, - 'reg_lambda': 0.10185644384043743, - 'n_estimators': 3633, - 'learning_rate': 0.08046301304430488, - 'num_leaves': 674, - 'feature_fraction': 0.9101240539122566, - 'bagging_fraction': 0.9884451442950513, - 'bagging_freq': 8, - 'min_child_samples': 51} - -params2 = { - 'objective':'mae', - 'reg_alpha': 0.1, - 'reg_lambda': 0.1, - 'n_estimators': 80, - 'learning_rate': 0.1, - 'random_state': 42, - "num_leaves": 22 -} - -params4 = {'objective':'mae', - 'reg_alpha': 0.016468100279441976, - 'reg_lambda': 0.09128335764019105, - 'n_estimators': 9868, - 'learning_rate': 0.10528150510326864, - 'num_leaves': 157, - 'feature_fraction': 0.5419185713426886, - 'bagging_fraction': 0.2637405128936662, - 'bagging_freq': 19, - 'min_child_samples': 71} - - -params = { - 'objective':'mae', - 'reg_alpha': 0.1, - 'reg_lambda': 0.1, - 'n_estimators': 10000, - 'learning_rate': 0.1, - 'random_state': 42, - "num_leaves": 100 -} - - -# Slow from this point !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - -oof1, model1, score1 = fit_lgbm( - x_train1, y_train1['target1'], - x_valid1, y_valid1['target1'], - params1 - ) - -oof2, model2, score2 = fit_lgbm( - x_train2, y_train2['target2'], - x_valid2, y_valid2['target2'], - params2 -) - -oof3, model3, score3 = fit_lgbm( - x_train2, y_train2['target3'], - x_valid2, y_valid2['target3'], - params -) - -oof4, model4, score4 = fit_lgbm( - x_train2, y_train2['target4'], - x_valid2, y_valid2['target4'], - params4 -) - -score = (score1+score2+score3+score4) / 4 -print(f'score: {score}') - -# %% [code] -import pickle -from catboost import CatBoostRegressor - -def fit_lgbm(x_train, y_train, x_valid, y_valid, target, params: dict=None, verbose=100): - oof_pred_lgb = np.zeros(len(y_valid), dtype=np.float32) - oof_pred_cat = np.zeros(len(y_valid), dtype=np.float32) - - if os.path.isfile(f'../input/mlb-lgbm-and-catboost-models/model_lgb_{target}.pkl'): - with open(f'../input/mlb-lgbm-and-catboost-models/model_lgb_{target}.pkl', 'rb') as fin: - model = pickle.load(fin) - else: - - model = lgbm.LGBMRegressor(**params) - model.fit(x_train, y_train, - eval_set=[(x_valid, y_valid)], - early_stopping_rounds=verbose, - verbose=verbose) - - with open(f'model_lgb_{target}.pkl', 'wb') as handle: - pickle.dump(model, handle, protocol=pickle.HIGHEST_PROTOCOL) - - oof_pred_lgb = model.predict(x_valid) - score_lgb = mean_absolute_error(oof_pred_lgb, y_valid) - print('mae:', score_lgb) - - if os.path.isfile(f'../input/mlb-lgbm-and-catboost-models/model_cb_{target}.pkl'): - with open(f'../input/mlb-lgbm-and-catboost-models/model_cb_{target}.pkl', 'rb') as fin: - model_cb = pickle.load(fin) - else: - - model_cb = CatBoostRegressor( - n_estimators=2000, - learning_rate=0.05, - loss_function='MAE', - eval_metric='MAE', - max_bin=50, - subsample=0.9, - colsample_bylevel=0.5, - verbose=100) - - model_cb.fit(x_train, y_train, use_best_model=True, - eval_set=(x_valid, y_valid), - early_stopping_rounds=25) - - with open(f'model_cb_{target}.pkl', 'wb') as handle: - pickle.dump(model_cb, handle, protocol=pickle.HIGHEST_PROTOCOL) - - oof_pred_cat = model_cb.predict(x_valid) - score_cat = mean_absolute_error(oof_pred_cat, y_valid) - print('mae:', score_cat) - - return oof_pred_lgb, model, oof_pred_cat, model_cb, score_lgb, score_cat - - -# training lightgbm -params = { -'boosting_type': 'gbdt', -'objective':'mae', -'subsample': 0.5, -'subsample_freq': 1, -'learning_rate': 0.03, -'num_leaves': 2**11-1, -'min_data_in_leaf': 2**12-1, -'feature_fraction': 0.5, -'max_bin': 100, -'n_estimators': 2500, -'boost_from_average': False, -"random_seed":42, -} - -oof_pred_lgb2, model_lgb2, oof_pred_cat2, model_cb2, score_lgb2, score_cat2 = fit_lgbm( - x_train1, y_train1['target2'], - x_valid1, y_valid1['target2'], - 2, params -) - -oof_pred_lgb1, model_lgb1, oof_pred_cat1, model_cb1, score_lgb1, score_cat1 = fit_lgbm( - x_train1, y_train1['target1'], - x_valid1, y_valid1['target1'], - 1, params -) - -oof_pred_lgb3, model_lgb3, oof_pred_cat3, model_cb3, score_lgb3, score_cat3 = fit_lgbm( - x_train1, y_train1['target3'], - x_valid1, y_valid1['target3'], - 3, params -) -oof_pred_lgb4, model_lgb4, oof_pred_cat4, model_cb4, score_lgb4, score_cat4= fit_lgbm( - x_train1, y_train1['target4'], - x_valid1, y_valid1['target4'], - 4, params -) - -score = (score_lgb1+score_lgb2+score_lgb3+score_lgb4) / 4 -print(f'LightGBM score: {score}') - -score = (score_cat1+score_cat2+score_cat3+score_cat4) / 4 -print(f'Catboost score: {score}') - -# %% [markdown] -# ## Inference - -# %% [code] {"jupyter":{"outputs_hidden":false},"execution":{"iopub.status.busy":"2021-06-26T07:17:02.259872Z","iopub.status.idle":"2021-06-26T07:17:02.260506Z"}} -players_cols = ['playerId', 'primaryPositionName'] -rosters_cols = ['playerId', 'teamId', 'status'] -scores_cols = ['playerId', 'battingOrder', 'gamesPlayedBatting', 'flyOuts', - 'groundOuts', 'runsScored', 'doubles', 'triples', 'homeRuns', - 'strikeOuts', 'baseOnBalls', 'intentionalWalks', 'hits', 'hitByPitch', - 'atBats', 'caughtStealing', 'stolenBases', 'groundIntoDoublePlay', - 'groundIntoTriplePlay', 'plateAppearances', 'totalBases', 'rbi', - 'leftOnBase', 'sacBunts', 'sacFlies', 'catchersInterference', - 'pickoffs', 'gamesPlayedPitching', 'gamesStartedPitching', - 'completeGamesPitching', 'shutoutsPitching', 'winsPitching', - 'lossesPitching', 'flyOutsPitching', 'airOutsPitching', - 'groundOutsPitching', 'runsPitching', 'doublesPitching', - 'triplesPitching', 'homeRunsPitching', 'strikeOutsPitching', - 'baseOnBallsPitching', 'intentionalWalksPitching', 'hitsPitching', - 'hitByPitchPitching', 'atBatsPitching', 'caughtStealingPitching', - 'stolenBasesPitching', 'inningsPitched', 'saveOpportunities', - 'earnedRuns', 'battersFaced', 'outsPitching', 'pitchesThrown', 'balls', - 'strikes', 'hitBatsmen', 'balks', 'wildPitches', 'pickoffsPitching', - 'rbiPitching', 'gamesFinishedPitching', 'inheritedRunners', - 'inheritedRunnersScored', 'catchersInterferencePitching', - 'sacBuntsPitching', 'sacFliesPitching', 'saves', 'holds', 'blownSaves', - 'assists', 'putOuts', 'errors', 'chances'] - -null = np.nan -true = True -false = False - -# %% [code] {"execution":{"iopub.status.busy":"2021-06-26T07:17:02.26162Z","iopub.status.idle":"2021-06-26T07:17:02.262287Z"}} -import pandas as pd -import numpy as np -from datetime import timedelta -from tqdm import tqdm -import gc -from functools import reduce -from sklearn.model_selection import StratifiedKFold - -ROOT_DIR = "../input/mlb-player-digital-engagement-forecasting" - -#=======================# -def flatten(df, col): - du = (df.pivot(index="playerId", columns="EvalDate", - values=col).add_prefix(f"{col}_"). - rename_axis(None, axis=1).reset_index()) - return du -#============================# -def reducer(left, right): - return left.merge(right, on="playerId") -#======================== - -TGTCOLS = ["target1","target2","target3","target4"] -def train_lag(df, lag=1): - dp = df[["playerId","EvalDate"]+TGTCOLS].copy() - dp["EvalDate"] =dp["EvalDate"] + timedelta(days=lag) - df = df.merge(dp, on=["playerId", "EvalDate"], suffixes=["",f"_{lag}"], how="left") - return df -#================================= -def test_lag(sub): - sub["playerId"] = sub["date_playerId"].apply(lambda s: int( s.split("_")[1] ) ) - assert sub.date.nunique() == 1 - dte = sub["date"].unique()[0] - - eval_dt = pd.to_datetime(dte, format="%Y%m%d") - dtes = [eval_dt + timedelta(days=-k) for k in LAGS] - mp_dtes = {eval_dt + timedelta(days=-k):k for k in LAGS} - - sl = LAST.loc[LAST.EvalDate.between(dtes[-1], dtes[0]), ["EvalDate","playerId"]+TGTCOLS].copy() - sl["EvalDate"] = sl["EvalDate"].map(mp_dtes) - du = [flatten(sl, col) for col in TGTCOLS] - du = reduce(reducer, du) - return du, eval_dt - # -#=============== - -tr = pd.read_csv("../input/mlb-data/target.csv") -print(tr.shape) -gc.collect() - -tr["EvalDate"] = pd.to_datetime(tr["EvalDate"]) -tr["EvalDate"] = tr["EvalDate"] + timedelta(days=-1) -tr["EvalYear"] = tr["EvalDate"].dt.year - -MED_DF = tr.groupby(["playerId","EvalYear"])[TGTCOLS].median().reset_index() -MEDCOLS = ["tgt1_med","tgt2_med", "tgt3_med", "tgt4_med"] -MED_DF.columns = ["playerId","EvalYear"] + MEDCOLS - -LAGS = list(range(1,21)) -FECOLS = [f"{col}_{lag}" for lag in reversed(LAGS) for col in TGTCOLS] - -for lag in tqdm(LAGS): - tr = train_lag(tr, lag=lag) - gc.collect() -#=========== -tr = tr.sort_values(by=["playerId", "EvalDate"]) -print(tr.shape) -tr = tr.dropna() -print(tr.shape) -tr = tr.merge(MED_DF, on=["playerId","EvalYear"]) -gc.collect() - -X = tr[FECOLS+MEDCOLS].values -y = tr[TGTCOLS].values -cl = tr["playerId"].values - -NFOLDS = 6 -skf = StratifiedKFold(n_splits=NFOLDS) -folds = skf.split(X, cl) -folds = list(folds) - -import tensorflow as tf -import tensorflow.keras.layers as L -import tensorflow.keras.models as M -from sklearn.metrics import mean_absolute_error, mean_squared_error -from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, EarlyStopping - -tf.random.set_seed(777) - -def make_model(n_in): - inp = L.Input(name="inputs", shape=(n_in,)) - x = L.Dense(50, activation="relu", name="d1")(inp) - x = L.Dense(50, activation="relu", name="d2")(x) - preds = L.Dense(4, activation="linear", name="preds")(x) - - model = M.Model(inp, preds, name="ANN") - model.compile(loss="mean_absolute_error", optimizer="adam") - return model - -net = make_model(X.shape[1]) -print(net.summary()) - -oof = np.zeros(y.shape) -nets = [] -for idx in range(NFOLDS): - print("FOLD:", idx) - tr_idx, val_idx = folds[idx] - ckpt = ModelCheckpoint(f"w{idx}.h5", monitor='val_loss', verbose=1, save_best_only=True,mode='min') - reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2,patience=3, min_lr=0.0005) - es = EarlyStopping(monitor='val_loss', patience=6) - reg = make_model(X.shape[1]) -# reg.fit(X[tr_idx], y[tr_idx], epochs=10, batch_size=35_000, validation_data=(X[val_idx], y[val_idx]), -# verbose=1, callbacks=[ckpt, reduce_lr, es]) - reg.load_weights(f"w{idx}.h5") - oof[val_idx] = reg.predict(X[val_idx], batch_size=50_000, verbose=1) - nets.append(reg) - gc.collect() - # -# - -mae = mean_absolute_error(y, oof) -mse = mean_squared_error(y, oof, squared=False) -print("mae:", mae) -print("mse:", mse) - -# Historical information to use in prediction time -bound_dt = pd.to_datetime("2021-01-01") -LAST = tr.loc[tr.EvalDate>bound_dt].copy() - -LAST_MED_DF = MED_DF.loc[MED_DF.EvalYear==2021].copy() -LAST_MED_DF.drop("EvalYear", axis=1, inplace=True) -del tr - -#""" -import mlb -FE = []; SUB = []; - -# %% [markdown] -#
-#
- -# %% [code] {"jupyter":{"outputs_hidden":false},"execution":{"iopub.status.busy":"2021-06-26T07:17:02.263332Z","iopub.status.idle":"2021-06-26T07:17:02.263974Z"}} -import copy - -env = mlb.make_env() # initialize the environment -iter_test = env.iter_test() # iterator which loops over each date in test set - -for (test_df, sample_prediction_df) in iter_test: # make predictions here - - sub = copy.deepcopy(sample_prediction_df.reset_index()) - sample_prediction_df = copy.deepcopy(sample_prediction_df.reset_index(drop=True)) - - # LGBM summit - # creat dataset - sample_prediction_df['playerId'] = sample_prediction_df['date_playerId']\ - .map(lambda x: int(x.split('_')[1])) - # Dealing with missing values - if test_df['rosters'].iloc[0] == test_df['rosters'].iloc[0]: - test_rosters = pd.DataFrame(eval(test_df['rosters'].iloc[0])) - else: - test_rosters = pd.DataFrame({'playerId': sample_prediction_df['playerId']}) - for col in rosters.columns: - if col == 'playerId': continue - test_rosters[col] = np.nan - - if test_df['playerBoxScores'].iloc[0] == test_df['playerBoxScores'].iloc[0]: - test_scores = pd.DataFrame(eval(test_df['playerBoxScores'].iloc[0])) - else: - test_scores = pd.DataFrame({'playerId': sample_prediction_df['playerId']}) - for col in scores.columns: - if col == 'playerId': continue - test_scores[col] = np.nan - test_scores = test_scores.groupby('playerId').sum().reset_index() - test = sample_prediction_df[['playerId']].copy() - test = test.merge(players[players_cols], on='playerId', how='left') - test = test.merge(test_rosters[rosters_cols], on='playerId', how='left') - test = test.merge(test_scores[scores_cols], on='playerId', how='left') - test = test.merge(player_target_stats, how='inner', left_on=["playerId"],right_on=["playerId"]) - - - test['label_playerId'] = test['playerId'].map(player2num) - test['label_primaryPositionName'] = test['primaryPositionName'].map(position2num) - test['label_teamId'] = test['teamId'].map(teamid2num) - test['label_status'] = test['status'].map(status2num) - - test_X = test[feature_cols] - # predict - pred1 = model1.predict(test_X) - - # predict - pred_lgd1 = model_lgb1.predict(test_X) - pred_lgd2 = model_lgb2.predict(test_X) - pred_lgd3 = model_lgb3.predict(test_X) - pred_lgd4 = model_lgb4.predict(test_X) - - pred_cat1 = model_cb1.predict(test_X) - pred_cat2 = model_cb2.predict(test_X) - pred_cat3 = model_cb3.predict(test_X) - pred_cat4 = model_cb4.predict(test_X) - - test['target1'] = np.clip(pred1,0,100) - test_X = test[feature_cols2] - - pred2 = model2.predict(test_X) - pred3 = model3.predict(test_X) - pred4 = model4.predict(test_X) - - # merge submission - sample_prediction_df['target1'] = 0.65*np.clip(pred1, 0, 100)+0.25*np.clip(pred_lgd1, 0, 100)+0.10*np.clip(pred_cat1, 0, 100) - sample_prediction_df['target2'] = 0.65*np.clip(pred2, 0, 100)+0.25*np.clip(pred_lgd2, 0, 100)+0.10*np.clip(pred_cat2, 0, 100) - sample_prediction_df['target3'] = 0.65*np.clip(pred3, 0, 100)+0.25*np.clip(pred_lgd3, 0, 100)+0.10*np.clip(pred_cat3, 0, 100) - sample_prediction_df['target4'] = 0.65*np.clip(pred4, 0, 100)+0.25*np.clip(pred_lgd4, 0, 100)+0.10*np.clip(pred_cat4, 0, 100) - sample_prediction_df = sample_prediction_df.fillna(0.) - del sample_prediction_df['playerId'] - # TF summit - # Features computation at Evaluation Date - sub_fe, eval_dt = test_lag(sub) - sub_fe = sub_fe.merge(LAST_MED_DF, on="playerId", how="left") - sub_fe = sub_fe.fillna(0.) - - _preds = 0. - for reg in nets: - _preds += reg.predict(sub_fe[FECOLS + MEDCOLS]) / NFOLDS - sub_fe[TGTCOLS] = np.clip(_preds, 0, 100) - sub.drop(["date"]+TGTCOLS, axis=1, inplace=True) - sub = sub.merge(sub_fe[["playerId"]+TGTCOLS], on="playerId", how="left") - sub.drop("playerId", axis=1, inplace=True) - sub = sub.fillna(0.) - # Blending - blend = pd.concat( - [sub[['date_playerId']], - (0.35*sub.drop('date_playerId', axis=1) + 0.65*sample_prediction_df.drop('date_playerId', axis=1))], - axis=1 - ) - env.predict(blend) - # Update Available information - sub_fe["EvalDate"] = eval_dt - #sub_fe.drop(MEDCOLS, axis=1, inplace=True) - LAST = LAST.append(sub_fe) - LAST = LAST.drop_duplicates(subset=["EvalDate","playerId"], keep="last") - -# %% [code] {"jupyter":{"outputs_hidden":false},"execution":{"iopub.status.busy":"2021-06-26T07:17:02.264951Z","iopub.status.idle":"2021-06-26T07:17:02.265581Z"}} -pd.concat( - [sub[['date_playerId']], - (sub.drop('date_playerId', axis=1) + sample_prediction_df.drop('date_playerId', axis=1)) / 2], - axis=1 -) - -# %% [code] {"jupyter":{"outputs_hidden":false},"execution":{"iopub.status.busy":"2021-06-26T07:17:02.26657Z","iopub.status.idle":"2021-06-26T07:17:02.267169Z"}} -sample_prediction_df - -# %% [markdown] -#
-#
diff --git a/d1/mlb_player_v2.py b/d1/mlb_player_v2.py deleted file mode 100644 index 37052bc..0000000 --- a/d1/mlb_player_v2.py +++ /dev/null @@ -1,1399 +0,0 @@ -# %% [markdown] {"papermill":{"duration":0.099672,"end_time":"2021-06-11T18:42:32.853471","exception":false,"start_time":"2021-06-11T18:42:32.753799","status":"completed"},"tags":[]} -# # **MLB Player Digital Engagementコンペ概略**😀 -# -# ## ※ English page is here : https://www.kaggle.com/chumajin/eda-of-mlb-for-starter-english-ver -# -# ## このコンペは、MLBのplayer idごとに、次の日(将来)にファンがデジタルコンテンツへのエンゲージメント(「反応」「行動」みたいなもの)をどれくらい起こすかというのを数値化したもの(target)を予測するコンペだと思います。targetは1~4で、それぞれ異なる指標で4つあって、0-100のスケールで数値化したものだそうです。 -# (コメントいただきました。ありがとうございます!! たしかにサポーターなどのtwitterの書き込みとか、どこかのサイトへのアクセスなどそういうのを想像するとイメージしやすいですね。) -# -# - -# %% [markdown] {"papermill":{"duration":0.10241,"end_time":"2021-06-11T18:42:33.052395","exception":false,"start_time":"2021-06-11T18:42:32.949985","status":"completed"},"tags":[]} -# ## もし、少しでもお役に立てば、**upvote**いただけたら嬉しいです! 他notebookでもupvoteいただけた方いつもありがとうございます。 -# -# ## また、基本的には、この事務局のスターターを見て、EDAを理解していきました(一部抜粋)。ありがとうございます。 -# -# ## こちらもupvoteお願いいたします。 -# -# https://www.kaggle.com/ryanholbrook/getting-started-with-mlb-player-digital-engagement - -# %% [code] {"papermill":{"duration":1.060051,"end_time":"2021-06-11T18:42:34.209063","exception":false,"start_time":"2021-06-11T18:42:33.149012","status":"completed"},"tags":[],"execution":{"iopub.status.busy":"2021-06-14T09:06:08.741917Z","iopub.execute_input":"2021-06-14T09:06:08.742307Z","iopub.status.idle":"2021-06-14T09:06:09.566612Z","shell.execute_reply.started":"2021-06-14T09:06:08.742273Z","shell.execute_reply":"2021-06-14T09:06:09.565664Z"}} -import pprint -def display(*args, **kwargs): - pprint.pprint( - dict( - args=args, - kwargs=kwargs, - ), - ) - -import gc -import sys -import warnings -from pathlib import Path - -import os - -import ipywidgets as widgets -import matplotlib.pyplot as plt -import numpy as np -import pandas as pd -import seaborn as sns -warnings.simplefilter("ignore") - -# %% [markdown] {"papermill":{"duration":0.099753,"end_time":"2021-06-11T18:42:34.425178","exception":false,"start_time":"2021-06-11T18:42:34.325425","status":"completed"},"tags":[]} -# # 0. 何を予測するか (submissionファイルから見ちゃいます) - -# %% [code] {"papermill":{"duration":0.148821,"end_time":"2021-06-11T18:42:34.687273","exception":false,"start_time":"2021-06-11T18:42:34.538452","status":"completed"},"tags":[],"execution":{"iopub.status.busy":"2021-06-14T09:06:13.75186Z","iopub.execute_input":"2021-06-14T09:06:13.752423Z","iopub.status.idle":"2021-06-14T09:06:13.804719Z","shell.execute_reply.started":"2021-06-14T09:06:13.752383Z","shell.execute_reply":"2021-06-14T09:06:13.803356Z"}} -example_sample_submission = pd.read_csv("../input/mlb-player-digital-engagement-forecasting/example_sample_submission.csv") -example_sample_submission - -# %% [markdown] {"papermill":{"duration":0.092961,"end_time":"2021-06-11T18:42:34.873875","exception":false,"start_time":"2021-06-11T18:42:34.780914","status":"completed"},"tags":[]} -# playeridごとに、次の日(将来)にファンがデジタルコンテンツへのエンゲージメント(「反応」「行動」みたいなもの)をどれくらい起こすかというのを数値化したもの(target)を予測するコンペ。 -# -# targetは1~4で、それぞれ異なる指標で4つあって、0-100のスケールで数値化したものだそうです。 -# - -# %% [markdown] {"papermill":{"duration":0.09515,"end_time":"2021-06-11T18:42:35.062356","exception":false,"start_time":"2021-06-11T18:42:34.967206","status":"completed"},"tags":[]} -# ## 0.1 どの情報から推測 ? (先にテストデータを見ちゃいます) - -# %% [code] {"papermill":{"duration":0.895014,"end_time":"2021-06-11T18:42:36.051171","exception":false,"start_time":"2021-06-11T18:42:35.156157","status":"completed"},"tags":[],"execution":{"iopub.status.busy":"2021-06-14T09:07:01.149956Z","iopub.execute_input":"2021-06-14T09:07:01.150331Z","iopub.status.idle":"2021-06-14T09:07:02.04348Z","shell.execute_reply.started":"2021-06-14T09:07:01.1503Z","shell.execute_reply":"2021-06-14T09:07:02.042485Z"}} -example_test = pd.read_csv("../input/mlb-player-digital-engagement-forecasting/example_test.csv") -example_test - -# %% [markdown] {"papermill":{"duration":0.093244,"end_time":"2021-06-11T18:42:36.239027","exception":false,"start_time":"2021-06-11T18:42:36.145783","status":"completed"},"tags":[]} -# パッと見て、submissionに出てくるplayer IDとかがすぐわかる感じではなさそう。json形式でいろいろな情報が入っていそう。 -# -# -# テストデータは1日に1行のデータからなっている。 -# -# -# 例えば、starterコードからの関数を使用すると、以下のように展開できる。 - -# %% [code] {"papermill":{"duration":0.105661,"end_time":"2021-06-11T18:42:36.437921","exception":false,"start_time":"2021-06-11T18:42:36.33226","status":"completed"},"tags":[],"execution":{"iopub.status.busy":"2021-06-14T09:07:46.275371Z","iopub.execute_input":"2021-06-14T09:07:46.275752Z","iopub.status.idle":"2021-06-14T09:07:46.280719Z","shell.execute_reply.started":"2021-06-14T09:07:46.275719Z","shell.execute_reply":"2021-06-14T09:07:46.279482Z"}} -# Helper function to unpack json found in daily data -def unpack_json(json_str): - return np.nan if pd.isna(json_str) else pd.read_json(json_str) - -# %% [code] {"papermill":{"duration":0.211983,"end_time":"2021-06-11T18:42:36.743198","exception":false,"start_time":"2021-06-11T18:42:36.531215","status":"completed"},"tags":[],"execution":{"iopub.status.busy":"2021-06-14T09:07:52.045349Z","iopub.execute_input":"2021-06-14T09:07:52.045719Z","iopub.status.idle":"2021-06-14T09:07:52.170803Z","shell.execute_reply.started":"2021-06-14T09:07:52.045686Z","shell.execute_reply":"2021-06-14T09:07:52.169701Z"}} -example_test.head(3) - -# %% [markdown] {"papermill":{"duration":0.093974,"end_time":"2021-06-11T18:42:36.931685","exception":false,"start_time":"2021-06-11T18:42:36.837711","status":"completed"},"tags":[]} -# example_test["games"].iloc[0] の中身を見てみる - -# %% [code] {"papermill":{"duration":0.165624,"end_time":"2021-06-11T18:42:37.192619","exception":false,"start_time":"2021-06-11T18:42:37.026995","status":"completed"},"tags":[],"execution":{"iopub.status.busy":"2021-06-14T09:08:00.433132Z","iopub.execute_input":"2021-06-14T09:08:00.43354Z","iopub.status.idle":"2021-06-14T09:08:00.50561Z","shell.execute_reply.started":"2021-06-14T09:08:00.433495Z","shell.execute_reply":"2021-06-14T09:08:00.50448Z"}} -unpack_json(example_test["games"].iloc[0]) - -# %% [markdown] {"papermill":{"duration":0.094383,"end_time":"2021-06-11T18:42:37.381562","exception":false,"start_time":"2021-06-11T18:42:37.287179","status":"completed"},"tags":[]} -# example_test["rosters"].iloc[0] の中身を見てみる - -# %% [code] {"papermill":{"duration":0.126356,"end_time":"2021-06-11T18:42:37.603497","exception":false,"start_time":"2021-06-11T18:42:37.477141","status":"completed"},"tags":[],"execution":{"iopub.status.busy":"2021-06-14T09:09:08.225885Z","iopub.execute_input":"2021-06-14T09:09:08.226269Z","iopub.status.idle":"2021-06-14T09:09:08.255073Z","shell.execute_reply.started":"2021-06-14T09:09:08.226235Z","shell.execute_reply":"2021-06-14T09:09:08.254404Z"}} -unpack_json(example_test["rosters"].iloc[0]) - -# %% [markdown] {"papermill":{"duration":0.094364,"end_time":"2021-06-11T18:42:37.794102","exception":false,"start_time":"2021-06-11T18:42:37.699738","status":"completed"},"tags":[]} -# この辺の情報から、player idごとに次の日のtarget1~4という評価項目の期待値を推測するコンペだと思います。 - -# %% [code] {"papermill":{"duration":0.097753,"end_time":"2021-06-11T18:42:37.988036","exception":false,"start_time":"2021-06-11T18:42:37.890283","status":"completed"},"tags":[]} - - -# %% [markdown] {"papermill":{"duration":0.095565,"end_time":"2021-06-11T18:42:38.18146","exception":false,"start_time":"2021-06-11T18:42:38.085895","status":"completed"},"tags":[]} -# ---------以上を踏まえて、trainデータなど他のデータを見ていきます--------- - -# %% [markdown] {"papermill":{"duration":0.094539,"end_time":"2021-06-11T18:42:38.370792","exception":false,"start_time":"2021-06-11T18:42:38.276253","status":"completed"},"tags":[]} -# # 1. train.csv - -# %% [code] {"papermill":{"duration":73.740436,"end_time":"2021-06-11T18:43:52.206935","exception":false,"start_time":"2021-06-11T18:42:38.466499","status":"completed"},"tags":[],"execution":{"iopub.status.busy":"2021-06-14T09:09:47.628029Z","iopub.execute_input":"2021-06-14T09:09:47.628644Z","iopub.status.idle":"2021-06-14T09:11:03.816877Z","shell.execute_reply.started":"2021-06-14T09:09:47.6286Z","shell.execute_reply":"2021-06-14T09:11:03.815822Z"}} -# 読み込みに少し時間かかります。 -training = pd.read_csv("../input/mlb-player-digital-engagement-forecasting/train.csv") -training - -# %% [code] {"papermill":{"duration":0.108401,"end_time":"2021-06-11T18:43:52.411026","exception":false,"start_time":"2021-06-11T18:43:52.302625","status":"completed"},"tags":[],"execution":{"iopub.status.busy":"2021-06-14T09:12:42.628153Z","iopub.execute_input":"2021-06-14T09:12:42.628536Z","iopub.status.idle":"2021-06-14T09:12:42.64072Z","shell.execute_reply.started":"2021-06-14T09:12:42.628496Z","shell.execute_reply":"2021-06-14T09:12:42.639545Z"}} -# dateはdatetimeに変換 -training['date'] = pd.to_datetime(training['date'], format="%Y%m%d") - -# %% [code] {"papermill":{"duration":0.124469,"end_time":"2021-06-11T18:43:52.63163","exception":false,"start_time":"2021-06-11T18:43:52.507161","status":"completed"},"tags":[],"execution":{"iopub.status.busy":"2021-06-14T09:12:46.690978Z","iopub.execute_input":"2021-06-14T09:12:46.691391Z","iopub.status.idle":"2021-06-14T09:12:46.716353Z","shell.execute_reply.started":"2021-06-14T09:12:46.691356Z","shell.execute_reply":"2021-06-14T09:12:46.715114Z"}} -training.info() - -# %% [markdown] {"papermill":{"duration":0.096956,"end_time":"2021-06-11T18:43:52.824788","exception":false,"start_time":"2021-06-11T18:43:52.727832","status":"completed"},"tags":[]} -# 1216日分のデータ。nullデータは無し。nanデータがところどころにある。 - -# %% [markdown] {"papermill":{"duration":0.09861,"end_time":"2021-06-11T18:43:53.01957","exception":false,"start_time":"2021-06-11T18:43:52.92096","status":"completed"},"tags":[]} -# --------------------------------------------------------------------- - -# %% [markdown] {"papermill":{"duration":0.095137,"end_time":"2021-06-11T18:43:53.213022","exception":false,"start_time":"2021-06-11T18:43:53.117885","status":"completed"},"tags":[]} -# ## ここから**カラムごとにデータがあるところのjsonを事例として1つ見てみます**。 -# -# 上述したように、train.csvの中身も1つのセルの中にjsonファイル形式で、dataframeがさらにそれぞれ入っているような複雑な形をしています。 -# -# (結果から言うと、全部で1216日分のデータの1日に対して、約11個(nanもあるのでも少し少ないですが)のDataFrameが情報量としてぶら下がっているイメージで、かなりの情報量です。 -# -# ので、ここから少し長いです。イメージだけつかんで、読み流しても良いかもです。) -# - -# %% [code] {"papermill":{"duration":0.105297,"end_time":"2021-06-11T18:43:53.414068","exception":false,"start_time":"2021-06-11T18:43:53.308771","status":"completed"},"tags":[],"execution":{"iopub.status.busy":"2021-06-14T09:13:37.94206Z","iopub.execute_input":"2021-06-14T09:13:37.942469Z","iopub.status.idle":"2021-06-14T09:13:37.94928Z","shell.execute_reply.started":"2021-06-14T09:13:37.942422Z","shell.execute_reply":"2021-06-14T09:13:37.948339Z"}} -training.columns - -# %% [markdown] {"papermill":{"duration":0.098188,"end_time":"2021-06-11T18:43:53.612128","exception":false,"start_time":"2021-06-11T18:43:53.51394","status":"completed"},"tags":[]} -# 1つ1つ入力するのが、めんどくさいので、naを抜いて、n番目(0だと一番上)のサンプルをdataframeにしてcolumn名と中身を見る関数を作っちゃいます。 - -# %% [code] {"papermill":{"duration":0.1263,"end_time":"2021-06-11T18:43:53.851813","exception":false,"start_time":"2021-06-11T18:43:53.725513","status":"completed"},"tags":[],"execution":{"iopub.status.busy":"2021-06-14T09:14:01.593291Z","iopub.execute_input":"2021-06-14T09:14:01.593682Z","iopub.status.idle":"2021-06-14T09:14:01.598603Z","shell.execute_reply.started":"2021-06-14T09:14:01.593649Z","shell.execute_reply":"2021-06-14T09:14:01.597731Z"}} -def exshow(col,n): - tmp = training[col] - tmp = tmp.dropna() - tmpdf = unpack_json(tmp.iloc[n]) - print(tmpdf.columns) - return tmpdf - -# %% [markdown] {"papermill":{"duration":0.112367,"end_time":"2021-06-11T18:43:54.094326","exception":false,"start_time":"2021-06-11T18:43:53.981959","status":"completed"},"tags":[]} -# ## 1.1 nextDayPlayerEngagement (train.csvのcolumn1番目) -# 翌日以降のすべてのモデリング ターゲットを含むネストされた JSON。 - -# %% [code] {"papermill":{"duration":0.13531,"end_time":"2021-06-11T18:43:54.391578","exception":false,"start_time":"2021-06-11T18:43:54.256268","status":"completed"},"tags":[],"execution":{"iopub.status.busy":"2021-06-14T09:14:10.526008Z","iopub.execute_input":"2021-06-14T09:14:10.526394Z","iopub.status.idle":"2021-06-14T09:14:10.552638Z","shell.execute_reply.started":"2021-06-14T09:14:10.526362Z","shell.execute_reply":"2021-06-14T09:14:10.551683Z"}} -training.head(3) - -# %% [code] {"papermill":{"duration":0.15348,"end_time":"2021-06-11T18:43:54.656058","exception":false,"start_time":"2021-06-11T18:43:54.502578","status":"completed"},"tags":[],"execution":{"iopub.status.busy":"2021-06-14T09:14:19.888531Z","iopub.execute_input":"2021-06-14T09:14:19.888888Z","iopub.status.idle":"2021-06-14T09:14:19.928418Z","shell.execute_reply.started":"2021-06-14T09:14:19.888857Z","shell.execute_reply":"2021-06-14T09:14:19.927415Z"}} -tmpdf = exshow("nextDayPlayerEngagement",0) -tmpdf - -# %% [markdown] {"papermill":{"duration":0.101315,"end_time":"2021-06-11T18:43:54.857283","exception":false,"start_time":"2021-06-11T18:43:54.755968","status":"completed"},"tags":[]} -# * engagementMetricsDate - 米国太平洋時間に基づくプレーヤーエンゲージメント指標の日付(前日のゲーム、名簿、フィールド統計、トランザクション、賞などと一致します)。 -# * playerId -# * target1 -# * target2 -# * target3 -# * target4 -# -# -# target1-target4は、0から100のスケールでのデジタルエンゲージメントの毎日のインデックスです。 - -# %% [markdown] {"papermill":{"duration":0.099862,"end_time":"2021-06-11T18:43:55.056252","exception":false,"start_time":"2021-06-11T18:43:54.95639","status":"completed"},"tags":[]} -# ここから、plyaerIdと次の日以降のtarget1~4を抜くんですね。 - -# %% [markdown] {"papermill":{"duration":0.108076,"end_time":"2021-06-11T18:43:55.265858","exception":false,"start_time":"2021-06-11T18:43:55.157782","status":"completed"},"tags":[]} -# ## 1.2 games(train.csvのcolumn2番目) -# 特定の日のすべてのゲーム情報を含むネストされた JSON。レギュラー シーズン、ポストシーズン、オールスター ゲームに加えて、スプリング トレーニングとエキシビション ゲームが含まれています。 - -# %% [code] {"papermill":{"duration":0.138443,"end_time":"2021-06-11T18:43:55.51297","exception":false,"start_time":"2021-06-11T18:43:55.374527","status":"completed"},"tags":[],"execution":{"iopub.status.busy":"2021-06-14T09:14:48.471833Z","iopub.execute_input":"2021-06-14T09:14:48.472245Z","iopub.status.idle":"2021-06-14T09:14:48.498208Z","shell.execute_reply.started":"2021-06-14T09:14:48.472214Z","shell.execute_reply":"2021-06-14T09:14:48.497415Z"}} -training.head(3) - -# %% [code] {"papermill":{"duration":0.153552,"end_time":"2021-06-11T18:43:55.769473","exception":false,"start_time":"2021-06-11T18:43:55.615921","status":"completed"},"tags":[],"execution":{"iopub.status.busy":"2021-06-14T09:14:56.122131Z","iopub.execute_input":"2021-06-14T09:14:56.122758Z","iopub.status.idle":"2021-06-14T09:14:56.169907Z","shell.execute_reply.started":"2021-06-14T09:14:56.122703Z","shell.execute_reply":"2021-06-14T09:14:56.168899Z"}} -tmpdf = exshow("games",1) # 0番目(1番上はデータが一行しかなかったので、1にしました。) -tmpdf - -# %% [markdown] {"papermill":{"duration":0.105256,"end_time":"2021-06-11T18:43:55.985414","exception":false,"start_time":"2021-06-11T18:43:55.880158","status":"completed"},"tags":[]} -# カラムの意味の翻訳は↓を開いてください。(長いので、hideしています。) - -# %% [markdown] {"_kg_hide-input":true,"papermill":{"duration":0.109485,"end_time":"2021-06-11T18:43:56.199254","exception":false,"start_time":"2021-06-11T18:43:56.089769","status":"completed"},"tags":[]} -# * gamePk : ゲームの一意の識別子。 -# * gameType : ゲームの種類、さまざまな種類がここにあります。 -# * season : -# * gameDate : -# * gameTimeUTC : UTCでの始球式。 -# * resumeDate : タイムゲームが再開されました(放棄された場合、それ以外の場合はnull)。 -# * resumedFrom : タイムゲームは元々放棄されていました(放棄された場合、それ以外の場合はnull)。 -# * codedGameState : ゲームのステータスコード、さまざまなタイプがここにあります。 -# * detailedGameState : ゲームのステータス、さまざまな種類がここにあります。 -# * isTie : ブール値。ゲームが引き分けで終了した場合はtrue。 -# * gameNumber : ダブルヘッダーを区別するためのゲーム番号フラグ -# * doubleHeader : YはDH、Nはシングルゲーム、Sはスプリット -# * dayNight : スケジュールされた開始時間の昼または夜のフラグ。 -# * scheduledInnings : 予定イニング数。 -# * gamesInSeries : 現在のシリーズのゲーム数。 -# * seriesDescription : 現在のシリーズのテキスト説明。 -# * homeId : ホームチームの一意の識別子。 -# * homeName : ホームチーム名。 -# * homeAbbrev : ホームチームの略語。 -# * homeWins : ホームチームのシーズンの現在の勝利数。 -# * homeLosses : ホームチームのシーズンでの現在の損失数。 -# * homeWinPct : ホームチームの現在の勝率。 -# * homeWinner : ブール値。ホームチームが勝った場合はtrue。 -# * homeScore : ホームチームが得点するラン。 -# * awayId : アウェイチームの一意の識別子。 -# * awayName : アウェイチームの一意の識別子。 -# * awayAbbrev : アウェイチームの略。 -# * awayWins : アウェイチームのシーズンの現在の勝利数。 -# * awayLosses : アウェイチームのシーズン中の現在の敗北数。 -# * awayWinPct : アウェイチームの現在の勝率。 -# * awayWinner : ブール値。離れたチームが勝った場合はtrue。 -# * awayScore : アウェイチームが得点したラン。 - -# %% [markdown] {"papermill":{"duration":0.099984,"end_time":"2021-06-11T18:43:56.401399","exception":false,"start_time":"2021-06-11T18:43:56.301415","status":"completed"},"tags":[]} -# ## 1.3 rosters(train.csvのcolumn3番目) -# 特定の日のすべての名簿情報を含むネストされた JSON。インシーズンとオフシーズンのチーム名簿が含まれます。 - -# %% [code] {"papermill":{"duration":0.133667,"end_time":"2021-06-11T18:43:56.635608","exception":false,"start_time":"2021-06-11T18:43:56.501941","status":"completed"},"tags":[],"execution":{"iopub.status.busy":"2021-06-14T09:15:42.681691Z","iopub.execute_input":"2021-06-14T09:15:42.682297Z","iopub.status.idle":"2021-06-14T09:15:42.710025Z","shell.execute_reply.started":"2021-06-14T09:15:42.682226Z","shell.execute_reply":"2021-06-14T09:15:42.708629Z"}} -training.head(3) - -# %% [code] {"papermill":{"duration":0.136156,"end_time":"2021-06-11T18:43:56.876362","exception":false,"start_time":"2021-06-11T18:43:56.740206","status":"completed"},"tags":[],"execution":{"iopub.status.busy":"2021-06-14T09:15:47.818485Z","iopub.execute_input":"2021-06-14T09:15:47.818837Z","iopub.status.idle":"2021-06-14T09:15:47.849949Z","shell.execute_reply.started":"2021-06-14T09:15:47.818807Z","shell.execute_reply":"2021-06-14T09:15:47.848977Z"}} -tmpdf = exshow("rosters",0) -tmpdf - -# %% [markdown] {"papermill":{"duration":0.110068,"end_time":"2021-06-11T18:43:57.094794","exception":false,"start_time":"2021-06-11T18:43:56.984726","status":"completed"},"tags":[]} -# * playerId-プレーヤーの一意の識別子。 -# * gameDate -# * teamId-そのプレーヤーがその日にいるteamId。 -# * statusCode-名簿ステータスの略語。 -# * status-説明的な名簿のステータス。 - -# %% [markdown] {"papermill":{"duration":0.10842,"end_time":"2021-06-11T18:43:57.309461","exception":false,"start_time":"2021-06-11T18:43:57.201041","status":"completed"},"tags":[]} -# ## 1.4 playerBoxScores(train.csvのcolumn4番目) -# 特定の日のプレイヤー ゲーム レベルで集計されたゲーム統計を含むネストされた JSON。レギュラーシーズン、ポストシーズン、オールスターゲームが含まれます。 - -# %% [code] {"papermill":{"duration":0.131819,"end_time":"2021-06-11T18:43:57.543572","exception":false,"start_time":"2021-06-11T18:43:57.411753","status":"completed"},"tags":[],"execution":{"iopub.status.busy":"2021-06-14T09:15:51.122412Z","iopub.execute_input":"2021-06-14T09:15:51.122822Z","iopub.status.idle":"2021-06-14T09:15:51.150918Z","shell.execute_reply.started":"2021-06-14T09:15:51.122791Z","shell.execute_reply":"2021-06-14T09:15:51.149868Z"}} -training.head(3) - -# %% [code] {"papermill":{"duration":0.193827,"end_time":"2021-06-11T18:43:57.843961","exception":false,"start_time":"2021-06-11T18:43:57.650134","status":"completed"},"tags":[],"execution":{"iopub.status.busy":"2021-06-14T09:15:52.259031Z","iopub.execute_input":"2021-06-14T09:15:52.259431Z","iopub.status.idle":"2021-06-14T09:15:52.344774Z","shell.execute_reply.started":"2021-06-14T09:15:52.259394Z","shell.execute_reply":"2021-06-14T09:15:52.343636Z"}} -tmpdf = exshow("playerBoxScores",0) -tmpdf.head(5) - -# %% [markdown] {"papermill":{"duration":0.107718,"end_time":"2021-06-11T18:43:58.056077","exception":false,"start_time":"2021-06-11T18:43:57.948359","status":"completed"},"tags":[]} -# カラムの意味の翻訳は↓を開いてください。(長いので、hideしています。) - -# %% [markdown] {"_kg_hide-input":true,"papermill":{"duration":0.106169,"end_time":"2021-06-11T18:43:58.272551","exception":false,"start_time":"2021-06-11T18:43:58.166382","status":"completed"},"tags":[]} -# * home : バイナリ、ホームチームの場合は1、離れている場合は0。 -# * gamePk : ゲームの一意の識別子。 -# * gameDate : -# * gameTimeUTC : UTCでの始球式。 -# * teamId : チームの一意の識別子。 -# * teamName : -# * playerId : プレーヤーの一意の識別子。 -# * playerName : -# * jerseyNum : -# * positionCode : 番号の位置コード、詳細はこちらです。 -# * positionName : テキスト位置の表示、詳細はこちらです。 -# * positionType : ポジショングループ、詳細はこちらです。 -# * battingOrder : 形式:「###」。最初の桁は打順スポットを示し、次の2桁はそのプレーヤーがその打順スポットを占めた順序を示します。例:「300」は、打順の3番目のスポットのスターターを示します。 4人目(900、901、902以降)が打順9位を占めることを示す「903」。ゲームに登場した場合にのみ入力されます。 -# * gamesPlayedBatting : プレーヤーが打者、ランナー、または野手としてゲームに参加した場合は1。 -# * flyOuts : ゲームの合計フライアウト。 -# * groundOuts : ゲームのトータルグラウンドアウト。 -# * runsScored : ゲームの合計ランが記録されました。 -# * doubles : ゲームの合計は2倍です。 -# * triples : ゲームの合計トリプル。 -# * homeRuns : ゲームの総本塁打。 -# * strikeOuts : ゲームの合計三振。 -# * baseOnBalls : ゲームの合計ウォーク。 -# * intentionalWalks : ゲームの故意四球。 -# * hits : ゲームの総ヒット数。 -# * hitByPitch : ピッチによるゲームの合計ヒット。 -# * atBats : でのゲーム合計 -# * caughtStealing : ゲームの合計が盗塁をキャッチしました。 -# * stolenBases : ゲームの盗塁総数。 -# * groundIntoDoublePlay : ゲームの合計併殺はに基づいています。 -# * groundIntoTriplePlay : ゲームの合計 3 回プレイが基礎になります。 -# * plateAppearances : ゲームの総打席。 -# * totalBases : ゲームの総拠点数。 -# * rbi : ゲームの合計打点。 -# * leftOnBase : ゲームの総ランナーはベースに残った。 -# * sacBunts : ゲームの合計犠牲バント。 -# * sacFlies : ゲームの総犠牲フライ。 -# * catchersInterference : ゲームのトータルキャッチャーの干渉が発生しました。 -# * pickoffs : ゲームの合計回数がベースから外れました。 -# * gamesPlayedPitching : バイナリ、プレーヤーが投手としてゲームに参加した場合は 1。 -# * gamesStartedPitching : バイナリ、プレーヤーがゲームの先発投手だった場合は1。 -# * completeGamesPitching : バイナリ、完投でクレジットされている場合は1。 -# * shutoutsPitching : バイナリ、完封でクレジットされている場合は1。 -# * winsPitching : バイナリ、勝利でクレジットされている場合は 1。 -# * lossesPitching : バイナリ、損失がクレジットされている場合は1。 -# * flyOutsPitching : 許可されたフライアウトのゲーム合計。 -# * airOutsPitching : エアアウト(フライアウト+ポップアウト)のゲーム合計が許可されます。 -# * groundOutsPitching : ゲームの合計グラウンドアウトが許可されます。 -# * runsPitching : ゲームの合計実行が許可されます。 -# * doublesPitching : ゲームの合計は2倍になります。 -# * triplesPitching : ゲームの合計トリプルが許可されます。 -# * homeRunsPitching : ゲームの合計ホームランが許可されます。 -# * strikeOutsPitching : ゲームの合計三振が許可されます。 -# * baseOnBallsPitching : ゲームの合計歩行が許可されます。 -# * intentionalWalksPitching : ゲームの故意四球の合計が許可されます。 -# * hitsPitching : 許可されるゲームの合計ヒット数。 -# * hitByPitchPitching : 許可されたピッチによるゲームの合計ヒット。 -# * atBatsPitching : でのゲーム合計 -# * caughtStealingPitching : ゲームの合計は、盗みをキャッチしました。 -# * stolenBasesPitching : ゲームの盗塁の合計は許可されます。 -# * inningsPitched : ゲームの総投球回。 -# * saveOpportunities : バイナリ、保存の機会がある場合は1。 -# * earnedRuns : ゲームの合計自責点は許可されています。 -# * battersFaced : 直面したゲームの総打者。 -# * outsPitching : ゲームの合計アウトが記録されました。 -# * pitchesThrown : 投げられた投球のゲーム総数。 -# * balls : 投げられたゲームの合計ボール。 -# * strikes : スローされたゲームの合計ストライク。 -# * hitBatsmen : ゲームの総死球打者。 -# * balks : ゲームの合計はボークします。 -# * wildPitches : 投げられた暴投のゲーム総数。 -# * pickoffsPitching : ゲームのピックオフの総数。 -# * rbiPitching : 打点のゲーム総数は許可されています。 -# * inheritedRunners : 継承されたランナーのゲーム合計を想定。 -# * inheritedRunnersScored : 得点した継承されたランナーのゲーム合計。 -# * catchersInterferencePitching : キャッチャーの干渉のゲーム合計はバッテリーによって発生しました。 -# * sacBuntsPitching : ゲームの犠牲バントの合計が許可されます。 -# * sacFliesPitching : ゲームの犠牲フライは許可されています。 -# * saves : バイナリ、保存でクレジットされている場合は1。 -# * holds : バイナリ、保留がクレジットされている場合は1。 -# * blownSaves : バイナリ、ブローセーブでクレジットされている場合は1。 -# * assists : ゲームのアシスト総数。 -# * putOuts : ゲームの刺殺の総数。 -# * errors : ゲームのエラーの総数。 -# * chances : ゲームのトータルフィールディングチャンス。 - -# %% [code] {"papermill":{"duration":0.102554,"end_time":"2021-06-11T18:43:58.478581","exception":false,"start_time":"2021-06-11T18:43:58.376027","status":"completed"},"tags":[]} - - -# %% [markdown] {"papermill":{"duration":0.108051,"end_time":"2021-06-11T18:43:58.692413","exception":false,"start_time":"2021-06-11T18:43:58.584362","status":"completed"},"tags":[]} -# ## 1.5 teamBoxScores(train.csvのcolumn5番目) -# 特定の日のチーム ゲーム レベルで集計されたゲーム統計を含むネストされた JSON。レギュラーシーズン、ポストシーズン、オールスターゲームが含まれます。 - -# %% [code] {"papermill":{"duration":0.131738,"end_time":"2021-06-11T18:43:58.930844","exception":false,"start_time":"2021-06-11T18:43:58.799106","status":"completed"},"tags":[],"execution":{"iopub.status.busy":"2021-06-14T09:16:00.738588Z","iopub.execute_input":"2021-06-14T09:16:00.738972Z","iopub.status.idle":"2021-06-14T09:16:00.765648Z","shell.execute_reply.started":"2021-06-14T09:16:00.738941Z","shell.execute_reply":"2021-06-14T09:16:00.764551Z"}} -training.head(3) - -# %% [code] {"papermill":{"duration":0.15405,"end_time":"2021-06-11T18:43:59.189049","exception":false,"start_time":"2021-06-11T18:43:59.034999","status":"completed"},"tags":[],"execution":{"iopub.status.busy":"2021-06-14T09:16:01.366547Z","iopub.execute_input":"2021-06-14T09:16:01.367082Z","iopub.status.idle":"2021-06-14T09:16:01.4172Z","shell.execute_reply.started":"2021-06-14T09:16:01.36705Z","shell.execute_reply":"2021-06-14T09:16:01.415972Z"}} -tmpdf = exshow("teamBoxScores",0) -tmpdf.head(5) - -# %% [markdown] {"papermill":{"duration":0.121084,"end_time":"2021-06-11T18:43:59.427565","exception":false,"start_time":"2021-06-11T18:43:59.306481","status":"completed"},"tags":[]} -# カラムの意味の翻訳は↓を開いてください。(長いので、hideしています。) - -# %% [markdown] {"_kg_hide-input":true,"papermill":{"duration":0.120566,"end_time":"2021-06-11T18:43:59.660476","exception":false,"start_time":"2021-06-11T18:43:59.53991","status":"completed"},"tags":[]} -# * home : バイナリ、ホームチームの場合は1、離れている場合は0。 -# * teamId : チームの一意の識別子。 -# * gamePk : ゲームの一意の識別子。 -# * gameDate : -# * gameTimeUTC : UTCでの始球式。 -# * flyOuts : ゲームの合計フライアウト。 -# * groundOuts : ゲームのトータルグラウンドアウト。 -# * runsScored : ゲームの合計ランが記録されました。 -# * doubles : ゲームの合計は2倍です。 -# * triples : ゲームの合計トリプル。 -# * homeRuns : ゲームの総本塁打。 -# * strikeOuts : ゲームの合計三振。 -# * baseOnBalls : ゲームの合計ウォーク。 -# * intentionalWalks : ゲームの故意四球。 -# * hits : ゲームの総ヒット数。 -# * hitByPitch : ピッチによるゲームの合計ヒット。 -# * atBats : でのゲーム合計 -# * caughtStealing : ゲームの合計が盗塁をキャッチしました。 -# * stolenBases : ゲームの盗塁総数。 -# * groundIntoDoublePlay : ゲームの合計併殺はに基づいています。 -# * groundIntoTriplePlay : ゲームの合計 3 回プレイが基礎になります。 -# * plateAppearances : ゲームの総打席。 -# * totalBases : ゲームの総拠点数。 -# * rbi : ゲームの合計打点。 -# * leftOnBase : ゲームの総ランナーはベースに残った。 -# * sacBunts : ゲームの合計犠牲バント。 -# * sacFlies : ゲームの総犠牲フライ。 -# * catchersInterference : ゲームのトータルキャッチャーの干渉が発生しました。 -# * pickoffs : ゲームの合計回数がベースから外れました。 -# * airOutsPitching : エアアウト(フライアウト+ポップアウト)のゲーム合計が許可されます。 -# * groundOutsPitching : ゲームの合計グラウンドアウトが許可されます。 -# * runsPitching : ゲームの合計実行が許可されます。 -# * doublesPitching : ゲームの合計は2倍になります。 -# * triplesPitching : ゲームの合計トリプルが許可されます。 -# * homeRunsPitching : ゲームの合計ホームランが許可されます。 -# * strikeOutsPitching : ゲームの合計三振が許可されます。 -# * baseOnBallsPitching : ゲームの合計歩行が許可されます。 -# * intentionalWalksPitching : ゲームの故意四球の合計が許可されます。 -# * hitsPitching : 許可されるゲームの合計ヒット数。 -# * hitByPitchPitching : 許可されたピッチによるゲームの合計ヒット。 -# * atBatsPitching : でのゲーム合計 -# * caughtStealingPitching : ゲームの合計は、盗みをキャッチしました。 -# * stolenBasesPitching : ゲームの盗塁の合計は許可されます。 -# * inningsPitched : ゲームの総投球回。 -# * earnedRuns : ゲームの合計自責点は許可されています。 -# * battersFaced : 直面したゲームの総打者。 -# * outsPitching : ゲームの合計アウトが記録されました。 -# * hitBatsmen : ゲームの総死球打者。 -# * balks : ゲームの合計はボークします。 -# * wildPitches : 投げられた暴投のゲーム総数。 -# * pickoffsPitching : ゲームのピックオフの総数。 -# * rbiPitching : 打点のゲーム総数は許可されています。 -# * inheritedRunners : 継承されたランナーのゲーム合計を想定。 -# * inheritedRunnersScored : 得点した継承されたランナーのゲーム合計。 -# * catchersInterferencePitching : キャッチャーの干渉のゲーム合計はバッテリーによって発生しました。 -# * sacBuntsPitching : ゲームの犠牲バントの合計が許可されます。 -# * sacFliesPitching : ゲームの犠牲フライは許可されています。 - -# %% [code] {"papermill":{"duration":0.120236,"end_time":"2021-06-11T18:43:59.90294","exception":false,"start_time":"2021-06-11T18:43:59.782704","status":"completed"},"tags":[]} - - -# %% [markdown] {"papermill":{"duration":0.124656,"end_time":"2021-06-11T18:44:00.150824","exception":false,"start_time":"2021-06-11T18:44:00.026168","status":"completed"},"tags":[]} -# ## 1.6 transactions(train.csvのcolumn6番目) -# 特定の日の MLB チームに関係するすべてのトランザクション情報を含むネストされた JSON。 - -# %% [code] {"papermill":{"duration":0.271551,"end_time":"2021-06-11T18:44:00.569742","exception":false,"start_time":"2021-06-11T18:44:00.298191","status":"completed"},"tags":[],"execution":{"iopub.status.busy":"2021-06-14T09:16:04.232745Z","iopub.execute_input":"2021-06-14T09:16:04.233092Z","iopub.status.idle":"2021-06-14T09:16:04.26048Z","shell.execute_reply.started":"2021-06-14T09:16:04.233063Z","shell.execute_reply":"2021-06-14T09:16:04.25898Z"}} -training.head(3) - -# %% [code] {"papermill":{"duration":0.164462,"end_time":"2021-06-11T18:44:00.857988","exception":false,"start_time":"2021-06-11T18:44:00.693526","status":"completed"},"tags":[],"execution":{"iopub.status.busy":"2021-06-14T09:16:04.801025Z","iopub.execute_input":"2021-06-14T09:16:04.801379Z","iopub.status.idle":"2021-06-14T09:16:04.834846Z","shell.execute_reply.started":"2021-06-14T09:16:04.801351Z","shell.execute_reply":"2021-06-14T09:16:04.833668Z"}} -tmpdf = exshow("transactions",1) -tmpdf - -# %% [markdown] {"papermill":{"duration":0.108288,"end_time":"2021-06-11T18:44:01.074241","exception":false,"start_time":"2021-06-11T18:44:00.965953","status":"completed"},"tags":[]} -# * transactionId : トランザクションの一意の識別子。 -# * playerId : プレーヤーの一意の識別子。 -# * playerName : -# * date : -# * fromTeamId : プレーヤーの出身チームの一意の識別子。 -# * fromTeamName : -# * toTeamId : プレーヤーが行くチームの一意の識別子。 -# * toTeamName : -# * effectiveDate : -# * resolutionDate : -# * typeCode : トランザクションステータスの略語。 -# * typeDesc : トランザクションステータスの説明。 -# * description : トランザクションのテキスト説明。 - -# %% [markdown] {"papermill":{"duration":0.112168,"end_time":"2021-06-11T18:44:01.294478","exception":false,"start_time":"2021-06-11T18:44:01.18231","status":"completed"},"tags":[]} -# ## 1.7 standings(train.csvのcolumn7番目) -# 特定の日の MLB チームに関するすべての順位情報を含むネストされた JSON。 - -# %% [code] {"papermill":{"duration":0.140989,"end_time":"2021-06-11T18:44:01.541726","exception":false,"start_time":"2021-06-11T18:44:01.400737","status":"completed"},"tags":[],"execution":{"iopub.status.busy":"2021-06-14T09:16:06.386009Z","iopub.execute_input":"2021-06-14T09:16:06.386417Z","iopub.status.idle":"2021-06-14T09:16:06.415615Z","shell.execute_reply.started":"2021-06-14T09:16:06.386382Z","shell.execute_reply":"2021-06-14T09:16:06.414552Z"}} -training.head(3) - -# %% [code] {"papermill":{"duration":0.164115,"end_time":"2021-06-11T18:44:01.812397","exception":false,"start_time":"2021-06-11T18:44:01.648282","status":"completed"},"tags":[],"execution":{"iopub.status.busy":"2021-06-14T09:16:07.341281Z","iopub.execute_input":"2021-06-14T09:16:07.341726Z","iopub.status.idle":"2021-06-14T09:16:07.391635Z","shell.execute_reply.started":"2021-06-14T09:16:07.34169Z","shell.execute_reply":"2021-06-14T09:16:07.390416Z"}} -tmpdf = exshow("standings",0) -tmpdf.head(5) - -# %% [markdown] {"papermill":{"duration":0.112066,"end_time":"2021-06-11T18:44:02.037524","exception":false,"start_time":"2021-06-11T18:44:01.925458","status":"completed"},"tags":[]} -# カラムの意味の翻訳は↓を開いてください。(長いので、hideしています。) - -# %% [markdown] {"_kg_hide-input":true,"papermill":{"duration":0.106878,"end_time":"2021-06-11T18:44:02.255318","exception":false,"start_time":"2021-06-11T18:44:02.14844","status":"completed"},"tags":[]} -# * season : -# * gameDate : -# * divisionId : このチームが所属する部門を表す一意識別子。 -# * teamId : チームの一意の識別子。 -# * teamName : -# * streakCode : チームの現在の勝ち負けの連続の略語。最初の文字は勝ち負けを示し、数字はゲームの数です。 -# * divisionRank : チームの部門における現在のランク。 -# * leagueRank : リーグでのチームの現在のランク。 -# * wildCardRank : ワイルドカードバースのチームの現在のランク。 -# * leagueGamesBack : ゲームはチームのリーグに戻ります。 -# * sportGamesBack : MLBのすべてに戻ってゲーム。 -# * divisionGamesBack : チームの部門にゲームが戻ってきました。 -# * wins : 現在の勝利。 -# * losses : 現在の損失。 -# * pct : 現在の勝率。 -# * runsAllowed : シーズン中に許可された実行。 -# * runsScored : シーズンに得点したラン。 -# * divisionChamp : チームが部門タイトルを獲得した場合はtrue。 -# * divisionLeader : チームがディビジョンレースをリードしている場合はtrue。 -# * wildCardLeader : チームがワイルドカードリーダーの場合はtrue。 -# * eliminationNumber : ディビジョンレースから排除されるまでのゲーム数(チームの敗北+対戦相手の勝利)。 -# * wildCardEliminationNumber : ワイルドカードレースから排除されるまでのゲーム数(チームの敗北+対戦相手の勝利)。 -# * homeWins : ホームはシーズンに勝ちます。 -# * homeLosses : シーズン中のホームロス。 -# * awayWins : アウェイはシーズンに勝ちます。 -# * awayLosses : シーズンのアウェイロス。 -# * lastTenWins : 過去10試合で勝ちました。 -# * lastTenLosses : 過去10試合で負けました。 -# * extraInningWins : シーズンの追加イニングで勝ちます。 -# * extraInningLosses : シーズンの追加イニングでの損失。 -# * oneRunWins : シーズン中に1ランで勝ちます。 -# * oneRunLosses : シーズン中に1ランで負けます。 -# * dayWins : デイゲームはシーズンに勝ちます。 -# * dayLosses Day game losses on the season. : -# * nightWins : ナイトゲームはシーズンに勝ちます。 -# * nightLosses : シーズン中のナイトゲームの敗北。 -# * grassWins : 芝生のフィールドがシーズンに勝ちます。 -# * grassLosses : 季節の草地の損失。 -# * turfWins : 芝フィールドはシーズンに勝ちます。 -# * turfLosses : シーズン中の芝フィールドの損失。 -# * divWins : シーズン中にディビジョンの対戦相手に勝ちます。 -# * divLosses : シーズン中のディビジョンの対戦相手に対する敗北。 -# * alWins : シーズン中にALチームに勝ちます。 -# * alLosses : シーズン中のALチームに対する敗北。 -# * nlWins : シーズン中にNLチームに勝ちます。 -# * nlLosses : シーズン中のNLチームに対する敗北。 -# * xWinLossPct : スコアリングおよび許可されたランに基づく予想勝率. - -# %% [code] {"papermill":{"duration":0.106775,"end_time":"2021-06-11T18:44:02.471542","exception":false,"start_time":"2021-06-11T18:44:02.364767","status":"completed"},"tags":[]} - - -# %% [markdown] {"papermill":{"duration":0.107466,"end_time":"2021-06-11T18:44:02.687027","exception":false,"start_time":"2021-06-11T18:44:02.579561","status":"completed"},"tags":[]} -# ## 1.8 awards(train.csvのcolumn8番目) -# 特定の日に配られたすべての賞または栄誉を含むネストされた JSON。 - -# %% [code] {"papermill":{"duration":0.134686,"end_time":"2021-06-11T18:44:02.93004","exception":false,"start_time":"2021-06-11T18:44:02.795354","status":"completed"},"tags":[],"execution":{"iopub.status.busy":"2021-06-14T09:16:11.266915Z","iopub.execute_input":"2021-06-14T09:16:11.267266Z","iopub.status.idle":"2021-06-14T09:16:11.294052Z","shell.execute_reply.started":"2021-06-14T09:16:11.267232Z","shell.execute_reply":"2021-06-14T09:16:11.29274Z"}} -training.head(3) - -# %% [code] {"papermill":{"duration":0.135331,"end_time":"2021-06-11T18:44:03.177168","exception":false,"start_time":"2021-06-11T18:44:03.041837","status":"completed"},"tags":[],"execution":{"iopub.status.busy":"2021-06-14T09:16:12.039564Z","iopub.execute_input":"2021-06-14T09:16:12.039924Z","iopub.status.idle":"2021-06-14T09:16:12.061189Z","shell.execute_reply.started":"2021-06-14T09:16:12.039892Z","shell.execute_reply":"2021-06-14T09:16:12.060285Z"}} -tmpdf = exshow("awards",0) -tmpdf - -# %% [markdown] {"papermill":{"duration":0.109992,"end_time":"2021-06-11T18:44:03.399011","exception":false,"start_time":"2021-06-11T18:44:03.289019","status":"completed"},"tags":[]} -# * awardId : -# * awardName : -# * awardDate : 日付賞が与えられました。 -# * awardSeason : シーズンアワードはからでした。 -# * playerId : プレーヤーの一意の識別子。 -# * playerName : -# * awardPlayerTeamId : - -# %% [markdown] {"papermill":{"duration":0.1095,"end_time":"2021-06-11T18:44:03.617125","exception":false,"start_time":"2021-06-11T18:44:03.507625","status":"completed"},"tags":[]} -# ## 1.9 events(train.csvのcolumn9番目) -# 特定の日のすべてのオンフィールド ゲーム イベントを含むネストされた JSON。レギュラーシーズンとポストシーズンの試合が含まれます。 - -# %% [code] {"papermill":{"duration":0.137444,"end_time":"2021-06-11T18:44:03.868309","exception":false,"start_time":"2021-06-11T18:44:03.730865","status":"completed"},"tags":[],"execution":{"iopub.status.busy":"2021-06-14T09:16:15.023498Z","iopub.execute_input":"2021-06-14T09:16:15.023833Z","iopub.status.idle":"2021-06-14T09:16:15.049979Z","shell.execute_reply.started":"2021-06-14T09:16:15.023804Z","shell.execute_reply":"2021-06-14T09:16:15.048934Z"}} -training.head(3) - -# %% [code] {"papermill":{"duration":0.464152,"end_time":"2021-06-11T18:44:04.44451","exception":false,"start_time":"2021-06-11T18:44:03.980358","status":"completed"},"tags":[],"execution":{"iopub.status.busy":"2021-06-14T09:16:15.920155Z","iopub.execute_input":"2021-06-14T09:16:15.920493Z","iopub.status.idle":"2021-06-14T09:16:16.20824Z","shell.execute_reply.started":"2021-06-14T09:16:15.920465Z","shell.execute_reply":"2021-06-14T09:16:16.206929Z"}} -tmpdf = exshow("events",0) -tmpdf.head(5) - -# %% [markdown] {"papermill":{"duration":0.110823,"end_time":"2021-06-11T18:44:04.666523","exception":false,"start_time":"2021-06-11T18:44:04.5557","status":"completed"},"tags":[]} -# カラムの意味の翻訳は↓を開いてください。(長いので、hideしています。) - -# %% [markdown] {"_kg_hide-input":true,"papermill":{"duration":0.110798,"end_time":"2021-06-11T18:44:04.89031","exception":false,"start_time":"2021-06-11T18:44:04.779512","status":"completed"},"tags":[]} -# * gamePk : ゲームの一意の識別子。 -# * gameDate : -# * gameTimeUTC : UTCでの始球式。 -# * season : -# * gameType : ゲームの種類、さまざまな種類がここにあります。 -# * playId : スタットキャストのプレイガイド。 -# * eventId : -# * inning : イニングABが発生しました。 -# * halfInning : 「上」または「下」のイニングインジケーター。 -# * homeScore : イベント開始時のホームスコア。 -# * awayScore : イベント開始時のアウェイスコア。 -# * menOnBase : 走者がベースにいる場合に使用されるスプリット–すなわち(RISP、空)。 -# * atBatIndex : で -# * atBatDesc : 演奏する -# * atBatEvent : atBatのイベントタイプの結果。さまざまなタイプがここにあります。 -# * hasOut : バイナリ、ランナーが場に出ている場合は1。 -# * pitcherTeamId : ピッチングチームの一意の識別子。 -# * isPitcherHome : バイナリ、投手がホームチームの場合は1。 -# * pitcherTeam : ピッチングチームのチーム名。 -# * hitterTeamId : 打撃チームの一意の識別子。 -# * hitterTeam : 打撃チームのチーム名。 -# * pitcherId : -# * pitcherName : -# * isStarter : バイナリ、プレーヤーがゲームの先発投手だった場合は1。 -# * pitcherHand : プレーヤーが手を投げる:「L」、「R」。 -# * hitterId : -# * hitterName : -# * batSide : プレーヤーのバット側:「L」、「R」。 -# * pitchNumber : ABのピッチシーケンス番号。 -# * balls : イベント後のボール数。 -# * strikes : イベント後のストライクカウント。 -# * isGB : バイナリ、打席がグラウンドボールの場合は1。 -# * isLD : バイナリ、打席がラインドライブの場合は1。 -# * isFB : バイナリ、打席が飛球の場合は1。 -# * isPU : バイナリ、打席がポップアップの場合は1。 -# * launchSpeed : 打球の測定速度。 -# * launchAngle : ヒットが開始された地平線に対する垂直角度。 -# * totalDistance : ボールが移動した合計距離。 -# * event : で発生する可能性のあるイベント -# * description : イベントのテキスト説明。 -# * rbi : AB中に打点を打った回数。 -# * pitchType : ピッチタイプ分類コード。さまざまなタイプがここにあります。 -# * call : 投球または投球の結果分類コード。さまざまなタイプがここにあります。 -# * outs : ABの現在/最終アウト。 -# * inPlay : ボールが場に出た場合は真/偽。 -# * isPaOver : バイナリ、このイベントがプレートの外観の終わりである場合は1。 -# * startSpeed : ホームプレートの前50フィートでのボールのMPHでの速度。 -# * endSpeed : ボールがホームプレートの前端(x軸で0,0)を横切るときのボールのMPHでの速度。 -# * nastyFactor : 各ピッチのいくつかのプロパティを評価し、ピッチの「不快感」を0からのスケールで評価します -# * breakAngle : ピッチの平面が垂直から外れる時計回り(打者の視点)の角度。 -# * breakLength : ピッチがピッチ開始とピッチ終了の間の直線から離れる最大距離。 -# * breakY : ブレークが最大のホームプレートからの距離。 -# * spinRate : ピッチャーによってRPMでリリースされた後のボールのスピン率。 -# * spinDirection : スピンがボールの弾道にどのように影響するかを反映する角度として与えられる、リリース時のボールの回転軸。ピュアバック -# * pX : ボールがホームプレートの前軸と交差するときのボールのフィート単位の水平位置。 -# * pZ : ボールがホームプレートの前軸と交差するときの、ボールのホームプレートからのフィート単位の垂直位置。 -# * aX : z軸のボール加速度。 -# * aY : y軸のボール加速度。 -# * aZ : z 軸上のボールの加速度。 -# * pfxX : インチ単位のボールの水平方向の動き。 -# * pfxZ : インチ単位のボールの垂直方向の動き。 -# * vX0 : x軸からのボールの速度。 -# * vY0 : y軸からのボールの速度。 0,0,0 はバッターの後ろにあり、ボールはピッチャー マウンドから 0,0,0 に向かって移動するため、これは負です。 -# * vZ0 : z軸からのボールの速度。 -# * x : ピッチがホームプレートの前を横切ったX座標。 -# * y : ピッチがホームプレートの前面と交差するY座標。 -# * x0 : ピッチャーの手を離したときのボールの x 軸上の座標位置 (時間 = 0)。 -# * y0 : y軸上でピッチャーの手からボールがリリースされたポイントでのボールの座標位置(時間= 0)。 -# * z0 : z軸上でピッチャーの手からボールがリリースされたポイントでのボールの座標位置(時間= 0)。 -# * type : 「ピッチ」または「アクション」のいずれかのイベントのタイプ -# * zone : ゾーンロケーション番号.下を参照 -# -# ![image.png](attachment:1ad951bc-0f08-4424-83c4-6ff88a557d7d.png) -# - -# %% [markdown] {"papermill":{"duration":0.114746,"end_time":"2021-06-11T18:44:05.117226","exception":false,"start_time":"2021-06-11T18:44:05.00248","status":"completed"},"tags":[]} -# ## 1.10 playerTwitterFollowers(train.csvのcolumn10番目) -# その日の一部のプレイヤーの Twitter フォロワー数を含むネストされた JSON。 - -# %% [code] {"papermill":{"duration":0.142856,"end_time":"2021-06-11T18:44:05.374626","exception":false,"start_time":"2021-06-11T18:44:05.23177","status":"completed"},"tags":[],"execution":{"iopub.status.busy":"2021-06-14T09:16:19.415876Z","iopub.execute_input":"2021-06-14T09:16:19.416249Z","iopub.status.idle":"2021-06-14T09:16:19.445478Z","shell.execute_reply.started":"2021-06-14T09:16:19.416219Z","shell.execute_reply":"2021-06-14T09:16:19.444498Z"}} -training.head(3) - -# %% [code] {"papermill":{"duration":0.148668,"end_time":"2021-06-11T18:44:05.64077","exception":false,"start_time":"2021-06-11T18:44:05.492102","status":"completed"},"tags":[],"execution":{"iopub.status.busy":"2021-06-14T09:16:20.69924Z","iopub.execute_input":"2021-06-14T09:16:20.699607Z","iopub.status.idle":"2021-06-14T09:16:20.734173Z","shell.execute_reply.started":"2021-06-14T09:16:20.699576Z","shell.execute_reply":"2021-06-14T09:16:20.732938Z"}} -tmpdf = exshow("playerTwitterFollowers",0) -tmpdf.head(3) - -# %% [markdown] {"papermill":{"duration":0.115258,"end_time":"2021-06-11T18:44:05.87703","exception":false,"start_time":"2021-06-11T18:44:05.761772","status":"completed"},"tags":[]} -# Twitterのフォローデータは、MLBによってメジャーリーグプレーヤーのTwitter APIから毎月1日に収集され、2018年1月1日までさかのぼります。 すべてのプレーヤーがTwitterアカウントを持っている/持っているわけではない、プレーヤーがランダムにアカウントを作成/削除/復元する、または特定の日にフォロワーデータを収集できないその他のシナリオがあるため、このデータセットはすべての月にわたってすべてのプレーヤーを網羅しているわけではありません。 - -# %% [markdown] {"papermill":{"duration":0.1152,"end_time":"2021-06-11T18:44:06.109077","exception":false,"start_time":"2021-06-11T18:44:05.993877","status":"completed"},"tags":[]} -# * date : フォロワー数の日付。 -# * playerId : プレーヤーの一意の識別子。 -# * playerName : -# * accountName : プレイヤーのTwitterアカウントの名前。 -# * twitterHandle : プレイヤーのツイッターハンドル。 -# * numberOfFollowers : フォロワー数 - -# %% [markdown] {"papermill":{"duration":0.117931,"end_time":"2021-06-11T18:44:06.340356","exception":false,"start_time":"2021-06-11T18:44:06.222425","status":"completed"},"tags":[]} -# ## 1.11 teamTwitterFollowers(train.csvのcolumn11番目) -# その日の各チームの Twitter フォロワー数を含むネストされた JSON。 - -# %% [code] {"papermill":{"duration":0.144909,"end_time":"2021-06-11T18:44:06.602116","exception":false,"start_time":"2021-06-11T18:44:06.457207","status":"completed"},"tags":[],"execution":{"iopub.status.busy":"2021-06-14T09:17:20.07115Z","iopub.execute_input":"2021-06-14T09:17:20.071555Z","iopub.status.idle":"2021-06-14T09:17:20.098844Z","shell.execute_reply.started":"2021-06-14T09:17:20.071521Z","shell.execute_reply":"2021-06-14T09:17:20.097876Z"}} -training.head(3) - -# %% [code] {"papermill":{"duration":0.140245,"end_time":"2021-06-11T18:44:06.858481","exception":false,"start_time":"2021-06-11T18:44:06.718236","status":"completed"},"tags":[],"execution":{"iopub.status.busy":"2021-06-14T09:17:21.031613Z","iopub.execute_input":"2021-06-14T09:17:21.03204Z","iopub.status.idle":"2021-06-14T09:17:21.057308Z","shell.execute_reply.started":"2021-06-14T09:17:21.032005Z","shell.execute_reply":"2021-06-14T09:17:21.056119Z"}} -tmpdf = exshow("teamTwitterFollowers",0) -tmpdf.head(3) - -# %% [markdown] {"papermill":{"duration":0.119312,"end_time":"2021-06-11T18:44:07.09097","exception":false,"start_time":"2021-06-11T18:44:06.971658","status":"completed"},"tags":[]} -# Twitterのフォローデータは、2018年1月1日までさかのぼって、毎月1日に、メジャーリーグの30チームすべてのTwitterAPIからMLBによって収集されました。 - -# %% [markdown] {"papermill":{"duration":0.11246,"end_time":"2021-06-11T18:44:07.317214","exception":false,"start_time":"2021-06-11T18:44:07.204754","status":"completed"},"tags":[]} -# * date : フォロワー数の日付。 -# * teamId : チームの一意の識別子。 -# * teamName : -# * accountName : チームのTwitterアカウントの名前。 -# * twitterHandle : チームのツイッターハンドル。 - -# %% [markdown] {"papermill":{"duration":0.119547,"end_time":"2021-06-11T18:44:07.551012","exception":false,"start_time":"2021-06-11T18:44:07.431465","status":"completed"},"tags":[]} -# やっとこ中身確認完了。おつかれさまでした。。。 - -# %% [code] {"papermill":{"duration":0.120035,"end_time":"2021-06-11T18:44:07.783273","exception":false,"start_time":"2021-06-11T18:44:07.663238","status":"completed"},"tags":[]} - - -# %% [markdown] {"papermill":{"duration":0.112525,"end_time":"2021-06-11T18:44:08.009126","exception":false,"start_time":"2021-06-11T18:44:07.896601","status":"completed"},"tags":[]} -# # 2. 他のadditional data ( awards.csv, players.csv, seasons.csv, teams.csv) - -# %% [markdown] {"papermill":{"duration":0.12237,"end_time":"2021-06-11T18:44:08.250799","exception":false,"start_time":"2021-06-11T18:44:08.128429","status":"completed"},"tags":[]} -# ## 2.1 starterにあったwidgetの練習(こんなことできるんだーと思いましたので・・・) - -# %% [code] {"papermill":{"duration":0.120605,"end_time":"2021-06-11T18:44:08.489884","exception":false,"start_time":"2021-06-11T18:44:08.369279","status":"completed"},"tags":[],"execution":{"iopub.status.busy":"2021-06-14T09:17:55.345428Z","iopub.execute_input":"2021-06-14T09:17:55.345843Z","iopub.status.idle":"2021-06-14T09:17:55.35036Z","shell.execute_reply.started":"2021-06-14T09:17:55.345812Z","shell.execute_reply":"2021-06-14T09:17:55.349032Z"}} -df_names = ['seasons', 'teams', 'players', 'awards'] - -path = "../input/mlb-player-digital-engagement-forecasting" - -# %% [code] {"papermill":{"duration":0.155209,"end_time":"2021-06-11T18:44:08.759151","exception":false,"start_time":"2021-06-11T18:44:08.603942","status":"completed"},"tags":[],"execution":{"iopub.status.busy":"2021-06-14T09:17:58.414493Z","iopub.execute_input":"2021-06-14T09:17:58.414844Z","iopub.status.idle":"2021-06-14T09:17:58.448828Z","shell.execute_reply.started":"2021-06-14T09:17:58.414816Z","shell.execute_reply":"2021-06-14T09:17:58.447643Z"}} -kaggle_data_tabs = widgets.Tab() -# widgetsにそれぞれのDataFrameをchildrenの中にタブで表示 -kaggle_data_tabs.children = list([widgets.Output() for df_name in df_names]) - -# %% [code] {"papermill":{"duration":0.295868,"end_time":"2021-06-11T18:44:09.169213","exception":false,"start_time":"2021-06-11T18:44:08.873345","status":"completed"},"tags":[],"execution":{"iopub.status.busy":"2021-06-14T09:17:59.564008Z","iopub.execute_input":"2021-06-14T09:17:59.564414Z","iopub.status.idle":"2021-06-14T09:17:59.734201Z","shell.execute_reply.started":"2021-06-14T09:17:59.564381Z","shell.execute_reply":"2021-06-14T09:17:59.733044Z"}} -for index in range(len(df_names)): - # タブのタイトルを設定 - kaggle_data_tabs.set_title(index, df_names[index]) - - df = pd.read_csv(os.path.join(path,df_names[index]) + ".csv") - - # それぞれのタブにDataFrameを埋め込む - with kaggle_data_tabs.children[index]: - display(df) - -# %% [code] {"papermill":{"duration":0.127693,"end_time":"2021-06-11T18:44:09.419366","exception":false,"start_time":"2021-06-11T18:44:09.291673","status":"completed"},"tags":[],"execution":{"iopub.status.busy":"2021-06-14T09:18:01.219466Z","iopub.execute_input":"2021-06-14T09:18:01.221633Z","iopub.status.idle":"2021-06-14T09:18:01.229718Z","shell.execute_reply.started":"2021-06-14T09:18:01.22159Z","shell.execute_reply":"2021-06-14T09:18:01.228737Z"}} -display(kaggle_data_tabs) - -# %% [markdown] {"papermill":{"duration":0.112987,"end_time":"2021-06-11T18:44:09.648047","exception":false,"start_time":"2021-06-11T18:44:09.53506","status":"completed"},"tags":[]} -# -----------細かく一つ一つ見ていきます----------- - -# %% [markdown] {"papermill":{"duration":0.128132,"end_time":"2021-06-11T18:44:09.894007","exception":false,"start_time":"2021-06-11T18:44:09.765875","status":"completed"},"tags":[]} -# ## 2.2 Seasons.csv - -# %% [code] {"papermill":{"duration":0.150781,"end_time":"2021-06-11T18:44:10.190127","exception":false,"start_time":"2021-06-11T18:44:10.039346","status":"completed"},"tags":[],"execution":{"iopub.status.busy":"2021-06-14T09:19:00.781853Z","iopub.execute_input":"2021-06-14T09:19:00.782327Z","iopub.status.idle":"2021-06-14T09:19:00.807238Z","shell.execute_reply.started":"2021-06-14T09:19:00.782296Z","shell.execute_reply":"2021-06-14T09:19:00.806039Z"}} -seasons = pd.read_csv("../input/mlb-player-digital-engagement-forecasting/seasons.csv") -seasons - -# %% [markdown] {"papermill":{"duration":0.116735,"end_time":"2021-06-11T18:44:10.442337","exception":false,"start_time":"2021-06-11T18:44:10.325602","status":"completed"},"tags":[]} -# * seasonId : シーズンID -# * seasonStartDate : シーズンスタート日 -# * seasonEndDate : シーズン終了日 -# * preSeasonStartDate : 1つ前のシーズンスタート日 -# * preSeasonEndDate : 1つ前のシーズンの終わりの日 -# * regularSeasonStartDate : レギュラーシーズンのスタートの日 -# * regularSeasonEndDate : レギュラーシーズンの終わりの日 -# * lastDate1stHalf : 1st halfの最終日 -# * allStarDate : オールスター戦の日付 -# * firstDate2ndHalf : 2nd halfの始まり日 -# * postSeasonStartDate : 次のシーズンのスタート日 -# * postSeasonEndDate : 次のシーズンの終わり日 - -# %% [code] {"papermill":{"duration":0.118911,"end_time":"2021-06-11T18:44:10.677851","exception":false,"start_time":"2021-06-11T18:44:10.55894","status":"completed"},"tags":[]} - - -# %% [markdown] {"papermill":{"duration":0.11553,"end_time":"2021-06-11T18:44:10.912934","exception":false,"start_time":"2021-06-11T18:44:10.797404","status":"completed"},"tags":[]} -# ## 2.3 teams.csv - -# %% [code] {"papermill":{"duration":0.138966,"end_time":"2021-06-11T18:44:11.167983","exception":false,"start_time":"2021-06-11T18:44:11.029017","status":"completed"},"tags":[],"execution":{"iopub.status.busy":"2021-06-14T09:19:09.180321Z","iopub.execute_input":"2021-06-14T09:19:09.180697Z","iopub.status.idle":"2021-06-14T09:19:09.204548Z","shell.execute_reply.started":"2021-06-14T09:19:09.180668Z","shell.execute_reply":"2021-06-14T09:19:09.203438Z"}} -teams = pd.read_csv("../input/mlb-player-digital-engagement-forecasting/teams.csv") -teams.head(3) - -# %% [markdown] {"papermill":{"duration":0.125126,"end_time":"2021-06-11T18:44:11.428349","exception":false,"start_time":"2021-06-11T18:44:11.303223","status":"completed"},"tags":[]} -# ## teams.csv -# * id - チームID -# * name : 名前 -# * teamName : チームの名前 -# * teamCode : チームのコード -# * shortName : 短い名前 -# * abbreviation : 略語 -# * locationName : 場所の名前 -# * leagueId : リーグのid -# * leagueName : リーグの名前 -# * divisionId : 部門id -# * divisionName : 部門名 -# * venueId : 会場id -# * venueName : 会場名 - -# %% [markdown] {"papermill":{"duration":0.115273,"end_time":"2021-06-11T18:44:11.660746","exception":false,"start_time":"2021-06-11T18:44:11.545473","status":"completed"},"tags":[]} -# ## 2.4 players.csv - -# %% [code] {"papermill":{"duration":0.146866,"end_time":"2021-06-11T18:44:11.923004","exception":false,"start_time":"2021-06-11T18:44:11.776138","status":"completed"},"tags":[],"execution":{"iopub.status.busy":"2021-06-14T09:19:17.018473Z","iopub.execute_input":"2021-06-14T09:19:17.019073Z","iopub.status.idle":"2021-06-14T09:19:17.050961Z","shell.execute_reply.started":"2021-06-14T09:19:17.019024Z","shell.execute_reply":"2021-06-14T09:19:17.049924Z"}} -players = pd.read_csv("../input/mlb-player-digital-engagement-forecasting/players.csv") -players.head(3) - -# %% [markdown] {"papermill":{"duration":0.116271,"end_time":"2021-06-11T18:44:12.158173","exception":false,"start_time":"2021-06-11T18:44:12.041902","status":"completed"},"tags":[]} -# * playerId - Unique identifier for a player. : プレーヤーID-プレーヤーの一意の識別子。 -# * playerName : プレーヤの名前 -# * DOB - Player’s date of birth. : DOB-プレーヤーの生年月日。 -# * mlbDebutDate : MLBデビュー日 -# * birthCity : 生まれた町 -# * birthStateProvince : 出生州 -# * birthCountry : 生まれた国 -# * heightInches : 身長(inch) -# * weight : 体重 -# * primaryPositionCode - Player’s primary position code : 主要ポジションコード -# * primaryPositionName - player’s primary position : 主要ポジション名 -# * playerForTestSetAndFuturePreds - Boolean, true if player is among those for whom predictions are to be made in test data -# -# : **ブール値、プレーヤーがテストデータで予測が行われる対象の1人である場合はtrue** - -# %% [code] {"papermill":{"duration":0.11651,"end_time":"2021-06-11T18:44:12.393967","exception":false,"start_time":"2021-06-11T18:44:12.277457","status":"completed"},"tags":[]} - - -# %% [markdown] {"papermill":{"duration":0.118357,"end_time":"2021-06-11T18:44:12.629827","exception":false,"start_time":"2021-06-11T18:44:12.51147","status":"completed"},"tags":[]} -# ## 2.5 awards.csv - -# %% [code] {"papermill":{"duration":0.148565,"end_time":"2021-06-11T18:44:12.897385","exception":false,"start_time":"2021-06-11T18:44:12.74882","status":"completed"},"tags":[],"execution":{"iopub.status.busy":"2021-06-14T09:19:57.113713Z","iopub.execute_input":"2021-06-14T09:19:57.114402Z","iopub.status.idle":"2021-06-14T09:19:57.152653Z","shell.execute_reply.started":"2021-06-14T09:19:57.114352Z","shell.execute_reply":"2021-06-14T09:19:57.151616Z"}} -awards = pd.read_csv("../input/mlb-player-digital-engagement-forecasting/awards.csv") -awards.head(3) - -# %% [markdown] {"papermill":{"duration":0.118007,"end_time":"2021-06-11T18:44:13.133412","exception":false,"start_time":"2021-06-11T18:44:13.015405","status":"completed"},"tags":[]} -# このファイルには、日次データの開始前(つまり、2018年以前)にトレーニングセットのプレーヤーが獲得した賞が含まれています。 -# -# * awardDate - Date award was given. : 授与日 - 授与された日付。 -# * awardSeason - Season award was from. : アワードシーズン-シーズンアワードはからでした。 -# * awardId : アワードid -# * awardName : アワード名 -# * playerId - Unique identifier for a player. : プレーヤーID-プレーヤーの一意の識別子。 -# * playerName : プレーヤーの名前 -# * awardPlayerTeamId : プレイヤーのチームID - -# %% [code] {"papermill":{"duration":0.116296,"end_time":"2021-06-11T18:44:13.36787","exception":false,"start_time":"2021-06-11T18:44:13.251574","status":"completed"},"tags":[]} - - -# %% [markdown] {"papermill":{"duration":0.115578,"end_time":"2021-06-11T18:44:13.601453","exception":false,"start_time":"2021-06-11T18:44:13.485875","status":"completed"},"tags":[]} -# # 3. Data Merge - -# %% [markdown] {"papermill":{"duration":0.120359,"end_time":"2021-06-11T18:44:13.841012","exception":false,"start_time":"2021-06-11T18:44:13.720653","status":"completed"},"tags":[]} -# とりあえず、スターターhttps://www.kaggle.com/ryanholbrook/getting-started-with-mlb-player-digital-engagement  -# -# のコピーです。けっこう時間かかります。 - -# %% [code] {"papermill":{"duration":0.164068,"end_time":"2021-06-11T18:44:14.121247","exception":false,"start_time":"2021-06-11T18:44:13.957179","status":"completed"},"tags":[],"execution":{"iopub.status.busy":"2021-06-14T09:20:09.176809Z","iopub.execute_input":"2021-06-14T09:20:09.177183Z","iopub.status.idle":"2021-06-14T09:20:09.220929Z","shell.execute_reply.started":"2021-06-14T09:20:09.177151Z","shell.execute_reply":"2021-06-14T09:20:09.2202Z"}} -for name in df_names: - globals()[name] = pd.read_csv(os.path.join(path,name)+ ".csv") - -# %% [code] {"papermill":{"duration":306.845173,"end_time":"2021-06-11T18:49:21.088167","exception":false,"start_time":"2021-06-11T18:44:14.242994","status":"completed"},"tags":[],"execution":{"iopub.status.busy":"2021-06-14T09:20:10.410562Z","iopub.execute_input":"2021-06-14T09:20:10.411092Z","iopub.status.idle":"2021-06-14T09:25:36.408897Z","shell.execute_reply.started":"2021-06-14T09:20:10.411043Z","shell.execute_reply":"2021-06-14T09:25:36.40774Z"}} -#### Unnest various nested data within training (daily) data #### -daily_data_unnested_dfs = pd.DataFrame(data = { - 'dfName': training.drop('date', axis = 1).columns.values.tolist() - }) - -# Slow from this point !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!1 - -daily_data_unnested_dfs['df'] = [pd.DataFrame() for row in - daily_data_unnested_dfs.iterrows()] - -for df_index, df_row in daily_data_unnested_dfs.iterrows(): - nestedTableName = str(df_row['dfName']) - - date_nested_table = training[['date', nestedTableName]] - - date_nested_table = (date_nested_table[ - ~pd.isna(date_nested_table[nestedTableName]) - ]. - reset_index(drop = True) - ) - - daily_dfs_collection = [] - - for date_index, date_row in date_nested_table.iterrows(): - daily_df = unpack_json(date_row[nestedTableName]) - - daily_df['dailyDataDate'] = date_row['date'] - - daily_dfs_collection = daily_dfs_collection + [daily_df] - - unnested_table = pd.concat(daily_dfs_collection, - ignore_index = True).set_index('dailyDataDate').reset_index() - - # Creates 1 pandas df per unnested df from daily data read in, with same name - globals()[df_row['dfName']] = unnested_table - - daily_data_unnested_dfs['df'][df_index] = unnested_table - -del training -gc.collect() - - - -#### Get some information on each date in daily data (using season dates of interest) #### -dates = pd.DataFrame(data = - {'dailyDataDate': nextDayPlayerEngagement['dailyDataDate'].unique()}) - -dates['date'] = pd.to_datetime(dates['dailyDataDate'].astype(str)) - -dates['year'] = dates['date'].dt.year -dates['month'] = dates['date'].dt.month - -dates_with_info = pd.merge( - dates, - seasons, - left_on = 'year', - right_on = 'seasonId' - ) - -dates_with_info['inSeason'] = ( - dates_with_info['date'].between( - dates_with_info['regularSeasonStartDate'], - dates_with_info['postSeasonEndDate'], - inclusive = True - ) - ) - -dates_with_info['seasonPart'] = np.select( - [ - dates_with_info['date'] < dates_with_info['preSeasonStartDate'], - dates_with_info['date'] < dates_with_info['regularSeasonStartDate'], - dates_with_info['date'] <= dates_with_info['lastDate1stHalf'], - dates_with_info['date'] < dates_with_info['firstDate2ndHalf'], - dates_with_info['date'] <= dates_with_info['regularSeasonEndDate'], - dates_with_info['date'] < dates_with_info['postSeasonStartDate'], - dates_with_info['date'] <= dates_with_info['postSeasonEndDate'], - dates_with_info['date'] > dates_with_info['postSeasonEndDate'] - ], - [ - 'Offseason', - 'Preseason', - 'Reg Season 1st Half', - 'All-Star Break', - 'Reg Season 2nd Half', - 'Between Reg and Postseason', - 'Postseason', - 'Offseason' - ], - default = np.nan - ) - -#### Add some pitching stats/pieces of info to player game level stats #### - -player_game_stats = (playerBoxScores.copy(). - # Change team Id/name to reflect these come from player game, not roster - rename(columns = {'teamId': 'gameTeamId', 'teamName': 'gameTeamName'}) - ) - -# Adds in field for innings pitched as fraction (better for aggregation) -player_game_stats['inningsPitchedAsFrac'] = np.where( - pd.isna(player_game_stats['inningsPitched']), - np.nan, - np.floor(player_game_stats['inningsPitched']) + - (player_game_stats['inningsPitched'] - - np.floor(player_game_stats['inningsPitched'])) * 10/3 - ) - -# Add in Tom Tango pitching game score (https://www.mlb.com/glossary/advanced-stats/game-score) -player_game_stats['pitchingGameScore'] = (40 -# + 2 * player_game_stats['outs'] - + 1 * player_game_stats['strikeOutsPitching'] - - 2 * player_game_stats['baseOnBallsPitching'] - - 2 * player_game_stats['hitsPitching'] - - 3 * player_game_stats['runsPitching'] - - 6 * player_game_stats['homeRunsPitching'] - ) - -# Add in criteria for no-hitter by pitcher (individual, not multiple pitchers) -player_game_stats['noHitter'] = np.where( - (player_game_stats['gamesStartedPitching'] == 1) & - (player_game_stats['inningsPitched'] >= 9) & - (player_game_stats['hitsPitching'] == 0), - 1, 0 - ) - -player_date_stats_agg = pd.merge( - (player_game_stats. - groupby(['dailyDataDate', 'playerId'], as_index = False). - # Some aggregations that are not simple sums - agg( - numGames = ('gamePk', 'nunique'), - # Should be 1 team per player per day, but adding here for 1 exception: - # playerId 518617 (Jake Diekman) had 2 games for different teams marked - # as played on 5/19/19, due to resumption of game after he was traded - numTeams = ('gameTeamId', 'nunique'), - # Should be only 1 team for almost all player-dates, taking min to simplify - gameTeamId = ('gameTeamId', 'min') - ) - ), - # Merge with a bunch of player stats that can be summed at date/player level - (player_game_stats. - groupby(['dailyDataDate', 'playerId'], as_index = False) - [['runsScored', 'homeRuns', 'strikeOuts', 'baseOnBalls', 'hits', - 'hitByPitch', 'atBats', 'caughtStealing', 'stolenBases', - 'groundIntoDoublePlay', 'groundIntoTriplePlay', 'plateAppearances', - 'totalBases', 'rbi', 'leftOnBase', 'sacBunts', 'sacFlies', - 'gamesStartedPitching', 'runsPitching', 'homeRunsPitching', - 'strikeOutsPitching', 'baseOnBallsPitching', 'hitsPitching', - 'inningsPitchedAsFrac', 'earnedRuns', - 'battersFaced','saves', 'blownSaves', 'pitchingGameScore', - 'noHitter' - ]]. - sum() - ), - on = ['dailyDataDate', 'playerId'], - how = 'inner' - ) - -#### Turn games table into 1 row per team-game, then merge with team box scores #### -# Filter to regular or Postseason games w/ valid scores for this part -games_for_stats = games[ - np.isin(games['gameType'], ['R', 'F', 'D', 'L', 'W', 'C', 'P']) & - ~pd.isna(games['homeScore']) & - ~pd.isna(games['awayScore']) - ] - -# Get games table from home team perspective -games_home_perspective = games_for_stats.copy() - -# Change column names so that "team" is "home", "opp" is "away" -games_home_perspective.columns = [ - col_value.replace('home', 'team').replace('away', 'opp') for - col_value in games_home_perspective.columns.values] - -games_home_perspective['isHomeTeam'] = 1 - -# Get games table from away team perspective -games_away_perspective = games_for_stats.copy() - -# Change column names so that "opp" is "home", "team" is "away" -games_away_perspective.columns = [ - col_value.replace('home', 'opp').replace('away', 'team') for - col_value in games_away_perspective.columns.values] - -games_away_perspective['isHomeTeam'] = 0 - -# Put together games from home/away perspective to get df w/ 1 row per team game -team_games = (pd.concat([ - games_home_perspective, - games_away_perspective - ], - ignore_index = True) - ) - -# Copy over team box scores data to modify -team_game_stats = teamBoxScores.copy() - -# Change column names to reflect these are all "team" stats - helps -# to differentiate from individual player stats if/when joining later -team_game_stats.columns = [ - (col_value + 'Team') - if (col_value not in ['dailyDataDate', 'home', 'teamId', 'gamePk', - 'gameDate', 'gameTimeUTC']) - else col_value - for col_value in team_game_stats.columns.values - ] - -# Merge games table with team game stats -team_games_with_stats = pd.merge( - team_games, - team_game_stats. - # Drop some fields that are already present in team_games table - drop(['home', 'gameDate', 'gameTimeUTC'], axis = 1), - on = ['dailyDataDate', 'gamePk', 'teamId'], - # Doing this as 'inner' join excludes spring training games, postponed games, - # etc. from original games table, but this may be fine for purposes here - how = 'inner' - ) - -team_date_stats_agg = (team_games_with_stats. - groupby(['dailyDataDate', 'teamId', 'gameType', 'oppId', 'oppName'], - as_index = False). - agg( - numGamesTeam = ('gamePk', 'nunique'), - winsTeam = ('teamWinner', 'sum'), - lossesTeam = ('oppWinner', 'sum'), - runsScoredTeam = ('teamScore', 'sum'), - runsAllowedTeam = ('oppScore', 'sum') - ) - ) - -# Prepare standings table for merge w/ player digital engagement data -# Pick only certain fields of interest from standings for merge -standings_selected_fields = (standings[['dailyDataDate', 'teamId', - 'streakCode', 'divisionRank', 'leagueRank', 'wildCardRank', 'pct' - ]]. - rename(columns = {'pct': 'winPct'}) - ) - -# Change column names to reflect these are all "team" standings - helps -# to differentiate from player-related fields if/when joining later -standings_selected_fields.columns = [ - (col_value + 'Team') - if (col_value not in ['dailyDataDate', 'teamId']) - else col_value - for col_value in standings_selected_fields.columns.values - ] - -standings_selected_fields['streakLengthTeam'] = ( - standings_selected_fields['streakCodeTeam']. - str.replace('W', ''). - str.replace('L', ''). - astype(float) - ) - -# Add fields to separate winning and losing streak from streak code -standings_selected_fields['winStreakTeam'] = np.where( - standings_selected_fields['streakCodeTeam'].str[0] == 'W', - standings_selected_fields['streakLengthTeam'], - np.nan - ) - -standings_selected_fields['lossStreakTeam'] = np.where( - standings_selected_fields['streakCodeTeam'].str[0] == 'L', - standings_selected_fields['streakLengthTeam'], - np.nan - ) - -standings_for_digital_engagement_merge = (pd.merge( - standings_selected_fields, - dates_with_info[['dailyDataDate', 'inSeason']], - on = ['dailyDataDate'], - how = 'left' - ). - # Limit down standings to only in season version - query("inSeason"). - # Drop fields no longer necessary (in derived values, etc.) - drop(['streakCodeTeam', 'streakLengthTeam', 'inSeason'], axis = 1). - reset_index(drop = True) - ) - -#### Merge together various data frames to add date, player, roster, and team info #### -# Copy over player engagement df to add various pieces to it -player_engagement_with_info = nextDayPlayerEngagement.copy() - -# Take "row mean" across targets to add (helps with studying all 4 targets at once) -player_engagement_with_info['targetAvg'] = np.mean( - player_engagement_with_info[['target1', 'target2', 'target3', 'target4']], - axis = 1) - -# Merge in date information -player_engagement_with_info = pd.merge( - player_engagement_with_info, - dates_with_info[['dailyDataDate', 'date', 'year', 'month', 'inSeason', - 'seasonPart']], - on = ['dailyDataDate'], - how = 'left' - ) - -# Merge in some player information -player_engagement_with_info = pd.merge( - player_engagement_with_info, - players[['playerId', 'playerName', 'DOB', 'mlbDebutDate', 'birthCity', - 'birthStateProvince', 'birthCountry', 'primaryPositionName']], - on = ['playerId'], - how = 'left' - ) - -# Merge in some player roster information by date -player_engagement_with_info = pd.merge( - player_engagement_with_info, - (rosters[['dailyDataDate', 'playerId', 'statusCode', 'status', 'teamId']]. - rename(columns = { - 'statusCode': 'rosterStatusCode', - 'status': 'rosterStatus', - 'teamId': 'rosterTeamId' - }) - ), - on = ['dailyDataDate', 'playerId'], - how = 'left' - ) - -# Merge in team name from player's roster team -player_engagement_with_info = pd.merge( - player_engagement_with_info, - (teams[['id', 'teamName']]. - rename(columns = { - 'id': 'rosterTeamId', - 'teamName': 'rosterTeamName' - }) - ), - on = ['rosterTeamId'], - how = 'left' - ) - -# Merge in some player game stats (previously aggregated) from that date -player_engagement_with_info = pd.merge( - player_engagement_with_info, - player_date_stats_agg, - on = ['dailyDataDate', 'playerId'], - how = 'left' - ) - -# Merge in team name from player's game team -player_engagement_with_info = pd.merge( - player_engagement_with_info, - (teams[['id', 'teamName']]. - rename(columns = { - 'id': 'gameTeamId', - 'teamName': 'gameTeamName' - }) - ), - on = ['gameTeamId'], - how = 'left' - ) - -# Merge in some team game stats/results (previously aggregated) from that date -player_engagement_with_info = pd.merge( - player_engagement_with_info, - team_date_stats_agg.rename(columns = {'teamId': 'gameTeamId'}), - on = ['dailyDataDate', 'gameTeamId'], - how = 'left' - ) - -# Merge in player transactions of note on that date - -# Merge in some pieces of team standings (previously filter/processed) from that date -player_engagement_with_info = pd.merge( - player_engagement_with_info, - standings_for_digital_engagement_merge. - rename(columns = {'teamId': 'gameTeamId'}), - on = ['dailyDataDate', 'gameTeamId'], - how = 'left' - ) - -display(player_engagement_with_info) - -# %% [code] {"papermill":{"duration":0.143732,"end_time":"2021-06-11T18:49:21.348227","exception":false,"start_time":"2021-06-11T18:49:21.204495","status":"completed"},"tags":[],"execution":{"iopub.status.busy":"2021-06-14T09:26:55.997662Z","iopub.execute_input":"2021-06-14T09:26:55.998073Z","iopub.status.idle":"2021-06-14T09:26:56.018325Z","shell.execute_reply.started":"2021-06-14T09:26:55.998038Z","shell.execute_reply":"2021-06-14T09:26:56.017469Z"}} -player_engagement_with_info.info() - -# %% [markdown] {"papermill":{"duration":0.116904,"end_time":"2021-06-11T18:49:21.581773","exception":false,"start_time":"2021-06-11T18:49:21.464869","status":"completed"},"tags":[]} -# output結果をreferenceできるように、一応pickleで保存しておきます。 - -# %% [code] {"execution":{"iopub.execute_input":"2021-06-11T18:49:21.819366Z","iopub.status.busy":"2021-06-11T18:49:21.81832Z","iopub.status.idle":"2021-06-11T18:49:29.543521Z","shell.execute_reply":"2021-06-11T18:49:29.542493Z","shell.execute_reply.started":"2021-06-11T18:29:24.484735Z"},"papermill":{"duration":7.84534,"end_time":"2021-06-11T18:49:29.543677","exception":false,"start_time":"2021-06-11T18:49:21.698337","status":"completed"},"tags":[]} -player_engagement_with_info.to_pickle("player_engagement_with_info.pkl") - -# %% [code] {"papermill":{"duration":0.134207,"end_time":"2021-06-11T18:49:29.803106","exception":false,"start_time":"2021-06-11T18:49:29.668899","status":"completed"},"tags":[]} - - -# %% [markdown] {"papermill":{"duration":0.136875,"end_time":"2021-06-11T18:49:30.071632","exception":false,"start_time":"2021-06-11T18:49:29.934757","status":"completed"},"tags":[]} -# #### スターターではここからkerasで簡単なモデル作成をしていますので、興味ある方はそちらをご覧ください。 - -# %% [code] {"papermill":{"duration":0.153007,"end_time":"2021-06-11T18:49:30.347614","exception":false,"start_time":"2021-06-11T18:49:30.194607","status":"completed"},"tags":[]} - - -# %% [markdown] {"papermill":{"duration":0.12281,"end_time":"2021-06-11T18:49:30.601655","exception":false,"start_time":"2021-06-11T18:49:30.478845","status":"completed"},"tags":[]} -# ### 以下、検証用として、target1~4をすべて中間値(スコア上がるため、v8でmeanからmedianに変更しました)でsubmitします。 - -# %% [code] {"papermill":{"duration":0.163708,"end_time":"2021-06-11T18:49:30.885597","exception":false,"start_time":"2021-06-11T18:49:30.721889","status":"completed"},"tags":[],"execution":{"iopub.status.busy":"2021-06-14T09:27:36.099392Z","iopub.execute_input":"2021-06-14T09:27:36.099805Z","iopub.status.idle":"2021-06-14T09:27:36.314269Z","shell.execute_reply.started":"2021-06-14T09:27:36.09976Z","shell.execute_reply":"2021-06-14T09:27:36.313164Z"}} -t1_median = player_engagement_with_info["target1"].median() -t2_median = player_engagement_with_info["target2"].median() -t3_median = player_engagement_with_info["target3"].median() -t4_median = player_engagement_with_info["target4"].median() - -# %% [code] {"papermill":{"duration":0.131632,"end_time":"2021-06-11T18:49:31.142954","exception":false,"start_time":"2021-06-11T18:49:31.011322","status":"completed"},"tags":[],"execution":{"iopub.status.busy":"2021-06-14T09:27:37.123159Z","iopub.execute_input":"2021-06-14T09:27:37.12362Z","iopub.status.idle":"2021-06-14T09:27:37.130077Z","shell.execute_reply.started":"2021-06-14T09:27:37.123585Z","shell.execute_reply":"2021-06-14T09:27:37.128856Z"}} -print(t1_median,t2_median,t3_median,t4_median) - -# %% [markdown] {"papermill":{"duration":0.11749,"end_time":"2021-06-11T18:49:33.163029","exception":false,"start_time":"2021-06-11T18:49:33.045539","status":"completed"},"tags":[]} -# # 4. submitの形式 -# riiidの https://www.kaggle.com/chumajin/eda-for-biginner で解説したのと同じく、1部のtest dataをget → 1部を予測 → 1部を提出 をどんどん繰り返していく方式です。 -# 今回は1日分のtest data→次の日を予測提出、んで、次の日のtest data→その次の日を予測、提出 の流れです。 -# -# -# ## **↓のmake_envは1回しか実行できません。** -# ## **失敗したら、データをrestart(上の方のFactory resetボタンを押す)して、再度やることになりますので、注意が必要です!** - -# %% [markdown] {"papermill":{"duration":0.118225,"end_time":"2021-06-11T18:49:33.398072","exception":false,"start_time":"2021-06-11T18:49:33.279847","status":"completed"},"tags":[]} -# #### 最終形はこんな感じです(スターターから抜粋。解説用に少し細かくやっていきます - -# %% [code] {"execution":{"iopub.execute_input":"2021-06-11T18:49:33.63826Z","iopub.status.busy":"2021-06-11T18:49:33.637334Z","iopub.status.idle":"2021-06-11T18:49:33.641655Z","shell.execute_reply":"2021-06-11T18:49:33.642107Z","shell.execute_reply.started":"2021-06-11T18:29:31.450494Z"},"papermill":{"duration":0.127157,"end_time":"2021-06-11T18:49:33.642278","exception":false,"start_time":"2021-06-11T18:49:33.515121","status":"completed"},"tags":[]} -""" -if 'kaggle_secrets' in sys.modules: # only run while on Kaggle - import mlb - - env = mlb.make_env() - iter_test = env.iter_test() - - for (test_df, sample_prediction_df) in iter_test: - - # Example: unpack a dataframe from a json column - today_games = unpack_json(test_df['games'].iloc[0]) - - # Make your predictions for the next day's engagement - sample_prediction_df['target1'] = 100.00 - - # Submit your predictions - env.predict(sample_prediction_df) - - -""" - -# %% [markdown] {"papermill":{"duration":0.118402,"end_time":"2021-06-11T18:49:33.878823","exception":false,"start_time":"2021-06-11T18:49:33.760421","status":"completed"},"tags":[]} -# #### ここから↑のサンプルコードを少し解説 - -# %% [markdown] {"papermill":{"duration":0.117862,"end_time":"2021-06-11T18:49:34.114753","exception":false,"start_time":"2021-06-11T18:49:33.996891","status":"completed"},"tags":[]} -# mlbのダウンロード - -# %% [code] {"papermill":{"duration":0.157437,"end_time":"2021-06-11T18:49:34.38967","exception":false,"start_time":"2021-06-11T18:49:34.232233","status":"completed"},"tags":[],"execution":{"iopub.status.busy":"2021-06-14T09:28:19.253637Z","iopub.execute_input":"2021-06-14T09:28:19.254017Z","iopub.status.idle":"2021-06-14T09:28:19.298491Z","shell.execute_reply.started":"2021-06-14T09:28:19.253987Z","shell.execute_reply":"2021-06-14T09:28:19.297258Z"}} -if 'kaggle_secrets' in sys.modules: # only run while on Kaggle - import mlb - -# %% [markdown] {"papermill":{"duration":0.124681,"end_time":"2021-06-11T18:49:34.711578","exception":false,"start_time":"2021-06-11T18:49:34.586897","status":"completed"},"tags":[]} -# envとiter_testの定義 (お決まりの作業と思ってもらえれば) - -# %% [code] {"papermill":{"duration":0.125932,"end_time":"2021-06-11T18:49:34.955809","exception":false,"start_time":"2021-06-11T18:49:34.829877","status":"completed"},"tags":[],"execution":{"iopub.status.busy":"2021-06-14T09:28:24.550083Z","iopub.execute_input":"2021-06-14T09:28:24.550516Z","iopub.status.idle":"2021-06-14T09:28:24.55605Z","shell.execute_reply.started":"2021-06-14T09:28:24.550468Z","shell.execute_reply":"2021-06-14T09:28:24.554671Z"}} -env = mlb.make_env() -iter_test = env.iter_test() - -# %% [markdown] {"papermill":{"duration":0.118437,"end_time":"2021-06-11T18:49:35.193711","exception":false,"start_time":"2021-06-11T18:49:35.075274","status":"completed"},"tags":[]} -# iter_testの中身を見てみる (とりあえずbreakで1個だけ見る。break外すとエラーでます。理由はそのあと解説しています) - -# %% [code] {"papermill":{"duration":0.991035,"end_time":"2021-06-11T18:49:36.301953","exception":false,"start_time":"2021-06-11T18:49:35.310918","status":"completed"},"tags":[],"execution":{"iopub.status.busy":"2021-06-14T09:28:40.080548Z","iopub.execute_input":"2021-06-14T09:28:40.081101Z","iopub.status.idle":"2021-06-14T09:28:40.942286Z","shell.execute_reply.started":"2021-06-14T09:28:40.081052Z","shell.execute_reply":"2021-06-14T09:28:40.941273Z"}} -for (test_df, sample_prediction_df) in iter_test: - display(test_df) - display(sample_prediction_df) - break - -# %% [markdown] {"papermill":{"duration":0.121287,"end_time":"2021-06-11T18:49:36.544242","exception":false,"start_time":"2021-06-11T18:49:36.422955","status":"completed"},"tags":[]} -# 1日分のtest dataと、submissionファイルが出てくるのがわかる -# -# -# ここで、submissionファイルに予測値を記入して、提出しないと、次の日のtest dataを受け取ることができないというエラーが出る(以下のように、もう一度走らせると怒られる) - -# %% [code] {"papermill":{"duration":0.226649,"end_time":"2021-06-11T18:49:36.890061","exception":false,"start_time":"2021-06-11T18:49:36.663412","status":"completed"},"tags":[],"execution":{"iopub.status.busy":"2021-06-14T09:29:31.022394Z","iopub.execute_input":"2021-06-14T09:29:31.022795Z","iopub.status.idle":"2021-06-14T09:29:31.080859Z","shell.execute_reply.started":"2021-06-14T09:29:31.022764Z","shell.execute_reply":"2021-06-14T09:29:31.079283Z"}} -for (test_df, sample_prediction_df) in iter_test: - display(test_df) - display(sample_prediction_df) - break - -# %% [markdown] {"papermill":{"duration":0.121178,"end_time":"2021-06-11T18:49:37.134545","exception":false,"start_time":"2021-06-11T18:49:37.013367","status":"completed"},"tags":[]} -# なので、一度submissionファイルに記入して提出してあげないといけません。ここではお試しに、先ほどのtargetのそれぞれの中間値を全部入れてみます。 - -# %% [code] {"papermill":{"duration":0.145229,"end_time":"2021-06-11T18:49:37.401468","exception":false,"start_time":"2021-06-11T18:49:37.256239","status":"completed"},"tags":[],"execution":{"iopub.status.busy":"2021-06-14T09:29:51.650122Z","iopub.execute_input":"2021-06-14T09:29:51.650494Z","iopub.status.idle":"2021-06-14T09:29:51.671854Z","shell.execute_reply.started":"2021-06-14T09:29:51.650465Z","shell.execute_reply":"2021-06-14T09:29:51.670792Z"}} -sample_prediction_df["target1"] = t1_median -sample_prediction_df["target2"] = t2_median -sample_prediction_df["target3"] = t3_median -sample_prediction_df["target4"] = t4_median - - -sample_prediction_df - -# %% [markdown] {"papermill":{"duration":0.121948,"end_time":"2021-06-11T18:49:37.64384","exception":false,"start_time":"2021-06-11T18:49:37.521892","status":"completed"},"tags":[]} -# 予測値を入れたらこの時点で一度下記のコードでsubmitします - -# %% [code] {"papermill":{"duration":0.129576,"end_time":"2021-06-11T18:49:37.895614","exception":false,"start_time":"2021-06-11T18:49:37.766038","status":"completed"},"tags":[],"execution":{"iopub.status.busy":"2021-06-14T09:29:59.007068Z","iopub.execute_input":"2021-06-14T09:29:59.007417Z","iopub.status.idle":"2021-06-14T09:29:59.012104Z","shell.execute_reply.started":"2021-06-14T09:29:59.00739Z","shell.execute_reply":"2021-06-14T09:29:59.010732Z"}} -env.predict(sample_prediction_df) - -# %% [markdown] {"papermill":{"duration":0.123149,"end_time":"2021-06-11T18:49:38.142959","exception":false,"start_time":"2021-06-11T18:49:38.01981","status":"completed"},"tags":[]} -# そうすると、次の日のデータが受け取れるようになります。(以下のように先ほどと同じコードを流してもエラーで怒られません) - -# %% [code] {"papermill":{"duration":0.383184,"end_time":"2021-06-11T18:49:38.648033","exception":false,"start_time":"2021-06-11T18:49:38.264849","status":"completed"},"tags":[],"execution":{"iopub.status.busy":"2021-06-14T09:30:09.450715Z","iopub.execute_input":"2021-06-14T09:30:09.45107Z","iopub.status.idle":"2021-06-14T09:30:09.707916Z","shell.execute_reply.started":"2021-06-14T09:30:09.451042Z","shell.execute_reply":"2021-06-14T09:30:09.707035Z"}} -for (test_df, sample_prediction_df) in iter_test: - display(test_df) - display(sample_prediction_df) - break - -# %% [markdown] {"papermill":{"duration":0.121984,"end_time":"2021-06-11T18:49:38.89173","exception":false,"start_time":"2021-06-11T18:49:38.769746","status":"completed"},"tags":[]} -# 以下のスターターのコードをもう一度みて見ると、for文の中でこれを繰り返してsubmitしていることがわかります。 -# -# 基本的には以下のfor文の中身を、test dfの前処理と、predictionして、sample_prediction_dfの書き換え、env.predictで提出していく流れですね。 -# -# riiidコンペの場合は、一つ前の情報の正解が流れてきてましたので、それを使って次の予測のためのデータとして使用していました(今回もおそらくそうなのかな??)。 - -# %% [code] {"papermill":{"duration":0.121709,"end_time":"2021-06-11T18:49:39.135494","exception":false,"start_time":"2021-06-11T18:49:39.013785","status":"completed"},"tags":[]} - - -# %% [code] {"execution":{"iopub.execute_input":"2021-06-11T18:49:39.386397Z","iopub.status.busy":"2021-06-11T18:49:39.385345Z","iopub.status.idle":"2021-06-11T18:49:39.390058Z","shell.execute_reply":"2021-06-11T18:49:39.389555Z"},"papermill":{"duration":0.133027,"end_time":"2021-06-11T18:49:39.390209","exception":false,"start_time":"2021-06-11T18:49:39.257182","status":"completed"},"tags":[]} -""" -if 'kaggle_secrets' in sys.modules: # only run while on Kaggle - import mlb - - env = mlb.make_env() - iter_test = env.iter_test() - - for (test_df, sample_prediction_df) in iter_test: - - # Example: unpack a dataframe from a json column - today_games = unpack_json(test_df['games'].iloc[0]) - - # Make your predictions for the next day's engagement - sample_prediction_df['target1'] = 100.00 - - # Submit your predictions - env.predict(sample_prediction_df) - - -""" - -# %% [markdown] {"papermill":{"duration":0.123008,"end_time":"2021-06-11T18:49:39.63599","exception":false,"start_time":"2021-06-11T18:49:39.512982","status":"completed"},"tags":[]} -# ## 今回はこのままsubmitしたいので、2回目の提出後にfor文で最後まで回します。 - -# %% [code] {"papermill":{"duration":0.132458,"end_time":"2021-06-11T18:49:39.894569","exception":false,"start_time":"2021-06-11T18:49:39.762111","status":"completed"},"tags":[],"execution":{"iopub.status.busy":"2021-06-14T09:30:26.736705Z","iopub.execute_input":"2021-06-14T09:30:26.737092Z","iopub.status.idle":"2021-06-14T09:30:26.745284Z","shell.execute_reply.started":"2021-06-14T09:30:26.737059Z","shell.execute_reply":"2021-06-14T09:30:26.743878Z"}} -# 2回目の提出 - -sample_prediction_df["target1"] = t1_median -sample_prediction_df["target2"] = t2_median -sample_prediction_df["target3"] = t3_median -sample_prediction_df["target4"] = t4_median -env.predict(sample_prediction_df) - - -# %% [code] {"papermill":{"duration":0.760212,"end_time":"2021-06-11T18:49:40.779803","exception":false,"start_time":"2021-06-11T18:49:40.019591","status":"completed"},"tags":[],"execution":{"iopub.status.busy":"2021-06-14T09:31:21.734487Z","iopub.execute_input":"2021-06-14T09:31:21.734853Z","iopub.status.idle":"2021-06-14T09:31:22.380877Z","shell.execute_reply.started":"2021-06-14T09:31:21.734822Z","shell.execute_reply":"2021-06-14T09:31:22.379995Z"}} -# 残り最後まで - -for (test_df, sample_prediction_df) in iter_test: - - # Example: unpack a dataframe from a json column - #today_games = unpack_json(test_df['games'].iloc[0]) - - # Make your predictions for the next day's engagement - sample_prediction_df["target1"] = t1_median - sample_prediction_df["target2"] = t2_median - sample_prediction_df["target3"] = t3_median - sample_prediction_df["target4"] = t4_median - - # Submit your predictions - env.predict(sample_prediction_df) - -# %% [markdown] {"papermill":{"duration":0.121297,"end_time":"2021-06-11T18:49:41.02355","exception":false,"start_time":"2021-06-11T18:49:40.902253","status":"completed"},"tags":[]} -# ご参考> コード要件 -# -# * これはコードコンペティションです -# * このコンテストへの提出は、ノートブックを通じて行う必要があります。コミット後に[送信]ボタンをアクティブにするには、次の条件が満たされている必要があります。 -# * -# * CPUノートブック<= 6時間の実行時間 -# * GPUノートブック<= 6時間の実行時間 -# * **インターネットアクセスが無効** -# * 事前にトレーニングされたモデルを含む、無料で公開されている外部データが許可されます -# * 提出は、mlbPythonモジュールを使用して行う必要があります - -# %% [code] {"papermill":{"duration":0.135355,"end_time":"2021-06-11T18:49:41.280874","exception":false,"start_time":"2021-06-11T18:49:41.145519","status":"completed"},"tags":[]} - - -# %% [code] {"papermill":{"duration":0.122852,"end_time":"2021-06-11T18:49:41.541567","exception":false,"start_time":"2021-06-11T18:49:41.418715","status":"completed"},"tags":[]} - - -# %% [markdown] {"papermill":{"duration":0.122603,"end_time":"2021-06-11T18:49:41.786428","exception":false,"start_time":"2021-06-11T18:49:41.663825","status":"completed"},"tags":[]} -# # ここまで読んでいただいてありがとうございます! -# # お役にたてば、upvote/followいただけたら嬉しいです! -# # よろしくお願いいたします !! - -# %% [code] {"papermill":{"duration":0.123659,"end_time":"2021-06-11T18:49:42.033425","exception":false,"start_time":"2021-06-11T18:49:41.909766","status":"completed"},"tags":[]} diff --git a/d1/mlb_player_v3.py b/d1/mlb_player_v3.py deleted file mode 100644 index 8372f21..0000000 --- a/d1/mlb_player_v3.py +++ /dev/null @@ -1,168 +0,0 @@ -#!/usr/bin/env python -# coding: utf-8 - -# # Overview -# The kernel shows how to use the [tf_pose_estimation](https://github.com/ildoonet/tf-pose-estimation) package in Python on a series of running videos. - -# ## Libraries we need -# Install tf_pose and pycocotools - -# In[1]: - - -import os -def get_ipython(): - return os - -get_ipython().system('pip install -qq https://www.github.com/ildoonet/tf-pose-estimation') - - -# In[2]: - - -get_ipython().system('pip install -qq pycocotools') - - -# In[3]: - - -get_ipython().run_line_magic('load_ext', 'autoreload') -get_ipython().run_line_magic('autoreload', '2') -import seaborn as sns -import matplotlib.pyplot as plt -plt.rcParams["figure.figsize"] = (8, 8) -plt.rcParams["figure.dpi"] = 125 -plt.rcParams["font.size"] = 14 -plt.rcParams['font.family'] = ['sans-serif'] -plt.rcParams['font.sans-serif'] = ['DejaVu Sans'] -plt.style.use('ggplot') -sns.set_style("whitegrid", {'axes.grid': False}) - - -# In[4]: - - -get_ipython().run_line_magic('matplotlib', 'inline') -import tf_pose -import cv2 -from glob import glob -from tqdm import tqdm_notebook -from PIL import Image -import numpy as np -import os -def video_gen(in_path): - c_cap = cv2.VideoCapture(in_path) - while c_cap.isOpened(): - ret, frame = c_cap.read() - if not ret: - break - yield c_cap.get(cv2.CAP_PROP_POS_MSEC), frame[:, :, ::-1] - c_cap.release() - - -# In[5]: - - -video_paths = glob('../input/*.mp4') -c_video = video_gen(video_paths[0]) -for _ in range(300): - c_ts, c_frame = next(c_video) -plt.imshow(c_frame) - - -# In[6]: - - -from tf_pose.estimator import TfPoseEstimator -from tf_pose.networks import get_graph_path, model_wh -tfpe = tf_pose.get_estimator() - - -# In[7]: - - -humans = tfpe.inference(npimg=c_frame, upsample_size=4.0) -print(humans) - - -# In[8]: - - -new_image = TfPoseEstimator.draw_humans(c_frame[:, :, ::-1], humans, imgcopy=False) -fig, ax1 = plt.subplots(1, 1, figsize=(10, 10)) -ax1.imshow(new_image[:, :, ::-1]) - - -# In[9]: - - -body_to_dict = lambda c_fig: {'bp_{}_{}'.format(k, vec_name): vec_val - for k, part_vec in c_fig.body_parts.items() - for vec_name, vec_val in zip(['x', 'y', 'score'], - (part_vec.x, 1-part_vec.y, part_vec.score))} -c_fig = humans[0] -body_to_dict(c_fig) - - -# In[10]: - - -MAX_FRAMES = 200 -body_pose_list = [] -for vid_path in tqdm_notebook(video_paths, desc='Files'): - c_video = video_gen(vid_path) - c_ts, c_frame = next(c_video) - out_path = '{}_out.avi'.format(os.path.split(vid_path)[1]) - out = cv2.VideoWriter(out_path, - cv2.VideoWriter_fourcc('M','J','P','G'), - 10, - (c_frame.shape[1], c_frame.shape[0])) - for (c_ts, c_frame), _ in zip(c_video, - tqdm_notebook(range(MAX_FRAMES), desc='Frames')): - bgr_frame = c_frame[:,:,::-1] - humans = tfpe.inference(npimg=bgr_frame, upsample_size=4.0) - for c_body in humans: - body_pose_list += [dict(video=out_path, time=c_ts, **body_to_dict(c_body))] - new_image = TfPoseEstimator.draw_humans(bgr_frame, humans, imgcopy=False) - out.write(new_image) - out.release() - - -# In[11]: - - -import pandas as pd -body_pose_df = pd.DataFrame(body_pose_list) -body_pose_df.describe() - - -# In[12]: - - -fig, m_axs = plt.subplots(1, 2, figsize=(15, 5)) -for c_ax, (c_name, c_rows) in zip(m_axs, body_pose_df.groupby('video')): - for i in range(17): - c_ax.plot(c_rows['time'], c_rows['bp_{}_y'.format(i)], label='x {}'.format(i)) - c_ax.legend() - c_ax.set_title(c_name) - - -# In[13]: - - -fig, m_axs = plt.subplots(1, 2, figsize=(15, 5)) -for c_ax, (c_name, n_rows) in zip(m_axs, body_pose_df.groupby('video')): - for i in range(17): - c_rows = n_rows.query('bp_{}_score>0.6'.format(i)) # only keep confident results - c_ax.plot(c_rows['bp_{}_x'.format(i)], c_rows['bp_{}_y'.format(i)], label='BP {}'.format(i)) - c_ax.legend() - c_ax.set_title(c_name) - - -# In[14]: - - -body_pose_df.to_csv('body_pose.csv', index=False) - - -# In[15]: diff --git a/d1/mlb_player_v4.py b/d1/mlb_player_v4.py deleted file mode 100644 index feaf276..0000000 --- a/d1/mlb_player_v4.py +++ /dev/null @@ -1,576 +0,0 @@ -#!/usr/bin/env python -# coding: utf-8 - -# -# -# NOTE: Turn on Internet and GPU - -# The code hidden below handles all the imports and function definitions (the heavy lifting). If you're a beginner I'd advice you skip this for now. When you are able to understand the rest of the code, come back here and understand each function to get a deeper knowledge. - -# In[1]: - - -# !/usr/bin/env python3 -# coding=utf-8 -# author=dave.fang@outlook.com -# create=20171225 - -import os -import pprint -import cv2 -import sys -import math -import time -import tempfile -import numpy as np -import matplotlib.pyplot as plt - -import torch -import torch.nn as nn -import torch.nn.parallel -import torch.backends.cudnn as cudnn -import torch.optim as optim -import torchvision.transforms as transforms -import torchvision.datasets as datasets -import torchvision.models as models - -from torch.autograd import Variable - -from scipy.ndimage.filters import gaussian_filter - -#get_ipython().run_line_magic('matplotlib', 'inline') -#get_ipython().run_line_magic('config', "InlineBackend.figure_format = 'retina'") - -# find connection in the specified sequence, center 29 is in the position 15 -limb_seq = [[2, 3], [2, 6], [3, 4], [4, 5], [6, 7], [7, 8], [2, 9], [9, 10], - [10, 11], [2, 12], [12, 13], [13, 14], [2, 1], [1, 15], [15, 17], - [1, 16], [16, 18], [3, 17], [6, 18]] - -# the middle joints heatmap correpondence -map_ids = [[31, 32], [39, 40], [33, 34], [35, 36], [41, 42], [43, 44], [19, 20], [21, 22], - [23, 24], [25, 26], [27, 28], [29, 30], [47, 48], [49, 50], [53, 54], [51, 52], - [55, 56], [37, 38], [45, 46]] - -# these are the colours for the 18 body points -colors = [[255, 0, 0], [255, 85, 0], [255, 170, 0], [255, 255, 0], [170, 255, 0], [85, 255, 0], [0, 255, 0], - [0, 255, 85], [0, 255, 170], [0, 255, 255], [0, 170, 255], [0, 85, 255], [0, 0, 255], [85, 0, 255], - [170, 0, 255], [255, 0, 255], [255, 0, 170], [255, 0, 85]] - - -class PoseEstimation(nn.Module): - def __init__(self, model_dict): - super(PoseEstimation, self).__init__() - - self.model0 = model_dict['block_0'] - self.model1_1 = model_dict['block1_1'] - self.model2_1 = model_dict['block2_1'] - self.model3_1 = model_dict['block3_1'] - self.model4_1 = model_dict['block4_1'] - self.model5_1 = model_dict['block5_1'] - self.model6_1 = model_dict['block6_1'] - - self.model1_2 = model_dict['block1_2'] - self.model2_2 = model_dict['block2_2'] - self.model3_2 = model_dict['block3_2'] - self.model4_2 = model_dict['block4_2'] - self.model5_2 = model_dict['block5_2'] - self.model6_2 = model_dict['block6_2'] - - def forward(self, x): - out1 = self.model0(x) - - out1_1 = self.model1_1(out1) - out1_2 = self.model1_2(out1) - out2 = torch.cat([out1_1, out1_2, out1], 1) - - out2_1 = self.model2_1(out2) - out2_2 = self.model2_2(out2) - out3 = torch.cat([out2_1, out2_2, out1], 1) - - out3_1 = self.model3_1(out3) - out3_2 = self.model3_2(out3) - out4 = torch.cat([out3_1, out3_2, out1], 1) - - out4_1 = self.model4_1(out4) - out4_2 = self.model4_2(out4) - out5 = torch.cat([out4_1, out4_2, out1], 1) - - out5_1 = self.model5_1(out5) - out5_2 = self.model5_2(out5) - out6 = torch.cat([out5_1, out5_2, out1], 1) - - out6_1 = self.model6_1(out6) - out6_2 = self.model6_2(out6) - - return out6_1, out6_2 - - -def make_layers(layer_dict): - layers = [] - - for i in range(len(layer_dict) - 1): - layer = layer_dict[i] - for k in layer: - v = layer[k] - if 'pool' in k: - layers += [nn.MaxPool2d(kernel_size=v[0], stride=v[1], padding=v[2])] - else: - conv2d = nn.Conv2d(in_channels=v[0], out_channels=v[1], kernel_size=v[2], stride=v[3], padding=v[4]) - layers += [conv2d, nn.ReLU(inplace=True)] - layer = list(layer_dict[-1].keys()) - k = layer[0] - v = layer_dict[-1][k] - - conv2d = nn.Conv2d(in_channels=v[0], out_channels=v[1], kernel_size=v[2], stride=v[3], padding=v[4]) - layers += [conv2d] - - return nn.Sequential(*layers) - - -def get_pose_model(): - blocks = {} - - block_0 = [{'conv1_1': [3, 64, 3, 1, 1]}, {'conv1_2': [64, 64, 3, 1, 1]}, {'pool1_stage1': [2, 2, 0]}, - {'conv2_1': [64, 128, 3, 1, 1]}, {'conv2_2': [128, 128, 3, 1, 1]}, {'pool2_stage1': [2, 2, 0]}, - {'conv3_1': [128, 256, 3, 1, 1]}, {'conv3_2': [256, 256, 3, 1, 1]}, {'conv3_3': [256, 256, 3, 1, 1]}, - {'conv3_4': [256, 256, 3, 1, 1]}, {'pool3_stage1': [2, 2, 0]}, {'conv4_1': [256, 512, 3, 1, 1]}, - {'conv4_2': [512, 512, 3, 1, 1]}, {'conv4_3_CPM': [512, 256, 3, 1, 1]}, - {'conv4_4_CPM': [256, 128, 3, 1, 1]}] - - blocks['block1_1'] = [{'conv5_1_CPM_L1': [128, 128, 3, 1, 1]}, {'conv5_2_CPM_L1': [128, 128, 3, 1, 1]}, - {'conv5_3_CPM_L1': [128, 128, 3, 1, 1]}, {'conv5_4_CPM_L1': [128, 512, 1, 1, 0]}, - {'conv5_5_CPM_L1': [512, 38, 1, 1, 0]}] - - blocks['block1_2'] = [{'conv5_1_CPM_L2': [128, 128, 3, 1, 1]}, {'conv5_2_CPM_L2': [128, 128, 3, 1, 1]}, - {'conv5_3_CPM_L2': [128, 128, 3, 1, 1]}, {'conv5_4_CPM_L2': [128, 512, 1, 1, 0]}, - {'conv5_5_CPM_L2': [512, 19, 1, 1, 0]}] - - for i in range(2, 7): - blocks['block%d_1' % i] = [{'Mconv1_stage%d_L1' % i: [185, 128, 7, 1, 3]}, - {'Mconv2_stage%d_L1' % i: [128, 128, 7, 1, 3]}, - {'Mconv3_stage%d_L1' % i: [128, 128, 7, 1, 3]}, - {'Mconv4_stage%d_L1' % i: [128, 128, 7, 1, 3]}, - {'Mconv5_stage%d_L1' % i: [128, 128, 7, 1, 3]}, - {'Mconv6_stage%d_L1' % i: [128, 128, 1, 1, 0]}, - {'Mconv7_stage%d_L1' % i: [128, 38, 1, 1, 0]}] - blocks['block%d_2' % i] = [{'Mconv1_stage%d_L2' % i: [185, 128, 7, 1, 3]}, - {'Mconv2_stage%d_L2' % i: [128, 128, 7, 1, 3]}, - {'Mconv3_stage%d_L2' % i: [128, 128, 7, 1, 3]}, - {'Mconv4_stage%d_L2' % i: [128, 128, 7, 1, 3]}, - {'Mconv5_stage%d_L2' % i: [128, 128, 7, 1, 3]}, - {'Mconv6_stage%d_L2' % i: [128, 128, 1, 1, 0]}, - {'Mconv7_stage%d_L2' % i: [128, 19, 1, 1, 0]}] - - layers = [] - for block in block_0: - # print(block) - for key in block: - v = block[key] - if 'pool' in key: - layers += [nn.MaxPool2d(kernel_size=v[0], stride=v[1], padding=v[2])] - else: - conv2d = nn.Conv2d(in_channels=v[0], out_channels=v[1], kernel_size=v[2], stride=v[3], padding=v[4]) - layers += [conv2d, nn.ReLU(inplace=True)] - - models = { - 'block_0': nn.Sequential(*layers) - } - - for k in blocks: - v = blocks[k] - models[k] = make_layers(v) - - return PoseEstimation(models) - - -def get_paf_and_heatmap(model, img_raw, scale_search, param_stride=8, box_size=368): - multiplier = [scale * box_size / img_raw.shape[0] for scale in scale_search] - - heatmap_avg = torch.zeros((len(multiplier), 19, img_raw.shape[0], img_raw.shape[1])).cuda() - paf_avg = torch.zeros((len(multiplier), 38, img_raw.shape[0], img_raw.shape[1])).cuda() - - for i, scale in enumerate(multiplier): - img_test = cv2.resize(img_raw, (0, 0), fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC) - img_test_pad, pad = pad_right_down_corner(img_test, param_stride, param_stride) - img_test_pad = np.transpose(np.float32(img_test_pad[:, :, :, np.newaxis]), (3, 2, 0, 1)) / 256 - 0.5 - - feed = Variable(torch.from_numpy(img_test_pad)).cuda() - output1, output2 = model(feed) - - print(output1.size()) - print(output2.size()) - - heatmap = nn.UpsamplingBilinear2d((img_raw.shape[0], img_raw.shape[1])).cuda()(output2) - - paf = nn.UpsamplingBilinear2d((img_raw.shape[0], img_raw.shape[1])).cuda()(output1) - - heatmap_avg[i] = heatmap[0].data - paf_avg[i] = paf[0].data - - heatmap_avg = torch.transpose(torch.transpose(torch.squeeze(torch.mean(heatmap_avg, 0)), 0, 1), 1, 2).cuda() - heatmap_avg = heatmap_avg.cpu().numpy() - - paf_avg = torch.transpose(torch.transpose(torch.squeeze(torch.mean(paf_avg, 0)), 0, 1), 1, 2).cuda() - paf_avg = paf_avg.cpu().numpy() - - return paf_avg, heatmap_avg - - -def extract_heatmap_info(heatmap_avg, param_thre1=0.1): - all_peaks = [] - peak_counter = 0 - - for part in range(18): - map_ori = heatmap_avg[:, :, part] - map_gau = gaussian_filter(map_ori, sigma=3) - - map_left = np.zeros(map_gau.shape) - map_left[1:, :] = map_gau[:-1, :] - map_right = np.zeros(map_gau.shape) - map_right[:-1, :] = map_gau[1:, :] - map_up = np.zeros(map_gau.shape) - map_up[:, 1:] = map_gau[:, :-1] - map_down = np.zeros(map_gau.shape) - map_down[:, :-1] = map_gau[:, 1:] - - peaks_binary = np.logical_and.reduce( - (map_gau >= map_left, map_gau >= map_right, map_gau >= map_up, - map_gau >= map_down, map_gau > param_thre1)) - - peaks = zip(np.nonzero(peaks_binary)[1], np.nonzero(peaks_binary)[0]) # note reverse - peaks = list(peaks) - peaks_with_score = [x + (map_ori[x[1], x[0]],) for x in peaks] - ids = range(peak_counter, peak_counter + len(peaks)) - peaks_with_score_and_id = [peaks_with_score[i] + (ids[i],) for i in range(len(ids))] - - all_peaks.append(peaks_with_score_and_id) - peak_counter += len(peaks) - - return all_peaks - - -def extract_paf_info(img_raw, paf_avg, all_peaks, param_thre2=0.05, param_thre3=0.5): - connection_all = [] - special_k = [] - mid_num = 10 - - for k in range(len(map_ids)): - score_mid = paf_avg[:, :, [x - 19 for x in map_ids[k]]] - candA = all_peaks[limb_seq[k][0] - 1] - candB = all_peaks[limb_seq[k][1] - 1] - nA = len(candA) - nB = len(candB) - if nA != 0 and nB != 0: - connection_candidate = [] - for i in range(nA): - for j in range(nB): - vec = np.subtract(candB[j][:2], candA[i][:2]) - norm = math.sqrt(vec[0] * vec[0] + vec[1] * vec[1]) - vec = np.divide(vec, norm) - - startend = zip(np.linspace(candA[i][0], candB[j][0], num=mid_num), - np.linspace(candA[i][1], candB[j][1], num=mid_num)) - startend = list(startend) - - vec_x = np.array([score_mid[int(round(startend[I][1])), int(round(startend[I][0])), 0] - for I in range(len(startend))]) - vec_y = np.array([score_mid[int(round(startend[I][1])), int(round(startend[I][0])), 1] - for I in range(len(startend))]) - - score_midpts = np.multiply(vec_x, vec[0]) + np.multiply(vec_y, vec[1]) - score_with_dist_prior = sum(score_midpts) / len(score_midpts) - score_with_dist_prior += min(0.5 * img_raw.shape[0] / norm - 1, 0) - - criterion1 = len(np.nonzero(score_midpts > param_thre2)[0]) > 0.8 * len(score_midpts) - criterion2 = score_with_dist_prior > 0 - if criterion1 and criterion2: - connection_candidate.append( - [i, j, score_with_dist_prior, score_with_dist_prior + candA[i][2] + candB[j][2]]) - - connection_candidate = sorted(connection_candidate, key=lambda x: x[2], reverse=True) - connection = np.zeros((0, 5)) - for c in range(len(connection_candidate)): - i, j, s = connection_candidate[c][0:3] - if i not in connection[:, 3] and j not in connection[:, 4]: - connection = np.vstack([connection, [candA[i][3], candB[j][3], s, i, j]]) - if len(connection) >= min(nA, nB): - break - - connection_all.append(connection) - else: - special_k.append(k) - connection_all.append([]) - - return special_k, connection_all - - -def get_subsets(connection_all, special_k, all_peaks): - # last number in each row is the total parts number of that person - # the second last number in each row is the score of the overall configuration - subset = -1 * np.ones((0, 20)) - candidate = np.array([item for sublist in all_peaks for item in sublist]) - - for k in range(len(map_ids)): - if k not in special_k: - partAs = connection_all[k][:, 0] - partBs = connection_all[k][:, 1] - indexA, indexB = np.array(limb_seq[k]) - 1 - - for i in range(len(connection_all[k])): # = 1:size(temp,1) - found = 0 - subset_idx = [-1, -1] - for j in range(len(subset)): # 1:size(subset,1): - if subset[j][indexA] == partAs[i] or subset[j][indexB] == partBs[i]: - subset_idx[found] = j - found += 1 - - if found == 1: - j = subset_idx[0] - if (subset[j][indexB] != partBs[i]): - subset[j][indexB] = partBs[i] - subset[j][-1] += 1 - subset[j][-2] += candidate[partBs[i].astype(int), 2] + connection_all[k][i][2] - elif found == 2: # if found 2 and disjoint, merge them - j1, j2 = subset_idx - print("found = 2") - membership = ((subset[j1] >= 0).astype(int) + (subset[j2] >= 0).astype(int))[:-2] - if len(np.nonzero(membership == 2)[0]) == 0: # merge - subset[j1][:-2] += (subset[j2][:-2] + 1) - subset[j1][-2:] += subset[j2][-2:] - subset[j1][-2] += connection_all[k][i][2] - subset = np.delete(subset, j2, 0) - else: # as like found == 1 - subset[j1][indexB] = partBs[i] - subset[j1][-1] += 1 - subset[j1][-2] += candidate[partBs[i].astype(int), 2] + connection_all[k][i][2] - - # if find no partA in the subset, create a new subset - elif not found and k < 17: - row = -1 * np.ones(20) - row[indexA] = partAs[i] - row[indexB] = partBs[i] - row[-1] = 2 - row[-2] = sum(candidate[connection_all[k][i, :2].astype(int), 2]) + connection_all[k][i][2] - subset = np.vstack([subset, row]) - return subset, candidate - - -def draw_key_point(subset, all_peaks, img_raw): - del_ids = [] - for i in range(len(subset)): - if subset[i][-1] < 4 or subset[i][-2] / subset[i][-1] < 0.4: - del_ids.append(i) - subset = np.delete(subset, del_ids, axis=0) - - img_canvas = img_raw.copy() # B,G,R order - - for i in range(18): - for j in range(len(all_peaks[i])): - cv2.circle(img_canvas, all_peaks[i][j][0:2], 4, colors[i], thickness=-1) - - return subset, img_canvas - - -def link_key_point(img_canvas, candidate, subset, stickwidth=4): - for i in range(17): - for n in range(len(subset)): - index = subset[n][np.array(limb_seq[i]) - 1] - if -1 in index: - continue - cur_canvas = img_canvas.copy() - Y = candidate[index.astype(int), 0] - X = candidate[index.astype(int), 1] - mX = np.mean(X) - mY = np.mean(Y) - length = ((X[0] - X[1]) ** 2 + (Y[0] - Y[1]) ** 2) ** 0.5 - angle = math.degrees(math.atan2(X[0] - X[1], Y[0] - Y[1])) - polygon = cv2.ellipse2Poly((int(mY), int(mX)), (int(length / 2), stickwidth), int(angle), 0, 360, 1) - cv2.fillConvexPoly(cur_canvas, polygon, colors[i]) - img_canvas = cv2.addWeighted(img_canvas, 0.4, cur_canvas, 0.6, 0) - - return img_canvas - -def pad_right_down_corner(img, stride, pad_value): - h = img.shape[0] - w = img.shape[1] - - pad = 4 * [None] - pad[0] = 0 # up - pad[1] = 0 # left - pad[2] = 0 if (h % stride == 0) else stride - (h % stride) # down - pad[3] = 0 if (w % stride == 0) else stride - (w % stride) # right - - img_padded = img - pad_up = np.tile(img_padded[0:1, :, :] * 0 + pad_value, (pad[0], 1, 1)) - img_padded = np.concatenate((pad_up, img_padded), axis=0) - pad_left = np.tile(img_padded[:, 0:1, :] * 0 + pad_value, (1, pad[1], 1)) - img_padded = np.concatenate((pad_left, img_padded), axis=1) - pad_down = np.tile(img_padded[-2:-1, :, :] * 0 + pad_value, (pad[2], 1, 1)) - img_padded = np.concatenate((img_padded, pad_down), axis=0) - pad_right = np.tile(img_padded[:, -2:-1, :] * 0 + pad_value, (1, pad[3], 1)) - img_padded = np.concatenate((img_padded, pad_right), axis=1) - - return img_padded, pad - - -if __name__ == '__main__': - print(get_pose_model()) - - -# First let's download the pre-trained model. - -# In[2]: - - -# Using gdown to download the model directly from Google Drive - -#assert os.system(' conda install -y gdown') == 0 -import gdown - - -# In[3]: - - -model = 'coco_pose_iter_440000.pth.tar' -if not os.path.exists(model): - url = 'https://drive.google.com/u/0/uc?export=download&confirm=f_Ix&id=0B1asvDK18cu_MmY1ZkpaOUhhRHM' - gdown.download( - url, - model, - quiet=False - ) - - -# In[4]: - - -state_dict = torch.load('./coco_pose_iter_440000.pth.tar')['state_dict'] # getting the pre-trained model's parameters -# A state_dict is simply a Python dictionary object that maps each layer to its parameter tensor. - -model_pose = get_pose_model() # building the model (see fn. defn. above). To see the architecture, see below cell. -model_pose.load_state_dict(state_dict) # Loading the parameters (weights, biases) into the model. - -model_pose.float() # I'm not sure why this is used. No difference if you remove it. - - -# In[5]: - - -arch_image = '../input/indonesian-traditional-dance/tgagrakanyar/tga_0000.jpg' -img_ori = cv2.imread(arch_image) -plt.figure(figsize=(15, 8)) -plt.imshow(img_ori[...,::-1]) - - -# Notice, the first 10 layers are from VGG-19. But here instead of downloading the model and loading the layers from there, we simply hardcoaded it in get_pose_model() - -# In[6]: - - -# Run this to view the model's architecture -#model_pose.eval() - - -# In[7]: - - -use_gpu = True - -if use_gpu: - model_pose.cuda() - model_pose = torch.nn.DataParallel(model_pose, device_ids=range(torch.cuda.device_count())) - cudnn.benchmark = True - - -# In[8]: - - -def estimate_pose(img_ori, name=None): - if name is None: - name = tempfile.mktemp( - dir='/kaggle/working', - suffix='.png', - ) - pprint.pprint( - ['estimate_pose', dict(name=name)], - ) - - # People might be at different scales in the image, perform inference at multiple scales to boost results - scale_param = [0.5, 1.0, 1.5, 2.0] - - # Predict Heatmaps for approximate joint position - # Use Part Affinity Fields (PAF's) as guidance to link joints to form skeleton - # PAF's are just unit vectors along the limb encoding the direction of the limb - # A dot product of possible joint connection will be high if actual limb else low - - paf_info, heatmap_info = get_paf_and_heatmap(model_pose, img_ori, scale_param) - peaks = extract_heatmap_info(heatmap_info) - sp_k, con_all = extract_paf_info(img_ori, paf_info, peaks) - - subsets, candidates = get_subsets(con_all, sp_k, peaks) - subsets, img_points = draw_key_point(subsets, peaks, img_ori) - - # After predicting Heatmaps and PAF's, proceeed to link joints correctly - img_canvas = link_key_point(img_points, candidates, subsets) - - - f = plt.figure(figsize=(15, 10)) - - plt.subplot(1, 2, 1) - plt.imshow(img_points[...,::-1]) - - plt.subplot(1, 2, 2) - plt.imshow(img_canvas[...,::-1]) - - f.savefig(name) - - -# In[9]: - - -test_image = '../input/indonesian-traditional-dance/tgagrakanyar/tga_0000.jpg' -img_ori = cv2.imread(test_image) -estimate_pose(img_ori) - - -# In[10]: - - -test_image = '../input/indonesian-traditional-dance/tgagrakanyar/tga_0010.jpg' -img_ori = cv2.imread(test_image) -estimate_pose(img_ori) - - -# In[11]: - - -test_image = '../input/indonesian-traditional-dance/tgagrakanyar/tga_0020.jpg' -img_ori = cv2.imread(test_image) -estimate_pose(img_ori) - - -# In[12]: - - -test_image = '../input/indonesian-traditional-dance/tgagrakanyar/tga_0030.jpg' -img_ori = cv2.imread(test_image) -estimate_pose(img_ori) - - -# In[13]: - - -test_image = '../input/indonesian-traditional-dance/tgagrakanyar/tga_0040.jpg' -img_ori = cv2.imread(test_image) -estimate_pose(img_ori) - - -# In[14]: - - -test_image = '../input/indonesian-traditional-dance/tgagrakanyar/tga_0050.jpg' -img_ori = cv2.imread(test_image) -estimate_pose(img_ori) - - -# In[ ]: diff --git a/d1/mlb_player_v5.py b/d1/mlb_player_v5.py deleted file mode 100644 index b296083..0000000 --- a/d1/mlb_player_v5.py +++ /dev/null @@ -1,56 +0,0 @@ -import os - -if os.system(r''' pip show alphapose''') != 0: - t1 = r''' - pip install pycocotools - rm -fr /kaggle/working/AlphaPose - pip install pyyaml==5.2 - pip install scipy==1.1.0 - git clone https://github.com/WildflowerSchools/AlphaPose - python -m pip install cython gdown - apt-get install libyaml-dev - cd /kaggle/working/AlphaPose && python setup.py build develop - ''' - - for o in t1.splitlines(): - print(o) - assert os.system(o) == 0 - -import os -#!git clone https://github.com/MVIG-SJTU/AlphaPose.git - -import torch -print(torch.__version__) -import yaml, scipy -print(yaml.__version__) -print(scipy.__version__) - -import gdown -import os -for o1, o2 in [ - ( - '1D47msNOOiJKvPOXlnpyzdKA3k6E97NTC', - '/kaggle/working/AlphaPose/detector/yolo/data/yolov3-spp.weights', - ), - ( - '1nlnuYfGNuHWZztQHXwVZSL_FvfE551pA', - '/kaggle/working/AlphaPose/detector/tracker/data/JDE-1088x608-uncertainty', - ), - ( - '1kQhnMRURFiy7NsdS8EFL-8vtqEXOgECn', - '/kaggle/working/AlphaPose/pretrained_models/fast_res50_256x192.pth' - ), -]: - os.makedirs(os.path.split(o2)[0], exist_ok=True) - if not os.path.exists(o2): - gdown.download( - 'https://drive.google.com/u/0/uc?export=download&confirm=f_Ix&id=%s' % o1, - o2, - quiet=False - ) - - -assert os.system(r''' -mkdir -p /kaggle/working/test-input && mkdir -p /kaggle/working/test-output && cp /kaggle/working/AlphaPose/examples/demo/*.jpg /kaggle/working/test-input -cd /kaggle/working/AlphaPose && python3 scripts/demo_inference.py --cfg configs/coco/resnet/256x192_res50_lr1e-3_1x.yaml --checkpoint pretrained_models/fast_res50_256x192.pth --indir /kaggle/working/test-input --outdir /kaggle/working/test-output --save_img -''') == 0 diff --git a/d1/mlb_player_v6.py b/d1/mlb_player_v6.py deleted file mode 100644 index ba47ac8..0000000 --- a/d1/mlb_player_v6.py +++ /dev/null @@ -1,172 +0,0 @@ -# https://raw.githubusercontent.com/hafizas101/Real-time-human-pose-estimation-and-classification/master/main.py -# From Python -# It requires OpenCV installed for Python -import sys -import cv2 -import os -from sys import platform -import argparse -from math import sqrt, acos, degrees, atan, degrees -import numpy as np - -# ----------------------------------------- Arslan Part ---------------------------------------------------------------------------------- -def get_angle(a,b): - #print(a) - #print(b) - del_y = a[1]-b[1] - del_x = b[0]-a[0] - if del_x == 0: - del_x = 0.1 - #print("Del_X : "+str(del_x)+"-----Del_Y: "+str(del_y)) - angle = 0 - - if del_x > 0 and del_y > 0: - angle = degrees(atan(del_y / del_x)) - elif del_x < 0 and del_y > 0: - angle = degrees(atan(del_y / del_x)) + 180 - - return angle - -# ------------------------------------------------------------------------------------------------------------------------------------------ - -# ----------------------------------------- Maksim Part ---------------------------------------------------------------------------------- - -def angle_gor(a,b,c,d): - ab=[a[0]-b[0],a[1]-b[1]] - ab1=[c[0]-d[0],c[1]-d[1]] - cos=abs(ab[0]*ab1[0]+ab[1]*ab1[1])/(sqrt(ab[0]**2+ab[1]**2)*sqrt(ab1[0]**2+ab1[1]**2)) - ang = acos(cos) - return ang*180/np.pi - - -def sit_ang(a,b,c,d): - ang=angle_gor(a,b,c,d) - s1=0 - if ang != None: - #print("Angle",ang) - if ang < 120 and ang>40: - s1=1 - return s1 - -def sit_rec(a,b,c,d): - ab = [a[0] - b[0], a[1] - b[1]] - ab1 = [c[0] - d[0], c[1] - d[1]] - l1=sqrt(ab[0]**2+ab[1]**2) - l2=sqrt(ab1[0]**2+ab1[1]**2) - s=0 - if l1!=0 and l2!=0: - #print(l1,l2, "---------->>>") - if l2/l1>=1.5: - s=1 - return s - -# ------------------------------------------------------------------------------------------------------------------------------------------ - -# ----------------------------------------------------------- OpenPose Example Code ---------------------------------------------------------- - -# Import Openpose (Windows/Ubuntu/OSX) -dir_path = os.path.dirname(os.path.realpath(__file__)) -try: - # Windows Import - if platform == "win32": - # Change these variables to point to the correct folder (Release/x64 etc.) - sys.path.append(dir_path + '/../../python/openpose/Release'); - os.environ['PATH'] = os.environ['PATH'] + ';' + dir_path + '/../../x64/Release;' + dir_path + '/../../bin;' - import pyopenpose as op - else: - # Change these variables to point to the correct folder (Release/x64 etc.) - sys.path.append('../../python'); - # If you run `make install` (default path is `/usr/local/python` for Ubuntu), you can also access the OpenPose/python module from there. This will install OpenPose and the python library at your desired installation path. Ensure that this is in your python path in order to use it. - # sys.path.append('/usr/local/python') - from openpose import pyopenpose as op -except ImportError as e: - print('Error: OpenPose library could not be found. Did you enable `BUILD_PYTHON` in CMake and have this Python script in the right folder?') - raise e - -# Flags -parser = argparse.ArgumentParser() -parser.add_argument("--image_path", default="../../../examples/media/COCO_val2014_000000000192.jpg", help="Process an image. Read all standard formats (jpg, png, bmp, etc.).") -args = parser.parse_known_args() - -# Custom Params (refer to include/openpose/flags.hpp for more parameters) -params = dict() -params["model_folder"] = "/home/nvidia/openpose/models/" - -# Add others in path? -for i in range(0, len(args[1])): - curr_item = args[1][i] - if i != len(args[1])-1: next_item = args[1][i+1] - else: next_item = "1" - if "--" in curr_item and "--" in next_item: - key = curr_item.replace('-','') - if key not in params: params[key] = "1" - elif "--" in curr_item and "--" not in next_item: - key = curr_item.replace('-','') - if key not in params: params[key] = next_item - -# Construct it from system arguments -# op.init_argv(args[1]) -# oppython = op.OpenposePython() - -c=0 -# Starting OpenPose -opWrapper = op.WrapperPython() -opWrapper.configure(params) -opWrapper.start() - -# ------------------------------------------------------- OUR CONTRIBUTIONS ---------------------------------------------------------------- - -cam = cv2.VideoCapture(1) -for i in range(1000): - # Process Image - datum = op.Datum() - s, im = cam.read() # captures image - #cv2.imshow("Test Picture", im) # displays captured image - #im=cv2.resize(im,(480,270), interpolation = cv2.INTER_AREA) - image1 = im - #imageToProcess = cv2.imread(args[0].image_path) - c+=1 - if c==8: - c=0 - datum.cvInputData = image1 - opWrapper.emplaceAndPop([datum]) # OpenPose being applied to the frame image. - # Display Image - #print("Body keypoints: \n" + str(datum.poseKeypoints)) - #print(datum.poseKeypoints.shape) - if len(datum.poseKeypoints.shape)>=2: - x1=0 - x2=0 - - for j in range(len(datum.poseKeypoints)): - x1=0 - x2=0 - s=0 - s1=0 - ang1 = get_angle(datum.poseKeypoints[j][3], datum.poseKeypoints[j][4]) - ang2 = get_angle(datum.poseKeypoints[j][6], datum.poseKeypoints[j][7]) - if (30 < ang1 < 150): - x1 = 1 - if (30 < ang2 < 150): - x2 = 1 - x3 = x1+x2 - if (x3 == 1): - print("The {} person says: HELLO !".format(j+1)) - #cv2.putText(datum.cvOutputData,'OpenPose using Python-OpenCV',(20,30), cv2.FONT_HERSHEY_SIMPLEX, 1,(255,255,255),1,cv2.LINE_AA) - elif (x3 == 2): - print("The {} person says: STOP PLEASE !".format(j+1)) - s += sit_rec(datum.poseKeypoints[j][9], datum.poseKeypoints[j][10],datum.poseKeypoints[j][10],datum.poseKeypoints[j][11]) - s += sit_rec(datum.poseKeypoints[j][12], datum.poseKeypoints[j][13],datum.poseKeypoints[j][13],datum.poseKeypoints[j][14]) - s1+=sit_ang(datum.poseKeypoints[j][9], datum.poseKeypoints[j][10],datum.poseKeypoints[j][10],datum.poseKeypoints[j][11]) - s1+=sit_ang(datum.poseKeypoints[j][12], datum.poseKeypoints[j][13],datum.poseKeypoints[j][13],datum.poseKeypoints[j][14]) - if s > 0 or s1>0: - print("The {} person is sitting".format(j+1)) - if s == 0 and s1 == 0: - print("The {} person is standing".format(j+1)) - print("___________________________") - print(" ") - im=cv2.resize(datum.cvOutputData,(960,540), interpolation = cv2.INTER_AREA) - cv2.imshow("OpenPose 1.4.0 - Tutorial Python API", im) - cv2.waitKey(1) - - -# ------------------------------------------------------------------------------------------------------------------------------------------ diff --git a/m.py b/m.py deleted file mode 100755 index 31656d3..0000000 --- a/m.py +++ /dev/null @@ -1,338 +0,0 @@ -#!/usr/bin/env python3 -import glob -import io -import tempfile -import dataclasses -import pathlib -import sys -import subprocess -import os -import logging - - -from typing import (Optional, Any,) -from typing_extensions import ( - Self, BinaryIO, -) - -logger = logging.getLogger(__name__) - -def toml_load(f: BinaryIO) -> Any: - try: - import tomllib - return tomllib.load(f) - except: - pass - - try: - import tomli - return tomli.load(f) - except: - pass - - raise NotImplementedError - -@dataclasses.dataclass -class PyProject: - path: pathlib.Path - dependencies: dict[str, list[str]] - early_features: Optional[list[str]] = None - pip_find_links: Optional[list[pathlib.Path]] = None - runtime_libdirs: Optional[list[pathlib.Path]] = None - runtime_preload: Optional[list[pathlib.Path]] = None - requirements: dict[str, pathlib.Path] = dataclasses.field(default_factory=lambda : dict()) - -def pyproject_load( - d: pathlib.Path, -) -> PyProject: - with io.open(d, 'rb') as f: - content = toml_load(f) - - assert isinstance(content, dict) - - dependencies : dict[str, list[str]] = dict() - - dependencies['default'] = content['project']['dependencies'] - - if ( - 'optional-dependencies' in content['project'] - ): - assert isinstance( - content['project']['optional-dependencies'], - dict - ) - - for k, v in content['project']['optional-dependencies'].items(): - assert isinstance(v, list) - assert isinstance(k, str) - - dependencies[k] = v - - - res = PyProject( - path=d, - dependencies=dependencies, - ) - - tool_name = 'online.fxreader.pr34'.replace('.', '-') - - if ( - 'tool' in content and - isinstance( - content['tool'], dict - ) and - tool_name in content['tool'] and - isinstance( - content['tool'][tool_name], - dict - ) - ): - if 'early_features' in content['tool'][tool_name]: - res.early_features = content['tool'][tool_name]['early_features'] - - if 'pip_find_links' in content['tool'][tool_name]: - res.pip_find_links = [ - d.parent / pathlib.Path(o) - for o in content['tool'][tool_name]['pip_find_links'] - ] - - if 'runtime_libdirs' in content['tool'][tool_name]: - res.runtime_libdirs = [ - d.parent / pathlib.Path(o) - # pathlib.Path(o) - for o in content['tool'][tool_name]['runtime_libdirs'] - ] - - if 'runtime_preload' in content['tool'][tool_name]: - res.runtime_preload = [ - d.parent / pathlib.Path(o) - # pathlib.Path(o) - for o in content['tool'][tool_name]['runtime_preload'] - ] - - if 'requirements' in content['tool'][tool_name]: - assert isinstance(content['tool'][tool_name]['requirements'], dict) - - res.requirements = { - k : d.parent / pathlib.Path(v) - # pathlib.Path(o) - for k, v in content['tool'][tool_name]['requirements'].items() - } - - return res - -@dataclasses.dataclass -class BootstrapSettings: - env_path: pathlib.Path - python_path: pathlib.Path - base_dir: pathlib.Path - python_version: Optional[str] = dataclasses.field( - default_factory=lambda : os.environ.get( - 'PYTHON_VERSION', - '%d.%d' % ( - sys.version_info.major, - sys.version_info.minor, - ), - ).strip() - ) - uv_args: list[str] = dataclasses.field( - default_factory=lambda : os.environ.get( - 'UV_ARGS', - '--offline', - ).split(), - ) - - @classmethod - def get( - cls, - base_dir: Optional[pathlib.Path] = None, - ) -> Self: - if base_dir is None: - base_dir = pathlib.Path.cwd() - - env_path = base_dir / '.venv' - python_path = env_path / 'bin' / 'python3' - - return cls( - base_dir=base_dir, - env_path=env_path, - python_path=python_path, - ) - -def env_bootstrap( - bootstrap_settings: BootstrapSettings, - pyproject: PyProject, -) -> None: - pip_find_links : list[pathlib.Path] = [] - - if not pyproject.pip_find_links is None: - pip_find_links.extend(pyproject.pip_find_links) - - pip_find_links_args = sum([ - ['-f', str(o),] - for o in pip_find_links - ], []) - - features : list[str] = [] - - if pyproject.early_features: - features.extend(pyproject.early_features) - - requirements_python_version: Optional[str] = None - if not bootstrap_settings.python_version is None: - requirements_python_version = bootstrap_settings.python_version.replace('.', '_') - - - requirements_name = '_'.join(sorted(features)) - - if requirements_python_version: - requirements_name += '_' + requirements_python_version - - requirements_path : Optional[pathlib.Path] = None - - if requirements_name in pyproject.requirements: - requirements_path = pyproject.requirements[requirements_name] - else: - requirements_path = pyproject.path.parent / 'requirements.txt' - - requirements_in : list[str] = [] - - requirements_in.extend([ - 'uv', 'pip', 'build', 'setuptools', 'meson-python', 'pybind11' - ]) - - if pyproject.early_features: - early_dependencies = sum([ - pyproject.dependencies[o] - for o in pyproject.early_features - ], []) - - logger.info(dict( - early_dependencies=early_dependencies, - )) - - requirements_in.extend(early_dependencies) - # if len(early_dependencies) > 0: - # subprocess.check_call([ - # bootstrap_settings.python_path, - # '-m', - # 'uv', 'pip', 'install', - # *pip_find_links_args, - # # '-f', str(pathlib.Path(__file__).parent / 'deps' / 'dist'), - # *bootstrap_settings.uv_args, - # *early_dependencies, - # ]) - - if not requirements_path.exists(): - with tempfile.NamedTemporaryFile( - mode='w', - prefix='requirements', - suffix='.in', - ) as f: - f.write( - '\n'.join(requirements_in) - ) - f.flush() - - subprocess.check_call([ - 'uv', - 'pip', - 'compile', - '--generate-hashes', - *pip_find_links_args, - # '-p', - # bootstrap_settings.python_path, - *bootstrap_settings.uv_args, - '-o', str(requirements_path), - f.name, - ]) - - uv_python_version: list[str] = [] - - if not bootstrap_settings.python_version is None: - uv_python_version.extend([ - '-p', bootstrap_settings.python_version, - ]) - - subprocess.check_call([ - 'uv', 'venv', - *uv_python_version, - *pip_find_links_args, - # '--seed', - *bootstrap_settings.uv_args, - str(bootstrap_settings.env_path) - ]) - - subprocess.check_call([ - 'uv', - 'pip', - 'install', - *pip_find_links_args, - '-p', - bootstrap_settings.python_path, - '--require-hashes', - *bootstrap_settings.uv_args, - '-r', str(requirements_path), - ]) - - -def paths_equal( - a: pathlib.Path | str, - b: pathlib.Path | str -) -> bool: - return ( - os.path.abspath(str(a)) == - os.path.abspath(str(b)) - ) - -def run( - d: Optional[pathlib.Path] = None, - cli_path: Optional[pathlib.Path] = None, -) -> None: - if cli_path is None: - cli_path = pathlib.Path(__file__).parent / 'cli.py' - - if d is None: - d = pathlib.Path(__file__).parent / 'pyproject.toml' - - bootstrap_settings = BootstrapSettings.get() - - pyproject : PyProject = pyproject_load( - d - ) - - logging.basicConfig(level=logging.INFO) - - if not bootstrap_settings.env_path.exists(): - env_bootstrap( - bootstrap_settings=bootstrap_settings, - pyproject=pyproject, - ) - - logger.info([sys.executable, sys.argv, bootstrap_settings.python_path]) - - if not paths_equal(sys.executable, bootstrap_settings.python_path): - os.execv( - str(bootstrap_settings.python_path), - [ - str(bootstrap_settings.python_path), - *sys.argv, - ] - ) - - os.execv( - str(bootstrap_settings.python_path), - [ - str(bootstrap_settings.python_path), - str( - cli_path - ), - *sys.argv[1:], - ] - ) - -if __name__ == '__main__': - run( - d=pathlib.Path(__file__).parent / 'python' / 'pyproject.toml', - cli_path=pathlib.Path(__file__).parent / 'python' / 'cli.py', - ) diff --git a/meson/toolchains/wasm-cross-meson.ini b/meson/toolchains/wasm-cross-meson.ini new file mode 100644 index 0000000..da0c2e1 --- /dev/null +++ b/meson/toolchains/wasm-cross-meson.ini @@ -0,0 +1,13 @@ +[binaries] +cpp = 'em++' +c = 'emcc' +ar = 'emar' +windres = '/usr/bin/false' +; exe_wrapper = '/usr/bin/false' +exe_wrapper = 'node' + +[host_machine] +system = 'linux' +cpu_family = 'x86_64' +cpu = 'x86_64' +endian = 'little' diff --git a/python/.mypy.ini b/python/.mypy.ini new file mode 100644 index 0000000..7b470c3 --- /dev/null +++ b/python/.mypy.ini @@ -0,0 +1,18 @@ +[mypy] +mypy_path = + ../mypy-stubs, + ../deps/com.github.aiortc.aiortc/src, + ../mypy-stubs/marisa-trie-types, + ../mypy-stubs/types-debugpy, + . + +exclude = + tmp, + build + +plugins = + numpy.typing.mypy_plugin, + pydantic.mypy + +explicit_package_bases = true +namespace_packages = true diff --git a/python/cli.py b/python/cli.py index 4c9bc47..95a470e 100644 --- a/python/cli.py +++ b/python/cli.py @@ -28,8 +28,11 @@ logger = logging.getLogger(__name__) class Command(enum.StrEnum): mypy = 'mypy' + pyright = 'pyright' + ruff = 'ruff' deploy_wheel = 'deploy:wheel' tests = 'tests' + meson_setup = 'meson:setup' @dataclasses.dataclass @@ -39,8 +42,8 @@ class Settings( base_dir: pathlib.Path = pathlib.Path(__file__).parent.parent build_dir: pathlib.Path = base_dir / 'tmp' / 'build' wheel_dir: pathlib.Path = base_dir / 'deps' / 'dist' - env_path: pathlib.Path = cli_bootstrap.BootstrapSettings.get(base_dir).env_path - python_path: pathlib.Path = cli_bootstrap.BootstrapSettings.get(base_dir).python_path + env_path: pathlib.Path = cli_bootstrap.BootstrapSettings.get().env_path + python_path: pathlib.Path = pathlib.Path(sys.executable) class CLI(_cli.CLI): @@ -51,6 +54,7 @@ class CLI(_cli.CLI): source_dir=self.settings.base_dir / 'python', build_dir=self.settings.base_dir / 'tmp' / 'online' / 'fxreader' / 'pr34' / 'build', dest_dir=self.settings.base_dir / 'tmp' / 'online' / 'fxreader' / 'pr34' / 'install', + meson_path=self.settings.base_dir / 'python' / 'meson.build', ) } @@ -83,16 +87,17 @@ class CLI(_cli.CLI): project.source_dir / '_m.py', project.source_dir / 'online', project.source_dir / 'cli.py', - self.settings.base_dir / 'm.py', + project.source_dir / 'm.py', # Settings.settings().project_root / 'deps/com.github.aiortc.aiortc/src', # Settings.settings().project_root / 'm.py', ], max_errors={ - 'python/online/fxreader/pr34/commands_typed': 0, - 'python/cli.py': 0, + 'online/fxreader/pr34/commands_typed': 0, + # 'online/fxreader/pr34/commands': 0, + 'cli.py': 0, 'm.py': 0, - 'deps/com.github.aiortc.aiortc/src/online_fxreader': 0, - 'deps/com.github.aiortc.aiortc/src/aiortc/contrib/signaling': 0, + '../deps/com.github.aiortc.aiortc/src/online_fxreader': 0, + '../deps/com.github.aiortc.aiortc/src/aiortc/contrib/signaling': 0, }, ), ) @@ -125,6 +130,23 @@ class CLI(_cli.CLI): options, args = parser.parse_known_args(argv[1:]) + default_project: Optional[str] = None + + for k, v in self.projects.items(): + if cli_bootstrap.paths_equal( + v.source_dir.resolve(), + # pathlib.Path(__file__).parent.resolve(), + pathlib.Path.cwd(), + ): + default_project = k + + if options.project is None: + if not default_project is None: + options.project = default_project + else: + logger.error(dict(msg='not provided project name')) + raise NotImplementedError + options.command = Command(options.command) if options.command is Command.deploy_wheel: @@ -135,6 +157,26 @@ class CLI(_cli.CLI): argv=args, output_dir=options.output_dir, mypy=True, + ruff=True, + pyright=True, + ) + elif options.command is Command.pyright: + self.pyright( + project_name=options.project, + argv=args, + ) + elif options.command is Command.ruff: + self.ruff( + project_name=options.project, + argv=args, + ) + elif options.command is Command.meson_setup: + assert not options.project is None + + self.meson_setup( + project_name=options.project, + argv=args, + force=options.force, ) elif options.command is Command.mypy: self.mypy( diff --git a/python/m.py b/python/m.py new file mode 100755 index 0000000..99da03a --- /dev/null +++ b/python/m.py @@ -0,0 +1,335 @@ +#!/usr/bin/env python3 +import glob +import io +import tempfile +import dataclasses +import pathlib +import sys +import subprocess +import os +import logging + + +from typing import ( + Optional, + Any, +) +from typing_extensions import ( + Self, + BinaryIO, +) + +logger = logging.getLogger(__name__) + + +def toml_load(f: BinaryIO) -> Any: + try: + import tomllib + + return tomllib.load(f) + except: + pass + + try: + import tomli + + return tomli.load(f) + except: + pass + + raise NotImplementedError + + +@dataclasses.dataclass +class PyProject: + path: pathlib.Path + dependencies: dict[str, list[str]] + early_features: Optional[list[str]] = None + pip_find_links: Optional[list[pathlib.Path]] = None + runtime_libdirs: Optional[list[pathlib.Path]] = None + runtime_preload: Optional[list[pathlib.Path]] = None + requirements: dict[str, pathlib.Path] = dataclasses.field(default_factory=lambda: dict()) + + +def pyproject_load( + d: pathlib.Path, +) -> PyProject: + with io.open(d, 'rb') as f: + content = toml_load(f) + + assert isinstance(content, dict) + + dependencies: dict[str, list[str]] = dict() + + dependencies['default'] = content['project']['dependencies'] + + if 'optional-dependencies' in content['project']: + assert isinstance(content['project']['optional-dependencies'], dict) + + for k, v in content['project']['optional-dependencies'].items(): + assert isinstance(v, list) + assert isinstance(k, str) + + dependencies[k] = v + + res = PyProject( + path=d, + dependencies=dependencies, + ) + + tool_name = 'online.fxreader.pr34'.replace('.', '-') + + if 'tool' in content and isinstance(content['tool'], dict) and tool_name in content['tool'] and isinstance(content['tool'][tool_name], dict): + if 'early_features' in content['tool'][tool_name]: + res.early_features = content['tool'][tool_name]['early_features'] + + if 'pip_find_links' in content['tool'][tool_name]: + res.pip_find_links = [d.parent / pathlib.Path(o) for o in content['tool'][tool_name]['pip_find_links']] + + if 'runtime_libdirs' in content['tool'][tool_name]: + res.runtime_libdirs = [ + d.parent / pathlib.Path(o) + # pathlib.Path(o) + for o in content['tool'][tool_name]['runtime_libdirs'] + ] + + if 'runtime_preload' in content['tool'][tool_name]: + res.runtime_preload = [ + d.parent / pathlib.Path(o) + # pathlib.Path(o) + for o in content['tool'][tool_name]['runtime_preload'] + ] + + if 'requirements' in content['tool'][tool_name]: + assert isinstance(content['tool'][tool_name]['requirements'], dict) + + res.requirements = { + k: d.parent / pathlib.Path(v) + # pathlib.Path(o) + for k, v in content['tool'][tool_name]['requirements'].items() + } + + return res + + +@dataclasses.dataclass +class BootstrapSettings: + env_path: pathlib.Path + python_path: pathlib.Path + base_dir: pathlib.Path + python_version: Optional[str] = dataclasses.field( + default_factory=lambda: os.environ.get( + 'PYTHON_VERSION', + '%d.%d' + % ( + sys.version_info.major, + sys.version_info.minor, + ), + ).strip() + ) + uv_args: list[str] = dataclasses.field( + default_factory=lambda: os.environ.get( + 'UV_ARGS', + '--offline', + ).split(), + ) + + @classmethod + def get( + cls, + base_dir: Optional[pathlib.Path] = None, + ) -> Self: + if base_dir is None: + base_dir = pathlib.Path.cwd() + + env_path = base_dir / '.venv' + python_path = env_path / 'bin' / 'python3' + + return cls( + base_dir=base_dir, + env_path=env_path, + python_path=python_path, + ) + + +def env_bootstrap( + bootstrap_settings: BootstrapSettings, + pyproject: PyProject, +) -> None: + pip_find_links: list[pathlib.Path] = [] + + if not pyproject.pip_find_links is None: + pip_find_links.extend(pyproject.pip_find_links) + + pip_find_links_args = sum( + [ + [ + '-f', + str(o), + ] + for o in pip_find_links + ], + [], + ) + + features: list[str] = [] + + if pyproject.early_features: + features.extend(pyproject.early_features) + + requirements_python_version: Optional[str] = None + if not bootstrap_settings.python_version is None: + requirements_python_version = bootstrap_settings.python_version.replace('.', '_') + + requirements_name = '_'.join(sorted(features)) + + if requirements_python_version: + requirements_name += '_' + requirements_python_version + + requirements_path: Optional[pathlib.Path] = None + + if requirements_name in pyproject.requirements: + requirements_path = pyproject.requirements[requirements_name] + else: + requirements_path = pyproject.path.parent / 'requirements.txt' + + requirements_in: list[str] = [] + + requirements_in.extend(['uv', 'pip', 'build', 'setuptools', 'meson-python', 'pybind11']) + + if pyproject.early_features: + early_dependencies = sum([pyproject.dependencies[o] for o in pyproject.early_features], []) + + logger.info( + dict( + early_dependencies=early_dependencies, + ) + ) + + requirements_in.extend(early_dependencies) + # if len(early_dependencies) > 0: + # subprocess.check_call([ + # bootstrap_settings.python_path, + # '-m', + # 'uv', 'pip', 'install', + # *pip_find_links_args, + # # '-f', str(pathlib.Path(__file__).parent / 'deps' / 'dist'), + # *bootstrap_settings.uv_args, + # *early_dependencies, + # ]) + + if not requirements_path.exists(): + with tempfile.NamedTemporaryFile( + mode='w', + prefix='requirements', + suffix='.in', + ) as f: + f.write('\n'.join(requirements_in)) + f.flush() + + subprocess.check_call( + [ + 'uv', + 'pip', + 'compile', + '--generate-hashes', + *pip_find_links_args, + # '-p', + # bootstrap_settings.python_path, + *bootstrap_settings.uv_args, + '-o', + str(requirements_path), + f.name, + ] + ) + + uv_python_version: list[str] = [] + + if not bootstrap_settings.python_version is None: + uv_python_version.extend( + [ + '-p', + bootstrap_settings.python_version, + ] + ) + + subprocess.check_call( + [ + 'uv', + 'venv', + *uv_python_version, + *pip_find_links_args, + # '--seed', + *bootstrap_settings.uv_args, + str(bootstrap_settings.env_path), + ] + ) + + subprocess.check_call( + [ + 'uv', + 'pip', + 'install', + *pip_find_links_args, + '-p', + bootstrap_settings.python_path, + '--require-hashes', + *bootstrap_settings.uv_args, + '-r', + str(requirements_path), + ] + ) + + +def paths_equal(a: pathlib.Path | str, b: pathlib.Path | str) -> bool: + return os.path.abspath(str(a)) == os.path.abspath(str(b)) + + +def run( + d: Optional[pathlib.Path] = None, + cli_path: Optional[pathlib.Path] = None, +) -> None: + if cli_path is None: + cli_path = pathlib.Path(__file__).parent / 'cli.py' + + if d is None: + d = pathlib.Path(__file__).parent / 'pyproject.toml' + + bootstrap_settings = BootstrapSettings.get() + + pyproject: PyProject = pyproject_load(d) + + logging.basicConfig(level=logging.INFO) + + if not bootstrap_settings.env_path.exists(): + env_bootstrap( + bootstrap_settings=bootstrap_settings, + pyproject=pyproject, + ) + + logger.info([sys.executable, sys.argv, bootstrap_settings.python_path]) + + if not paths_equal(sys.executable, bootstrap_settings.python_path): + os.execv( + str(bootstrap_settings.python_path), + [ + str(bootstrap_settings.python_path), + *sys.argv, + ], + ) + + os.execv( + str(bootstrap_settings.python_path), + [ + str(bootstrap_settings.python_path), + str(cli_path), + *sys.argv[1:], + ], + ) + + +if __name__ == '__main__': + run( + d=pathlib.Path(__file__).parent / 'pyproject.toml', + cli_path=pathlib.Path(__file__).parent / 'cli.py', + ) diff --git a/python/meson.build b/python/meson.build new file mode 100644 index 0000000..23cd8ba --- /dev/null +++ b/python/meson.build @@ -0,0 +1,87 @@ +project( + run_command( + 'tomlq', '-r', '.project.name', 'pyproject.toml', + check: true + ).stdout().strip('\n'), +# 'online.fxreader.uv', +# ['c', 'cpp'], + version: '0.1.5.16+27.21', +# default_options: [ +# 'cpp_std=c++23', +# # 'prefer_static=true', +# ], +) + +install_path = get_option('install_path') +message('install_path = ' + install_path) + +modes = get_option('modes') + +fs = import('fs') + +assert(modes.length() == 1, 'only one mode allowed') + +mode = modes[0] + + +project_root = '.' +source_dir = project_root +include_dir = project_root + + +if mode == 'meson' +# error() +endif + +if mode == 'pyproject' + py = import('python').find_installation(pure: false) + + namespace_path = meson.project_name().replace('.', '/') + + install_root = py.get_install_dir(pure: true) + install_root_binary = py.get_install_dir(pure: false) + + module_root = install_root / namespace_path + + python_sources = run_command( + 'find', namespace_path, '-iname', '*.py', + check: true + ).stdout().strip().split('\n') + + py.install_sources( + python_sources, + preserve_path: true, + pure: true, +# subdir: namespace_path, + ) + +# install_subdir( +# namespace_path, +# install_dir: py.get_install_dir(), +# install_tag: 'python-runtime', +# # python_sources, +# # subdir: namespace_path, +# ) + + install_data( + files( + [ + namespace_path / 'py.typed', + ], +# 'py.typed', + # '__init__.py', +# 'pyproject.toml', + ), + install_dir : install_root, + install_tag: 'python-runtime', + preserve_path: true, + ) +# + install_subdir( + project_root / '..' / 'meson', + install_dir : module_root, + strip_directory: false, +# install_tag: 'devel', + install_tag: 'devel', + ) +endif diff --git a/python/meson_options.txt b/python/meson_options.txt new file mode 100644 index 0000000..bad457a --- /dev/null +++ b/python/meson_options.txt @@ -0,0 +1,2 @@ +option('modes', type: 'array', choices: ['meson', 'pyproject'], value: ['pyproject']) +option('install_path', type : 'string', value: '') diff --git a/python/online/fxreader/pr34/commands.py b/python/online/fxreader/pr34/commands.py index 9b78e20..723df9d 100644 --- a/python/online/fxreader/pr34/commands.py +++ b/python/online/fxreader/pr34/commands.py @@ -3953,6 +3953,30 @@ class Command(enum.StrEnum): vpn = 'vpn' backup = 'backup' pip_resolve = 'pip_resolve' + pip_check_conflicts = 'pip_check_conflicts' + + +def pip_check_conflicts( + args: list[str], +) -> None: + from .commands_typed.pip import pip_check_conflicts + from .commands_typed.argparse import parse_args as pr34_parse_args + + parser = argparse.ArgumentParser() + parser.add_argument( + '-p', + dest='venv_path', + type=pathlib.Path, + help='venv path', + default=None, + ) + + options, argv = pr34_parse_args(parser, args) + + res = pip_check_conflicts(options.venv_path) + logger.info(dict(res=res)) + + assert res.status == 'ok' def pip_resolve( @@ -4091,6 +4115,8 @@ def commands_cli(argv: Optional[list[str]] = None) -> int: desktop_services(args) elif options.command is Command.pip_resolve: pip_resolve(args) + elif options.command is Command.pip_check_conflicts: + pip_check_conflicts(args) elif options.command is Command.pm_service: pm_service(args) elif options.command is Command.backup: diff --git a/python/online/fxreader/pr34/commands_typed/cli.py b/python/online/fxreader/pr34/commands_typed/cli.py index 5b1f45c..a225f94 100644 --- a/python/online/fxreader/pr34/commands_typed/cli.py +++ b/python/online/fxreader/pr34/commands_typed/cli.py @@ -5,9 +5,11 @@ import os import pathlib import logging import sys +import pydantic import subprocess import shutil import abc +import argparse from .os import shutil_which @@ -15,6 +17,14 @@ from typing import ( Optional, Literal, Any, + MutableMapping, + Mapping, + IO, +) + +from typing_extensions import ( + cast, + Callable, ) logger = logging.getLogger(__name__) @@ -28,6 +38,36 @@ class Project: meson_path: Optional[pathlib.Path] = None +@dataclasses.dataclass +class PyProject: + @dataclasses.dataclass + class Tool: + @dataclasses.dataclass + class Meson: + @dataclasses.dataclass + class Args: + install: list[str] + + args: Args + + @dataclasses.dataclass + class MesonPython: + @dataclasses.dataclass + class Args: + install: list[str] + + args: Args + + meson: Optional[Meson] = None + + meson_python: Optional[MesonPython] = pydantic.Field( + alias='meson-python', + default=None, + ) + + tool: Optional[Tool] = None + + @dataclasses.dataclass class Dependency: name: str @@ -79,17 +119,23 @@ class CLI(abc.ABC): '.', ] - subprocess.check_call( - [ - self.dist_settings.python_path, - '-m', - 'ruff', - '--config', - str(project.source_dir / 'pyproject.toml'), - *argv, - ] + cmd = [ + str(self.dist_settings.python_path), + '-m', + 'ruff', + '--config', + str(project.source_dir / 'pyproject.toml'), + *argv, + ] + + logger.info( + dict( + cmd=cmd, + ) ) + subprocess.check_call(cmd) + def pyright( self, project_name: str, @@ -124,7 +170,7 @@ class CLI(abc.ABC): pyproject = cli_bootstrap.pyproject_load(self.projects[project].source_dir / 'pyproject.toml') - dependencies = sum([pyproject.dependencies[o] for o in features], []) + dependencies = sum([pyproject.dependencies[o] for o in features], cast(list[str], [])) pip_find_links: list[pathlib.Path] = [] @@ -153,7 +199,7 @@ class CLI(abc.ABC): ] for o in pip_find_links ], - [], + cast(list[str], []), ), # '-f', str(pathlib.Path(__file__).parent / 'deps' / 'dist'), '--offline', @@ -251,6 +297,8 @@ class CLI(abc.ABC): force: Optional[bool] = None, env: Optional[dict[str, str]] = None, mypy: bool = False, + ruff: bool = False, + pyright: bool = False, tests: bool = False, ) -> None: project = self.projects[project_name] @@ -278,9 +326,29 @@ class CLI(abc.ABC): force=force, ) + if ruff: + self.ruff( + project_name=project_name, + argv=[ + 'format', + '--check', + ], + ) + + self.ruff( + project_name=project_name, + argv=[], + ) + if mypy: self.mypy([]) + if pyright: + self.pyright( + project_name=project_name, + argv=[], + ) + if env is None: env = dict() @@ -331,6 +399,8 @@ class CLI(abc.ABC): force: Optional[bool] = None, argv: Optional[list[str]] = None, ) -> None: + from . import cli_bootstrap + project = self.projects[project_name] if force is None: @@ -342,21 +412,34 @@ class CLI(abc.ABC): if force and project.dest_dir.exists(): shutil.rmtree(project.dest_dir) - subprocess.check_call( - [ - shutil_which( - 'meson', - True, - ), - 'install', - '-C', - project.build_dir / 'meson', - '--destdir', - project.dest_dir, - *argv, - ] + pyproject = cli_bootstrap.pyproject_load(project.source_dir / 'pyproject.toml') + + pyproject_tool = pydantic.RootModel[PyProject.Tool].model_validate(pyproject.tool).root + + if pyproject_tool.meson and pyproject_tool.meson.args and pyproject_tool.meson.args.install: + argv = pyproject_tool.meson.args.install + argv + + cmd = [ + shutil_which( + 'meson', + True, + ), + 'install', + '-C', + str(project.build_dir / 'meson'), + '--destdir', + str(project.dest_dir), + *argv, + ] + + logger.info( + dict( + cmd=cmd, + ) ) + subprocess.check_call(cmd) + for o in glob.glob( str(project.dest_dir / 'lib' / 'pkgconfig' / '*.pc'), recursive=True, @@ -499,3 +582,195 @@ class CLI(abc.ABC): cmd, env=dict(list(os.environ.items())) | env, ) + + def venv_compile( + self, + project_name: str, + # force: bool, + argv: Optional[list[str]] = None, + ) -> None: + from . import cli_bootstrap + from . import argparse as pr34_argparse + + project = self.projects[project_name] + + bootstrap_settings = cli_bootstrap.BootstrapSettings.get() + + parser = argparse.ArgumentParser() + parser.add_argument( + '-f', + dest='features', + action='append', + default=[], + # type=pathlib.Path, + type=str, + ) + parser.add_argument( + '-p', + dest='python_version', + default=bootstrap_settings.python_version, + # type=pathlib.Path, + type=str, + ) + + options, args = pr34_argparse.parse_args( + parser, + argv, + ) + + pyproject = cli_bootstrap.pyproject_load(project.source_dir / 'pyproject.toml') + + dependencies = sum([pyproject.dependencies[o] for o in options.features], cast(list[str], [])) + + pip_find_links: list[pathlib.Path] = [] + + if not pyproject.pip_find_links is None: + pip_find_links.extend([o for o in pyproject.pip_find_links if o.exists()]) + + requirements_name_get_res = cli_bootstrap.requirements_name_get( + source_dir=project.source_dir, + features=options.features, + python_version=options.python_version, + requirements=pyproject.requirements, + ) + + logger.info( + dict( + dependencies=dependencies, + requirements_name_get_res=requirements_name_get_res, + ) + ) + + with io.open( + requirements_name_get_res.not_compiled, + 'w', + ) as f: + f.write( + '\n'.join(dependencies), + ) + f.flush() + + if len(dependencies) > 0: + cmd = [ + str(self.dist_settings.python_path), + '-m', + 'uv', + 'pip', + 'compile', + *args, + '--python-version', + options.python_version, + *sum( + [ + [ + '-f', + str(o), + ] + for o in pip_find_links + ], + cast(list[str], []), + ), + '--generate-hashes', + str(requirements_name_get_res.not_compiled), + '-o', + str(requirements_name_get_res.compiled), + ] + + logger.info( + dict( + cmd=cmd, + ) + ) + subprocess.check_call(cmd) + + def module_switch( + self, + project_name: str, + # force: bool, + argv: Optional[list[str]] = None, + ) -> None: + import tomlkit + import tomlkit.container + import tomlkit.items + + from . import cli_bootstrap + from . import argparse as pr34_argparse + + project = self.projects[project_name] + + parser = argparse.ArgumentParser() + parser.add_argument( + '-m', + dest='module', + # choices=[ + # o.name + # for o in project.modules + # ], + required=True, + # type=pathlib.Path, + type=str, + ) + + parser.add_argument( + '-f', + dest='file', + default=pathlib.Path('pyproject.common.toml'), + # type=pathlib.Path, + type=pathlib.Path, + ) + + options, args = pr34_argparse.parse_args( + parser, + argv, + ) + + if not options.file.is_absolute(): + options.file = project.source_dir / options.file + + pyproject = cli_bootstrap.pyproject_load( + options.file, + ) + + assert options.module in [o.name for o in pyproject.modules] + + modules: dict[str, cli_bootstrap.PyProject.Module] = {o.name: o for o in pyproject.modules} + + module = modules[options.module] + + with io.open(options.file, 'rb') as f: + pyproject2 = tomlkit.load(f) + + with io.open( + project.source_dir / 'pyproject.toml', + 'w', + ) as f: + p = pyproject2['project'] + assert isinstance(p, tomlkit.items.Table) + p['name'] = module.name + + if not pyproject2['tool']: + pyproject2['tool'] = [] + + if not 'tool' in pyproject2: + pyproject2['tool'] = dict() + + pyproject_tool = pyproject2['tool'] + # assert isinstance(pyproject_tool, tomlkit.items.Array) + assert isinstance(pyproject_tool, MutableMapping) + + for k, v in module.tool.items(): + assert not k in pyproject_tool + pyproject_tool[k] = v + + del p + del pyproject_tool + + cast( + Callable[[Mapping[Any, Any], IO[str] | IO[bytes]], None], + getattr(tomlkit, 'dump'), + )(pyproject2, f) + + del pyproject2 + del module + + # raise NotImplementedError diff --git a/python/online/fxreader/pr34/commands_typed/cli_bootstrap.py b/python/online/fxreader/pr34/commands_typed/cli_bootstrap.py index f282d75..087fcd5 100644 --- a/python/online/fxreader/pr34/commands_typed/cli_bootstrap.py +++ b/python/online/fxreader/pr34/commands_typed/cli_bootstrap.py @@ -1,5 +1,6 @@ #!/usr/bin/env python3 import glob +import json import io import tempfile import dataclasses @@ -13,10 +14,14 @@ import logging from typing import ( Optional, Any, + cast, + Type, + TypeVar, ) from typing_extensions import ( Self, BinaryIO, + overload, ) logger = logging.getLogger(__name__) @@ -42,6 +47,12 @@ def toml_load(f: BinaryIO) -> Any: @dataclasses.dataclass class PyProject: + @dataclasses.dataclass + class Module: + name: str + meson: Optional[pathlib.Path] = None + tool: dict[str, Any] = dataclasses.field(default_factory=lambda: dict()) + path: pathlib.Path dependencies: dict[str, list[str]] early_features: Optional[list[str]] = None @@ -50,6 +61,89 @@ class PyProject: runtime_preload: Optional[list[pathlib.Path]] = None requirements: dict[str, pathlib.Path] = dataclasses.field(default_factory=lambda: dict()) + modules: list[Module] = dataclasses.field( + default_factory=lambda: [], + ) + + tool: dict[str, Any] = dataclasses.field( + default_factory=lambda: dict(), + ) + + +Key = TypeVar('Key') +Value = TypeVar('Value') + + +@overload +def check_dict( + value: Any, + KT: Type[Key], + VT: Type[Value], +) -> dict[Key, Value]: ... + + +@overload +def check_dict( + value: Any, + KT: Type[Key], +) -> dict[Key, Any]: ... + + +def check_dict( + value: Any, + KT: Type[Key], + VT: Optional[Type[Value]] = None, +) -> dict[Key, Value]: + assert isinstance(value, dict) + value2 = cast(dict[Any, Any], value) + + assert all([isinstance(k, KT) and (VT is None or isinstance(v, VT)) for k, v in value2.items()]) + + if VT is None: + return cast( + dict[Key, Any], + value, + ) + else: + return cast( + dict[Key, Value], + value, + ) + + +@overload +def check_list( + value: Any, + VT: Type[Value], +) -> list[Value]: ... + + +@overload +def check_list( + value: Any, +) -> list[Any]: ... + + +def check_list( + value: Any, + VT: Optional[Type[Value]] = None, +) -> list[Value] | list[Any]: + assert isinstance(value, list) + value2 = cast(list[Any], value) + + assert all([(VT is None or isinstance(o, VT)) for o in value2]) + + if VT is None: + return cast( + list[Any], + value, + ) + else: + return cast( + list[Value], + value, + ) + def pyproject_load( d: pathlib.Path, @@ -66,9 +160,21 @@ def pyproject_load( if 'optional-dependencies' in content['project']: assert isinstance(content['project']['optional-dependencies'], dict) - for k, v in content['project']['optional-dependencies'].items(): - assert isinstance(v, list) - assert isinstance(k, str) + for k, v in check_dict( + check_dict( + check_dict( + content, + str, + # Any, + )['project'], + str, + # Any, + )['optional-dependencies'], + str, + list[Any], + ).items(): + # assert isinstance(v, list) + # assert isinstance(k, str) dependencies[k] = v @@ -79,36 +185,75 @@ def pyproject_load( tool_name = 'online.fxreader.pr34'.replace('.', '-') + if 'tool' in content: + res.tool = check_dict( + content['tool'], + str, + ) + if 'tool' in content and isinstance(content['tool'], dict) and tool_name in content['tool'] and isinstance(content['tool'][tool_name], dict): - if 'early_features' in content['tool'][tool_name]: - res.early_features = content['tool'][tool_name]['early_features'] + pr34_tool = check_dict( + check_dict( + content['tool'], + str, + )[tool_name], + str, + ) - if 'pip_find_links' in content['tool'][tool_name]: - res.pip_find_links = [d.parent / pathlib.Path(o) for o in content['tool'][tool_name]['pip_find_links']] + if 'early_features' in pr34_tool: + res.early_features = pr34_tool['early_features'] - if 'runtime_libdirs' in content['tool'][tool_name]: + if 'pip_find_links' in pr34_tool: + res.pip_find_links = [d.parent / pathlib.Path(o) for o in pr34_tool['pip_find_links']] + + if 'runtime_libdirs' in pr34_tool: res.runtime_libdirs = [ d.parent / pathlib.Path(o) # pathlib.Path(o) - for o in content['tool'][tool_name]['runtime_libdirs'] + for o in pr34_tool['runtime_libdirs'] ] - if 'runtime_preload' in content['tool'][tool_name]: + if 'runtime_preload' in pr34_tool: res.runtime_preload = [ d.parent / pathlib.Path(o) # pathlib.Path(o) - for o in content['tool'][tool_name]['runtime_preload'] + for o in pr34_tool['runtime_preload'] ] - if 'requirements' in content['tool'][tool_name]: - assert isinstance(content['tool'][tool_name]['requirements'], dict) - + if 'requirements' in pr34_tool: res.requirements = { k: d.parent / pathlib.Path(v) # pathlib.Path(o) - for k, v in content['tool'][tool_name]['requirements'].items() + for k, v in check_dict(pr34_tool['requirements'], str, str).items() } + if 'modules' in pr34_tool: + modules = check_list(pr34_tool['modules']) + # res.modules = [] + + for o in modules: + assert isinstance(o, dict) + assert 'name' in o and isinstance(o['name'], str) + + module = PyProject.Module( + name=o['name'], + ) + + if 'meson' in o: + assert 'meson' in o and isinstance(o['meson'], str) + + module.meson = pathlib.Path(o['meson']) + + if 'tool' in o: + module.tool.update( + check_dict( + o['tool'], + str, + ) + ) + + res.modules.append(module) + return res @@ -127,6 +272,9 @@ class BootstrapSettings: ), ).strip() ) + pip_check_conflicts: Optional[bool] = dataclasses.field( + default_factory=lambda: os.environ.get('PIP_CHECK_CONFLICTS', json.dumps(True)) in [json.dumps(True)], + ) uv_args: list[str] = dataclasses.field( default_factory=lambda: os.environ.get( 'UV_ARGS', @@ -142,7 +290,12 @@ class BootstrapSettings: if base_dir is None: base_dir = pathlib.Path.cwd() - env_path = base_dir / '.venv' + env_path: Optional[pathlib.Path] = None + if 'ENV_PATH' in os.environ: + env_path = pathlib.Path(os.environ['ENV_PATH']) + else: + env_path = base_dir / '.venv' + python_path = env_path / 'bin' / 'python3' return cls( @@ -152,6 +305,47 @@ class BootstrapSettings: ) +class requirements_name_get_t: + @dataclasses.dataclass + class res_t: + not_compiled: pathlib.Path + compiled: pathlib.Path + name: str + + +def requirements_name_get( + source_dir: pathlib.Path, + python_version: Optional[str], + features: list[str], + requirements: dict[str, pathlib.Path], +) -> requirements_name_get_t.res_t: + requirements_python_version: Optional[str] = None + if not python_version is None: + requirements_python_version = python_version.replace('.', '_') + + requirements_name = '_'.join(sorted(features)) + + if requirements_python_version: + requirements_name += '_' + requirements_python_version + + requirements_path: Optional[pathlib.Path] = None + + if requirements_name in requirements: + requirements_path = requirements[requirements_name] + else: + requirements_path = source_dir / 'requirements.txt' + + requirements_path_in = requirements_path.parent / (requirements_path.stem + '.in') + + requirements_in: list[str] = [] + + return requirements_name_get_t.res_t( + not_compiled=requirements_path_in, + compiled=requirements_path, + name=requirements_name, + ) + + def env_bootstrap( bootstrap_settings: BootstrapSettings, pyproject: PyProject, @@ -169,7 +363,7 @@ def env_bootstrap( ] for o in pip_find_links ], - [], + cast(list[str], []), ) features: list[str] = [] @@ -177,32 +371,24 @@ def env_bootstrap( if pyproject.early_features: features.extend(pyproject.early_features) - requirements_python_version: Optional[str] = None - if not bootstrap_settings.python_version is None: - requirements_python_version = bootstrap_settings.python_version.replace('.', '_') - - requirements_name = '_'.join(sorted(features)) - - if requirements_python_version: - requirements_name += '_' + requirements_python_version - - requirements_path: Optional[pathlib.Path] = None - - if requirements_name in pyproject.requirements: - requirements_path = pyproject.requirements[requirements_name] - else: - requirements_path = pyproject.path.parent / 'requirements.txt' + requirements_name_get_res = requirements_name_get( + python_version=bootstrap_settings.python_version, + features=features, + requirements=pyproject.requirements, + source_dir=pyproject.path.parent, + ) + requirements_path = requirements_name_get_res.compiled requirements_in: list[str] = [] requirements_in.extend(['uv', 'pip', 'build', 'setuptools', 'meson-python', 'pybind11']) if pyproject.early_features: - early_dependencies = sum([pyproject.dependencies[o] for o in pyproject.early_features], []) + early_dependencies = sum([pyproject.dependencies[o] for o in pyproject.early_features], cast(list[str], [])) logger.info( dict( - requirements_name=requirements_name, + requirements_name_get_res=requirements_name_get_res, early_dependencies=early_dependencies, ) ) @@ -281,6 +467,16 @@ def env_bootstrap( ] ) + if bootstrap_settings.pip_check_conflicts: + subprocess.check_call( + [ + bootstrap_settings.python_path, + '-m', + 'online.fxreader.pr34.commands', + 'pip_check_conflicts', + ] + ) + def paths_equal(a: pathlib.Path | str, b: pathlib.Path | str) -> bool: return os.path.abspath(str(a)) == os.path.abspath(str(b)) diff --git a/python/online/fxreader/pr34/commands_typed/crypto.py b/python/online/fxreader/pr34/commands_typed/crypto.py index 9efbd8c..327d1cb 100644 --- a/python/online/fxreader/pr34/commands_typed/crypto.py +++ b/python/online/fxreader/pr34/commands_typed/crypto.py @@ -2,6 +2,7 @@ import base64 import os import cryptography.hazmat.primitives.kdf.scrypt +import cryptography.exceptions from typing import ( Literal, diff --git a/python/online/fxreader/pr34/commands_typed/mypy.py b/python/online/fxreader/pr34/commands_typed/mypy.py index 6defea2..82bb7d8 100644 --- a/python/online/fxreader/pr34/commands_typed/mypy.py +++ b/python/online/fxreader/pr34/commands_typed/mypy.py @@ -84,7 +84,7 @@ def run( argv = [] if settings is None: - settings = MypySettings() + settings = MypySettings.model_validate(dict()) parser = argparse.ArgumentParser() parser.add_argument( diff --git a/python/online/fxreader/pr34/commands_typed/pip.py b/python/online/fxreader/pr34/commands_typed/pip.py index 079f434..f1dca2f 100644 --- a/python/online/fxreader/pr34/commands_typed/pip.py +++ b/python/online/fxreader/pr34/commands_typed/pip.py @@ -1,4 +1,5 @@ import contextlib +import glob import pathlib import sys import enum @@ -10,6 +11,11 @@ import unittest.mock import logging import typing +from typing_extensions import ( + cast, + Protocol, +) + if typing.TYPE_CHECKING: import pip._internal.commands.show import pip._internal.commands.download @@ -21,6 +27,8 @@ if typing.TYPE_CHECKING: import pip._internal.resolution.base import pip._internal.resolution.resolvelib.resolver import pip._internal.operations.prepare + import pip._internal.index.package_finder + from pip._internal.req.req_install import InstallRequirement from typing import ( Literal, @@ -32,19 +40,48 @@ from typing import ( logger = logging.getLogger(__name__) +class pip_show_t: + class res_t: + class package_info_t: + pass + + def pip_show( argv: list[str], -) -> list['pip._internal.commands.show._PackageInfo']: +) -> list[ + # 'pip._internal.commands.show._PackageInfo' + pip_show_t.res_t.package_info_t, +]: import pip._internal.commands.show - return list( - pip._internal.commands.show.search_packages_info( - argv, - ) + return cast( + list[pip_show_t.res_t.package_info_t], + list( + pip._internal.commands.show.search_packages_info( + argv, + ) + ), ) class pip_resolve_t: + class build_package_finder_t(Protocol): + def __call__( + self, + options: Any, + session: Any, + target_python: Any, + ignore_requires_python: Any, + ) -> 'pip._internal.index.package_finder.PackageFinder': ... + + class complete_partial_requirements_t(Protocol): + def __call__( + self, + _self: 'pip._internal.resolution.resolvelib.resolver.Resolver', + partially_downloaded_reqs: Iterable['InstallRequirement',], + parallel_builds: bool = False, + ) -> None: ... + class kwargs_t: class mode_t(enum.StrEnum): copy_paste = 'copy_paste' @@ -130,12 +167,13 @@ def pip_resolve( # t1._in_main_context = True session = t1.get_default_session(options) target_python = pip._internal.cli.cmdoptions.make_target_python(options) - finder = t1._build_package_finder( + finder = cast(pip_resolve_t.build_package_finder_t, getattr(t1, '_build_package_finder'))( options=options, session=session, target_python=target_python, ignore_requires_python=options.ignore_requires_python, ) + build_tracker = t1.enter_context(pip._internal.operations.build.build_tracker.get_build_tracker()) reqs = t1.get_requirements( [ @@ -220,6 +258,8 @@ def pip_resolve( from pip._internal.utils.hashes import Hashes from pip._internal.req.req_install import InstallRequirement + from . import cli_bootstrap + downloader_call_def = pip._internal.network.download.Downloader.__call__ def downloader_call( @@ -311,7 +351,13 @@ def pip_resolve( result_requirements.extend(reqs) raise NotImplementedError - _complete_partial_requirements_def = pip._internal.operations.prepare.RequirementPreparer._complete_partial_requirements + _complete_partial_requirements_def = cast( + pip_resolve_t.complete_partial_requirements_t, + getattr( + pip._internal.operations.prepare.RequirementPreparer, + '_complete_partial_requirements', + ), + ) def _complete_partial_requirements( _self: pip._internal.resolution.resolvelib.resolver.Resolver, @@ -391,7 +437,7 @@ def pip_resolve( # ] # for o in result_requirements # ], []) - logger.warn(result_requirements) + logger.warning(result_requirements) res = pip_resolve_t.res_t() @@ -400,7 +446,15 @@ def pip_resolve( for o in result_requirements: assert isinstance(o, InstallRequirement) - sha256_hashes = o.hashes()._allowed['sha256'] + sha256_hashes = cli_bootstrap.check_list( + cli_bootstrap.check_dict( + getattr(o.hashes(), '_allowed'), + str, + list[str], + )['sha256'], + str, + ) + assert len(sha256_hashes) == 1 assert not o.link is None @@ -478,3 +532,46 @@ def pip_resolve( ) else: raise NotImplementedError + + +class pip_check_conflicts_t: + @dataclasses.dataclass + class res_t: + status: Literal['ok', 'error'] + duplicates: list[str] + + +def pip_check_conflicts( + venv_path: Optional[pathlib.Path] = None, +) -> pip_check_conflicts_t.res_t: + assert sys.platform == 'linux' + if venv_path is None: + venv_path = ( + pathlib.Path( + sys.executable, + ).parent + / '..' + ) + + # records = glob.glob( + # str(venv_path / '*' / 'site-packages' / '*.dist-info' / 'RECORD'), + # recursive=True, + # ) + + duplicates = [ + line + for line in subprocess.check_output( + r""" + cat $(find $VENV_PATH/lib/*/*/*.dist-info/RECORD) | sort | uniq -c | (grep -v -P '^\s+1\s'; true;) + """, + shell=True, + env=dict( + VENV_PATH=str(venv_path), + ), + ) + .decode('utf-8') + .splitlines() + if line.strip() != '' + ] + + return pip_check_conflicts_t.res_t(status=('error' if len(duplicates) > 0 else 'ok'), duplicates=duplicates) diff --git a/python/online/fxreader/pr34/tasks/mlb_player.py b/python/online/fxreader/pr34/tasks/mlb_player.py index 5a1fd8a..35701ef 100644 --- a/python/online/fxreader/pr34/tasks/mlb_player.py +++ b/python/online/fxreader/pr34/tasks/mlb_player.py @@ -5,12 +5,9 @@ import cv2 import re import json import io -import glob import xarray import numpy -import json import glob -import io import os import pandas import pickle @@ -325,7 +322,7 @@ def kernel_7( import torch.optim as optim import torchvision.transforms as transforms import torchvision.datasets as datasets - import torchvision.models as models + import torchvision.models from torch.autograd import Variable @@ -1735,7 +1732,13 @@ def kernel_28( --outdir %s """ % (t13, t2) if False: - pprint.pprint([t4, t2, t6]) + pprint.pprint( + [ + # t4, + t2, + t6, + ] + ) with subprocess.Popen(t6, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) as p: if False: pprint.pprint(p.communicate()) @@ -1781,7 +1784,7 @@ def kernel_29( t1 = json.load(f) t8 = sum([o['data'] for o in t1], []) - t10 = re.compile('frame-(\d+)\.jpg') + t10 = re.compile(r'frame-(\d+)\.jpg') for i, o in enumerate(t8): o['frame_id'] = int(t10.match(o['image_id'])[1]) @@ -2018,7 +2021,7 @@ def kernel_30( def kernel_31(image_id, image_size, keypoints): def get_angle(a, b): - from math import sqrt, acos, degrees, atan, degrees + from math import sqrt, acos, degrees, atan # print(a) # print(b) @@ -2038,7 +2041,7 @@ def kernel_31(image_id, image_size, keypoints): def angle_gor(a, b, c, d): import numpy as np - from math import sqrt, acos, degrees, atan, degrees + from math import sqrt, acos, degrees, atan ab = [a[0] - b[0], a[1] - b[1]] ab1 = [c[0] - d[0], c[1] - d[1]] @@ -2049,14 +2052,14 @@ def kernel_31(image_id, image_size, keypoints): def sit_ang(a, b, c, d): ang = angle_gor(a, b, c, d) s1 = 0 - if ang != None: + if not ang is None: # print("Angle",ang) if ang < 120 and ang > 40: s1 = 1 return s1 def sit_rec(a, b, c, d): - from math import sqrt, acos, degrees, atan, degrees + from math import sqrt, acos, degrees, atan ab = [a[0] - b[0], a[1] - b[1]] ab1 = [c[0] - d[0], c[1] - d[1]] @@ -2267,7 +2270,7 @@ def kernel_36(): """ - import os + # import os from os.path import exists, join, basename, splitext git_repo_url = 'https://github.com/CMU-Perceptual-Computing-Lab/openpose.git' @@ -2296,8 +2299,8 @@ def kernel_36(): """## From a Google Drive's folder""" - import os - from os.path import exists, join, basename, splitext + # import os + # from os.path import exists, join, basename, splitext folder_path = '/content/drive/My Drive/openpose/' files = os.listdir(folder_path) @@ -2345,9 +2348,8 @@ def kernel_36(): # video_folder = os.path.dirname(colab_video_path) # video_base_name = os.path.basename(colab_video_path) # print(video_base_name) - import os - from os.path import exists, join, basename, splitext - + # import os + # from os.path import exists, join, basename, splitext # colab_video_path = '/content/drive/My Drive/bachata.mp4' colab_video_path = '/content/output.mp4' colab_openpose_video_path = colab_video_path.replace('.mp4', '') + '-openpose.mp4' diff --git a/python/pyproject.toml b/python/pyproject.toml index 59a5fa8..02f4682 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -1,6 +1,18 @@ [project] +description = 'set of tools for software development' +requires-python = '>= 3.10' +maintainers = [ + { name = 'Siarhei Siniak', email = 'siarheisiniak@gmail.com' }, +] +classifiers = [ + 'Programming Language :: Python', +] + name = 'online.fxreader.pr34' -version = '0.1.5.16' +# version = '0.1.5.16+27.7' +dynamic = [ + 'version', +] dependencies = [ #"-r requirements.txt", @@ -8,6 +20,7 @@ dependencies = [ 'marisa-trie', 'pydantic', 'pydantic-settings', + 'tomlkit', ] [project.optional-dependencies] @@ -18,40 +31,201 @@ crypto = [ early = [ 'numpy', 'cryptography', + # 'tomlkit', ] lint = [ 'tomli', + 'mypy', + 'pyright', + 'ruff', + # 'tomlkit', ] [tool.online-fxreader-pr34] early_features = ['default', 'early', 'lint',] [build-system] -requires = ['setuptools'] -build-backend = 'setuptools.build_meta' - -[tool.setuptools] -include-package-data = false - -[tool.setuptools.package-dir] -'online.fxreader.pr34' = 'online/fxreader/pr34' -#package_dir = '..' -#packages = ['online_fxreader'] -#[tool.setuptools.packages.find] -#where = ['../..'] -#include = ['../../online_fxreader/vpn'] -#exclude =['../../aiortc/*', '../../_cffi_src/*'] - -#[tool.setuptools.packages.find] -#exclude = ['*'] -#include = ['*.py'] - -# [tool.setuptools.exclude-package-data] -# 'online.fxreader.pr34' = ['online/fxreader/pr34/py.typed'] - -#[tool.setuptools.package-data] -#'online_fxreader.vpn' = ['requirements.txt'] +requires = ["meson-python", "pybind11"] +build-backend = "mesonpy" [project.scripts] online-fxreader-pr34-commands = 'online.fxreader.pr34.commands:commands_cli' + + +[tool.ruff] +line-length = 160 +target-version = 'py310' +# builtins = ['_', 'I', 'P'] +include = [ + # 'follow_the_leader/**/*.py', + #'*.py', + # '*.recipe', + '*.py', + 'online/**/*.py', + 'online/**/*.pyi', +] +exclude = [ + '.venv', +] + +[tool.ruff.format] +quote-style = 'single' +indent-style = 'tab' +skip-magic-trailing-comma = false + + +[tool.ruff.lint] +ignore = [ + 'E402', 'E722', 'E741', 'W191', 'E101', 'E501', 'I001', 'F401', 'E714', + 'E713', + # remove lambdas later on + 'E731', + # fix this too + 'E712', + 'E703', + # remove unused variables, or fix a bug + 'F841', + # fix * imports + 'F403', + # don't care about trailing new lines + 'W292', + +] +select = ['E', 'F', 'I', 'W', 'INT'] + + +[tool.ruff.lint.isort] +detect-same-package = true +# extra-standard-library = ["aes", "elementmaker", "encodings"] +# known-first-party = ["calibre_extensions", "calibre_plugins", "polyglot"] +# known-third-party = ["odf", "qt", "templite", "tinycss", "css_selectors"] +relative-imports-order = "closest-to-furthest" +split-on-trailing-comma = true +section-order = [ + # '__python__', + "future", + "standard-library", "third-party", "first-party", "local-folder" +] +force-wrap-aliases = true + +# [tool.ruff.lint.isort.sections] +# '__python__' = ['__python__'] + +[tool.pylsp-mypy] +enabled = false + +[tool.pyright] +include = [ + #'../../../../../follow_the_leader/views2/payments.py', + #'../../../../../follow_the_leader/logic/payments.py', + #'../../../../../follow_the_leader/logic/paypal.py', + 'online/fxreader/pr34/commands_typed/**/*.py', +] +# stubPath = '../mypy-stubs' +extraPaths = [ + '.', + '../mypy-stubs', + '../mypy-stubs/types-debugpy', + '../mypy-stubs/marisa-trie-types', + # '../../../../../', +] +#strict = ["src"] + +analyzeUnannotatedFunctions = true +disableBytesTypePromotions = true +strictParameterNoneValue = true +enableTypeIgnoreComments = true +enableReachabilityAnalysis = true +strictListInference = true +strictDictionaryInference = true +strictSetInference = true +deprecateTypingAliases = false +enableExperimentalFeatures = false +reportMissingTypeStubs ="error" +reportMissingModuleSource = "warning" +reportInvalidTypeForm = "error" +reportMissingImports = "error" +reportUndefinedVariable = "error" +reportAssertAlwaysTrue = "error" +reportInvalidStringEscapeSequence = "error" +reportInvalidTypeVarUse = "error" +reportSelfClsParameterName = "error" +reportUnsupportedDunderAll = "error" +reportUnusedExpression = "error" +reportWildcardImportFromLibrary = "error" +reportAbstractUsage = "error" +reportArgumentType = "error" +reportAssertTypeFailure = "error" +reportAssignmentType = "error" +reportAttributeAccessIssue = "error" +reportCallIssue = "error" +reportGeneralTypeIssues = "error" +reportInconsistentOverload = "error" +reportIndexIssue = "error" +reportInvalidTypeArguments = "error" +reportNoOverloadImplementation = "error" +reportOperatorIssue = "error" +reportOptionalSubscript = "error" +reportOptionalMemberAccess = "error" +reportOptionalCall = "error" +reportOptionalIterable = "error" +reportOptionalContextManager = "error" +reportOptionalOperand = "error" +reportRedeclaration = "error" +reportReturnType = "error" +reportTypedDictNotRequiredAccess = "error" +reportPrivateImportUsage = "error" +reportUnboundVariable = "error" +reportUnhashable = "error" +reportUnusedCoroutine = "error" +reportUnusedExcept = "error" +reportFunctionMemberAccess = "error" +reportIncompatibleMethodOverride = "error" +reportIncompatibleVariableOverride = "error" +reportOverlappingOverload = "error" +reportPossiblyUnboundVariable = "error" +reportConstantRedefinition = "error" +#reportDeprecated = "error" +reportDeprecated = "warning" +reportDuplicateImport = "error" +reportIncompleteStub = "error" +reportInconsistentConstructor = "error" +reportInvalidStubStatement = "error" +reportMatchNotExhaustive = "error" +reportMissingParameterType = "error" +reportMissingTypeArgument = "error" +reportPrivateUsage = "error" +reportTypeCommentUsage = "error" +reportUnknownArgumentType = "error" +reportUnknownLambdaType = "error" +reportUnknownMemberType = "error" +reportUnknownParameterType = "error" +reportUnknownVariableType = "error" +#reportUnknownVariableType = "warning" +reportUnnecessaryCast = "error" +reportUnnecessaryComparison = "error" +reportUnnecessaryContains = "error" +#reportUnnecessaryIsInstance = "error" +reportUnnecessaryIsInstance = "warning" +reportUnusedClass = "error" +#reportUnusedImport = "error" +reportUnusedImport = "none" +# reportUnusedFunction = "error" +reportUnusedFunction = "warning" +#reportUnusedVariable = "error" +reportUnusedVariable = "warning" +reportUntypedBaseClass = "error" +reportUntypedClassDecorator = "error" +reportUntypedFunctionDecorator = "error" +reportUntypedNamedTuple = "error" +reportCallInDefaultInitializer = "none" +reportImplicitOverride = "none" +reportImplicitStringConcatenation = "none" +reportImportCycles = "none" +reportMissingSuperCall = "none" +reportPropertyTypeMismatch = "none" +reportShadowedImports = "none" +reportUninitializedInstanceVariable = "none" +reportUnnecessaryTypeIgnoreComment = "none" +reportUnusedCallResult = "none" diff --git a/python/requirements.txt b/python/requirements.txt index 766bbbd..8bc513e 100644 --- a/python/requirements.txt +++ b/python/requirements.txt @@ -1,5 +1,5 @@ # This file was autogenerated by uv via the following command: -# uv pip compile --generate-hashes -o /home/nartes/Documents/current/freelance-project-34-marketing-blog/python/requirements.txt /tmp/requirementsmeh8aapn.in +# uv pip compile --generate-hashes --offline -o /home/nartes/Documents/current/freelance-project-34-marketing-blog/python/requirements.txt /tmp/requirementsguod07w5.in annotated-types==0.7.0 \ --hash=sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53 \ --hash=sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89 @@ -7,7 +7,7 @@ annotated-types==0.7.0 \ build==1.2.2.post1 \ --hash=sha256:1d61c0887fa860c01971625baae8bdd338e517b836a2f70dd1f7aa3a6b2fc5b5 \ --hash=sha256:b36993e92ca9375a219c99e606a122ff365a760a2d4bba0caa09bd5278b608b7 - # via -r /tmp/requirementsmeh8aapn.in + # via -r /tmp/requirementsguod07w5.in cffi==1.17.1 \ --hash=sha256:045d61c734659cc045141be4bae381a41d89b741f795af1dd018bfb532fd0df8 \ --hash=sha256:0984a4925a435b1da406122d4d7968dd861c1385afe3b45ba82b750f229811e2 \ @@ -77,43 +77,45 @@ cffi==1.17.1 \ --hash=sha256:f7f5baafcc48261359e14bcd6d9bff6d4b28d9103847c9e136694cb0501aef87 \ --hash=sha256:fc48c783f9c87e60831201f2cce7f3b2e4846bf4d8728eabe54d60700b318a0b # via cryptography -cryptography==44.0.2 \ - --hash=sha256:04abd71114848aa25edb28e225ab5f268096f44cf0127f3d36975bdf1bdf3390 \ - --hash=sha256:0529b1d5a0105dd3731fa65680b45ce49da4d8115ea76e9da77a875396727b41 \ - --hash=sha256:1bc312dfb7a6e5d66082c87c34c8a62176e684b6fe3d90fcfe1568de675e6688 \ - --hash=sha256:268e4e9b177c76d569e8a145a6939eca9a5fec658c932348598818acf31ae9a5 \ - --hash=sha256:29ecec49f3ba3f3849362854b7253a9f59799e3763b0c9d0826259a88efa02f1 \ - --hash=sha256:2bf7bf75f7df9715f810d1b038870309342bff3069c5bd8c6b96128cb158668d \ - --hash=sha256:3b721b8b4d948b218c88cb8c45a01793483821e709afe5f622861fc6182b20a7 \ - --hash=sha256:3c00b6b757b32ce0f62c574b78b939afab9eecaf597c4d624caca4f9e71e7843 \ - --hash=sha256:3dc62975e31617badc19a906481deacdeb80b4bb454394b4098e3f2525a488c5 \ - --hash=sha256:4973da6ca3db4405c54cd0b26d328be54c7747e89e284fcff166132eb7bccc9c \ - --hash=sha256:4e389622b6927d8133f314949a9812972711a111d577a5d1f4bee5e58736b80a \ - --hash=sha256:51e4de3af4ec3899d6d178a8c005226491c27c4ba84101bfb59c901e10ca9f79 \ - --hash=sha256:5f6f90b72d8ccadb9c6e311c775c8305381db88374c65fa1a68250aa8a9cb3a6 \ - --hash=sha256:6210c05941994290f3f7f175a4a57dbbb2afd9273657614c506d5976db061181 \ - --hash=sha256:6f101b1f780f7fc613d040ca4bdf835c6ef3b00e9bd7125a4255ec574c7916e4 \ - --hash=sha256:7bdcd82189759aba3816d1f729ce42ffded1ac304c151d0a8e89b9996ab863d5 \ - --hash=sha256:7ca25849404be2f8e4b3c59483d9d3c51298a22c1c61a0e84415104dacaf5562 \ - --hash=sha256:81276f0ea79a208d961c433a947029e1a15948966658cf6710bbabb60fcc2639 \ - --hash=sha256:8cadc6e3b5a1f144a039ea08a0bdb03a2a92e19c46be3285123d32029f40a922 \ - --hash=sha256:8e0ddd63e6bf1161800592c71ac794d3fb8001f2caebe0966e77c5234fa9efc3 \ - --hash=sha256:909c97ab43a9c0c0b0ada7a1281430e4e5ec0458e6d9244c0e821bbf152f061d \ - --hash=sha256:96e7a5e9d6e71f9f4fca8eebfd603f8e86c5225bb18eb621b2c1e50b290a9471 \ - --hash=sha256:9a1e657c0f4ea2a23304ee3f964db058c9e9e635cc7019c4aa21c330755ef6fd \ - --hash=sha256:9eb9d22b0a5d8fd9925a7764a054dca914000607dff201a24c791ff5c799e1fa \ - --hash=sha256:af4ff3e388f2fa7bff9f7f2b31b87d5651c45731d3e8cfa0944be43dff5cfbdb \ - --hash=sha256:b042d2a275c8cee83a4b7ae30c45a15e6a4baa65a179a0ec2d78ebb90e4f6699 \ - --hash=sha256:bc821e161ae88bfe8088d11bb39caf2916562e0a2dc7b6d56714a48b784ef0bb \ - --hash=sha256:c505d61b6176aaf982c5717ce04e87da5abc9a36a5b39ac03905c4aafe8de7aa \ - --hash=sha256:c63454aa261a0cf0c5b4718349629793e9e634993538db841165b3df74f37ec0 \ - --hash=sha256:c7362add18b416b69d58c910caa217f980c5ef39b23a38a0880dfd87bdf8cd23 \ - --hash=sha256:d03806036b4f89e3b13b6218fefea8d5312e450935b1a2d55f0524e2ed7c59d9 \ - --hash=sha256:d1b3031093a366ac767b3feb8bcddb596671b3aaff82d4050f984da0c248b615 \ - --hash=sha256:d1c3572526997b36f245a96a2b1713bf79ce99b271bbcf084beb6b9b075f29ea \ - --hash=sha256:efcfe97d1b3c79e486554efddeb8f6f53a4cdd4cf6086642784fa31fc384e1d7 \ - --hash=sha256:f514ef4cd14bb6fb484b4a60203e912cfcb64f2ab139e88c2274511514bf7308 - # via -r /tmp/requirementsmeh8aapn.in +cryptography==44.0.3 \ + --hash=sha256:02f55fb4f8b79c1221b0961488eaae21015b69b210e18c386b69de182ebb1259 \ + --hash=sha256:157f1f3b8d941c2bd8f3ffee0af9b049c9665c39d3da9db2dc338feca5e98a43 \ + --hash=sha256:192ed30fac1728f7587c6f4613c29c584abdc565d7417c13904708db10206645 \ + --hash=sha256:21a83f6f35b9cc656d71b5de8d519f566df01e660ac2578805ab245ffd8523f8 \ + --hash=sha256:25cd194c39fa5a0aa4169125ee27d1172097857b27109a45fadc59653ec06f44 \ + --hash=sha256:3883076d5c4cc56dbef0b898a74eb6992fdac29a7b9013870b34efe4ddb39a0d \ + --hash=sha256:3bb0847e6363c037df8f6ede57d88eaf3410ca2267fb12275370a76f85786a6f \ + --hash=sha256:3be3f649d91cb182c3a6bd336de8b61a0a71965bd13d1a04a0e15b39c3d5809d \ + --hash=sha256:3f07943aa4d7dad689e3bb1638ddc4944cc5e0921e3c227486daae0e31a05e54 \ + --hash=sha256:479d92908277bed6e1a1c69b277734a7771c2b78633c224445b5c60a9f4bc1d9 \ + --hash=sha256:4ffc61e8f3bf5b60346d89cd3d37231019c17a081208dfbbd6e1605ba03fa137 \ + --hash=sha256:5639c2b16764c6f76eedf722dbad9a0914960d3489c0cc38694ddf9464f1bb2f \ + --hash=sha256:58968d331425a6f9eedcee087f77fd3c927c88f55368f43ff7e0a19891f2642c \ + --hash=sha256:5d186f32e52e66994dce4f766884bcb9c68b8da62d61d9d215bfe5fb56d21334 \ + --hash=sha256:5d20cc348cca3a8aa7312f42ab953a56e15323800ca3ab0706b8cd452a3a056c \ + --hash=sha256:6866df152b581f9429020320e5eb9794c8780e90f7ccb021940d7f50ee00ae0b \ + --hash=sha256:7d5fe7195c27c32a64955740b949070f21cba664604291c298518d2e255931d2 \ + --hash=sha256:896530bc9107b226f265effa7ef3f21270f18a2026bc09fed1ebd7b66ddf6375 \ + --hash=sha256:962bc30480a08d133e631e8dfd4783ab71cc9e33d5d7c1e192f0b7c06397bb88 \ + --hash=sha256:978631ec51a6bbc0b7e58f23b68a8ce9e5f09721940933e9c217068388789fe5 \ + --hash=sha256:9b4d4a5dbee05a2c390bf212e78b99434efec37b17a4bff42f50285c5c8c9647 \ + --hash=sha256:ab0b005721cc0039e885ac3503825661bd9810b15d4f374e473f8c89b7d5460c \ + --hash=sha256:af653022a0c25ef2e3ffb2c673a50e5a0d02fecc41608f4954176f1933b12359 \ + --hash=sha256:b0cc66c74c797e1db750aaa842ad5b8b78e14805a9b5d1348dc603612d3e3ff5 \ + --hash=sha256:b424563394c369a804ecbee9b06dfb34997f19d00b3518e39f83a5642618397d \ + --hash=sha256:c138abae3a12a94c75c10499f1cbae81294a6f983b3af066390adee73f433028 \ + --hash=sha256:c6cd67722619e4d55fdb42ead64ed8843d64638e9c07f4011163e46bc512cf01 \ + --hash=sha256:c91fc8e8fd78af553f98bc7f2a1d8db977334e4eea302a4bfd75b9461c2d8904 \ + --hash=sha256:cad399780053fb383dc067475135e41c9fe7d901a97dd5d9c5dfb5611afc0d7d \ + --hash=sha256:cb90f60e03d563ca2445099edf605c16ed1d5b15182d21831f58460c48bffb93 \ + --hash=sha256:dad80b45c22e05b259e33ddd458e9e2ba099c86ccf4e88db7bbab4b747b18d06 \ + --hash=sha256:dd3db61b8fe5be220eee484a17233287d0be6932d056cf5738225b9c05ef4fff \ + --hash=sha256:e28d62e59a4dbd1d22e747f57d4f00c459af22181f0b2f787ea83f5a876d7c76 \ + --hash=sha256:e909df4053064a97f1e6565153ff8bb389af12c5c8d29c343308760890560aff \ + --hash=sha256:f3ffef566ac88f75967d7abd852ed5f182da252d23fac11b4766da3957766759 \ + --hash=sha256:fc3c9babc1e1faefd62704bb46a69f359a9819eb0292e40df3fb6e3574715cd4 \ + --hash=sha256:fe19d8bc5536a91a24a8133328880a41831b6c5df54599a8417b62fe015d3053 + # via -r /tmp/requirementsguod07w5.in marisa-trie==1.2.1 \ --hash=sha256:06b099dd743676dbcd8abd8465ceac8f6d97d8bfaabe2c83b965495523b4cef2 \ --hash=sha256:0ee6cf6a16d9c3d1c94e21c8e63c93d8b34bede170ca4e937e16e1c0700d399f \ @@ -191,15 +193,15 @@ marisa-trie==1.2.1 \ --hash=sha256:f35c2603a6be168088ed1db6ad1704b078aa8f39974c60888fbbced95dcadad4 \ --hash=sha256:f4cd800704a5fc57e53c39c3a6b0c9b1519ebdbcb644ede3ee67a06eb542697d \ --hash=sha256:f713af9b8aa66a34cd3a78c7d150a560a75734713abe818a69021fd269e927fa - # via -r /tmp/requirementsmeh8aapn.in + # via -r /tmp/requirementsguod07w5.in meson==1.8.0 \ --hash=sha256:0a9b23311271519bd03dca12d7d8b0eab582c3a2c5da433d465b6e519dc88e2f \ --hash=sha256:472b7b25da286447333d32872b82d1c6f1a34024fb8ee017d7308056c25fec1f # via meson-python -meson-python==0.17.1 \ - --hash=sha256:30a75c52578ef14aff8392677b09c39346e0a24d2b2c6204b8ed30583c11269c \ - --hash=sha256:efb91f69f2e19eef7bc9a471ed2a4e730088cc6b39eacaf3e49fc4f930eb5f83 - # via -r /tmp/requirementsmeh8aapn.in +meson-python==0.18.0 \ + --hash=sha256:3b0fe051551cc238f5febb873247c0949cd60ded556efa130aa57021804868e2 \ + --hash=sha256:c56a99ec9df669a40662fe46960321af6e4b14106c14db228709c1628e23848d + # via -r /tmp/requirementsguod07w5.in mypy==1.15.0 \ --hash=sha256:1124a18bc11a6a62887e3e137f37f53fbae476dc36c185d549d4f837a2a6a14e \ --hash=sha256:171a9ca9a40cd1843abeca0e405bc1940cd9b305eaeea2dda769ba096932bb22 \ @@ -233,11 +235,15 @@ mypy==1.15.0 \ --hash=sha256:d10d994b41fb3497719bbf866f227b3489048ea4bbbb5015357db306249f7980 \ --hash=sha256:e601a7fa172c2131bff456bb3ee08a88360760d0d2f8cbd7a75a65497e2df078 \ --hash=sha256:f95579473af29ab73a10bada2f9722856792a36ec5af5399b653aa28360290a5 - # via -r /tmp/requirementsmeh8aapn.in + # via -r /tmp/requirementsguod07w5.in mypy-extensions==1.1.0 \ --hash=sha256:1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505 \ --hash=sha256:52e68efc3284861e772bbcd66823fde5ae21fd2fdb51c62a211403730b916558 # via mypy +nodeenv==1.9.1 \ + --hash=sha256:6ec12890a2dab7946721edbfbcd91f3319c6ccc9aec47be7c7e6b7011ee6645f \ + --hash=sha256:ba11c9782d29c27c70ffbdda2d7415098754709be8a7056d79a737cd901155c9 + # via pyright numpy==2.2.5 \ --hash=sha256:0255732338c4fdd00996c0421884ea8a3651eea555c3a56b84892b66f696eb70 \ --hash=sha256:02f226baeefa68f7d579e213d0f3493496397d8f1cff5e2b222af274c86a552a \ @@ -294,7 +300,7 @@ numpy==2.2.5 \ --hash=sha256:ec31367fd6a255dc8de4772bd1658c3e926d8e860a0b6e922b615e532d320ddc \ --hash=sha256:ee461a4eaab4f165b68780a6a1af95fb23a29932be7569b9fab666c407969051 \ --hash=sha256:f5045039100ed58fa817a6227a356240ea1b9a1bc141018864c306c1a16d4175 - # via -r /tmp/requirementsmeh8aapn.in + # via -r /tmp/requirementsguod07w5.in packaging==25.0 \ --hash=sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484 \ --hash=sha256:d443872c98d677bf60f6a1f2f8c1cb748e8fe762d2bf9d3148b5599295b0fc4f @@ -302,14 +308,14 @@ packaging==25.0 \ # build # meson-python # pyproject-metadata -pip==25.1 \ - --hash=sha256:13b4aa0aaad055020a11bec8a1c2a70a2b2d080e12d89b962266029fff0a16ba \ - --hash=sha256:272bdd1289f80165e9070a4f881e8f9e1001bbb50378561d1af20e49bf5a2200 - # via -r /tmp/requirementsmeh8aapn.in +pip==25.1.1 \ + --hash=sha256:2913a38a2abf4ea6b64ab507bd9e967f3b53dc1ede74b01b0931e1ce548751af \ + --hash=sha256:3de45d411d308d5054c2168185d8da7f9a2cd753dbac8acbfa88a8909ecd9077 + # via -r /tmp/requirementsguod07w5.in pybind11==2.13.6 \ --hash=sha256:237c41e29157b962835d356b370ededd57594a26d5894a795960f0047cb5caf5 \ --hash=sha256:ba6af10348c12b24e92fa086b39cfba0eff619b61ac77c406167d813b096d39a - # via -r /tmp/requirementsmeh8aapn.in + # via -r /tmp/requirementsguod07w5.in pycparser==2.22 \ --hash=sha256:491c8be9c040f5390f5bf44a5b07752bd07f56edf992381b05c701439eec10f6 \ --hash=sha256:c3702b6d3dd8c7abc1afa565d7e63d53a1d0bd86cdc24edd75470f4de499cfcc @@ -318,7 +324,7 @@ pydantic==2.11.4 \ --hash=sha256:32738d19d63a226a52eed76645a98ee07c1f410ee41d93b4afbfa85ed8111c2d \ --hash=sha256:d9615eaa9ac5a063471da949c8fc16376a84afb5024688b3ff885693506764eb # via - # -r /tmp/requirementsmeh8aapn.in + # -r /tmp/requirementsguod07w5.in # pydantic-settings pydantic-core==2.33.2 \ --hash=sha256:0069c9acc3f3981b9ff4cdfaf088e98d83440a4c7ea1bc07460af3d4dc22e72d \ @@ -424,7 +430,7 @@ pydantic-core==2.33.2 \ pydantic-settings==2.9.1 \ --hash=sha256:59b4f431b1defb26fe620c71a7d3968a710d719f5f4cdbbdb7926edeb770f6ef \ --hash=sha256:c509bf79d27563add44e8446233359004ed85066cd096d8b510f715e6ef5d268 - # via -r /tmp/requirementsmeh8aapn.in + # via -r /tmp/requirementsguod07w5.in pyproject-hooks==1.2.0 \ --hash=sha256:1e859bd5c40fae9448642dd871adf459e5e2084186e8d2c2a79a824c970da1f8 \ --hash=sha256:9e5c6bfa8dcc30091c74b0cf803c81fdd29d94f01992a7707bc97babb1141913 @@ -433,15 +439,39 @@ pyproject-metadata==0.9.1 \ --hash=sha256:b8b2253dd1b7062b78cf949a115f02ba7fa4114aabe63fa10528e9e1a954a816 \ --hash=sha256:ee5efde548c3ed9b75a354fc319d5afd25e9585fa918a34f62f904cc731973ad # via meson-python +pyright==1.1.400 \ + --hash=sha256:b8a3ba40481aa47ba08ffb3228e821d22f7d391f83609211335858bf05686bdb \ + --hash=sha256:c80d04f98b5a4358ad3a35e241dbf2a408eee33a40779df365644f8054d2517e + # via -r /tmp/requirementsguod07w5.in python-dotenv==1.1.0 \ --hash=sha256:41f90bc6f5f177fb41f53e87666db362025010eb28f60a01c9143bfa33a2b2d5 \ --hash=sha256:d7c01d9e2293916c18baf562d95698754b0dbbb5e74d457c45d4f6561fb9d55d # via pydantic-settings -setuptools==80.0.1 \ - --hash=sha256:20fe373a22ef9f3925512650d1db90b1b8de01cdb6df91ab1788263139cbf9a2 \ - --hash=sha256:f4b49d457765b3aae7cbbeb1c71f6633a61b729408c2d1a837dae064cca82ef2 +ruff==0.11.10 \ + --hash=sha256:1067245bad978e7aa7b22f67113ecc6eb241dca0d9b696144256c3a879663bca \ + --hash=sha256:2f071b0deed7e9245d5820dac235cbdd4ef99d7b12ff04c330a241ad3534319f \ + --hash=sha256:3afead355f1d16d95630df28d4ba17fb2cb9c8dfac8d21ced14984121f639bad \ + --hash=sha256:4a60e3a0a617eafba1f2e4186d827759d65348fa53708ca547e384db28406a0b \ + --hash=sha256:5a94acf798a82db188f6f36575d80609072b032105d114b0f98661e1679c9125 \ + --hash=sha256:5b6a9cc5b62c03cc1fea0044ed8576379dbaf751d5503d718c973d5418483641 \ + --hash=sha256:5cc725fbb4d25b0f185cb42df07ab6b76c4489b4bfb740a175f3a59c70e8a224 \ + --hash=sha256:607ecbb6f03e44c9e0a93aedacb17b4eb4f3563d00e8b474298a201622677947 \ + --hash=sha256:7b3a522fa389402cd2137df9ddefe848f727250535c70dafa840badffb56b7a4 \ + --hash=sha256:859a7bfa7bc8888abbea31ef8a2b411714e6a80f0d173c2a82f9041ed6b50f58 \ + --hash=sha256:8b4564e9f99168c0f9195a0fd5fa5928004b33b377137f978055e40008a082c5 \ + --hash=sha256:968220a57e09ea5e4fd48ed1c646419961a0570727c7e069842edd018ee8afed \ + --hash=sha256:d522fb204b4959909ecac47da02830daec102eeb100fb50ea9554818d47a5fa6 \ + --hash=sha256:da8ec977eaa4b7bf75470fb575bea2cb41a0e07c7ea9d5a0a97d13dbca697bf2 \ + --hash=sha256:dc061a98d32a97211af7e7f3fa1d4ca2fcf919fb96c28f39551f35fc55bdbc19 \ + --hash=sha256:ddf8967e08227d1bd95cc0851ef80d2ad9c7c0c5aab1eba31db49cf0a7b99523 \ + --hash=sha256:ef69637b35fb8b210743926778d0e45e1bffa850a7c61e428c6b971549b5f5d1 \ + --hash=sha256:f4854fd09c7aed5b1590e996a81aeff0c9ff51378b084eb5a0b9cd9518e6cff2 + # via -r /tmp/requirementsguod07w5.in +setuptools==80.7.1 \ + --hash=sha256:ca5cc1069b85dc23070a6628e6bcecb3292acac802399c7f8edc0100619f9009 \ + --hash=sha256:f6ffc5f0142b1bd8d0ca94ee91b30c0ca862ffd50826da1ea85258a06fd94552 # via - # -r /tmp/requirementsmeh8aapn.in + # -r /tmp/requirementsguod07w5.in # marisa-trie tomli==2.2.1 \ --hash=sha256:023aa114dd824ade0100497eb2318602af309e5a55595f76b626d6d9f3b7b0a6 \ @@ -476,7 +506,11 @@ tomli==2.2.1 \ --hash=sha256:e85e99945e688e32d5a35c1ff38ed0b3f41f43fad8df0bdf79f72b2ba7bc5272 \ --hash=sha256:ece47d672db52ac607a3d9599a9d48dcb2f2f735c6c2d1f34130085bb12b112a \ --hash=sha256:f4039b9cbc3048b2416cc57ab3bda989a6fcf9b36cf8937f01a6e731b64f80d7 - # via -r /tmp/requirementsmeh8aapn.in + # via -r /tmp/requirementsguod07w5.in +tomlkit==0.13.2 \ + --hash=sha256:7a974427f6e119197f670fbbbeae7bef749a6c14e793db934baefc1b5f03efde \ + --hash=sha256:fff5fe59a87295b278abd31bec92c15d9bc4a06885ab12bcea52c71119392e79 + # via -r /tmp/requirementsguod07w5.in typing-extensions==4.13.2 \ --hash=sha256:a439e7c04b49fec3e5d3e2beaa21755cadbbdc391694e28ccdd36ca4a1408f8c \ --hash=sha256:e6c81219bd689f51865d9e372991c540bda33a0379d5573cddb9a3a23f7caaef @@ -484,6 +518,7 @@ typing-extensions==4.13.2 \ # mypy # pydantic # pydantic-core + # pyright # typing-inspection typing-inspection==0.4.0 \ --hash=sha256:50e72559fcd2a6367a19f7a7e610e6afcb9fac940c650290eed893d61386832f \ @@ -491,23 +526,23 @@ typing-inspection==0.4.0 \ # via # pydantic # pydantic-settings -uv==0.7.1 \ - --hash=sha256:1d6f914601b769ad0f9a090573e2dc4365e0eaeb377d09cd74c5d47c97002c20 \ - --hash=sha256:2220b942b2eb8a0c5cc91af5d57c2eef7a25053037f9f311e85a2d5dd9154f88 \ - --hash=sha256:40a15f1fc73df852d7655530e5768e29dc7227ab25d9baeb711a8dde9e7f8234 \ - --hash=sha256:425064544f1e20b014447cf523e04e891bf6962e60dd25f495724b271f8911e0 \ - --hash=sha256:53eabd3aabc774d01da7836c58675c3e5cafd4285540e846debddfd056345d2c \ - --hash=sha256:5526f68ce9a5ba35ef13a14d144dc834b4940bd460fedc55f8313f9b7534b63c \ - --hash=sha256:57690b6e3b946dcf8b7b5836806d632f1a0d7667eae7af1302da812dbb7be7e5 \ - --hash=sha256:6bbf096970de17be0c2a1e28f24ebddaad9ad4d0f8d8f75364149cdde75d7462 \ - --hash=sha256:7025c9ba6f6f3d842a2b2915a579ff87eda901736105ee0379653bb4ff6b50d2 \ - --hash=sha256:7239a0ffd4695300a3b6d2004ab664e80be7ef2c46b677b0f47d6409affe2212 \ - --hash=sha256:877145523c348344c6fa2651559e9555dc4210730ad246afb4dd3414424afb3d \ - --hash=sha256:9b503d808310a978453bb91a448ffaf61542b192127c30be136443debac9cdaa \ - --hash=sha256:bf54fab715d6eb2332ff3276f80fddc6ee9e7faf29669d4bfb1918dd53ffc408 \ - --hash=sha256:c5572a2b1d6dbf1cbff315e55931f891d8706ef5ed76e94a7d5e6e6dae075b3a \ - --hash=sha256:c94cb14377c0efa65eb0267cfebfb5212729dc73fd61e4897e38839e3e72d763 \ - --hash=sha256:d9c0c70bd3734cdae20cf22889a0394307a86451bb7c9126f0542eb998dd1472 \ - --hash=sha256:ea2024e6a9daeea3ff6cab8ad4afe3b2aa0be9e07bad57646a749896e58648ad \ - --hash=sha256:ef8765771785a56b2e5485f3c6f9ec04cbd2c077be2fe1f2786ded5710e33c0d - # via -r /tmp/requirementsmeh8aapn.in +uv==0.7.3 \ + --hash=sha256:0646e463365e7277f22200ce2d43b7a44e5a3192320500b4983b4fe34d69a5fb \ + --hash=sha256:0a446d4e5b10ce8a793156a276727bb7affa96a85e80dc5ad34e0c2de7e71cc8 \ + --hash=sha256:3e6e1fd5755d4ef4c6e1ce55bd2c6d9dec278a8bef5752703d702ce03704fe29 \ + --hash=sha256:44e2f3fcbd1ab519bdb68986449b2e3103d2261be95f985cadcf7ec7c510b595 \ + --hash=sha256:4809e5f7f5b2d6423d6573fda5655389c955ca649499fe9750b61af95daf9b7d \ + --hash=sha256:5eb4872888a9fb10b62cc00be8e84822d63d3e622a5f340248e53ecf321dba96 \ + --hash=sha256:863ceb63aefc7c2db9918313a1cb3c8bf3fc3d59b656b617db9e4abad90373f3 \ + --hash=sha256:90990e4c289feee24164c8e463fc0ebc9a336960119cd256acca7c1439f0f536 \ + --hash=sha256:acef117a0c52299e60c6f7a3e60849050cd233704c561f688fac1100d113da2e \ + --hash=sha256:acff7fba5ff40dcb5a42de496db92a3965edac7a3d687d9b013ba6e0336995df \ + --hash=sha256:b1414a026c153ae0731daed0812b17bf77d34eafedaeb3a5c72e08181aea116b \ + --hash=sha256:c976fce3d1068a1d007f50127cc7873d67643c1a60439564970f092d9be41877 \ + --hash=sha256:cb2547fd1466698e9b4f11de5eef7055b8cbcc3c693d79f6d747e3f8e6be2ab7 \ + --hash=sha256:cc27207c35c959d2e0e873e86a80a2470a77b7a34a4512a831e8d4f7c87f4404 \ + --hash=sha256:d246243f348796730e8ea9736ddd48702d4448d98af5e61693063ed616e30378 \ + --hash=sha256:db8a5d5995b160158405379deadf0ffccf849a5e7ce048900b73517daf109e2c \ + --hash=sha256:f37c8a6b172776fb5305afe0699907aff44a778669de7a8fbe5a9c09c1a88a97 \ + --hash=sha256:fbb2d322d453e498e1431c51421cee597962ecd3f93fcef853b258e9c7e7636c + # via -r /tmp/requirementsguod07w5.in diff --git a/releases/whl/online_fxreader_pr34-0.1.5.16+27-py3-none-any.whl b/releases/whl/online_fxreader_pr34-0.1.5.16+27-py3-none-any.whl new file mode 100644 index 0000000..9230a36 --- /dev/null +++ b/releases/whl/online_fxreader_pr34-0.1.5.16+27-py3-none-any.whl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55b31297a8722395286f647d948ed5a729f03d0989fc7a68dd3324f8faa3ce52 +size 70698 diff --git a/releases/whl/online_fxreader_pr34-0.1.5.16+27.1-py3-none-any.whl b/releases/whl/online_fxreader_pr34-0.1.5.16+27.1-py3-none-any.whl new file mode 100644 index 0000000..fb75307 --- /dev/null +++ b/releases/whl/online_fxreader_pr34-0.1.5.16+27.1-py3-none-any.whl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8bae2399f7d1d3f27dabb8a66713f4c440d2ef98df52acb8d9c151932924f324 +size 70869 diff --git a/releases/whl/online_fxreader_pr34-0.1.5.16+27.10-py3-none-any.whl b/releases/whl/online_fxreader_pr34-0.1.5.16+27.10-py3-none-any.whl new file mode 100644 index 0000000..9d36afb --- /dev/null +++ b/releases/whl/online_fxreader_pr34-0.1.5.16+27.10-py3-none-any.whl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be90dd9d5f80df201c8de47b63a6d7250545f5421a1e14f2803b597132c96aff +size 71901 diff --git a/releases/whl/online_fxreader_pr34-0.1.5.16+27.11-py3-none-any.whl b/releases/whl/online_fxreader_pr34-0.1.5.16+27.11-py3-none-any.whl new file mode 100644 index 0000000..cc77152 --- /dev/null +++ b/releases/whl/online_fxreader_pr34-0.1.5.16+27.11-py3-none-any.whl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04ecbb7f7ef8a50172df7db15a9768d2d4129e44a194f47f6df314a701646c6c +size 71939 diff --git a/releases/whl/online_fxreader_pr34-0.1.5.16+27.12-py3-none-any.whl b/releases/whl/online_fxreader_pr34-0.1.5.16+27.12-py3-none-any.whl new file mode 100644 index 0000000..25cac92 --- /dev/null +++ b/releases/whl/online_fxreader_pr34-0.1.5.16+27.12-py3-none-any.whl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0182a9b408673bfc6dac8706058eaf255d43c791863cff8699ce0d74880ea61c +size 72153 diff --git a/releases/whl/online_fxreader_pr34-0.1.5.16+27.13-py3-none-any.whl b/releases/whl/online_fxreader_pr34-0.1.5.16+27.13-py3-none-any.whl new file mode 100644 index 0000000..90d248a --- /dev/null +++ b/releases/whl/online_fxreader_pr34-0.1.5.16+27.13-py3-none-any.whl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d84b500c9ca908db0043a9c346b5165380a370afc287f8a989c1693beb214945 +size 72193 diff --git a/releases/whl/online_fxreader_pr34-0.1.5.16+27.14-py3-none-any.whl b/releases/whl/online_fxreader_pr34-0.1.5.16+27.14-py3-none-any.whl new file mode 100644 index 0000000..55b3c2a --- /dev/null +++ b/releases/whl/online_fxreader_pr34-0.1.5.16+27.14-py3-none-any.whl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:028a358026924f4e4002ca4720570b2fbfe60fe66763720451a25172f3fdfa1a +size 72486 diff --git a/releases/whl/online_fxreader_pr34-0.1.5.16+27.15-py3-none-any.whl b/releases/whl/online_fxreader_pr34-0.1.5.16+27.15-py3-none-any.whl new file mode 100644 index 0000000..13881c1 --- /dev/null +++ b/releases/whl/online_fxreader_pr34-0.1.5.16+27.15-py3-none-any.whl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5cc5ecb131bd3e15da75902f5428306eaa0df5ad5a0588fd2d880dbea557ae75 +size 72507 diff --git a/releases/whl/online_fxreader_pr34-0.1.5.16+27.16-py3-none-any.whl b/releases/whl/online_fxreader_pr34-0.1.5.16+27.16-py3-none-any.whl new file mode 100644 index 0000000..e946f41 --- /dev/null +++ b/releases/whl/online_fxreader_pr34-0.1.5.16+27.16-py3-none-any.whl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f6d2e5319a2870456f132bff8b0952b55608cc218e12bf8e61c03f3d1a55e32 +size 72524 diff --git a/releases/whl/online_fxreader_pr34-0.1.5.16+27.17-py3-none-any.whl b/releases/whl/online_fxreader_pr34-0.1.5.16+27.17-py3-none-any.whl new file mode 100644 index 0000000..8e60c29 --- /dev/null +++ b/releases/whl/online_fxreader_pr34-0.1.5.16+27.17-py3-none-any.whl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81a87abcabe1b11f58a609cd0923d736c77008e7eeb4f10676cc29a3c7d06836 +size 72530 diff --git a/releases/whl/online_fxreader_pr34-0.1.5.16+27.18-py3-none-any.whl b/releases/whl/online_fxreader_pr34-0.1.5.16+27.18-py3-none-any.whl new file mode 100644 index 0000000..2f61f91 --- /dev/null +++ b/releases/whl/online_fxreader_pr34-0.1.5.16+27.18-py3-none-any.whl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0343401686b765a8e83c4d3f3b4d6100b497c2160cbeb14fdc67a8ccd998f499 +size 72556 diff --git a/releases/whl/online_fxreader_pr34-0.1.5.16+27.19-py3-none-any.whl b/releases/whl/online_fxreader_pr34-0.1.5.16+27.19-py3-none-any.whl new file mode 100644 index 0000000..09abdc1 --- /dev/null +++ b/releases/whl/online_fxreader_pr34-0.1.5.16+27.19-py3-none-any.whl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e6f5f4e32aaace54ac1d1602500cc9ea7f9a369de0441bcef133e2cd1f11290 +size 72567 diff --git a/releases/whl/online_fxreader_pr34-0.1.5.16+27.2-py3-none-any.whl b/releases/whl/online_fxreader_pr34-0.1.5.16+27.2-py3-none-any.whl new file mode 100644 index 0000000..fb8b54e --- /dev/null +++ b/releases/whl/online_fxreader_pr34-0.1.5.16+27.2-py3-none-any.whl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0b831baa4fe596a3442a774748625ffa9a8d5b21d897d424d41933dc390b220 +size 70869 diff --git a/releases/whl/online_fxreader_pr34-0.1.5.16+27.20-py3-none-any.whl b/releases/whl/online_fxreader_pr34-0.1.5.16+27.20-py3-none-any.whl new file mode 100644 index 0000000..d966e72 --- /dev/null +++ b/releases/whl/online_fxreader_pr34-0.1.5.16+27.20-py3-none-any.whl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cb01468f91874ee860999ce072dd4c7a552f56ca325650882eae353b153f07d +size 72581 diff --git a/releases/whl/online_fxreader_pr34-0.1.5.16+27.21-py3-none-any.whl b/releases/whl/online_fxreader_pr34-0.1.5.16+27.21-py3-none-any.whl new file mode 100644 index 0000000..3e62256 --- /dev/null +++ b/releases/whl/online_fxreader_pr34-0.1.5.16+27.21-py3-none-any.whl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7da8412a74439f76efc90cb147f91ad1ef7f6af49bd472eee351500541ac9476 +size 72636 diff --git a/releases/whl/online_fxreader_pr34-0.1.5.16+27.3-py3-none-any.whl b/releases/whl/online_fxreader_pr34-0.1.5.16+27.3-py3-none-any.whl new file mode 100644 index 0000000..00f1562 --- /dev/null +++ b/releases/whl/online_fxreader_pr34-0.1.5.16+27.3-py3-none-any.whl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72a5995097afe73aea5a2ece3c325bc4c5b8b9e26427dda9ce1f23f50d9e080f +size 71034 diff --git a/releases/whl/online_fxreader_pr34-0.1.5.16+27.4-py3-none-any.whl b/releases/whl/online_fxreader_pr34-0.1.5.16+27.4-py3-none-any.whl new file mode 100644 index 0000000..dc2889a --- /dev/null +++ b/releases/whl/online_fxreader_pr34-0.1.5.16+27.4-py3-none-any.whl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4202d8f2b16a9ea030cab0035dad9f35c0c3e471e50d053a4d999b3e74581dcc +size 71034 diff --git a/releases/whl/online_fxreader_pr34-0.1.5.16+27.5-py3-none-any.whl b/releases/whl/online_fxreader_pr34-0.1.5.16+27.5-py3-none-any.whl new file mode 100644 index 0000000..088307a --- /dev/null +++ b/releases/whl/online_fxreader_pr34-0.1.5.16+27.5-py3-none-any.whl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e111101646bfc13ba7cd79b54912b9b414ed7c0297fa1b616df6cfd728ca7867 +size 71030 diff --git a/releases/whl/online_fxreader_pr34-0.1.5.16+27.6-py3-none-any.whl b/releases/whl/online_fxreader_pr34-0.1.5.16+27.6-py3-none-any.whl new file mode 100644 index 0000000..d82e714 --- /dev/null +++ b/releases/whl/online_fxreader_pr34-0.1.5.16+27.6-py3-none-any.whl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e58c34ec58432632cdf4dbc67e468a69b2947814d3fecbd6ba64c0df07e3af80 +size 71037 diff --git a/releases/whl/online_fxreader_pr34-0.1.5.16+27.7-py3-none-any.whl b/releases/whl/online_fxreader_pr34-0.1.5.16+27.7-py3-none-any.whl new file mode 100644 index 0000000..85091be --- /dev/null +++ b/releases/whl/online_fxreader_pr34-0.1.5.16+27.7-py3-none-any.whl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57974bc5af7c567f214cd720c2e231ab8435f677fba3d997163989f6d254f712 +size 71536 diff --git a/releases/whl/online_fxreader_pr34-0.1.5.16+27.8-cp313-cp313-linux_x86_64.whl b/releases/whl/online_fxreader_pr34-0.1.5.16+27.8-cp313-cp313-linux_x86_64.whl new file mode 100644 index 0000000..5b177db --- /dev/null +++ b/releases/whl/online_fxreader_pr34-0.1.5.16+27.8-cp313-cp313-linux_x86_64.whl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00151fd4927f99c427715743cc87611a1c37dda17aa291db9efd3a38f55f766b +size 71488 diff --git a/releases/whl/online_fxreader_pr34-0.1.5.16+27.9-py3-none-any.whl b/releases/whl/online_fxreader_pr34-0.1.5.16+27.9-py3-none-any.whl new file mode 100644 index 0000000..a2d5772 --- /dev/null +++ b/releases/whl/online_fxreader_pr34-0.1.5.16+27.9-py3-none-any.whl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15d28a44ddb66da99516a1f45a3662200cbcb68118a66f7e19fe55c73f6d5bde +size 71876