diff --git a/docker/tiktok/Dockerfile b/docker/tiktok/Dockerfile index f894b3a..0486e8d 100644 --- a/docker/tiktok/Dockerfile +++ b/docker/tiktok/Dockerfile @@ -7,6 +7,7 @@ RUN pip3 install pyktok yt-dlp playwright==1.44.0 TikTokApi RUN pip3 install numpy pandas browser_cookie3 ipdb asgiref RUN python3 -m playwright install-deps RUN python3 -m playwright install +RUN pip3 install tqdm WORKDIR /app diff --git a/python/tasks/tiktok/__init__.py b/python/tasks/tiktok/__init__.py index 60a2b1b..9e9035b 100644 --- a/python/tasks/tiktok/__init__.py +++ b/python/tasks/tiktok/__init__.py @@ -1,7 +1,9 @@ import logging +import os import sys import json from typing import ( + Any, Optional, Iterable, ) @@ -79,3 +81,41 @@ async def tiktok_videos_links_get( ))) return links + +def tiktok_videos_meta(links: Iterable[str]) -> Iterable[dict[str, Any]]: + res = [] + for o in links: + parts = o.split('/') + + res.append(dict( + url=o, + fname='_'.join(parts[-3:]) +'.mp4', + )) + + return res + +def tiktok_videos_fetch( + meta: Iterable[dict[str, Any]] +) -> Iterable[dict[str, Any]]: + import pyktok + import tqdm + + stats = dict( + saved=0, + total=0, + skipped=0, + error=0, + ) + + for o in tqdm.tqdm(meta): + stats['total'] += 1 + if not os.path.exists(o['fname']): + try: + pyktok.save_tiktok(o['url']) + stats['saved'] += 1 + except: + stats['error'] += 1 + else: + stats['skipped'] += 1 + + return stats