[~] Refactor

This commit is contained in:
Siarhei Siniak 2024-07-06 15:50:09 +03:00
parent 7ade65b678
commit 4679b3b861

@ -1,5 +1,7 @@
import logging import logging
import enum
import dataclasses import dataclasses
import multiprocessing
import traceback import traceback
import subprocess import subprocess
import os import os
@ -27,7 +29,7 @@ def tiktok_config() -> tiktok_config_t.res_t:
res = tiktok_config_t.res_t( res = tiktok_config_t.res_t(
project_root=os.path.abspath( project_root=os.path.abspath(
os.path.join( os.path.join(
__file__, os.path.dirname(__file__),
'..', '..', '..', '..', '..', '..',
), ),
), ),
@ -125,19 +127,93 @@ def tiktok_videos_meta(links: Iterable[str]) -> Iterable[dict[str, Any]]:
url=o, url=o,
id=int(parts[-1]), id=int(parts[-1]),
fname='_'.join(parts[-3:]) +'.mp4', fname='_'.join(parts[-3:]) +'.mp4',
result_dir=tiktok_config().videos,
)) ))
return res return res
class tiktok_video_fetch_t:
class method_t(enum.Enum):
pyktok = 'pyktok'
tikcdn_io_curl = 'tikcdn.io-curl'
tikcdn_io_wget = 'tikcdn.io-wget'
def tiktok_video_fetch(
id: int,
url: str,
fname: str,
result_dir: str,
method: Optional[tiktok_video_fetch_t.method_t]=None,
method_str: Optional[str]=None,
) -> None:
os.chdir(result_dir)
if not method_str is None:
method = tiktok_video_fetch_t.method_t(method_str)
if method is None:
method = tiktok_video_fetch_t.method_t.tikcdn_io_curl
if method == tiktok_video_fetch_t.method_t.pyktok:
pyktok.save_tiktok(url)
elif method == tiktok_video_fetch_t.method_t.tikcdn_io_curl:
subprocess.check_call([
'curl',
'-v',
'https://tikcdn.io/ssstik/%d' % id,
'-o', fname,
])
elif method == tiktok_video_fetch_t.method_t.tikcdn_io_wget:
subprocess.check_call([
'wget',
'https://tikcdn.io/ssstik/%d' % id,
'-O',
fname,
])
else:
raise NotImplementedError
mime_type = file_mime_type(fname)
if mime_type in ['empty']:
raise RuntimeError('notdownloaded')
def file_mime_type(path: str) -> Optional[str]:
if os.path.exists(path):
mime_type = subprocess.check_output([
'file',
'-b', path,
]).strip().decode('utf-8')
return mime_type
else:
return None
async def playwright_save(url: str):
import TikTokApi
async with TikTokApi.TikTokApi() as client:
await client.create_sessions()
session = client.sessions[0]
page = session.page
async with page.expect_download() as download_info:
await page.goto(url)
download = download_info.value
path = download.path()
download.save_as(path)
print(path)
def tiktok_videos_fetch( def tiktok_videos_fetch(
meta: Iterable[dict[str, Any]], meta: Iterable[dict[str, Any]],
method: Optional[Literal['pyktok', 'tikcdn.io']]=None, method: Optional[tiktok_video_fetch_t.method_t]=None,
method_str: Optional[str]=None,
force: Optional[bool]=None,
) -> Iterable[dict[str, Any]]: ) -> Iterable[dict[str, Any]]:
import pyktok import pyktok
import tqdm import tqdm
if method is None: if force is None:
method = 'pyktok' force = False
stats = dict( stats = dict(
saved=0, saved=0,
@ -146,25 +222,40 @@ def tiktok_videos_fetch(
error=0, error=0,
) )
for o in tqdm.tqdm(meta): with multiprocessing.Pool(processes=1) as pool:
stats['total'] += 1 for o in tqdm.tqdm(meta):
if not os.path.exists(o['fname']): stats['total'] += 1
try: path = os.path.join(
if method == 'pyktok': o['result_dir'],
pyktok.save_tiktok(o['url']) o['fname'],
elif method == 'tikcdn.io': )
subprocess.check_call([
'curl', if (
'https://tikcdn.io/ssstik/%d' % o['id'], not os.path.exists(path) or
'-o', o['fname'], file_mime_type(path) in ['empty'] or
]) force
stats['saved'] += 1 ):
except: try:
logger.error(json.dumps(dict( pool.apply(
msg=traceback.format_exc(), tiktok_video_fetch,
))) kwds=dict(
stats['error'] += 1 id=o['id'],
else: url=o['url'],
stats['skipped'] += 1 fname=o['fname'],
method=method,
method_str=method_str,
result_dir=o['result_dir'],
),
)
stats['saved'] += 1
except KeyboardInterrupt:
break
except:
logger.error(json.dumps(dict(
msg=traceback.format_exc(),
)))
stats['error'] += 1
else:
stats['skipped'] += 1
return stats return stats