[+] partially add worker based on huggingface

1. test huggingface manually from IPython;
  1.1. seems to work;
  1.2 put some long text from documentation,
    it has provided some summary;
  1.3. runs in about 40-60 seconds on 3-4 cores CPU;
  1.4. almost made reuse local cache,
    for some reason huggingface still can
    download LFS weights from the public repo;
  2. partially wrapped into worker
    which is to be run as a separate service in docker-compose;
This commit is contained in:
Siarhei Siniak 2025-07-23 11:10:13 +03:00
parent 0b5971c4af
commit c4eb8b5568

@ -0,0 +1,47 @@
import transformers
import transformers.pipelines
from typing import (Any, cast, Callable, Protocol, Literal,)
class SummarizerPipeline(Protocol):
def predict(data: str) -> str: ...
class Pipeline(Protocol):
def __call__(
self,
task: Literal['summarizer'],
model: Any,
tokenizer: Any,
) -> Summarizer: ...
class Summarizer:
def __init__(self) -> None:
self.model = cast(
Callable[[str], Any],
getattr(transformers.AutoTokenizer, 'from_pretrained')(
'sshleifer/distilbart-cnn-12-6',
)
)
self.tokenizer = cast(
Callable[[str], Any],
getattr(transformers.AutoModelForSeq2SeqLM, 'from_pretrained')(
'sshleifer/distilbart-cnn-12-6',
)
)
self.summarizer = cast(
Pipeline,
getattr(transformers.pipelines, 'pipeline')(
'summarization',
model=model,
tokenizer=tokenizer,
)
)
def summarize(
self,
data: list[str]
) -> list[str]:
return self.summarizer.predict(
' '.join(data)
).split()