freelance-project-34-market.../python/tasks/jigsaw_toxic.py
Siarhei Siniak 492e0fafc6 [~] Refactor
2021-07-15 08:15:15 +03:00

67 lines
1.3 KiB
Python

import pprint
import requests
import pyquery
def kernel_1_sample_scrap(
max_articles=None,
):
if max_articles is None:
max_articles = 1
with requests.get(
'https://dev.to',
) as p:
t10 = p.content.decode('utf-8')
t11 = pyquery.PyQuery(t10)
t13 = t11('.crayons-story__title > a')
t12 = [
pyquery.PyQuery(o).attr('href')
for o in t13
]
pprint.pprint(t12)
t14 = [
'https://dev.to/%s' % o
for o in t12
]
t8 = []
for t7 in t14[:max_articles]:
with requests.get(
t7,
) as p:
t1 = p.content.decode('utf-8')
t2 = pyquery.PyQuery(t1)
t3 = t2('.comment__content')
t6 = []
for o in t3:
t4 = pyquery.PyQuery(o)
t5 = t4('.comment__header > a').attr['href']
t9 = t4('.comment__body').text()
t6.append(
dict(
author=t5,
text=t9,
)
)
#pprint.pprint(t3)
pprint.pprint(t6)
t8.append(
dict(
article=t7,
comments=t6,
)
)
pprint.pprint(t8)
return dict(
t1=t1,
t2=t2,
t3=t3,
t6=t6,
t8=t8,
t12=t12,
)