-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathdata_parse.py
More file actions
148 lines (100 loc) · 4.31 KB
/
data_parse.py
File metadata and controls
148 lines (100 loc) · 4.31 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
from bs4 import BeautifulSoup, Tag
import asyncio
import httpx
from vfxmed.vfxmed_new_version import save_load_first_product, read_links_from_file, extract_links, get_download_links, request_on_1_or_other_page
from blendermarket.blend_search_v3 import search_info_about_blend_product
from mysql.connector.errors import IntegrityError
import json
from requests import Session as RequestsSession
from database.models import ENGINE, Vfx, Version, Blend
from sqlalchemy.orm import Session
from sqlalchemy import insert, select, update , values
from sqlalchemy.exc import IntegrityError
def fill_vfx_and_version_tables(query_set: list[list[dict]], session) -> None:
for lst in query_set:
for data in lst:
session.execute(
insert(Vfx),
{'link': data['link'], 'download_link': data['link']},
)
vfx_id = next(session.execute(
select(Vfx.id).
where(Vfx.link == data['link'])
)).id
session.execute(
insert(Version),
{'product_name': data['title'], 'addon_version': data['version'], 'vfx_id': vfx_id}
)
session.commit()
def fill_blend_and_update_tables(blend_product_info: dict, session, version_id):
try:
session.execute(
insert(Blend),
blend_product_info
)
except IntegrityError:
pass
blend_product_id = next(session.execute(
select(Blend.id).
where(Blend.off_link==blend_product_info['off_link'])
)).id
try:
session.execute(
update(Version).
where(Version.id == version_id).
values(blend_id=blend_product_id)
)
except IntegrityError:
pass
session.commit()
async def parse_pages_vfx(mode_all=True) -> None:
if not mode_all:
with open('vfxmed/json/vfx_last_product.json', 'r') as f:
link = read_links_from_file(f)[0]
save_load_first_product('vfxmed\\json\\vfx_last_product.json')
with Session(ENGINE) as session:
page_number = 1
client = httpx.AsyncClient(timeout=10)
tasks = []
while True:
response = await request_on_1_or_other_page(page_number, client)
if not response:
continue
soup = BeautifulSoup(response.text, 'html.parser')
try:
page_links = extract_links(soup)
if not mode_all:
if link in map(lambda x: x[0], page_links):
tasks.append(asyncio.create_task(
get_download_links(page_links, client)))
fill_vfx_and_version_tables(await asyncio.gather(*tasks), session)
break
except ValueError:
print('ValueError', tasks)
fill_vfx_and_version_tables(await asyncio.gather(*tasks), session)
break
tasks.append(asyncio.create_task(
get_download_links(page_links, client)))
page_number += 1
if len(tasks) >= 5:
fill_vfx_and_version_tables(await asyncio.gather(*tasks), session)
tasks.clear()
def parse_blend():
with open('vfxmed/json/vfx_last_product.json', 'r') as f:
data_file = json.loads(next(f))
date_update = data_file['begin_update']
link = data_file['link']
with Session(ENGINE) as session:
version_data = session.execute(
select(Version.product_name, Version.id)
.where(Version.data_created >= date_update)
).all()
with RequestsSession() as s:
for version_row in version_data:
blend_product_info = search_info_about_blend_product(version_row[0], s)
if blend_product_info:
fill_blend_and_update_tables(blend_product_info, session, version_row[1])
if __name__ == '__main__':
pass
asyncio.run(parse_pages_vfx(mode_all=True))
parse_blend()