Skip to content

Commit 4c478fc

Browse files
committed
Merge branch 'dev'
2 parents 0f7ef74 + 0dab988 commit 4c478fc

4 files changed

Lines changed: 31 additions & 36 deletions

File tree

lncrawl/core/novel_search.py

Lines changed: 18 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -3,25 +3,29 @@
33
"""
44
import atexit
55
import logging
6-
import time
7-
from threading import Event
86
from concurrent.futures import Future
97
from difflib import SequenceMatcher
10-
from multiprocessing import Manager, Process
8+
from multiprocessing import Manager, Process, Queue
9+
from threading import Event
1110
from typing import Dict, List
1211
from urllib.parse import urlparse
1312

1413
from slugify import slugify
1514

1615
CONCURRENCY = 25
1716
MAX_RESULTS = 10
18-
SEARCH_TIMEOUT = 30
17+
SEARCH_TIMEOUT = 60
1918

2019
logger = logging.getLogger(__name__)
2120

2221

2322
# This function runs in a separate process
24-
def _search_process(results: list, link: str, file_path: str, query: str):
23+
def _search_process(
24+
results: Queue,
25+
link: str,
26+
query: str,
27+
file_path: str,
28+
):
2529
try:
2630
from ..models import SearchResult
2731
from .sources import prepare_crawler
@@ -35,7 +39,7 @@ def _search_process(results: list, link: str, file_path: str, query: str):
3539
if not (item.url and item.title):
3640
continue
3741
item.title = item.title.lower().title()
38-
results.append(item)
42+
results.put(item)
3943
except KeyboardInterrupt:
4044
pass
4145
except Exception:
@@ -46,7 +50,6 @@ def _search_process(results: list, link: str, file_path: str, query: str):
4650
# This runs in a thread to execute the processes
4751
def _run(p: Process, hostname: str, signal: Event):
4852
from .exeptions import LNException
49-
start_time = time.time()
5053
try:
5154
atexit.register(p.kill)
5255
p.start()
@@ -59,10 +62,11 @@ def _run(p: Process, hostname: str, signal: Event):
5962
else:
6063
if p.is_alive():
6164
raise LNException(f"[{hostname}] Timeout")
65+
except KeyboardInterrupt:
66+
pass
6267
finally:
6368
atexit.unregister(p.kill)
6469
p.kill()
65-
return (start_time, hostname)
6670

6771

6872
def search_novels(app):
@@ -77,13 +81,13 @@ def search_novels(app):
7781
return
7882

7983
manager = Manager()
80-
results = manager.list()
84+
results = manager.Queue()
8185
taskman = TaskManager(CONCURRENCY)
8286

8387
# Create tasks for the queue
8488
checked = set()
8589
signal = Event()
86-
futures: List[Future] = []
90+
futures: list[Future] = []
8791
for link in app.crawler_links:
8892
if link in rejected_sources:
8993
continue
@@ -100,8 +104,8 @@ def search_novels(app):
100104
args=(
101105
results,
102106
link,
107+
app.user_input,
103108
getattr(CrawlerType, 'file_path'),
104-
app.user_input
105109
),
106110
)
107111

@@ -113,15 +117,8 @@ def search_novels(app):
113117
# Wait for all tasks to finish
114118
try:
115119
app.progress = 0
116-
for start_time, hostname in taskman.resolve_as_generator(
117-
futures,
118-
unit='source',
119-
desc='Search',
120-
timeout=SEARCH_TIMEOUT,
121-
):
120+
for _ in taskman.resolve_as_generator(futures, unit='source', desc='Search'):
122121
app.progress += 1
123-
run_time = round(time.time() - start_time)
124-
logging.debug(f"[{hostname}] {run_time} seconds")
125122
except KeyboardInterrupt:
126123
pass
127124
except Exception:
@@ -134,8 +131,8 @@ def search_novels(app):
134131

135132
# Combine the search results
136133
combined: Dict[str, List[SearchResult]] = {}
137-
for item in results:
138-
assert isinstance(item, SearchResult)
134+
while not results.empty():
135+
item: SearchResult = results.get()
139136
if not (item and item.title):
140137
continue
141138
key = slugify(str(item.title))

lncrawl/core/taskman.py

Lines changed: 11 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import logging
33
import os
44
from abc import ABC
5-
from concurrent.futures import Future, ThreadPoolExecutor
5+
from concurrent.futures import Future, ThreadPoolExecutor, as_completed
66
from threading import Semaphore, Thread
77
from typing import Any, Dict, Generator, Iterable, List, Optional
88

@@ -110,7 +110,6 @@ def progress_bar(
110110
unit: Optional[str] = None,
111111
desc: Optional[str] = None,
112112
total: Optional[float] = None,
113-
timeout: Optional[float] = None,
114113
disable: bool = False,
115114
) -> tqdm:
116115
if os.getenv("debug_mode"):
@@ -119,7 +118,7 @@ def progress_bar(
119118
if not disable:
120119
# Since we are showing progress bar, it is not good to
121120
# resolve multiple list of futures at once
122-
if not _resolver.acquire(True, timeout):
121+
if not _resolver.acquire(True, 30):
123122
pass
124123

125124
bar = tqdm(
@@ -176,7 +175,6 @@ def cancel_futures(self, futures: Iterable[Future]) -> None:
176175
def resolve_as_generator(
177176
self,
178177
futures: Iterable[Future],
179-
timeout: Optional[float] = None,
180178
disable_bar: bool = False,
181179
desc: Optional[str] = None,
182180
unit: Optional[str] = None,
@@ -193,22 +191,25 @@ def resolve_as_generator(
193191
unit: The progress unit name
194192
fail_fast: Fail on first error
195193
"""
194+
futures = list(futures)
195+
if not futures:
196+
yield from ()
197+
return
198+
196199
bar = self.progress_bar(
197-
futures,
200+
total=len(futures),
198201
desc=desc,
199202
unit=unit,
200-
timeout=timeout,
201203
disable=disable_bar,
202204
)
203205
try:
204-
for step in bar:
205-
future: Future = step
206+
for future in as_completed(futures):
206207
if fail_fast:
207-
yield future.result(timeout)
208+
yield future.result()
208209
bar.update()
209210
continue
210211
try:
211-
yield future.result(timeout)
212+
yield future.result()
212213
except KeyboardInterrupt:
213214
raise
214215
except LNException as e:
@@ -227,13 +228,11 @@ def resolve_as_generator(
227228
raise
228229
finally:
229230
Thread(target=lambda: self.cancel_futures(futures)).start()
230-
yield from ()
231231
bar.close()
232232

233233
def resolve_futures(
234234
self,
235235
futures: Iterable[Future],
236-
timeout: Optional[float] = None,
237236
disable_bar: bool = False,
238237
desc: Optional[str] = None,
239238
unit: Optional[str] = None,
@@ -254,7 +253,6 @@ def resolve_futures(
254253
return list(
255254
self.resolve_as_generator(
256255
futures=futures,
257-
timeout=timeout,
258256
disable_bar=disable_bar,
259257
desc=desc,
260258
unit=unit,

scripts/build.bat

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ RD /S /Q "dist" ".venv" "build" "lightnovel_crawler.egg-info" &
1010
%PY% -m venv .venv
1111
CALL .venv\Scripts\activate.bat
1212

13-
%PIP% install -U pip install wheel setuptools
13+
%PIP% install -U pip wheel setuptools
1414
%PIP% install -r requirements-dev.txt
1515
%PIP% install -r requirements-app.txt
1616

setup_pyi.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
from PyInstaller import __main__ as pyi
77

88
ROOT = Path(__file__).parent
9-
site_packages = list(ROOT.glob("venv/**/site-packages"))[0]
9+
site_packages = list(ROOT.glob(".venv/**/site-packages"))[0]
1010

1111

1212
def build_command():

0 commit comments

Comments
 (0)