33"""
44import atexit
55import logging
6- import time
7- from threading import Event
86from concurrent .futures import Future
97from difflib import SequenceMatcher
10- from multiprocessing import Manager , Process
8+ from multiprocessing import Manager , Process , Queue
9+ from threading import Event
1110from typing import Dict , List
1211from urllib .parse import urlparse
1312
1413from slugify import slugify
1514
1615CONCURRENCY = 25
1716MAX_RESULTS = 10
18- SEARCH_TIMEOUT = 30
17+ SEARCH_TIMEOUT = 60
1918
2019logger = logging .getLogger (__name__ )
2120
2221
2322# This function runs in a separate process
24- def _search_process (results : list , link : str , file_path : str , query : str ):
23+ def _search_process (
24+ results : Queue ,
25+ link : str ,
26+ query : str ,
27+ file_path : str ,
28+ ):
2529 try :
2630 from ..models import SearchResult
2731 from .sources import prepare_crawler
@@ -35,7 +39,7 @@ def _search_process(results: list, link: str, file_path: str, query: str):
3539 if not (item .url and item .title ):
3640 continue
3741 item .title = item .title .lower ().title ()
38- results .append (item )
42+ results .put (item )
3943 except KeyboardInterrupt :
4044 pass
4145 except Exception :
@@ -46,7 +50,6 @@ def _search_process(results: list, link: str, file_path: str, query: str):
4650# This runs in a thread to execute the processes
4751def _run (p : Process , hostname : str , signal : Event ):
4852 from .exeptions import LNException
49- start_time = time .time ()
5053 try :
5154 atexit .register (p .kill )
5255 p .start ()
@@ -59,10 +62,11 @@ def _run(p: Process, hostname: str, signal: Event):
5962 else :
6063 if p .is_alive ():
6164 raise LNException (f"[{ hostname } ] Timeout" )
65+ except KeyboardInterrupt :
66+ pass
6267 finally :
6368 atexit .unregister (p .kill )
6469 p .kill ()
65- return (start_time , hostname )
6670
6771
6872def search_novels (app ):
@@ -77,13 +81,13 @@ def search_novels(app):
7781 return
7882
7983 manager = Manager ()
80- results = manager .list ()
84+ results = manager .Queue ()
8185 taskman = TaskManager (CONCURRENCY )
8286
8387 # Create tasks for the queue
8488 checked = set ()
8589 signal = Event ()
86- futures : List [Future ] = []
90+ futures : list [Future ] = []
8791 for link in app .crawler_links :
8892 if link in rejected_sources :
8993 continue
@@ -100,8 +104,8 @@ def search_novels(app):
100104 args = (
101105 results ,
102106 link ,
107+ app .user_input ,
103108 getattr (CrawlerType , 'file_path' ),
104- app .user_input
105109 ),
106110 )
107111
@@ -113,15 +117,8 @@ def search_novels(app):
113117 # Wait for all tasks to finish
114118 try :
115119 app .progress = 0
116- for start_time , hostname in taskman .resolve_as_generator (
117- futures ,
118- unit = 'source' ,
119- desc = 'Search' ,
120- timeout = SEARCH_TIMEOUT ,
121- ):
120+ for _ in taskman .resolve_as_generator (futures , unit = 'source' , desc = 'Search' ):
122121 app .progress += 1
123- run_time = round (time .time () - start_time )
124- logging .debug (f"[{ hostname } ] { run_time } seconds" )
125122 except KeyboardInterrupt :
126123 pass
127124 except Exception :
@@ -134,8 +131,8 @@ def search_novels(app):
134131
135132 # Combine the search results
136133 combined : Dict [str , List [SearchResult ]] = {}
137- for item in results :
138- assert isinstance ( item , SearchResult )
134+ while not results . empty () :
135+ item : SearchResult = results . get ( )
139136 if not (item and item .title ):
140137 continue
141138 key = slugify (str (item .title ))
0 commit comments