11import json
22import logging
3+ import re
34
45from lncrawl .core import Chapter , LegacyCrawler , Volume
56
@@ -18,31 +19,57 @@ def read_novel_info(self) -> None:
1819 self .novel_title = possible_title
1920 logger .info ("Novel title: %s" , self .novel_title )
2021
21- possible_image = soup .select_one ("img[alt='{self.novel_title}']" )
22+ possible_image = soup .select_one (f "img[alt='{ self .novel_title } ']" )
2223 if possible_image :
2324 self .novel_cover = possible_image ["src" ]
2425 logger .info ("Novel cover: %s" , self .novel_cover )
2526
26- section = soup .find_all ("section" , attrs = {"x-data" : True })
27-
28- if len (section ) == 1 :
29- chapter_section = section [0 ]
30- else :
31- chapter_section = section [1 ]
32-
33- section_data = "" .join (chapter_section .get ("x-data" ).split ("})(" )[1 ].split ())[
34- :- 2
35- ] # remove all whitespace & remove last 2 chars
36-
37- chapters_obj = json .loads (section_data )
38-
39- if isinstance (chapters_obj , list ) is False :
40- chapters = []
41- for _ , value in chapters_obj .items ():
42- chapters .append (value )
43-
44- else :
45- chapters = chapters_obj
27+ # Extract CSRF token for Livewire request
28+ csrf_meta = soup .select_one ('meta[name="csrf-token"]' )
29+ assert csrf_meta , "No CSRF token found"
30+ csrf_token = csrf_meta ["content" ]
31+
32+ # Extract Livewire update URL from script tag
33+ lw_script = soup .select_one ('script[src*="livewire"]' )
34+ assert lw_script , "No Livewire script tag found"
35+ lw_match = re .search (r"(livewire-[a-f0-9]+)" , lw_script ["src" ])
36+ assert lw_match , "Could not extract Livewire path"
37+ livewire_url = self .absolute_url (f"/{ lw_match .group (1 )} /update" )
38+
39+ # Extract snapshot from the lazy-loaded chapter-list component
40+ lw_div = soup .find ("div" , attrs = {"wire:name" : "project.chapter-list" })
41+ assert lw_div , "No Livewire chapter-list component found"
42+ snapshot = lw_div ["wire:snapshot" ]
43+
44+ # Fetch chapter list via Livewire lazy-load
45+ lw_response = self .post_json (
46+ livewire_url ,
47+ data = {"components" : [{"snapshot" : snapshot , "updates" : {}, "calls" : []}]},
48+ headers = {"X-CSRF-TOKEN" : csrf_token , "X-Livewire" : "" },
49+ )
50+
51+ chapter_html = lw_response ["components" ][0 ]["effects" ]["html" ]
52+ chapter_soup = self .make_soup (chapter_html )
53+
54+ # Parse chapter data from Alpine.js x-data in the response
55+ xdata_div = chapter_soup .select_one ("div[x-data]" )
56+ assert xdata_div , "No x-data div in chapter list response"
57+ x_data = xdata_div ["x-data" ]
58+
59+ free_match = re .search (r"freeChapters:\s*JSON\.parse\('(.+?)'\)" , x_data )
60+ assert free_match , "Could not extract freeChapters from x-data"
61+ # Decode JS string escapes before parsing JSON
62+ raw_json = re .sub (
63+ r"\\u([0-9a-fA-F]{4})" ,
64+ lambda m : chr (int (m .group (1 ), 16 )),
65+ free_match .group (1 ),
66+ )
67+ raw_json = raw_json .replace ("\\ '" , "'" )
68+ chapters = json .loads (raw_json )
69+
70+ slug_match = re .search (r"projectSlug:\s*'([^']+)'" , x_data )
71+ assert slug_match , "Could not extract projectSlug from x-data"
72+ project_slug = slug_match .group (1 )
4673
4774 chapters .reverse ()
4875
@@ -57,8 +84,8 @@ def read_novel_info(self) -> None:
5784 Chapter (
5885 id = chap_id ,
5986 volume = vol_id ,
60- url = self ._make_url (item ["slug" ], item [ "project" ][ "slug" ] ),
61- title = item ["full_title " ],
87+ url = self ._make_url (item ["slug" ], project_slug ),
88+ title = item ["title " ],
6289 )
6390 )
6491
0 commit comments