1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
|
import os
import json
import logging
logger = logging.getLogger(__name__)
def _load_urls_from_file(path: str):
urls = []
if not path:
logger.debug("No path provided to load_urls_from_file")
return urls
try:
with open(path, "r", encoding="utf-8") as f:
for raw in f:
line = raw.strip()
if not line:
continue
if line.startswith("#"):
continue
if " #" in line:
line = line.split(" #", 1)[0].strip()
urls.append(line)
logger.info("Loaded %d URLs from %s", len(urls), path)
except FileNotFoundError:
logger.warning("URL file not found: %s", path)
except Exception:
logger.exception("Failed to read URL file: %s", path)
return urls
def _create_or_get_cache(cache_file_path: str):
try:
if os.path.exists(cache_file_path):
with open(cache_file_path, "r", encoding="utf-8") as f:
return json.load(f)
else:
return {}
except Exception:
logger.exception("Failed to load cache file, starting with empty cache")
return {}
def _save_cache(cache_data: dict, cache_file_path: str):
try:
with open(cache_file_path, "w", encoding="utf-8") as f:
json.dump(cache_data, f, ensure_ascii=False, indent=2)
except Exception:
logger.exception("Failed to save cache to %s", cache_file_path)
|