diff options
| author | Pinapelz <yukais@pinapelz.com> | 2026-05-27 13:53:55 -0700 |
|---|---|---|
| committer | Pinapelz <yukais@pinapelz.com> | 2026-05-27 13:53:55 -0700 |
| commit | 478b257fa4b5f09730f87e6bf35555a1062e26ac (patch) | |
| tree | 3365ac2fb5f859c72f42368864c70bc991e576ef /captcha_scraper/cropper.py | |
| parent | e037112ab1f6f4f6dd987d37bd0a343f74d89f8e (diff) | |
add kpop captcha scraper
Diffstat (limited to 'captcha_scraper/cropper.py')
| -rw-r--r-- | captcha_scraper/cropper.py | 63 |
1 files changed, 63 insertions, 0 deletions
diff --git a/captcha_scraper/cropper.py b/captcha_scraper/cropper.py new file mode 100644 index 0000000..2d2aee8 --- /dev/null +++ b/captcha_scraper/cropper.py @@ -0,0 +1,63 @@ +import cv2 +import os +from pathlib import Path +from insightface.app import FaceAnalysis + +INPUT_DIR = "../captcha-original" +OUTPUT_DIR = "../captcha" + +EXTS = {".jpg", ".jpeg", ".png", ".webp"} + +app = FaceAnalysis(name="buffalo_l") +app.prepare(ctx_id=0, det_size=(640, 640)) + +os.makedirs(OUTPUT_DIR, exist_ok=True) + +def resize_and_crop(img, size=100): + h, w = img.shape[:2] + scale = max(size / w, size / h) + nw = int(w * scale) + nh = int(h * scale) + img = cv2.resize(img, (nw, nh), interpolation=cv2.INTER_AREA) + x = (nw - size) // 2 + y = (nh - size) // 2 + return img[y:y+size, x:x+size] + + +def process_image(path: Path): + img = cv2.imread(str(path)) + if img is None: + print(f"failed: {path}") + return + faces = app.get(img) + if not faces: + print(f"no face: {path}") + return + for i, face in enumerate(faces): + x1, y1, x2, y2 = face.bbox.astype(int) + pad_x = int((x2 - x1) * 0.25) + pad_y = int((y2 - y1) * 0.35) + x1 = max(0, x1 - pad_x) + y1 = max(0, y1 - pad_y) + x2 = min(img.shape[1], x2 + pad_x) + y2 = min(img.shape[0], y2 + pad_y) + crop = img[y1:y2, x1:x2] + if crop.size == 0: + continue + crop = resize_and_crop(crop, 100) + rel = path.relative_to(INPUT_DIR) + out_dir = Path(OUTPUT_DIR) / rel.parent + out_dir.mkdir(parents=True, exist_ok=True) + + out_name = f"{path.stem}.png" + out_path = out_dir / out_name + + cv2.imwrite(str(out_path), crop) + + print(f"saved: {out_path}") + +for root, _, files in os.walk(INPUT_DIR): + for file in files: + path = Path(root) / file + if path.suffix.lower() in EXTS: + process_image(path) |
