cropper.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63

import cv2
import os
from pathlib import Path
from insightface.app import FaceAnalysis

INPUT_DIR = "../captcha-original"
OUTPUT_DIR = "../captcha"

EXTS = {".jpg", ".jpeg", ".png", ".webp"}

app = FaceAnalysis(name="buffalo_l")
app.prepare(ctx_id=0, det_size=(640, 640))

os.makedirs(OUTPUT_DIR, exist_ok=True)

def resize_and_crop(img, size=100):
    h, w = img.shape[:2]
    scale = max(size / w, size / h)
    nw = int(w * scale)
    nh = int(h * scale)
    img = cv2.resize(img, (nw, nh), interpolation=cv2.INTER_AREA)
    x = (nw - size) // 2
    y = (nh - size) // 2
    return img[y:y+size, x:x+size]


def process_image(path: Path):
    img = cv2.imread(str(path))
    if img is None:
        print(f"failed: {path}")
        return
    faces = app.get(img)
    if not faces:
        print(f"no face: {path}")
        return
    for i, face in enumerate(faces):
        x1, y1, x2, y2 = face.bbox.astype(int)
        pad_x = int((x2 - x1) * 0.25)
        pad_y = int((y2 - y1) * 0.35)
        x1 = max(0, x1 - pad_x)
        y1 = max(0, y1 - pad_y)
        x2 = min(img.shape[1], x2 + pad_x)
        y2 = min(img.shape[0], y2 + pad_y)
        crop = img[y1:y2, x1:x2]
        if crop.size == 0:
            continue
        crop = resize_and_crop(crop, 100)
        rel = path.relative_to(INPUT_DIR)
        out_dir = Path(OUTPUT_DIR) / rel.parent
        out_dir.mkdir(parents=True, exist_ok=True)

        out_name = f"{path.stem}.png"
        out_path = out_dir / out_name

        cv2.imwrite(str(out_path), crop)

        print(f"saved: {out_path}")

for root, _, files in os.walk(INPUT_DIR):
    for file in files:
        path = Path(root) / file
        if path.suffix.lower() in EXTS:
            process_image(path)