builtin-programs/recognition/craft.folk

When when the CRAFT text detector is /any/ /any/ with environment /any/ {
    Wish -keep 500ms to load the CRAFT text detector
}

When /someone/ wishes to load the CRAFT text detector &\
     the image uvx argtype definer is /defineImageArgtype/ {
    fn defineImageArgtype

    set py [Uvx --with pillow --with "git+https://github.com/osnr/craft-text-detector.git"]
    defineImageArgtype $py

    $py exec {
        import torch
        import numpy as np
        from craft_text_detector import Craft
        import time

        if torch.cuda.is_available():
            device = "cuda"
        elif torch.backends.mps.is_available():
            device = "mps"
        else:
            device = "cpu"

        craft = Craft(output_dir=None, crop_type="box",
                      link_threshold=0.1, device=device)
    }
    $py def detectTextBoxes {Image image} {
        image_np = np.array(image)

        start_craft = time.time()
        result = craft.detect_text(image_np)
        boxes = result["boxes"]
        craft_time = time.time() - start_craft

        print(f"craft: Detected {len(boxes)} text boxes ({craft_time:.3f}s)",
              file=sys.stderr, flush=True)
        return boxes.tolist() if hasattr(boxes, 'tolist') else boxes
    }

    fn CRAFT {im} { return [$py detectTextBoxes $im] }
    Claim the CRAFT text detector is [fn CRAFT]
}