winocr

package module

v1.0.0 Latest Latest Go to latest Published: May 5, 2025 License: MIT Imports: 10 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/ifeng0188/go-winocr

Links

Open Source Insights

README ¶

go-winocr

A Go binding for Windows OCR engine that provides text recognition capabilities.

Features

Fast and accurate text recognition
Support for multiple output formats (text/json)
Detailed OCR results including:
- Text content
- Bounding boxes
- Word confidence scores
- Image angle detection

Installation

go get github.com/ifeng0188/go-winocr

Before using this package, you need to download the Windows OCR engine files and place them in a directory:

oneocr.dll
oneocr.onemodel
onnxruntime.dll

Quick Start

package main

import (
    "fmt"
    "image"
    "os"
    "github.com/ifeng0188/go-winocr"
)

func main() {
    // Set OCR DLL path
    winocr.SetOcrDllPath("path/to/ocr/dlls")

    // Create OCR engine
    engine := winocr.NewOcrEngine()
    defer engine.Close()

    // Enable model delay load (optional)
    engine.EnableModelDelayLoad()

    // Load and decode image
    file, _ := os.Open("image.png")
    defer file.Close()
    img, _, _ := image.Decode(file)

    // Perform OCR
    result, err := engine.Recognize(img, "text")
    if err != nil {
        fmt.Printf("OCR failed: %v\n", err)
        return
    }

    fmt.Printf("OCR Result:\n%s\n", result)
}

Credits

win11-oneocr - Inspiration for this project

License

This project is licensed under the MIT License.

Documentation ¶

Index ¶

func CreateOcrInitOptions() uintptr
func CreateOcrPipeline(initOpts uintptr, modelPath, modelKey string) uintptr
func CreateOcrProcessOptions() uintptr
func GetImageAngle(result uintptr) float32
func GetOcrDllPath() string
func GetOcrLine(result uintptr, index int) uintptr
func GetOcrLineContent(line uintptr) string
func GetOcrLineCount(result uintptr) int
func GetOcrLineWordCount(line uintptr) int
func GetOcrWord(line uintptr, index int) uintptr
func GetOcrWordConfidence(word uintptr) float32
func GetOcrWordContent(word uintptr) string
func OcrInitOptionsSetUseModelDelayLoad(initOpts uintptr, enable bool)
func OcrProcessOptionsGetMaxRecognitionLineCount(processOpts uintptr) int
func OcrProcessOptionsGetResizeResolution(processOpts uintptr) (int, int)
func OcrProcessOptionsSetMaxRecognitionLineCount(processOpts uintptr, count int)
func OcrProcessOptionsSetResizeResolution(processOpts uintptr, width int, height int)
func ReleaseOcrInitOptions(initOpts uintptr)
func ReleaseOcrPipeline(pipeline uintptr)
func ReleaseOcrProcessOptions(processOpts uintptr)
func ReleaseOcrResult(result uintptr)
func RunOcrPipeline(pipeline, processOpts uintptr, img *Image) (uintptr, error)
func SetOcrDllPath(path string)
type BoundingBox
- func GetOcrLineBoundingBox(line uintptr) BoundingBox
- func GetOcrWordBoundingBox(word uintptr) BoundingBox
type Image
type OcrEngine
- func NewOcrEngine() *OcrEngine
- func (e *OcrEngine) Close()
- func (e *OcrEngine) EnableModelDelayLoad() error
- func (e *OcrEngine) GetMaxRecognitionLineCount() int
- func (e *OcrEngine) GetResizeResolution() (int, int)
- func (e *OcrEngine) Recognize(img image.Image, format string) (string, error)
- func (e *OcrEngine) SetMaxRecognitionLineCount(count int) error
- func (e *OcrEngine) SetResizeResolution(width, height int) error
type OcrLine
type OcrResult
type OcrWord
type Point

Constants ¶

This section is empty.

Variables ¶

This section is empty.

Functions ¶

func CreateOcrInitOptions ¶

func CreateOcrInitOptions() uintptr

CreateOcrInitOptions creates and returns a handle to OCR initialization options.

func CreateOcrPipeline ¶

func CreateOcrPipeline(initOpts uintptr, modelPath, modelKey string) uintptr

CreateOcrPipeline creates and returns a handle to OCR pipeline.

func CreateOcrProcessOptions ¶

func CreateOcrProcessOptions() uintptr

CreateOcrProcessOptions creates and returns a handle to OCR processing options.

func GetImageAngle ¶

func GetImageAngle(result uintptr) float32

GetImageAngle returns the detected rotation angle of the image in degrees.

func GetOcrDllPath ¶

func GetOcrDllPath() string

GetOcrDllPath returns the current directory path containing OCR DLL and model files.

func GetOcrLine ¶

func GetOcrLine(result uintptr, index int) uintptr

GetOcrLine returns a handle to the specified text line by index.

func GetOcrLineContent ¶

func GetOcrLineContent(line uintptr) string

GetOcrLineContent returns the text content of the specified line.

func GetOcrLineCount ¶

func GetOcrLineCount(result uintptr) int

GetOcrLineCount returns the number of text lines detected in the image.

func GetOcrLineWordCount ¶

func GetOcrLineWordCount(line uintptr) int

GetOcrLineWordCount returns the number of words in the specified text line.

func GetOcrWord ¶

func GetOcrWord(line uintptr, index int) uintptr

GetOcrWord returns a handle to the specified word by index within a text line.

func GetOcrWordConfidence ¶

func GetOcrWordConfidence(word uintptr) float32

GetOcrWordConfidence returns the recognition confidence score (0-1) for the specified word.

func GetOcrWordContent ¶

func GetOcrWordContent(word uintptr) string

GetOcrWordContent returns the text content of the specified word.

func OcrInitOptionsSetUseModelDelayLoad ¶

func OcrInitOptionsSetUseModelDelayLoad(initOpts uintptr, enable bool)

OcrInitOptionsSetUseModelDelayLoad enables or disables lazy loading of the OCR model.

func OcrProcessOptionsGetMaxRecognitionLineCount ¶

func OcrProcessOptionsGetMaxRecognitionLineCount(processOpts uintptr) int

OcrProcessOptionsGetMaxRecognitionLineCount returns the maximum number of text lines

func OcrProcessOptionsGetResizeResolution ¶

func OcrProcessOptionsGetResizeResolution(processOpts uintptr) (int, int)

OcrProcessOptionsGetResizeResolution returns the current width and height settings

func OcrProcessOptionsSetMaxRecognitionLineCount ¶

func OcrProcessOptionsSetMaxRecognitionLineCount(processOpts uintptr, count int)

OcrProcessOptionsSetMaxRecognitionLineCount sets the maximum number of text lines

func OcrProcessOptionsSetResizeResolution ¶

func OcrProcessOptionsSetResizeResolution(processOpts uintptr, width int, height int)

OcrProcessOptionsSetResizeResolution sets the resolution that images will be resized to

func ReleaseOcrInitOptions ¶

func ReleaseOcrInitOptions(initOpts uintptr)

ReleaseOcrInitOptions releases the resources associated with OCR initialization options. This should be called when the options are no longer needed.

func ReleaseOcrPipeline ¶

func ReleaseOcrPipeline(pipeline uintptr)

ReleaseOcrPipeline releases the resources associated with an OCR pipeline. This should be called when the pipeline is no longer needed.

func ReleaseOcrProcessOptions ¶

func ReleaseOcrProcessOptions(processOpts uintptr)

ReleaseOcrProcessOptions releases the resources associated with OCR processing options. This should be called when the options are no longer needed.

func ReleaseOcrResult ¶

func ReleaseOcrResult(result uintptr)

ReleaseOcrResult releases the resources associated with OCR recognition results. This should be called when the results are no longer needed.

func RunOcrPipeline ¶

func RunOcrPipeline(pipeline, processOpts uintptr, img *Image) (uintptr, error)

RunOcrPipeline executes the OCR pipeline on the provided image with the specified processing options. Returns a handle to the recognition results.

func SetOcrDllPath ¶

func SetOcrDllPath(path string)

SetOcrDllPath sets the directory path containing the required OCR DLL and model files. This must be called before any other OCR operations. It will panic if any required files are missing.

Types ¶

type BoundingBox ¶

type BoundingBox struct {
	TopLeft     Point `json:"top_left"`
	TopRight    Point `json:"top_right"`
	BottomRight Point `json:"bottom_right"`
	BottomLeft  Point `json:"bottom_left"`
}

BoundingBox represents a rectangular region in the image defined by four corner points.

func GetOcrLineBoundingBox ¶

func GetOcrLineBoundingBox(line uintptr) BoundingBox

GetOcrLineBoundingBox returns the bounding box coordinates for the specified text line.

func GetOcrWordBoundingBox ¶

func GetOcrWordBoundingBox(word uintptr) BoundingBox

GetOcrWordBoundingBox returns the bounding box coordinates for the specified word.

type Image ¶

type Image struct {
	Type     int32
	Width    int32
	Height   int32
	Reserved int32
	Step     int64
	DataPtr  *byte
}

Image represents an input image for OCR processing.

type OcrEngine ¶

type OcrEngine struct {
	// contains filtered or unexported fields
}

OcrEngine represents the OCR engine instance that performs text recognition.

func NewOcrEngine ¶

func NewOcrEngine() *OcrEngine

NewOcrEngine creates a new instance of the OCR engine with default settings.

func (*OcrEngine) Close ¶

func (e *OcrEngine) Close()

Close releases all resources associated with the OCR engine. This should be called when the engine is no longer needed.

func (*OcrEngine) EnableModelDelayLoad ¶

func (e *OcrEngine) EnableModelDelayLoad() error

EnableModelDelayLoad enables lazy loading of the OCR model.

func (*OcrEngine) GetMaxRecognitionLineCount ¶

func (e *OcrEngine) GetMaxRecognitionLineCount() int

GetMaxRecognitionLineCount returns the maximum number of text lines

func (*OcrEngine) GetResizeResolution ¶

func (e *OcrEngine) GetResizeResolution() (int, int)

GetResizeResolution returns the current resolution settings used for image resizing

func (*OcrEngine) Recognize ¶

func (e *OcrEngine) Recognize(img image.Image, format string) (string, error)

Recognize performs OCR on the provided image and returns the results. The format parameter can be either "text" for plain text output or "json" for detailed recognition results including bounding boxes and confidence scores.

func (*OcrEngine) SetMaxRecognitionLineCount ¶

func (e *OcrEngine) SetMaxRecognitionLineCount(count int) error

SetMaxRecognitionLineCount sets the maximum number of text lines

func (*OcrEngine) SetResizeResolution ¶

func (e *OcrEngine) SetResizeResolution(width, height int) error

SetResizeResolution sets the resolution that images will be resized to

type OcrLine ¶

type OcrLine struct {
	Text         string      `json:"text"`
	BoundingRect BoundingBox `json:"bounding_rect"`
	Words        []OcrWord   `json:"words"`
}

OcrLine represents a single line of recognized text in the image.

type OcrResult ¶

type OcrResult struct {
	ImageAngle float32   `json:"image_angle"`
	Lines      []OcrLine `json:"lines"`
}

OcrResult represents the complete OCR recognition result for an image.

type OcrWord ¶

type OcrWord struct {
	Text         string      `json:"text"`
	BoundingRect BoundingBox `json:"bounding_rect"`
	Confidence   float32     `json:"confidence"`
}

OcrWord represents a single word within a line of recognized text.

type Point ¶

type Point struct {
	X float32 `json:"x"`
	Y float32 `json:"y"`
}

Point represents a 2D coordinate point in the image.

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL