nn

package

v0.0.0-...-b7e086b Latest Latest Go to latest Published: Apr 26, 2024 License: MIT Imports: 13 Imported by: 0

Details

Valid go.mod file

The Go module system was introduced in Go 1.11 and is the official dependency management solution for Go.
Redistributable license

Redistributable licenses place minimal restrictions on how software can be used, modified, and redistributed.
Tagged version

Modules with tagged versions give importers more predictable builds.
Stable version

When a project reaches major version v1 it is considered stable.
Learn more about best practices

Repository

github.com/cyclopcam/cyclops

Documentation ¶

Index ¶

Constants
Variables
func LoadClassFile(filename string) ([]string, error)
type DetectionParams
- func NewDetectionParams() *DetectionParams
type DetectionResult
type ImageCrop
- func WholeImage(nchan int, pixels []byte, width, height int) ImageCrop
- func (c ImageCrop) Crop(x1, y1, x2, y2 int) ImageCrop
- func (c ImageCrop) Pointer() unsafe.Pointer
- func (c ImageCrop) Stride() int
type ImageLabels
type InferenceOptions
type ModelConfig
- func LoadModelConfig(filename string) (*ModelConfig, error)
type ObjectDetection
- func TiledInference(model ObjectDetector, img ImageCrop, _params *DetectionParams, nThreads int) ([]ObjectDetection, error)
type ObjectDetector
type Point
- func (p Point) Distance(b Point) float32
type Rect
- func (r Rect) Area() int
- func (r Rect) Center() Point
- func (r Rect) IOU(b Rect) float32
- func (r Rect) Intersection(b Rect) Rect
- func (r *Rect) MaxDelta(b Rect) int
- func (r *Rect) Offset(dx, dy int)
- func (r Rect) Union(b Rect) Rect
- func (r Rect) X2() int
- func (r Rect) Y2() int
type ThreadingMode
type VideoLabels
- func RunInferenceOnVideoFile(model ObjectDetector, inputFile string, options InferenceOptions) (*VideoLabels, error)

Constants ¶

View Source

const (
	COCOPerson       = 0
	COCOBicycle      = 1
	COCOCar          = 2
	COCOMotorcycle   = 3
	COCOAirplane     = 4
	COCOBus          = 5
	COCOTrain        = 6
	COCOTruck        = 7
	COCOBoat         = 8
	COCOTrafficLight = 9
	COCOFireHydrant  = 10
	COCOStopSign     = 11
	COCOParkingMeter = 12
	COCOBench        = 13
	COCOBird         = 14
	COCOCat          = 15
	COCODog          = 16
)

Variables ¶

View Source

var COCOClasses = []string{
	"person",
	"bicycle",
	"car",
	"motorcycle",
	"airplane",
	"bus",
	"train",
	"truck",
	"boat",
	"traffic light",
	"fire hydrant",
	"stop sign",
	"parking meter",
	"bench",
	"bird",
	"cat",
	"dog",
	"horse",
	"sheep",
	"cow",
	"elephant",
	"bear",
	"zebra",
	"giraffe",
	"backpack",
	"umbrella",
	"handbag",
	"tie",
	"suitcase",
	"frisbee",
	"skis",
	"snowboard",
	"sports ball",
	"kite",
	"baseball bat",
	"baseball glove",
	"skateboard",
	"surfboard",
	"tennis racket",
	"bottle",
	"wine glass",
	"cup",
	"fork",
	"knife",
	"spoon",
	"bowl",
	"banana",
	"apple",
	"sandwich",
	"orange",
	"broccoli",
	"carrot",
	"hot dog",
	"pizza",
	"donut",
	"cake",
	"chair",
	"couch",
	"potted plant",
	"bed",
	"dining table",
	"toilet",
	"tv",
	"laptop",
	"mouse",
	"remote",
	"keyboard",
	"cell phone",
	"microwave",
	"oven",
	"toaster",
	"sink",
	"refrigerator",
	"book",
	"clock",
	"vase",
	"scissors",
	"teddy bear",
	"hair drier",
	"toothbrush",
}

COCO classes

Functions ¶

func LoadClassFile ¶

func LoadClassFile(filename string) ([]string, error)

Load a text file with class names on each line

Types ¶

type ImageCrop ¶

type ImageCrop struct {
	NChan       int    // Number of channels (eg 3 for RGB)
	Pixels      []byte // The whole image
	ImageWidth  int    // The width of the original image, held in Pixels
	ImageHeight int    // The height of the original image, held in Pixels
	CropX       int    // Origin of crop X
	CropY       int    // Origin of crop Y
	CropWidth   int    // The width of this crop
	CropHeight  int    // The height of this crop
}

ImageCrop is a crop of an image. In C we would represent this as a pointer and a stride, but since that's not memory safe, we must resort to this kind of thing. Once we get into the C world for NN inference, then we can use strides etc.

func WholeImage ¶

func WholeImage(nchan int, pixels []byte, width, height int) ImageCrop

Return a 'crop' of the entire image

func (ImageCrop) Crop ¶

func (c ImageCrop) Crop(x1, y1, x2, y2 int) ImageCrop

Return a crop of the crop (new crop is relative to existing). If any parameter is out of bounds, we panic

func (ImageCrop) Pointer ¶

func (c ImageCrop) Pointer() unsafe.Pointer

Return a pointer to the start of the crop

func (ImageCrop) Stride ¶

func (c ImageCrop) Stride() int

type ImageLabels ¶

type ImageLabels struct {
	Frame   int               `json:"frame,omitempty"` // For video, this is the frame number
	Objects []ObjectDetection `json:"objects"`
}

type InferenceOptions ¶

type InferenceOptions struct {
	MinSize        int      // Minimum size of object, in pixels. If max(width, height) >= MinSize, then use the object
	MaxVideoHeight int      // If video height is larger than this, then scale it down to this size (0 = no scaling)
	StartFrame     int      // Start processing at frame (0 = start at beginning)
	EndFrame       int      // Stop processing at frame (0 = process to end)
	Classes        []string // List of class names to detect (eg ["person", "car", "bear"]). Classes not included in the list are ignored.
	StdOutProgress bool     // Emit progress to stdout
}

NN analysis options for RunInferenceOnVideoFile

type ModelConfig ¶

type ModelConfig struct {
	Architecture string   `json:"architecture"` // eg "yolov8"
	Width        int      `json:"width"`        // eg 320
	Height       int      `json:"height"`       // eg 256
	Classes      []string `json:"classes"`      // eg ["person", "bicycle", "car", ...]
}

ModelConfig is saved in a JSON file along with the weights of the NN model

func LoadModelConfig ¶

func LoadModelConfig(filename string) (*ModelConfig, error)

Load model config from a JSON file

type ObjectDetection ¶

type ObjectDetection struct {
	Class      int     `json:"class"`
	Confidence float32 `json:"confidence"`
	Box        Rect    `json:"box"`
}

ObjectDetection is an object that a neural network has found in an image

func TiledInference ¶

func TiledInference(model ObjectDetector, img ImageCrop, _params *DetectionParams, nThreads int) ([]ObjectDetection, error)

Run tiled inference on the image. We look at the width and height of the model, and if the image is larger, then we split the image up into tiles, and run each of those tiles through the model. Then, we merge the tiles back into a single dataset. If the model is larger than the image, then we just run the model directly, so it is safe to call TiledInference on any image, without incurring any performance loss.

type ObjectDetector ¶

type ObjectDetector interface {
	// Close closes the detector (you MUST call this when finished, because it's a C++ object underneath)
	Close()

	// DetectObjects returns a list of objects detected in the image
	// nchan is expected to be 3, and image is a 24-bit RGB image.
	// You can create a default DetectionParams with NewDetectionParams()
	DetectObjects(img ImageCrop, params *DetectionParams) ([]ObjectDetection, error)

	// Model Config.
	// Callers assume that ModelConfig will remain constant, so don't change it
	// once the detector has been created.
	Config() *ModelConfig
}

ObjectDetector is given an image, and returns zero or more detected objects

type Point ¶

type Point struct {
	X int `json:"x"`
	Y int `json:"y"`
}

func (Point) Distance ¶

func (p Point) Distance(b Point) float32

type Rect ¶

type Rect struct {
	X      int `json:"x"`
	Y      int `json:"y"`
	Width  int `json:"width"`
	Height int `json:"height"`
}

func (Rect) Area ¶

func (r Rect) Area() int

func (Rect) Center ¶

func (r Rect) Center() Point

func (Rect) IOU ¶

func (r Rect) IOU(b Rect) float32

Intersection over Union

func (Rect) Intersection ¶

func (r Rect) Intersection(b Rect) Rect

func (*Rect) MaxDelta ¶

func (r *Rect) MaxDelta(b Rect) int

func (*Rect) Offset ¶

func (r *Rect) Offset(dx, dy int)

func (Rect) Union ¶

func (r Rect) Union(b Rect) Rect

func (Rect) X2 ¶

func (r Rect) X2() int

func (Rect) Y2 ¶

func (r Rect) Y2() int

type ThreadingMode ¶

type ThreadingMode int

const (
	ThreadingModeSingle   ThreadingMode = iota // Force the NN library to run inference on a single thread
	ThreadingModeParallel                      // Allow the NN library to run multiple threads while executing a model
)

type VideoLabels ¶

type VideoLabels struct {
	Classes []string       `json:"classes"`
	Frames  []*ImageLabels `json:"frames"`
	Width   int            `json:"width"`  // Image width. Useful when inference is run at different resolution to original image
	Height  int            `json:"height"` // Image height. Useful when inference is run at different resolution to original image
}

VideoLabels contains labels for each video frame

func RunInferenceOnVideoFile ¶

func RunInferenceOnVideoFile(model ObjectDetector, inputFile string, options InferenceOptions) (*VideoLabels, error)

Run NN inference on every frame of a video

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL

nn