decoder

package
v1.2.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Feb 9, 2026 License: MIT Imports: 10 Imported by: 0

Documentation

Overview

Package decoder provides high-level GEDCOM file decoding functionality.

The decoder package converts GEDCOM files into structured Go data types, building on the lower-level parser package. It handles character encoding, validates the GEDCOM structure, and constructs a complete Document with cross-reference resolution.

Example usage:

f, err := os.Open("family.ged")
if err != nil {
    log.Fatal(err)
}
defer f.Close()

doc, err := decoder.Decode(f)
if err != nil {
    log.Fatal(err)
}

fmt.Printf("Found %d individuals\n", len(doc.Individuals()))
Example

Example demonstrates basic GEDCOM file decoding.

package main

import (
	"fmt"
	"strings"

	"github.com/cacack/gedcom-go/decoder"
)

func main() {
	// GEDCOM data can come from a file, network, or any io.Reader
	gedcomData := `0 HEAD
1 GEDC
2 VERS 5.5
1 CHAR UTF-8
0 @I1@ INDI
1 NAME John /Smith/
1 SEX M
1 BIRT
2 DATE 15 MAR 1920
0 @I2@ INDI
1 NAME Jane /Doe/
1 SEX F
0 @F1@ FAM
1 HUSB @I1@
1 WIFE @I2@
0 TRLR`

	doc, err := decoder.Decode(strings.NewReader(gedcomData))
	if err != nil {
		fmt.Printf("Error: %v\n", err)
		return
	}

	// Access individuals
	individuals := doc.Individuals()
	fmt.Printf("Found %d individuals\n", len(individuals))

	// Access families
	families := doc.Families()
	fmt.Printf("Found %d families\n", len(families))

}
Output:

Found 2 individuals
Found 1 families

Index

Examples

Constants

View Source
const (
	// CodeSyntaxError indicates a general syntax error in the GEDCOM line.
	CodeSyntaxError = "SYNTAX_ERROR"

	// CodeInvalidLevel indicates the level number could not be parsed or is invalid.
	CodeInvalidLevel = "INVALID_LEVEL"

	// CodeInvalidXRef indicates a malformed cross-reference identifier.
	CodeInvalidXRef = "INVALID_XREF"

	// CodeBadLevelJump indicates an invalid level increment (e.g., jumping from 0 to 2).
	CodeBadLevelJump = "BAD_LEVEL_JUMP"

	// CodeEmptyLine indicates an unexpected empty line in the GEDCOM data.
	CodeEmptyLine = "EMPTY_LINE"
)

Parse-phase error codes for diagnostic reporting.

View Source
const (
	// CodeUnknownTag indicates an unrecognized tag was encountered.
	// These tags are preserved in raw form but not parsed into typed fields.
	CodeUnknownTag = "UNKNOWN_TAG"

	// CodeInvalidValue indicates a value doesn't match the expected format.
	// The raw value is preserved, but typed parsing may have failed.
	CodeInvalidValue = "INVALID_VALUE"

	// CodeMissingRequired indicates a required subordinate tag is missing.
	CodeMissingRequired = "MISSING_REQUIRED"

	// CodeSkippedRecord indicates an entire record was skipped due to errors.
	CodeSkippedRecord = "SKIPPED_RECORD"
)

Entity-level diagnostic codes for semantic issues during entity population.

Variables

This section is empty.

Functions

func Decode

func Decode(r io.Reader) (*gedcom.Document, error)

Decode parses a GEDCOM file from an io.Reader and returns a Document. This is a convenience function that uses default options.

Example

ExampleDecode shows how to decode GEDCOM data from an io.Reader.

package main

import (
	"fmt"
	"strings"

	"github.com/cacack/gedcom-go/decoder"
)

func main() {
	gedcomData := `0 HEAD
1 GEDC
2 VERS 5.5
0 @I1@ INDI
1 NAME Alice /Johnson/
0 TRLR`

	doc, err := decoder.Decode(strings.NewReader(gedcomData))
	if err != nil {
		fmt.Printf("Error: %v\n", err)
		return
	}

	// Lookup individual by XRef
	individual := doc.GetIndividual("@I1@")
	if individual != nil && len(individual.Names) > 0 {
		fmt.Printf("Found: %s\n", individual.Names[0].Full)
	}

}
Output:

Found: Alice /Johnson/

func DecodeWithOptions

func DecodeWithOptions(r io.Reader, opts *DecodeOptions) (*gedcom.Document, error)

DecodeWithOptions parses a GEDCOM file with custom options.

Example

ExampleDecodeWithOptions shows how to decode with custom options.

package main

import (
	"fmt"
	"strings"

	"github.com/cacack/gedcom-go/decoder"
)

func main() {
	gedcomData := `0 HEAD
1 GEDC
2 VERS 5.5
0 @I1@ INDI
1 NAME Bob /Williams/
0 TRLR`

	// Create custom options
	opts := &decoder.DecodeOptions{
		MaxNestingDepth: 50,
		StrictMode:      false,
	}

	doc, err := decoder.DecodeWithOptions(strings.NewReader(gedcomData), opts)
	if err != nil {
		fmt.Printf("Error: %v\n", err)
		return
	}

	fmt.Printf("Decoded %d records\n", len(doc.Records))

}
Output:

Decoded 1 records
Example (Progress)

ExampleDecodeWithOptions_progress shows how to track decoding progress.

package main

import (
	"fmt"
	"strings"

	"github.com/cacack/gedcom-go/decoder"
)

func main() {
	gedcomData := `0 HEAD
1 GEDC
2 VERS 5.5
0 @I1@ INDI
1 NAME Carol /Davis/
0 TRLR`

	// Track progress during decoding (useful for large files)
	opts := &decoder.DecodeOptions{
		TotalSize: int64(len(gedcomData)),
		OnProgress: func(bytesRead, totalBytes int64) {
			// Progress callback is called periodically during parsing
			// In real applications, update a progress bar or UI here
		},
	}

	doc, err := decoder.DecodeWithOptions(strings.NewReader(gedcomData), opts)
	if err != nil {
		fmt.Printf("Error: %v\n", err)
		return
	}

	fmt.Printf("Decoded successfully: %d individuals\n", len(doc.Individuals()))

}
Output:

Decoded successfully: 1 individuals

Types

type DecodeOptions

type DecodeOptions struct {
	// Context allows cancellation and timeout control
	Context context.Context

	// MaxNestingDepth sets the maximum allowed nesting depth (default: 100)
	// This prevents stack overflow with malformed files
	MaxNestingDepth int

	// StrictMode controls how parsing errors are handled.
	//
	// When StrictMode is true:
	//   - Parsing fails immediately on the first syntax error
	//   - The error is returned from Decode/DecodeWithOptions
	//   - Use for files that must be fully valid or rejected
	//
	// When StrictMode is false (default):
	//   - Parsing continues after encountering errors
	//   - Malformed lines are skipped; valid lines are preserved
	//   - Diagnostics are collected for all issues encountered
	//   - Use [DecodeWithDiagnostics] to access diagnostics
	//   - A partial document is returned if any valid records exist
	//
	// Lenient mode (StrictMode=false) is recommended for importing vendor
	// GEDCOMs, which often contain non-standard extensions or formatting quirks.
	StrictMode bool

	// OnProgress is called periodically during parsing to report progress.
	// If nil, no progress reporting occurs (zero overhead).
	OnProgress ProgressCallback

	// TotalSize is the expected total size of the input in bytes.
	// Set to 0 (default) if unknown; will be reported as -1 to the callback.
	TotalSize int64
}

DecodeOptions provides configuration options for decoding GEDCOM files.

func DefaultOptions

func DefaultOptions() *DecodeOptions

DefaultOptions returns the default decoding options.

type DecodeResult added in v1.1.0

type DecodeResult struct {
	// Document is the parsed GEDCOM document.
	// In lenient mode, this may be a partial document if some lines failed to parse.
	Document *gedcom.Document

	// Diagnostics contains all issues encountered during parsing.
	// Empty if parsing was successful or StrictMode was enabled.
	Diagnostics Diagnostics
}

DecodeResult contains the result of decoding a GEDCOM file with diagnostics. In lenient mode, Document may contain partial data even when diagnostics are present.

func DecodeWithDiagnostics added in v1.1.0

func DecodeWithDiagnostics(r io.Reader, opts *DecodeOptions) (*DecodeResult, error)

DecodeWithDiagnostics parses a GEDCOM file and returns both the document and any diagnostics. This function enables lenient parsing mode when StrictMode is false (the default).

In lenient mode:

  • Parse errors are collected as diagnostics rather than stopping parsing
  • A partial document is returned if some valid data was parsed
  • An error is returned only if no valid records could be parsed

In strict mode (StrictMode=true):

  • Parsing fails on the first error (current behavior)
  • Diagnostics will be empty on success
Example

ExampleDecodeWithDiagnostics demonstrates lenient parsing mode with diagnostic collection. This is useful when processing GEDCOM files that may contain errors but you still want to extract as much valid data as possible.

package main

import (
	"fmt"
	"strings"

	"github.com/cacack/gedcom-go/decoder"
)

func main() {
	// This GEDCOM data contains an error: line 8 has a negative level number
	gedcomData := `0 HEAD
1 GEDC
2 VERS 5.5
0 @I1@ INDI
1 NAME John /Smith/
1 SEX M
0 @I2@ INDI
-1 NAME Jane /Doe/
1 NAME Jane /Doe/
0 TRLR`

	// Use DecodeWithDiagnostics for lenient parsing
	opts := &decoder.DecodeOptions{
		StrictMode: false, // This is the default, shown for clarity
	}

	result, err := decoder.DecodeWithDiagnostics(strings.NewReader(gedcomData), opts)
	if err != nil {
		fmt.Printf("Fatal error: %v\n", err)
		return
	}

	// Check if there were any issues
	if len(result.Diagnostics) > 0 {
		fmt.Printf("Found %d diagnostic(s)\n", len(result.Diagnostics))

		// Check specifically for errors vs warnings
		if result.Diagnostics.HasErrors() {
			fmt.Printf("  Errors: %d\n", len(result.Diagnostics.Errors()))
		}
	}

	// The document still contains successfully parsed data
	fmt.Printf("Parsed %d individuals despite errors\n", len(result.Document.Individuals()))

}
Output:

Found 1 diagnostic(s)
  Errors: 1
Parsed 2 individuals despite errors
Example (FilterBySeverity)

ExampleDecodeWithDiagnostics_filterBySeverity shows how to filter diagnostics by severity.

package main

import (
	"fmt"
	"strings"

	"github.com/cacack/gedcom-go/decoder"
)

func main() {
	// GEDCOM with an invalid line (missing level number)
	gedcomData := `0 HEAD
1 GEDC
2 VERS 5.5
0 @I1@ INDI
1 NAME Alice /Johnson/
invalid line without level
0 TRLR`

	result, err := decoder.DecodeWithDiagnostics(strings.NewReader(gedcomData), nil)
	if err != nil {
		fmt.Printf("Fatal error: %v\n", err)
		return
	}

	// Filter to get only errors (not warnings or info)
	errors := result.Diagnostics.Errors()
	for _, diag := range errors {
		fmt.Printf("Line %d: %s\n", diag.Line, diag.Code)
	}

}
Output:

Line 6: INVALID_LEVEL
Example (InspectDetails)

ExampleDecodeWithDiagnostics_inspectDetails shows how to inspect diagnostic details.

package main

import (
	"fmt"
	"strings"

	"github.com/cacack/gedcom-go/decoder"
)

func main() {
	// GEDCOM with a negative level error
	gedcomData := `0 HEAD
1 GEDC
2 VERS 5.5
0 @I1@ INDI
-1 NAME Bob /Wilson/
0 TRLR`

	result, _ := decoder.DecodeWithDiagnostics(strings.NewReader(gedcomData), nil)

	// Inspect the first diagnostic's details
	if len(result.Diagnostics) > 0 {
		d := result.Diagnostics[0]
		fmt.Printf("Severity: %s\n", d.Severity)
		fmt.Printf("Code: %s\n", d.Code)
		fmt.Printf("Line: %d\n", d.Line)
		fmt.Printf("Message: %s\n", d.Message)
	}

}
Output:

Severity: ERROR
Code: INVALID_LEVEL
Line: 5
Message: level cannot be negative

type Diagnostic added in v1.1.0

type Diagnostic struct {
	// Line is the 1-based line number where the issue occurred.
	Line int

	// Severity indicates the importance level of this diagnostic.
	Severity Severity

	// Code is a machine-readable identifier for the diagnostic type.
	Code string

	// Message is a human-readable description of the issue.
	Message string

	// Context provides the actual content that caused the issue.
	Context string
}

Diagnostic represents a single issue encountered during parsing or decoding. It can represent both parser-level errors and entity-level warnings.

func NewDiagnostic added in v1.1.0

func NewDiagnostic(line int, severity Severity, code, message, context string) Diagnostic

NewDiagnostic creates a new Diagnostic with the given parameters.

func NewParseError added in v1.1.0

func NewParseError(line int, code, message, context string) Diagnostic

NewParseError creates a new Diagnostic with SeverityError for parse-phase errors. This is a convenience function for the common case of parser errors.

func (Diagnostic) Error added in v1.1.0

func (d Diagnostic) Error() string

Error implements the error interface, returning a formatted error string.

func (Diagnostic) String added in v1.1.0

func (d Diagnostic) String() string

String returns a human-friendly representation of the diagnostic.

type Diagnostics added in v1.1.0

type Diagnostics []Diagnostic

Diagnostics is a collection of Diagnostic instances with helper methods.

func (Diagnostics) Errors added in v1.1.0

func (ds Diagnostics) Errors() Diagnostics

Errors returns a new Diagnostics containing only error-severity diagnostics.

func (Diagnostics) HasErrors added in v1.1.0

func (ds Diagnostics) HasErrors() bool

HasErrors returns true if any diagnostic has SeverityError.

func (Diagnostics) String added in v1.1.0

func (ds Diagnostics) String() string

String returns a formatted multi-line summary of all diagnostics.

func (Diagnostics) Warnings added in v1.1.0

func (ds Diagnostics) Warnings() Diagnostics

Warnings returns a new Diagnostics containing only warning-severity diagnostics.

type ProgressCallback added in v0.8.0

type ProgressCallback func(bytesRead, totalBytes int64)

ProgressCallback reports parsing progress during GEDCOM decoding. bytesRead is the cumulative bytes read so far. totalBytes is the expected total size, or -1 if unknown.

type Severity added in v1.1.0

type Severity int

Severity represents the severity level of a diagnostic. This type mirrors validator.Severity to avoid import cycles between decoder and validator packages.

const (
	// SeverityError indicates a data integrity issue that must be fixed.
	SeverityError Severity = iota

	// SeverityWarning indicates a potential problem that should be reviewed.
	SeverityWarning

	// SeverityInfo indicates an informational data quality suggestion.
	SeverityInfo
)

func (Severity) String added in v1.1.0

func (s Severity) String() string

String returns the human-readable name of the severity level.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL