core

package

v0.1.0 Latest Latest Go to latest Published: Mar 20, 2025 License: MIT Imports: 3 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/TFMV/parity

Links

Open Source Insights

Documentation ¶

Overview ¶

Package core provides the core types and interfaces for the Parity dataset comparison tool.

Constants ¶

This section is empty.

Variables ¶

This section is empty.

Functions ¶

This section is empty.

Types ¶

type DatasetReader ¶

type DatasetReader interface {
	// Read returns a record batch and an error if any.
	// Returns io.EOF when there are no more batches.
	Read(ctx context.Context) (arrow.Record, error)

	// Schema returns the schema of the dataset.
	Schema() *arrow.Schema

	// Close closes the reader and releases resources.
	Close() error
}

DatasetReader defines an interface for reading data from various sources.

type DatasetWriter ¶

type DatasetWriter interface {
	// Write writes a record to the destination.
	Write(ctx context.Context, record arrow.Record) error

	// Close closes the writer and flushes any pending data.
	Close() error
}

DatasetWriter defines an interface for writing data to various destinations.

type DiffOptions ¶

type DiffOptions struct {
	// KeyColumns specifies the columns to use as keys for matching records.
	KeyColumns []string

	// IgnoreColumns specifies columns to ignore when comparing records.
	IgnoreColumns []string

	// BatchSize is the size of batches to process at once.
	BatchSize int64

	// Tolerance is the tolerance to use for floating point comparisons.
	Tolerance float64

	// Parallel indicates whether to use parallel processing.
	Parallel bool

	// NumWorkers is the number of workers to use for parallel processing.
	// If 0, defaults to the number of CPUs.
	NumWorkers int
}

DiffOptions provides options for the diff operation.

type DiffResult ¶

type DiffResult struct {
	// Added contains records that exist in the target but not in the source.
	Added arrow.Record

	// Deleted contains records that exist in the source but not in the target.
	Deleted arrow.Record

	// Modified contains records that exist in both but have different values.
	// This includes columns to indicate which fields were modified.
	Modified arrow.Record

	// Summary provides a summary of the differences.
	Summary DiffSummary
}

DiffResult represents the difference between two datasets.

type DiffSummary ¶

type DiffSummary struct {
	// TotalSource is the total number of records in the source dataset.
	TotalSource int64

	// TotalTarget is the total number of records in the target dataset.
	TotalTarget int64

	// Added is the number of records added.
	Added int64

	// Deleted is the number of records deleted.
	Deleted int64

	// Modified is the number of records modified.
	Modified int64

	// Columns is a map of column names to the number of modifications in that column.
	Columns map[string]int64
}

DiffSummary provides a summary of the differences between two datasets.

type Differ ¶

type Differ interface {
	// Diff computes the difference between two datasets.
	Diff(ctx context.Context, source, target DatasetReader, options DiffOptions) (*DiffResult, error)
}

Differ defines an interface for computing differences between datasets.

type ReaderConfig ¶

type ReaderConfig struct {
	// Type is the type of the reader.
	Type string

	// Path is the path to the file or directory.
	Path string

	// ConnectionString is the connection string for a database.
	ConnectionString string

	// Table is the table name for a database.
	Table string

	// Query is the query to execute for a database.
	Query string

	// BatchSize is the size of batches to read.
	BatchSize int64
}

ReaderConfig provides configuration for creating a reader.

type Reporter ¶

type Reporter interface {
	// Report generates a report from a diff result.
	Report(ctx context.Context, result *DiffResult) (io.Reader, error)
}

Reporter defines an interface for generating reports from diff results.

type WriterConfig ¶

type WriterConfig struct {
	// Type is the type of the writer.
	Type string

	// Path is the path to the file or directory.
	Path string

	// ConnectionString is the connection string for a database.
	ConnectionString string

	// Table is the table name for a database.
	Table string

	// BatchSize is the size of batches to write.
	BatchSize int64
}

WriterConfig provides configuration for creating a writer.

Source Files ¶

View all Source files

types.go

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL