schema

package
v0.0.0-...-3baa4d8 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Apr 7, 2022 License: Apache-2.0 Imports: 8 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

func ConvertArrowToParquetSchema

func ConvertArrowToParquetSchema(schema *arrow.Schema) ([]string, error)

ConvertArrowToParquetSchema converts arrow schema to representation understandable by parquet-go library. We need this coversion and can't directly use arrow format because the go parquet type contains metadata which the base writer is using to determine the size of the objects.

Types

type Item

type Item struct {
	GoType reflect.Type
	Info   *common.Tag
}

func NewItem

func NewItem() *Item

type JSONSchemaItemType

type JSONSchemaItemType struct {
	Tag    string                `json:"Tag"`
	Fields []*JSONSchemaItemType `json:"Fields,omitempty"`
}

func NewJSONSchemaItem

func NewJSONSchemaItem() *JSONSchemaItemType

type PathMapType

type PathMapType struct {
	Path     string
	Children map[string]*PathMapType
}

PathMapType records the path and its children; This is used in Marshal for improve performance.

func NewPathMap

func NewPathMap(path string) *PathMapType

func (*PathMapType) Add

func (pmt *PathMapType) Add(path []string)

type SchemaHandler

type SchemaHandler struct {
	SchemaElements []*parquet.SchemaElement
	MapIndex       map[string]int32
	IndexMap       map[int32]string
	PathMap        *PathMapType
	Infos          []*common.Tag

	InPathToExPath map[string]string
	ExPathToInPath map[string]string

	ValueColumns []string
}

SchemaHandler stores the schema data

func NewSchemaHandlerFromArrow

func NewSchemaHandlerFromArrow(arrowSchema *arrow.Schema) (
	*SchemaHandler, error)

NewSchemaHandlerFromArrow creates a schema handler from arrow format. This handler is needed since the base ParquetWriter does not understand arrow schema and we need to translate it to the native format which the parquet-go library understands.

func NewSchemaHandlerFromJSON

func NewSchemaHandlerFromJSON(str string) (sh *SchemaHandler, err error)

func NewSchemaHandlerFromMetadata

func NewSchemaHandlerFromMetadata(mds []string) (*SchemaHandler, error)

Create a schema handler from CSV metadata

func NewSchemaHandlerFromSchemaList

func NewSchemaHandlerFromSchemaList(schemas []*parquet.SchemaElement) *SchemaHandler

NewSchemaHandlerFromSchemaList creates schema handler from schema list

func NewSchemaHandlerFromStruct

func NewSchemaHandlerFromStruct(obj interface{}) (sh *SchemaHandler, err error)

Create schema handler from a object

func (*SchemaHandler) ConvertToInPathStr

func (sh *SchemaHandler) ConvertToInPathStr(pathStr string) (string, error)

Convert a path to internal path

func (*SchemaHandler) CreateInExMap

func (sh *SchemaHandler) CreateInExMap()

func (*SchemaHandler) GetColumnNum

func (sh *SchemaHandler) GetColumnNum() int64

func (*SchemaHandler) GetExName

func (sh *SchemaHandler) GetExName(index int) string

func (*SchemaHandler) GetInName

func (sh *SchemaHandler) GetInName(index int) string

func (*SchemaHandler) GetRepetitionLevelIndex

func (sh *SchemaHandler) GetRepetitionLevelIndex(path []string, rl int32) (int32, error)

MaxRepetitionLevel returns the max repetition level type of a column by it's schema path

func (*SchemaHandler) GetRepetitionType

func (sh *SchemaHandler) GetRepetitionType(path []string) (parquet.FieldRepetitionType, error)

GetRepetitionType returns the repetition type of a column by it's schema path

func (*SchemaHandler) GetRootExName

func (sh *SchemaHandler) GetRootExName() string

func (*SchemaHandler) GetRootInName

func (sh *SchemaHandler) GetRootInName() string

Get root name from the schema handler

func (*SchemaHandler) GetType

func (sh *SchemaHandler) GetType(prefixPath string) (reflect.Type, error)

func (*SchemaHandler) GetTypes

func (sh *SchemaHandler) GetTypes() []reflect.Type

Get object type from schema by reflect

func (*SchemaHandler) MaxDefinitionLevel

func (sh *SchemaHandler) MaxDefinitionLevel(path []string) (int32, error)

MaxDefinitionLevel returns the max definition level type of a column by it's schema path

func (*SchemaHandler) MaxRepetitionLevel

func (sh *SchemaHandler) MaxRepetitionLevel(path []string) (int32, error)

MaxRepetitionLevel returns the max repetition level type of a column by it's schema path

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL