Documentation ¶
Index ¶
- func ConvertArrowToParquetSchema(schema *arrow.Schema) ([]string, error)
- type Item
- type JSONSchemaItemType
- type PathMapType
- type SchemaHandler
- func NewSchemaHandlerFromArrow(arrowSchema *arrow.Schema) (*SchemaHandler, error)
- func NewSchemaHandlerFromJSON(str string) (sh *SchemaHandler, err error)
- func NewSchemaHandlerFromMetadata(mds []string) (*SchemaHandler, error)
- func NewSchemaHandlerFromSchemaList(schemas []*parquet.SchemaElement) *SchemaHandler
- func NewSchemaHandlerFromStruct(obj interface{}) (sh *SchemaHandler, err error)
- func (sh *SchemaHandler) ConvertToInPathStr(pathStr string) (string, error)
- func (sh *SchemaHandler) CreateInExMap()
- func (sh *SchemaHandler) GetColumnNum() int64
- func (sh *SchemaHandler) GetExName(index int) string
- func (sh *SchemaHandler) GetInName(index int) string
- func (sh *SchemaHandler) GetRepetitionLevelIndex(path []string, rl int32) (int32, error)
- func (sh *SchemaHandler) GetRepetitionType(path []string) (parquet.FieldRepetitionType, error)
- func (sh *SchemaHandler) GetRootExName() string
- func (sh *SchemaHandler) GetRootInName() string
- func (sh *SchemaHandler) GetType(prefixPath string) (reflect.Type, error)
- func (sh *SchemaHandler) GetTypes() []reflect.Type
- func (sh *SchemaHandler) MaxDefinitionLevel(path []string) (int32, error)
- func (sh *SchemaHandler) MaxRepetitionLevel(path []string) (int32, error)
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
func ConvertArrowToParquetSchema ¶
ConvertArrowToParquetSchema converts arrow schema to representation understandable by parquet-go library. We need this coversion and can't directly use arrow format because the go parquet type contains metadata which the base writer is using to determine the size of the objects.
Types ¶
type JSONSchemaItemType ¶
type JSONSchemaItemType struct { Tag string `json:"Tag"` Fields []*JSONSchemaItemType `json:"Fields,omitempty"` }
func NewJSONSchemaItem ¶
func NewJSONSchemaItem() *JSONSchemaItemType
type PathMapType ¶
type PathMapType struct { Path string Children map[string]*PathMapType }
PathMapType records the path and its children; This is used in Marshal for improve performance.
func NewPathMap ¶
func NewPathMap(path string) *PathMapType
func (*PathMapType) Add ¶
func (pmt *PathMapType) Add(path []string)
type SchemaHandler ¶
type SchemaHandler struct { SchemaElements []*parquet.SchemaElement MapIndex map[string]int32 IndexMap map[int32]string PathMap *PathMapType Infos []*common.Tag InPathToExPath map[string]string ExPathToInPath map[string]string ValueColumns []string }
SchemaHandler stores the schema data
func NewSchemaHandlerFromArrow ¶
func NewSchemaHandlerFromArrow(arrowSchema *arrow.Schema) ( *SchemaHandler, error)
NewSchemaHandlerFromArrow creates a schema handler from arrow format. This handler is needed since the base ParquetWriter does not understand arrow schema and we need to translate it to the native format which the parquet-go library understands.
func NewSchemaHandlerFromJSON ¶
func NewSchemaHandlerFromJSON(str string) (sh *SchemaHandler, err error)
func NewSchemaHandlerFromMetadata ¶
func NewSchemaHandlerFromMetadata(mds []string) (*SchemaHandler, error)
Create a schema handler from CSV metadata
func NewSchemaHandlerFromSchemaList ¶
func NewSchemaHandlerFromSchemaList(schemas []*parquet.SchemaElement) *SchemaHandler
NewSchemaHandlerFromSchemaList creates schema handler from schema list
func NewSchemaHandlerFromStruct ¶
func NewSchemaHandlerFromStruct(obj interface{}) (sh *SchemaHandler, err error)
Create schema handler from a object
func (*SchemaHandler) ConvertToInPathStr ¶
func (sh *SchemaHandler) ConvertToInPathStr(pathStr string) (string, error)
Convert a path to internal path
func (*SchemaHandler) CreateInExMap ¶
func (sh *SchemaHandler) CreateInExMap()
func (*SchemaHandler) GetColumnNum ¶
func (sh *SchemaHandler) GetColumnNum() int64
func (*SchemaHandler) GetExName ¶
func (sh *SchemaHandler) GetExName(index int) string
func (*SchemaHandler) GetInName ¶
func (sh *SchemaHandler) GetInName(index int) string
func (*SchemaHandler) GetRepetitionLevelIndex ¶
func (sh *SchemaHandler) GetRepetitionLevelIndex(path []string, rl int32) (int32, error)
MaxRepetitionLevel returns the max repetition level type of a column by it's schema path
func (*SchemaHandler) GetRepetitionType ¶
func (sh *SchemaHandler) GetRepetitionType(path []string) (parquet.FieldRepetitionType, error)
GetRepetitionType returns the repetition type of a column by it's schema path
func (*SchemaHandler) GetRootExName ¶
func (sh *SchemaHandler) GetRootExName() string
func (*SchemaHandler) GetRootInName ¶
func (sh *SchemaHandler) GetRootInName() string
Get root name from the schema handler
func (*SchemaHandler) GetType ¶
func (sh *SchemaHandler) GetType(prefixPath string) (reflect.Type, error)
func (*SchemaHandler) GetTypes ¶
func (sh *SchemaHandler) GetTypes() []reflect.Type
Get object type from schema by reflect
func (*SchemaHandler) MaxDefinitionLevel ¶
func (sh *SchemaHandler) MaxDefinitionLevel(path []string) (int32, error)
MaxDefinitionLevel returns the max definition level type of a column by it's schema path
func (*SchemaHandler) MaxRepetitionLevel ¶
func (sh *SchemaHandler) MaxRepetitionLevel(path []string) (int32, error)
MaxRepetitionLevel returns the max repetition level type of a column by it's schema path