Documentation
¶
Index ¶
- type ArrowWriter
- type CSVWriter
- type JSONWriter
- type ParquetWriter
- func NewParquetWriter(pFile source.ParquetFile, obj interface{}, np int64) (*ParquetWriter, error)
- func NewParquetWriterFromProto(pFile source.ParquetFile, obj interface{}, np int64) (*ParquetWriter, error)
- func NewParquetWriterFromWriter(w io.Writer, obj interface{}, np int64) (*ParquetWriter, error)
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type ArrowWriter ¶
type ArrowWriter struct {
ParquetWriter
}
ArrowWriter extending the base ParqueWriter
func NewArrowWriter ¶
func NewArrowWriter(arrowSchema *arrow.Schema, pfile source.ParquetFile, np int64) (*ArrowWriter, error)
NewArrowWriter creates arrow schema parquet writer given the native arrow schema, parquet file writer which contains the parquet file in which we will write the record along with the number of parallel threads which will write in the file.
func (*ArrowWriter) WriteArrow ¶
func (w *ArrowWriter) WriteArrow(record arrow.Record) error
WriteArrow wraps the base Write function provided by writer.ParquetWriter. The function transforms the data from the record, which the go arrow library gives as array of columns, to array of rows which the parquet-go library can understand as it does not accepts data by columns, but rather by rows.
type CSVWriter ¶
type CSVWriter struct {
ParquetWriter
}
func NewCSVWriter ¶
Create CSV writer
func NewCSVWriterFromWriter ¶
func (*CSVWriter) WriteString ¶
Write string values to parquet file
type JSONWriter ¶
type JSONWriter struct {
ParquetWriter
}
func NewJSONWriter ¶
func NewJSONWriter(jsonSchema string, pfile source.ParquetFile, np int64) (*JSONWriter, error)
Create JSON writer
func NewJSONWriterFromWriter ¶
type ParquetWriter ¶
type ParquetWriter struct {
SchemaHandler *schema.SchemaHandler
NP int64 //parallel number
PFile source.ParquetFile
PageSize int64
RowGroupSize int64
CompressionType parquet.CompressionCodec
Offset int64
Objs []interface{}
ObjsSize int64
ObjSize int64
CheckSizeCritical int64
PagesMapBuf map[string][]*layout.Page
Size int64
NumRows int64
DictRecs map[string]*layout.DictRecType
ColumnIndexes []*parquet.ColumnIndex
OffsetIndexes []*parquet.OffsetIndex
MarshalFunc func(src []interface{}, sh *schema.SchemaHandler) (*map[string]*layout.Table, error)
// contains filtered or unexported fields
}
ParquetWriter is a writer parquet file
func NewParquetWriter ¶
func NewParquetWriter(pFile source.ParquetFile, obj interface{}, np int64) (*ParquetWriter, error)
Create a parquet handler. Obj is a object with tags or JSON schema string.
func NewParquetWriterFromProto ¶
func NewParquetWriterFromProto(pFile source.ParquetFile, obj interface{}, np int64) (*ParquetWriter, error)
Pass the obj as the go struct object
func NewParquetWriterFromWriter ¶
func NewParquetWriterFromWriter(w io.Writer, obj interface{}, np int64) (*ParquetWriter, error)
func (*ParquetWriter) Flush ¶
func (pw *ParquetWriter) Flush(flag bool) error
Flush the write buffer to parquet file
func (*ParquetWriter) RenameSchema ¶
func (pw *ParquetWriter) RenameSchema()
Rename schema name to exname in tags
func (*ParquetWriter) SetSchemaHandlerFromJSON ¶
func (pw *ParquetWriter) SetSchemaHandlerFromJSON(jsonSchema string) error
func (*ParquetWriter) Write ¶
func (pw *ParquetWriter) Write(src interface{}) error
Write one object to parquet file
func (*ParquetWriter) WriteStop ¶
func (pw *ParquetWriter) WriteStop() error
Write the footer and stop writing