exports

package
v0.0.0-...-a103044 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Oct 25, 2021 License: MIT Imports: 18 Imported by: 4

Documentation

Overview

Package exports provides functionality to save the data contained in a DataFrame into another format. It provides inverse functionality to the imports package.

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

func ExportToCSV

func ExportToCSV(ctx context.Context, w io.Writer, df *dataframe.DataFrame, options ...CSVExportOptions) error

ExportToCSV exports a Dataframe to a CSV file.

func ExportToExcel

func ExportToExcel(ctx context.Context, w io.Writer, df *dataframe.DataFrame, options ...ExcelExportOptions) error

ExportToExcel exports a Dataframe to an excel file.

func ExportToJSON

func ExportToJSON(ctx context.Context, w io.Writer, df *dataframe.DataFrame, options ...JSONExportOptions) error

ExportToJSON exports a Dataframe in the jsonl format. Each line represents a row from the Dataframe.

See: http://jsonlines.org/ for more information.

func ExportToParquet

func ExportToParquet(ctx context.Context, w io.Writer, df *dataframe.DataFrame, options ...ParquetExportOptions) error

ExportToParquet exports a Dataframe as a Parquet file. Series names are escaped by replacing spaces with underscores and removing ",;{}()=" (excluding quotes) and then lower-casing for maximum cross-compatibility.

func ExportToSQL

func ExportToSQL(ctx context.Context, db execContexter, df *dataframe.DataFrame, tableName string, options ...SQLExportOptions) error

ExportToSQL exports a Dataframe to a SQL Database. It is assumed to be a PostgreSQL database (for placeholder purposes), unless otherwise set to MySQL using the Options.

Example (gist):

import (
	stdSql "database/sql"

	"github.com/rocketlaunchr/dataframe-go/exports"
	sql "github.com/rocketlaunchr/mysql-go"
	"github.com/myesui/uuid"
)

func main() {

	p, _ := stdSql.Open("mysql", "user:password@/dbname")
	pool := &sql.DB{DB:p}

	conn, err := pool.Conn(ctx)
	defer conn.Close()

	tx, _ := conn.BeginTx(ctx, nil)

	opts := exports.SQLExportOptions{
		SeriesToColumn: map[string]*string{
			"Country": &[]string{"country"}[0],
			"Age":     &[]string{"age"}[0],
			"Id":      nil,
			"Date":    nil,
			"Amount":  nil,
		},
		PrimaryKey: &exports.PrimaryKey{
			PrimaryKey: "uuid",
			Value: func(row int, n int) *string {
				str := uuid.NewV4().String()
				return &str
			},
		},
		BatchSize: &[]uint{50}[0],
		Database:  exports.MySQL,
	}

	err = exports.ExportToSQL(ctx, tx, df, "test", opts)
	if err != nil {
		tx.Rollback()
		return
	}

	tx.Commit()
}

Types

type CSVExportOptions

type CSVExportOptions struct {

	// NullString is used to set what nil values should be encoded to.
	// Common options are NULL, \N, NaN, NA.
	NullString *string

	// Range is used to export a subset of rows from the dataframe.
	Range dataframe.Range

	// Separator is the field delimiter. A common option is ',', which is
	// the default if CSVExportOptions is not provided.
	Separator rune

	// UseCRLF determines the line terminator.
	// When true, it is set to \r\n.
	UseCRLF bool
}

CSVExportOptions contains options for ExportToCSV function.

type Database

type Database int

Database is used to set the Database. Different databases have different syntax for placeholders etc.

const (
	// PostgreSQL database
	PostgreSQL Database = 0
	// MySQL database
	MySQL Database = 1
)

type ExcelExportOptions

type ExcelExportOptions struct {

	// NullString is used to set what nil values should be encoded to.
	// Common options are NULL, \N, NaN, NA.
	NullString *string

	// Range is used to export a subset of rows from the Dataframe.
	Range dataframe.Range

	// WriteSheet is used to specify a sheet name.
	// When not set, it defaults to "sheet1"
	WriteSheet *string
}

ExcelExportOptions contains options for ExportToExcel function.

type JSONExportOptions

type JSONExportOptions struct {

	// NullString is used to set what nil values should be encoded to.
	// Common options are strings: NULL, \N, NaN, NA.
	// If not set, then null (non-string) is used.
	NullString *string

	// Range is used to export a subset of rows from the Dataframe.
	Range dataframe.Range

	// SetEscapeHTML specifies whether problematic HTML characters should be escaped inside JSON quoted strings.
	// See: https://golang.org/pkg/encoding/json/#Encoder.SetEscapeHTML
	SetEscapeHTML bool
}

JSONExportOptions contains options for ExportToJSON function.

type ParquetExportOptions

type ParquetExportOptions struct {

	// Range is used to export a subset of rows from the dataframe.
	Range dataframe.Range

	// PageSize defaults to 8K if not set set.
	//
	// See: https://godoc.org/github.com/xitongsys/parquet-go/writer#ParquetWriter
	PageSize *int64

	// CompressionType defaults to CompressionCodec_SNAPPY if not set.
	//
	// See: https://godoc.org/github.com/xitongsys/parquet-go/writer#ParquetWriter
	CompressionType *parquet.CompressionCodec

	// Offset defaults to 4 if not set.
	//
	// See: https://godoc.org/github.com/xitongsys/parquet-go/writer#ParquetWriter
	Offset *int64
}

ParquetExportOptions contains options for ExportToParquet function.

type PrimaryKey

type PrimaryKey struct {

	// PrimaryKey is the column name of primary key
	PrimaryKey string

	// Value is a function that generates a primary key value given the row number
	// and number of rows in the Dataframe.
	// For auto-incrementing primary keys, nil can be returned.
	Value func(row int, n int) *string
}

PrimaryKey is used to generate custom values for the primary key

type SQLExportOptions

type SQLExportOptions struct {

	// NullString is used to set what nil values should be encoded to.
	// Common options are NULL, \N, NaN, NA.
	NullString *string

	// Range is used to export a subset of rows from the Dataframe.
	Range dataframe.Range

	// PrimaryKey is used if you want to generate custom values for the primary key
	PrimaryKey *PrimaryKey

	// BatchSize is used to insert data in batches.
	// It is recommended a transaction is used so if 1 batch-insert fails, then all
	// successfully inserted data can be rolled back.
	// If set, it must not be 0.
	BatchSize *uint

	// SeriesToColumn is used to map the series name to the table's column name.
	// The key of the map is the series name. Column names are case-sensitive.
	// If the key does not exist, the series name is used by default.
	// If the map value is nil, the series is ignored for the purposes of exporting.
	SeriesToColumn map[string]*string

	// Database is used to set the Database.
	Database Database
}

SQLExportOptions contains options for ExportToSQL function.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL