carrow

package module
v0.0.0-...-e0f2aff Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Aug 24, 2021 License: BSD-3-Clause Imports: 5 Imported by: 0

README

carrow - Go bindings to Apache Arrow via C++-API

CircleCI godoc

THIS PROJECT IS NO LONGER MAINTAINED, HAVE A LOOK HERE

Access to Arrow C++ from Go.

FAQ

Why Not Apache Arrow for Go?

We'd like to share memory between Go & Python and the current arrow bindings don't have that option. Since pyarrow uses the C++ Arrow under the hood, we can just pass a s a pointer.

Also, the C++ Arrow library is more maintained than the Go one and have more features.

Development

  • The C++ glue layer is in carrow.cc, we try to keep it simple and unaware of Go.
  • See Dockerfile & build-docker target in the Makefile on how to setup an environment
  • See Dockerfile.test for running tests (used in CircleCI)
Debugging

We have Go, C++ & Python code working together. See the Dockerfile on how we get dependencies and set environment for development.

Example using gdb
$ PKG_CONFIG_PATH=/opt/miniconda/lib/pkgconfig LD_LIBRARY_PATH=/opt/miniconda/lib  go build ./_misc/wtr.go
$ LD_LIBRARY_PATH=/opt/miniconda/lib gdb wtr
(gdb) break carrow.cc:write_table
(gdb) run -db /tmp/plasma.db -id 800

Documentation

Overview

Deprecated: Try https://github.com/apache/arrow/tree/master/go instead

Example
package main

import (
	"fmt"

	"github.com/353solutions/carrow"
)

func main() {
	size := 100
	intBld := carrow.NewInteger64ArrayBuilder()
	floatBld := carrow.NewFloat64ArrayBuilder()
	for i := 0; i < size; i++ {
		if err := intBld.Append(int64(i)); err != nil {
			fmt.Printf("intBld.Append error: %s", err)
			return
		}
		if err := floatBld.Append(float64(i)); err != nil {
			fmt.Printf("floatBld.Append error: %s", err)
			return
		}
	}

	intArr, err := intBld.Finish()
	if err != nil {
		fmt.Printf("intBld.Finish error: %s", err)
		return
	}

	floatArr, err := floatBld.Finish()
	if err != nil {
		fmt.Printf("floatBld.Finish error: %s", err)
		return
	}

	intField, err := carrow.NewField("incCol", carrow.Integer64Type)
	if err != nil {
		fmt.Printf("intField error: %s", err)
		return
	}

	floatField, err := carrow.NewField("floatCol", carrow.Float64Type)
	if err != nil {
		fmt.Printf("floatField error: %s", err)
		return
	}

	schema, err := carrow.NewSchema([]*carrow.Field{intField, floatField})
	if err != nil {
		fmt.Printf("can't create schema: %s", err)
		return
	}
	arrs := []*carrow.Array{intArr, floatArr}

	table, err := carrow.NewTableFromArrays(schema, arrs)
	if err != nil {
		fmt.Printf("table creation error: %s", err)
		return
	}

	fmt.Printf("num cols: %d\n", table.NumCols())
	fmt.Printf("num rows: %d\n", table.NumRows())

}
Output:

num cols: 2
num rows: 100

Index

Examples

Constants

This section is empty.

Variables

This section is empty.

Functions

This section is empty.

Types

type Array

type Array struct {
	// contains filtered or unexported fields
}

Array is arrow array

func (*Array) BoolAt

func (a *Array) BoolAt(i int) (bool, error)

BoolAt returns bool at location

func (*Array) DType

func (a *Array) DType() DType

DType returns the array data type

func (*Array) Float64At

func (a *Array) Float64At(i int) (float64, error)

Float64At returns float at location

func (*Array) Int64At

func (a *Array) Int64At(i int) (int64, error)

Int64At returns integer at location

func (*Array) Length

func (a *Array) Length() int

Length returns the length of the array

func (*Array) StringAt

func (a *Array) StringAt(i int) (string, error)

StringAt returns integer at location

func (*Array) TimeAt

func (a *Array) TimeAt(i int) (time.Time, error)

TimeAt returns time at location

type Field

type Field struct {
	// contains filtered or unexported fields
}

Field is a field description

func NewField

func NewField(name string, dtype DType) (*Field, error)

NewField returns a new Field

func (*Field) DType

func (f *Field) DType() DType

DType returns the field data type

func (*Field) Name

func (f *Field) Name() string

Name returns the field name

type Metadata

type Metadata struct {
	// contains filtered or unexported fields
}

Metadata in schema

func NewMetadata

func NewMetadata() *Metadata

NewMetadata creates new Metadata

func (*Metadata) Key

func (m *Metadata) Key(i int) (string, error)

Key returns key at index i

func (*Metadata) Len

func (m *Metadata) Len() (int, error)

Len returns number of elements

func (*Metadata) Set

func (m *Metadata) Set(key, value string) error

Set sets a key/value

func (*Metadata) Value

func (m *Metadata) Value(i int) (string, error)

Value returns value at index i

type Schema

type Schema struct {
	// contains filtered or unexported fields
}

Schema is table schema

func NewSchema

func NewSchema(fields []*Field) (*Schema, error)

NewSchema creates a new schema

func (*Schema) Metadata

func (s *Schema) Metadata() (*Metadata, error)

Metadata returns the schema metadata

func (*Schema) SetMetadata

func (s *Schema) SetMetadata(m *Metadata) error

SetMetadata sets the metadata

type Table

type Table struct {
	// contains filtered or unexported fields
}

Table is arrow table

func NewTableFromArrays

func NewTableFromArrays(schema *Schema, arrays []*Array) (*Table, error)

NewTableFromArrays creates new Table from slice of arrays

func NewTableFromPtr

func NewTableFromPtr(ptr unsafe.Pointer) *Table

NewTableFromPtr creates a new table from underlying C pointer You probably shouldn't use this function

func (*Table) Column

func (t *Table) Column(i int) (*Array, error)

Column returns the nth column (Array)

func (*Table) ColumnByName

func (t *Table) ColumnByName(name string) (*Array, error)

ColumnByName returns column by name

func (*Table) ColumnNames

func (t *Table) ColumnNames() ([]string, error)

ColumnNames names returns names of columns

func (*Table) Field

func (t *Table) Field(i int) (*Field, error)

Field returns the nth field

func (*Table) NumCols

func (t *Table) NumCols() int

NumCols returns the number of columns

func (*Table) NumRows

func (t *Table) NumRows() int

NumRows returns the number of rows

func (*Table) Ptr

func (t *Table) Ptr() unsafe.Pointer

Ptr returns the underlying C++ pointer

func (*Table) Schema

func (t *Table) Schema() *Schema

Schema returns the table Schema

func (*Table) Slice

func (t *Table) Slice(offset int, length int) *Table

Slice returns a 0 copy slize of t If length is -1 will return until end of table

Directories

Path Synopsis

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL