Documentation ¶
Overview ¶
Package arrow provides an implementation of Apache Arrow.
Apache Arrow is a cross-language development platform for in-memory data. It specifies a standardized language-independent columnar memory format for flat and hierarchical data, organized for efficient analytic operations on modern hardware. It also provides computational libraries and zero-copy streaming messaging and inter-process communication.
Basics ¶
The fundamental data structure in Arrow is an Array, which holds a sequence of values of the same type. An array consists of memory holding the data and an additional validity bitmap that indicates if the corresponding entry in the array is valid (not null). If the array has no null entries, it is possible to omit this bitmap.
Example (DenseUnionArray) ¶
package main import ( "fmt" "github.com/apache/arrow/go/v10/arrow/array" "github.com/apache/arrow/go/v10/arrow/memory" ) func main() { pool := memory.NewGoAllocator() denseBuilder := array.NewEmptyDenseUnionBuilder(pool) defer denseBuilder.Release() i8Builder := array.NewInt8Builder(pool) defer i8Builder.Release() i8Code := denseBuilder.AppendChild(i8Builder, "i8") strBuilder := array.NewStringBuilder(pool) defer strBuilder.Release() strCode := denseBuilder.AppendChild(strBuilder, "str") f64Builder := array.NewFloat64Builder(pool) defer f64Builder.Release() f64Code := denseBuilder.AppendChild(f64Builder, "f64") values := []interface{}{int8(33), "abc", float64(1.0), float64(-1.0), nil, "", int8(10), "def", int8(-10), float64(0.5)} for _, v := range values { switch v := v.(type) { case int8: denseBuilder.Append(i8Code) i8Builder.Append(v) case string: denseBuilder.Append(strCode) strBuilder.Append(v) case float64: denseBuilder.Append(f64Code) f64Builder.Append(v) case nil: denseBuilder.AppendNull() } } arr := denseBuilder.NewDenseUnionArray() defer arr.Release() fmt.Printf("Len() = %d\n", arr.Len()) fields := arr.UnionType().Fields() offsets := arr.RawValueOffsets() for i := 0; i < arr.Len(); i++ { child := arr.ChildID(i) data := arr.Field(child) field := fields[child] idx := int(offsets[i]) if data.IsNull(idx) { fmt.Printf("[%d] = (null)\n", i) continue } var v interface{} switch varr := data.(type) { case *array.Int8: v = varr.Value(idx) case *array.String: v = varr.Value(idx) case *array.Float64: v = varr.Value(idx) } fmt.Printf("[%d] = %#5v {%s}\n", i, v, field.Name) } fmt.Printf("i8: %s\n", arr.Field(0)) fmt.Printf("str: %s\n", arr.Field(1)) fmt.Printf("f64: %s\n", arr.Field(2)) }
Output: Len() = 10 [0] = 33 {i8} [1] = "abc" {str} [2] = 1 {f64} [3] = -1 {f64} [4] = (null) [5] = "" {str} [6] = 10 {i8} [7] = "def" {str} [8] = -10 {i8} [9] = 0.5 {f64} i8: [33 (null) 10 -10] str: ["abc" "" "def"] f64: [1 -1 0.5]
Example (FixedSizeListArray) ¶
This example shows how to create a FixedSizeList array. The resulting array should be:
[[0, 1, 2], (null), [3, 4, 5], [6, 7, 8], (null)]
package main import ( "fmt" "github.com/apache/arrow/go/v10/arrow" "github.com/apache/arrow/go/v10/arrow/array" "github.com/apache/arrow/go/v10/arrow/memory" ) func main() { pool := memory.NewGoAllocator() lb := array.NewFixedSizeListBuilder(pool, 3, arrow.PrimitiveTypes.Int64) defer lb.Release() vb := lb.ValueBuilder().(*array.Int64Builder) vb.Reserve(10) lb.Append(true) vb.Append(0) vb.Append(1) vb.Append(2) lb.AppendNull() vb.AppendValues([]int64{-1, -1, -1}, nil) lb.Append(true) vb.Append(3) vb.Append(4) vb.Append(5) lb.Append(true) vb.Append(6) vb.Append(7) vb.Append(8) lb.AppendNull() arr := lb.NewArray().(*array.FixedSizeList) defer arr.Release() fmt.Printf("NullN() = %d\n", arr.NullN()) fmt.Printf("Len() = %d\n", arr.Len()) fmt.Printf("Type() = %v\n", arr.DataType()) fmt.Printf("List = %v\n", arr) }
Output: NullN() = 2 Len() = 5 Type() = fixed_size_list<item: int64, nullable>[3] List = [[0 1 2] (null) [3 4 5] [6 7 8] (null)]
Example (Float64Slice) ¶
This example shows how one can slice an array. The initial (float64) array is:
[1, 2, 3, (null), 4, 5]
and the sub-slice is:
[3, (null), 4]
package main import ( "fmt" "github.com/apache/arrow/go/v10/arrow/array" "github.com/apache/arrow/go/v10/arrow/memory" ) func main() { pool := memory.NewGoAllocator() b := array.NewFloat64Builder(pool) defer b.Release() b.AppendValues( []float64{1, 2, 3, -1, 4, 5}, []bool{true, true, true, false, true, true}, ) arr := b.NewFloat64Array() defer arr.Release() fmt.Printf("array = %v\n", arr) sli := array.NewSlice(arr, 2, 5).(*array.Float64) defer sli.Release() fmt.Printf("slice = %v\n", sli) }
Output: array = [1 2 3 (null) 4 5] slice = [3 (null) 4]
Example (Float64Tensor2x5) ¶
package main import ( "fmt" "github.com/apache/arrow/go/v10/arrow/array" "github.com/apache/arrow/go/v10/arrow/memory" "github.com/apache/arrow/go/v10/arrow/tensor" ) func main() { pool := memory.NewGoAllocator() b := array.NewFloat64Builder(pool) defer b.Release() raw := []float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10} b.AppendValues(raw, nil) arr := b.NewFloat64Array() defer arr.Release() f64 := tensor.NewFloat64(arr.Data(), []int64{2, 5}, nil, []string{"x", "y"}) defer f64.Release() for _, i := range [][]int64{ {0, 0}, {0, 1}, {0, 2}, {0, 3}, {0, 4}, {1, 0}, {1, 1}, {1, 2}, {1, 3}, {1, 4}, } { fmt.Printf("arr%v = %v\n", i, f64.Value(i)) } }
Output: arr[0 0] = 1 arr[0 1] = 2 arr[0 2] = 3 arr[0 3] = 4 arr[0 4] = 5 arr[1 0] = 6 arr[1 1] = 7 arr[1 2] = 8 arr[1 3] = 9 arr[1 4] = 10
Example (Float64Tensor2x5ColMajor) ¶
package main import ( "fmt" "github.com/apache/arrow/go/v10/arrow/array" "github.com/apache/arrow/go/v10/arrow/memory" "github.com/apache/arrow/go/v10/arrow/tensor" ) func main() { pool := memory.NewGoAllocator() b := array.NewFloat64Builder(pool) defer b.Release() raw := []float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10} b.AppendValues(raw, nil) arr := b.NewFloat64Array() defer arr.Release() f64 := tensor.NewFloat64(arr.Data(), []int64{2, 5}, []int64{8, 16}, []string{"x", "y"}) defer f64.Release() for _, i := range [][]int64{ {0, 0}, {0, 1}, {0, 2}, {0, 3}, {0, 4}, {1, 0}, {1, 1}, {1, 2}, {1, 3}, {1, 4}, } { fmt.Printf("arr%v = %v\n", i, f64.Value(i)) } }
Output: arr[0 0] = 1 arr[0 1] = 3 arr[0 2] = 5 arr[0 3] = 7 arr[0 4] = 9 arr[1 0] = 2 arr[1 1] = 4 arr[1 2] = 6 arr[1 3] = 8 arr[1 4] = 10
Example (FromMemory) ¶
This example demonstrates creating an array, sourcing the values and null bitmaps directly from byte slices. The null count is set to UnknownNullCount, instructing the array to calculate the null count from the bitmap when NullN is called.
package main import ( "fmt" "github.com/apache/arrow/go/v10/arrow/array" "github.com/apache/arrow/go/v10/arrow/memory" ) func main() { // create LSB packed bits with the following pattern: // 01010011 11000101 data := memory.NewBufferBytes([]byte{0xca, 0xa3}) // create LSB packed validity (null) bitmap, where every 4th element is null: // 11101110 11101110 nullBitmap := memory.NewBufferBytes([]byte{0x77, 0x77}) // Create a boolean array and lazily determine NullN using UnknownNullCount bools := array.NewBoolean(16, data, nullBitmap, array.UnknownNullCount) defer bools.Release() // Show the null count fmt.Printf("NullN() = %d\n", bools.NullN()) // Enumerate the values. n := bools.Len() for i := 0; i < n; i++ { fmt.Printf("bools[%d] = ", i) if bools.IsNull(i) { fmt.Println("(null)") } else { fmt.Printf("%t\n", bools.Value(i)) } } }
Output: NullN() = 4 bools[0] = false bools[1] = true bools[2] = false bools[3] = (null) bools[4] = false bools[5] = false bools[6] = true bools[7] = (null) bools[8] = true bools[9] = true bools[10] = false bools[11] = (null) bools[12] = false bools[13] = true bools[14] = false bools[15] = (null)
Example (ListArray) ¶
This example shows how to create a List array. The resulting array should be:
[[0, 1, 2], [], [3], [4, 5], [6, 7, 8], [], [9]]
package main import ( "fmt" "github.com/apache/arrow/go/v10/arrow" "github.com/apache/arrow/go/v10/arrow/array" "github.com/apache/arrow/go/v10/arrow/memory" ) func main() { pool := memory.NewGoAllocator() lb := array.NewListBuilder(pool, arrow.PrimitiveTypes.Int64) defer lb.Release() vb := lb.ValueBuilder().(*array.Int64Builder) vb.Reserve(10) lb.Append(true) vb.Append(0) vb.Append(1) vb.Append(2) lb.AppendNull() lb.Append(true) vb.Append(3) lb.Append(true) vb.Append(4) vb.Append(5) lb.Append(true) vb.Append(6) vb.Append(7) vb.Append(8) lb.AppendNull() lb.Append(true) vb.Append(9) arr := lb.NewArray().(*array.List) defer arr.Release() arr.DataType().(*arrow.ListType).SetElemNullable(false) fmt.Printf("NullN() = %d\n", arr.NullN()) fmt.Printf("Len() = %d\n", arr.Len()) fmt.Printf("Offsets() = %v\n", arr.Offsets()) fmt.Printf("Type() = %v\n", arr.DataType()) offsets := arr.Offsets()[1:] varr := arr.ListValues().(*array.Int64) pos := 0 for i := 0; i < arr.Len(); i++ { if !arr.IsValid(i) { fmt.Printf("List[%d] = (null)\n", i) continue } fmt.Printf("List[%d] = [", i) for j := pos; j < int(offsets[i]); j++ { if j != pos { fmt.Printf(", ") } fmt.Printf("%v", varr.Value(j)) } pos = int(offsets[i]) fmt.Printf("]\n") } fmt.Printf("List = %v\n", arr) }
Output: NullN() = 2 Len() = 7 Offsets() = [0 3 3 4 6 9 9 10] Type() = list<item: int64> List[0] = [0, 1, 2] List[1] = (null) List[2] = [3] List[3] = [4, 5] List[4] = [6, 7, 8] List[5] = (null) List[6] = [9] List = [[0 1 2] (null) [3] [4 5] [6 7 8] (null) [9]]
Example (MapArray) ¶
This example demonstrates how to create a Map Array. The resulting array should be:
[{["ab" "cd" "ef" "gh"] [1 2 3 4]} (null) {["ab" "cd" "ef" "gh"] [(null) 2 5 1]}]
package main import ( "fmt" "github.com/apache/arrow/go/v10/arrow" "github.com/apache/arrow/go/v10/arrow/array" "github.com/apache/arrow/go/v10/arrow/memory" ) func main() { pool := memory.NewGoAllocator() mb := array.NewMapBuilder(pool, arrow.BinaryTypes.String, arrow.PrimitiveTypes.Int16, false) defer mb.Release() kb := mb.KeyBuilder().(*array.StringBuilder) ib := mb.ItemBuilder().(*array.Int16Builder) keys := []string{"ab", "cd", "ef", "gh"} mb.Append(true) kb.AppendValues(keys, nil) ib.AppendValues([]int16{1, 2, 3, 4}, nil) mb.AppendNull() mb.Append(true) kb.AppendValues(keys, nil) ib.AppendValues([]int16{-1, 2, 5, 1}, []bool{false, true, true, true}) arr := mb.NewMapArray() defer arr.Release() fmt.Printf("NullN() = %d\n", arr.NullN()) fmt.Printf("Len() = %d\n", arr.Len()) offsets := arr.Offsets() keyArr := arr.Keys().(*array.String) itemArr := arr.Items().(*array.Int16) for i := 0; i < arr.Len(); i++ { if arr.IsNull(i) { fmt.Printf("Map[%d] = (null)\n", i) continue } fmt.Printf("Map[%d] = {", i) for j := offsets[i]; j < offsets[i+1]; j++ { if j != offsets[i] { fmt.Printf(", ") } fmt.Printf("%v => ", keyArr.Value(int(j))) if itemArr.IsValid(int(j)) { fmt.Printf("%v", itemArr.Value(int(j))) } else { fmt.Printf("(null)") } } fmt.Printf("}\n") } fmt.Printf("Map = %v\n", arr) }
Output: NullN() = 1 Len() = 3 Map[0] = {ab => 1, cd => 2, ef => 3, gh => 4} Map[1] = (null) Map[2] = {ab => (null), cd => 2, ef => 5, gh => 1} Map = [{["ab" "cd" "ef" "gh"] [1 2 3 4]} (null) {["ab" "cd" "ef" "gh"] [(null) 2 5 1]}]
Example (Minimal) ¶
This example demonstrates how to build an array of int64 values using a builder and Append. Whilst convenient for small arrays,
package main import ( "fmt" "github.com/apache/arrow/go/v10/arrow/array" "github.com/apache/arrow/go/v10/arrow/memory" ) func main() { // Create an allocator. pool := memory.NewGoAllocator() // Create an int64 array builder. builder := array.NewInt64Builder(pool) defer builder.Release() builder.Append(1) builder.Append(2) builder.Append(3) builder.AppendNull() builder.Append(5) builder.Append(6) builder.Append(7) builder.Append(8) // Finish building the int64 array and reset the builder. ints := builder.NewInt64Array() defer ints.Release() // Enumerate the values. for i, v := range ints.Int64Values() { fmt.Printf("ints[%d] = ", i) if ints.IsNull(i) { fmt.Println("(null)") } else { fmt.Println(v) } } fmt.Printf("ints = %v\n", ints) }
Output: ints[0] = 1 ints[1] = 2 ints[2] = 3 ints[3] = (null) ints[4] = 5 ints[5] = 6 ints[6] = 7 ints[7] = 8 ints = [1 2 3 (null) 5 6 7 8]
Example (Record) ¶
package main import ( "fmt" "github.com/apache/arrow/go/v10/arrow" "github.com/apache/arrow/go/v10/arrow/array" "github.com/apache/arrow/go/v10/arrow/memory" ) func main() { pool := memory.NewGoAllocator() schema := arrow.NewSchema( []arrow.Field{ {Name: "f1-i32", Type: arrow.PrimitiveTypes.Int32}, {Name: "f2-f64", Type: arrow.PrimitiveTypes.Float64}, }, nil, ) b := array.NewRecordBuilder(pool, schema) defer b.Release() b.Field(0).(*array.Int32Builder).AppendValues([]int32{1, 2, 3, 4, 5, 6}, nil) b.Field(0).(*array.Int32Builder).AppendValues([]int32{7, 8, 9, 10}, []bool{true, true, false, true}) b.Field(1).(*array.Float64Builder).AppendValues([]float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, nil) rec := b.NewRecord() defer rec.Release() for i, col := range rec.Columns() { fmt.Printf("column[%d] %q: %v\n", i, rec.ColumnName(i), col) } }
Output: column[0] "f1-i32": [1 2 3 4 5 6 7 8 (null) 10] column[1] "f2-f64": [1 2 3 4 5 6 7 8 9 10]
Example (RecordReader) ¶
package main import ( "fmt" "log" "github.com/apache/arrow/go/v10/arrow" "github.com/apache/arrow/go/v10/arrow/array" "github.com/apache/arrow/go/v10/arrow/memory" ) func main() { pool := memory.NewGoAllocator() schema := arrow.NewSchema( []arrow.Field{ {Name: "f1-i32", Type: arrow.PrimitiveTypes.Int32}, {Name: "f2-f64", Type: arrow.PrimitiveTypes.Float64}, }, nil, ) b := array.NewRecordBuilder(pool, schema) defer b.Release() b.Field(0).(*array.Int32Builder).AppendValues([]int32{1, 2, 3, 4, 5, 6}, nil) b.Field(0).(*array.Int32Builder).AppendValues([]int32{7, 8, 9, 10}, []bool{true, true, false, true}) b.Field(1).(*array.Float64Builder).AppendValues([]float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, nil) rec1 := b.NewRecord() defer rec1.Release() b.Field(0).(*array.Int32Builder).AppendValues([]int32{11, 12, 13, 14, 15, 16, 17, 18, 19, 20}, nil) b.Field(1).(*array.Float64Builder).AppendValues([]float64{11, 12, 13, 14, 15, 16, 17, 18, 19, 20}, nil) rec2 := b.NewRecord() defer rec2.Release() itr, err := array.NewRecordReader(schema, []arrow.Record{rec1, rec2}) if err != nil { log.Fatal(err) } defer itr.Release() n := 0 for itr.Next() { rec := itr.Record() for i, col := range rec.Columns() { fmt.Printf("rec[%d][%q]: %v\n", n, rec.ColumnName(i), col) } n++ } }
Output: rec[0]["f1-i32"]: [1 2 3 4 5 6 7 8 (null) 10] rec[0]["f2-f64"]: [1 2 3 4 5 6 7 8 9 10] rec[1]["f1-i32"]: [11 12 13 14 15 16 17 18 19 20] rec[1]["f2-f64"]: [11 12 13 14 15 16 17 18 19 20]
Example (SparseUnionArray) ¶
package main import ( "fmt" "github.com/apache/arrow/go/v10/arrow/array" "github.com/apache/arrow/go/v10/arrow/memory" ) func main() { pool := memory.NewGoAllocator() sparseBuilder := array.NewEmptySparseUnionBuilder(pool) defer sparseBuilder.Release() i8Builder := array.NewInt8Builder(pool) defer i8Builder.Release() i8Code := sparseBuilder.AppendChild(i8Builder, "i8") strBuilder := array.NewStringBuilder(pool) defer strBuilder.Release() strCode := sparseBuilder.AppendChild(strBuilder, "str") f64Builder := array.NewFloat64Builder(pool) defer f64Builder.Release() f64Code := sparseBuilder.AppendChild(f64Builder, "f64") values := []interface{}{int8(33), "abc", float64(1.0), float64(-1.0), nil, "", int8(10), "def", int8(-10), float64(0.5)} for _, v := range values { switch v := v.(type) { case int8: sparseBuilder.Append(i8Code) i8Builder.Append(v) strBuilder.AppendEmptyValue() f64Builder.AppendEmptyValue() case string: sparseBuilder.Append(strCode) i8Builder.AppendEmptyValue() strBuilder.Append(v) f64Builder.AppendEmptyValue() case float64: sparseBuilder.Append(f64Code) i8Builder.AppendEmptyValue() strBuilder.AppendEmptyValue() f64Builder.Append(v) case nil: sparseBuilder.AppendNull() } } arr := sparseBuilder.NewSparseUnionArray() defer arr.Release() fmt.Printf("Len() = %d\n", arr.Len()) fields := arr.UnionType().Fields() for i := 0; i < arr.Len(); i++ { child := arr.ChildID(i) data := arr.Field(child) field := fields[child] if data.IsNull(i) { fmt.Printf("[%d] = (null)\n", i) continue } var v interface{} switch varr := data.(type) { case *array.Int8: v = varr.Value(i) case *array.String: v = varr.Value(i) case *array.Float64: v = varr.Value(i) } fmt.Printf("[%d] = %#5v {%s}\n", i, v, field.Name) } fmt.Printf("i8: %s\n", arr.Field(0)) fmt.Printf("str: %s\n", arr.Field(1)) fmt.Printf("f64: %s\n", arr.Field(2)) }
Output: Len() = 10 [0] = 33 {i8} [1] = "abc" {str} [2] = 1 {f64} [3] = -1 {f64} [4] = (null) [5] = "" {str} [6] = 10 {i8} [7] = "def" {str} [8] = -10 {i8} [9] = 0.5 {f64} i8: [33 0 0 0 (null) 0 10 0 -10 0] str: ["" "abc" "" "" "" "" "" "def" "" ""] f64: [0 0 1 -1 0 0 0 0 0 0.5]
Example (StructArray) ¶
This example shows how to create a Struct array. The resulting array should be:
[{‘joe’, 1}, {null, 2}, null, {‘mark’, 4}]
package main import ( "fmt" "github.com/apache/arrow/go/v10/arrow" "github.com/apache/arrow/go/v10/arrow/array" "github.com/apache/arrow/go/v10/arrow/memory" ) func main() { pool := memory.NewGoAllocator() dtype := arrow.StructOf([]arrow.Field{ {Name: "f1", Type: arrow.ListOf(arrow.PrimitiveTypes.Uint8)}, {Name: "f2", Type: arrow.PrimitiveTypes.Int32}, }...) sb := array.NewStructBuilder(pool, dtype) defer sb.Release() f1b := sb.FieldBuilder(0).(*array.ListBuilder) f1vb := f1b.ValueBuilder().(*array.Uint8Builder) f2b := sb.FieldBuilder(1).(*array.Int32Builder) sb.Reserve(4) f1vb.Reserve(7) f2b.Reserve(3) sb.Append(true) f1b.Append(true) f1vb.AppendValues([]byte("joe"), nil) f2b.Append(1) sb.Append(true) f1b.AppendNull() f2b.Append(2) sb.AppendNull() sb.Append(true) f1b.Append(true) f1vb.AppendValues([]byte("mark"), nil) f2b.Append(4) arr := sb.NewArray().(*array.Struct) defer arr.Release() fmt.Printf("NullN() = %d\n", arr.NullN()) fmt.Printf("Len() = %d\n", arr.Len()) list := arr.Field(0).(*array.List) offsets := list.Offsets() varr := list.ListValues().(*array.Uint8) ints := arr.Field(1).(*array.Int32) for i := 0; i < arr.Len(); i++ { if !arr.IsValid(i) { fmt.Printf("Struct[%d] = (null)\n", i) continue } fmt.Printf("Struct[%d] = [", i) pos := int(offsets[i]) switch { case list.IsValid(pos): fmt.Printf("[") for j := offsets[i]; j < offsets[i+1]; j++ { if j != offsets[i] { fmt.Printf(", ") } fmt.Printf("%v", string(varr.Value(int(j)))) } fmt.Printf("], ") default: fmt.Printf("(null), ") } fmt.Printf("%d]\n", ints.Value(i)) } }
Output: NullN() = 1 Len() = 4 Struct[0] = [[j, o, e], 1] Struct[1] = [[], 2] Struct[2] = (null) Struct[3] = [[m, a, r, k], 4]
Example (Table) ¶
package main import ( "fmt" "github.com/apache/arrow/go/v10/arrow" "github.com/apache/arrow/go/v10/arrow/array" "github.com/apache/arrow/go/v10/arrow/memory" ) func main() { pool := memory.NewGoAllocator() schema := arrow.NewSchema( []arrow.Field{ {Name: "f1-i32", Type: arrow.PrimitiveTypes.Int32}, {Name: "f2-f64", Type: arrow.PrimitiveTypes.Float64}, }, nil, ) b := array.NewRecordBuilder(pool, schema) defer b.Release() b.Field(0).(*array.Int32Builder).AppendValues([]int32{1, 2, 3, 4, 5, 6}, nil) b.Field(0).(*array.Int32Builder).AppendValues([]int32{7, 8, 9, 10}, []bool{true, true, false, true}) b.Field(1).(*array.Float64Builder).AppendValues([]float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, nil) rec1 := b.NewRecord() defer rec1.Release() b.Field(0).(*array.Int32Builder).AppendValues([]int32{11, 12, 13, 14, 15, 16, 17, 18, 19, 20}, nil) b.Field(1).(*array.Float64Builder).AppendValues([]float64{11, 12, 13, 14, 15, 16, 17, 18, 19, 20}, nil) rec2 := b.NewRecord() defer rec2.Release() tbl := array.NewTableFromRecords(schema, []arrow.Record{rec1, rec2}) defer tbl.Release() tr := array.NewTableReader(tbl, 5) defer tr.Release() n := 0 for tr.Next() { rec := tr.Record() for i, col := range rec.Columns() { fmt.Printf("rec[%d][%q]: %v\n", n, rec.ColumnName(i), col) } n++ } }
Output: rec[0]["f1-i32"]: [1 2 3 4 5] rec[0]["f2-f64"]: [1 2 3 4 5] rec[1]["f1-i32"]: [6 7 8 (null) 10] rec[1]["f2-f64"]: [6 7 8 9 10] rec[2]["f1-i32"]: [11 12 13 14 15] rec[2]["f2-f64"]: [11 12 13 14 15] rec[3]["f1-i32"]: [16 17 18 19 20] rec[3]["f2-f64"]: [16 17 18 19 20]
Index ¶
- Constants
- Variables
- func ConvertTimestampValue(in, out TimeUnit, value int64) int64
- func HashType(seed maphash.Seed, dt DataType) uint64
- func IsBaseBinary(t Type) bool
- func IsBinaryLike(t Type) bool
- func IsDecimal(t Type) bool
- func IsFixedSizeBinary(t Type) bool
- func IsInteger(t Type) bool
- func IsLargeBinaryLike(t Type) bool
- func IsListLike(t Type) bool
- func IsNested(t Type) bool
- func IsPrimitive(t Type) bool
- func IsUnion(t Type) bool
- func IsUnsignedInteger(t Type) bool
- func RegisterExtensionType(typ ExtensionType) error
- func TypeEqual(left, right DataType, opts ...TypeEqualOption) bool
- func TypesToString(types []DataType) string
- func UnregisterExtensionType(typName string) error
- type Array
- type ArrayData
- type BinaryDataType
- type BinaryType
- type BooleanType
- type BufferKind
- type BufferSpec
- type Chunked
- type Column
- type DataType
- type DataTypeLayout
- type Date32
- type Date32Type
- type Date64
- type Date64Type
- type DayTimeInterval
- type DayTimeIntervalType
- func (t *DayTimeIntervalType) BitWidth() int
- func (DayTimeIntervalType) Bytes() int
- func (*DayTimeIntervalType) Fingerprint() string
- func (*DayTimeIntervalType) ID() Type
- func (DayTimeIntervalType) Layout() DataTypeLayout
- func (*DayTimeIntervalType) Name() string
- func (*DayTimeIntervalType) String() string
- type Decimal128Type
- type Decimal256Type
- type DenseUnionType
- func (t DenseUnionType) ChildIDs() []int
- func (t DenseUnionType) Fields() []Field
- func (t *DenseUnionType) Fingerprint() string
- func (DenseUnionType) ID() Type
- func (DenseUnionType) Layout() DataTypeLayout
- func (t *DenseUnionType) MaxTypeCode() (max UnionTypeCode)
- func (DenseUnionType) Mode() UnionMode
- func (DenseUnionType) Name() string
- func (DenseUnionType) OffsetTypeTraits() OffsetTraits
- func (t *DenseUnionType) String() string
- func (t DenseUnionType) TypeCodes() []UnionTypeCode
- type DictionaryType
- type Duration
- type DurationType
- func (*DurationType) BitWidth() int
- func (*DurationType) Bytes() int
- func (t *DurationType) Fingerprint() string
- func (*DurationType) ID() Type
- func (DurationType) Layout() DataTypeLayout
- func (*DurationType) Name() string
- func (t *DurationType) String() string
- func (t *DurationType) TimeUnit() TimeUnit
- type ExtensionBase
- type ExtensionType
- type Field
- type FixedSizeBinaryType
- func (t *FixedSizeBinaryType) BitWidth() int
- func (t *FixedSizeBinaryType) Bytes() int
- func (t *FixedSizeBinaryType) Fingerprint() string
- func (*FixedSizeBinaryType) ID() Type
- func (t *FixedSizeBinaryType) Layout() DataTypeLayout
- func (*FixedSizeBinaryType) Name() string
- func (t *FixedSizeBinaryType) String() string
- type FixedSizeListType
- func (t *FixedSizeListType) Elem() DataType
- func (t *FixedSizeListType) ElemField() Field
- func (t *FixedSizeListType) Fields() []Field
- func (t *FixedSizeListType) Fingerprint() string
- func (*FixedSizeListType) ID() Type
- func (FixedSizeListType) Layout() DataTypeLayout
- func (t *FixedSizeListType) Len() int32
- func (*FixedSizeListType) Name() string
- func (t *FixedSizeListType) String() string
- type FixedWidthDataType
- type Float16Type
- type Float32Type
- type Float64Type
- type Int16Type
- type Int32Type
- type Int64Type
- type Int8Type
- type LargeBinaryType
- func (t *LargeBinaryType) Fingerprint() string
- func (t *LargeBinaryType) ID() Type
- func (LargeBinaryType) IsUtf8() bool
- func (t *LargeBinaryType) Layout() DataTypeLayout
- func (t *LargeBinaryType) Name() string
- func (t *LargeBinaryType) OffsetTypeTraits() OffsetTraits
- func (t *LargeBinaryType) String() string
- type LargeListType
- type LargeStringType
- func (t *LargeStringType) Fingerprint() string
- func (t *LargeStringType) ID() Type
- func (LargeStringType) IsUtf8() bool
- func (t *LargeStringType) Layout() DataTypeLayout
- func (t *LargeStringType) Name() string
- func (t *LargeStringType) OffsetTypeTraits() OffsetTraits
- func (t *LargeStringType) String() string
- type ListType
- func (t *ListType) Elem() DataType
- func (t *ListType) ElemField() Field
- func (t *ListType) Fields() []Field
- func (t *ListType) Fingerprint() string
- func (*ListType) ID() Type
- func (ListType) Layout() DataTypeLayout
- func (*ListType) Name() string
- func (ListType) OffsetTypeTraits() OffsetTraits
- func (t *ListType) SetElemMetadata(md Metadata)
- func (t *ListType) SetElemNullable(n bool)
- func (t *ListType) String() string
- type MapType
- func (t *MapType) Fields() []Field
- func (t *MapType) Fingerprint() string
- func (*MapType) ID() Type
- func (t *MapType) ItemField() Field
- func (t *MapType) ItemType() DataType
- func (t *MapType) KeyField() Field
- func (t *MapType) KeyType() DataType
- func (t *MapType) Layout() DataTypeLayout
- func (*MapType) Name() string
- func (MapType) OffsetTypeTraits() OffsetTraits
- func (t *MapType) SetItemNullable(nullable bool)
- func (t *MapType) String() string
- func (t *MapType) ValueField() Field
- func (t *MapType) ValueType() *StructType
- type Metadata
- type MonthDayNanoInterval
- type MonthDayNanoIntervalType
- func (*MonthDayNanoIntervalType) BitWidth() int
- func (*MonthDayNanoIntervalType) Bytes() int
- func (*MonthDayNanoIntervalType) Fingerprint() string
- func (*MonthDayNanoIntervalType) ID() Type
- func (MonthDayNanoIntervalType) Layout() DataTypeLayout
- func (*MonthDayNanoIntervalType) Name() string
- func (*MonthDayNanoIntervalType) String() string
- type MonthInterval
- type MonthIntervalType
- type NestedType
- type NullType
- type OffsetTraits
- type OffsetsDataType
- type Record
- type Schema
- func (sc *Schema) Endianness() endian.Endianness
- func (sc *Schema) Equal(o *Schema) bool
- func (sc *Schema) Field(i int) Field
- func (sc *Schema) FieldIndices(n string) []int
- func (sc *Schema) Fields() []Field
- func (sc *Schema) FieldsByName(n string) ([]Field, bool)
- func (s *Schema) Fingerprint() string
- func (sc *Schema) HasField(n string) bool
- func (sc *Schema) HasMetadata() bool
- func (sc *Schema) IsNativeEndian() bool
- func (sc *Schema) Metadata() Metadata
- func (s *Schema) String() string
- func (sc *Schema) WithEndianness(e endian.Endianness) *Schema
- type SparseUnionType
- func (t SparseUnionType) ChildIDs() []int
- func (t SparseUnionType) Fields() []Field
- func (t *SparseUnionType) Fingerprint() string
- func (SparseUnionType) ID() Type
- func (SparseUnionType) Layout() DataTypeLayout
- func (t *SparseUnionType) MaxTypeCode() (max UnionTypeCode)
- func (SparseUnionType) Mode() UnionMode
- func (SparseUnionType) Name() string
- func (t *SparseUnionType) String() string
- func (t SparseUnionType) TypeCodes() []UnionTypeCode
- type StringType
- type StructType
- func (t *StructType) Field(i int) Field
- func (t *StructType) FieldByName(name string) (Field, bool)
- func (t *StructType) FieldIdx(name string) (int, bool)
- func (t *StructType) Fields() []Field
- func (t *StructType) Fingerprint() string
- func (*StructType) ID() Type
- func (StructType) Layout() DataTypeLayout
- func (*StructType) Name() string
- func (t *StructType) String() string
- type Table
- type TemporalWithUnit
- type Time32
- type Time32Type
- type Time64
- type Time64Type
- type TimeUnit
- type Timestamp
- type TimestampConvertOp
- type TimestampType
- func (*TimestampType) BitWidth() int
- func (TimestampType) Bytes() int
- func (t *TimestampType) ClearCachedLocation()
- func (t *TimestampType) Fingerprint() string
- func (t *TimestampType) GetToTimeFunc() (func(Timestamp) time.Time, error)
- func (t *TimestampType) GetZone() (*time.Location, error)
- func (*TimestampType) ID() Type
- func (TimestampType) Layout() DataTypeLayout
- func (*TimestampType) Name() string
- func (t *TimestampType) String() string
- func (t *TimestampType) TimeUnit() TimeUnit
- type Type
- type TypeEqualOption
- type Uint16Type
- type Uint32Type
- type Uint64Type
- type Uint8Type
- type UnionMode
- type UnionType
- type UnionTypeCode
Examples ¶
- Package (DenseUnionArray)
- Package (FixedSizeListArray)
- Package (Float64Slice)
- Package (Float64Tensor2x5)
- Package (Float64Tensor2x5ColMajor)
- Package (FromMemory)
- Package (ListArray)
- Package (MapArray)
- Package (Minimal)
- Package (Record)
- Package (RecordReader)
- Package (SparseUnionArray)
- Package (StructArray)
- Package (Table)
Constants ¶
const ( ConvDIVIDE = iota ConvMULTIPLY )
const ( MaxUnionTypeCode UnionTypeCode = 127 InvalidUnionChildID int = -1 SparseMode UnionMode = iota // SPARSE DenseMode // DENSE )
const ( // Date32SizeBytes specifies the number of bytes required to store a single Date32 in memory Date32SizeBytes = int(unsafe.Sizeof(Date32(0))) )
const ( // Date64SizeBytes specifies the number of bytes required to store a single Date64 in memory Date64SizeBytes = int(unsafe.Sizeof(Date64(0))) )
const ( // DayTimeIntervalSizeBytes specifies the number of bytes required to store a single DayTimeInterval in memory DayTimeIntervalSizeBytes = int(unsafe.Sizeof(DayTimeInterval{})) )
const ( // Decimal128SizeBytes specifies the number of bytes required to store a single decimal128 in memory Decimal128SizeBytes = int(unsafe.Sizeof(decimal128.Num{})) )
const (
Decimal256SizeBytes = int(unsafe.Sizeof(decimal256.Num{}))
)
const ( // DurationSizeBytes specifies the number of bytes required to store a single Duration in memory DurationSizeBytes = int(unsafe.Sizeof(Duration(0))) )
const ( // Float16SizeBytes specifies the number of bytes required to store a single float16 in memory Float16SizeBytes = int(unsafe.Sizeof(uint16(0))) )
const ( // Float32SizeBytes specifies the number of bytes required to store a single float32 in memory Float32SizeBytes = int(unsafe.Sizeof(float32(0))) )
const ( // Float64SizeBytes specifies the number of bytes required to store a single float64 in memory Float64SizeBytes = int(unsafe.Sizeof(float64(0))) )
const ( // Int16SizeBytes specifies the number of bytes required to store a single int16 in memory Int16SizeBytes = int(unsafe.Sizeof(int16(0))) )
const ( // Int32SizeBytes specifies the number of bytes required to store a single int32 in memory Int32SizeBytes = int(unsafe.Sizeof(int32(0))) )
const ( // Int64SizeBytes specifies the number of bytes required to store a single int64 in memory Int64SizeBytes = int(unsafe.Sizeof(int64(0))) )
const ( // Int8SizeBytes specifies the number of bytes required to store a single int8 in memory Int8SizeBytes = int(unsafe.Sizeof(int8(0))) )
const ( // MonthDayNanoIntervalSizeBytes specifies the number of bytes required to store a single DayTimeInterval in memory MonthDayNanoIntervalSizeBytes = int(unsafe.Sizeof(MonthDayNanoInterval{})) )
const ( // MonthIntervalSizeBytes specifies the number of bytes required to store a single MonthInterval in memory MonthIntervalSizeBytes = int(unsafe.Sizeof(MonthInterval(0))) )
const PkgVersion = "10.0.1"
const ( // Time32SizeBytes specifies the number of bytes required to store a single Time32 in memory Time32SizeBytes = int(unsafe.Sizeof(Time32(0))) )
const ( // Time64SizeBytes specifies the number of bytes required to store a single Time64 in memory Time64SizeBytes = int(unsafe.Sizeof(Time64(0))) )
const ( // TimestampSizeBytes specifies the number of bytes required to store a single Timestamp in memory TimestampSizeBytes = int(unsafe.Sizeof(Timestamp(0))) )
const ( // Uint16SizeBytes specifies the number of bytes required to store a single uint16 in memory Uint16SizeBytes = int(unsafe.Sizeof(uint16(0))) )
const ( // Uint32SizeBytes specifies the number of bytes required to store a single uint32 in memory Uint32SizeBytes = int(unsafe.Sizeof(uint32(0))) )
const ( // Uint64SizeBytes specifies the number of bytes required to store a single uint64 in memory Uint64SizeBytes = int(unsafe.Sizeof(uint64(0))) )
const ( // Uint8SizeBytes specifies the number of bytes required to store a single uint8 in memory Uint8SizeBytes = int(unsafe.Sizeof(uint8(0))) )
Variables ¶
var ( ErrInvalid = errors.New("invalid") ErrNotImplemented = errors.New("not implemented") ErrType = errors.New("type error") ErrKey = errors.New("key error") ErrIndex = errors.New("index error") )
var ( MonthIntervalTraits monthTraits DayTimeIntervalTraits daytimeTraits MonthDayNanoIntervalTraits monthDayNanoTraits )
var ( Int64Traits int64Traits Uint64Traits uint64Traits Float64Traits float64Traits Int32Traits int32Traits Uint32Traits uint32Traits Float32Traits float32Traits Int16Traits int16Traits Uint16Traits uint16Traits Int8Traits int8Traits Uint8Traits uint8Traits TimestampTraits timestampTraits Time32Traits time32Traits Time64Traits time64Traits Date32Traits date32Traits Date64Traits date64Traits DurationTraits durationTraits )
var ( BinaryTypes = struct { Binary BinaryDataType String BinaryDataType LargeBinary BinaryDataType LargeString BinaryDataType }{ Binary: &BinaryType{}, String: &StringType{}, LargeBinary: &LargeBinaryType{}, LargeString: &LargeStringType{}, } )
var BooleanTraits booleanTraits
var Decimal128Traits decimal128Traits
Decimal128 traits
var Decimal256Traits decimal256Traits
Decimal256 traits
var ( FixedWidthTypes = struct { Boolean FixedWidthDataType Date32 FixedWidthDataType Date64 FixedWidthDataType DayTimeInterval FixedWidthDataType Duration_s FixedWidthDataType Duration_ms FixedWidthDataType Duration_us FixedWidthDataType Duration_ns FixedWidthDataType Float16 FixedWidthDataType MonthInterval FixedWidthDataType Time32s FixedWidthDataType Time32ms FixedWidthDataType Time64us FixedWidthDataType Time64ns FixedWidthDataType Timestamp_s FixedWidthDataType Timestamp_ms FixedWidthDataType Timestamp_us FixedWidthDataType Timestamp_ns FixedWidthDataType MonthDayNanoInterval FixedWidthDataType }{ Boolean: &BooleanType{}, Date32: &Date32Type{}, Date64: &Date64Type{}, DayTimeInterval: &DayTimeIntervalType{}, Duration_s: &DurationType{Unit: Second}, Duration_ms: &DurationType{Unit: Millisecond}, Duration_us: &DurationType{Unit: Microsecond}, Duration_ns: &DurationType{Unit: Nanosecond}, Float16: &Float16Type{}, MonthInterval: &MonthIntervalType{}, Time32s: &Time32Type{Unit: Second}, Time32ms: &Time32Type{Unit: Millisecond}, Time64us: &Time64Type{Unit: Microsecond}, Time64ns: &Time64Type{Unit: Nanosecond}, Timestamp_s: &TimestampType{Unit: Second, TimeZone: "UTC"}, Timestamp_ms: &TimestampType{Unit: Millisecond, TimeZone: "UTC"}, Timestamp_us: &TimestampType{Unit: Microsecond, TimeZone: "UTC"}, Timestamp_ns: &TimestampType{Unit: Nanosecond, TimeZone: "UTC"}, MonthDayNanoInterval: &MonthDayNanoIntervalType{}, } )
var Float16Traits float16Traits
Float16 traits
var ( PrimitiveTypes = struct { Int8 DataType Int16 DataType Int32 DataType Int64 DataType Uint8 DataType Uint16 DataType Uint32 DataType Uint64 DataType Float32 DataType Float64 DataType Date32 DataType Date64 DataType }{ Int8: &Int8Type{}, Int16: &Int16Type{}, Int32: &Int32Type{}, Int64: &Int64Type{}, Uint8: &Uint8Type{}, Uint16: &Uint16Type{}, Uint32: &Uint32Type{}, Uint64: &Uint64Type{}, Float32: &Float32Type{}, Float64: &Float64Type{}, Date32: &Date32Type{}, Date64: &Date64Type{}, } )
Functions ¶
func ConvertTimestampValue ¶
func IsBaseBinary ¶
IsBaseBinary returns true for Binary/String and their LARGE variants
func IsBinaryLike ¶
IsBinaryLike returns true for only BINARY and STRING
func IsFixedSizeBinary ¶
IsFixedSizeBinary returns true for Decimal128/256 and FixedSizeBinary
func IsInteger ¶
IsInteger is a helper to return true if the type ID provided is one of the integral types of uint or int with the varying sizes.
func IsLargeBinaryLike ¶
IsLargeBinaryLike returns true for only LARGE_BINARY and LARGE_STRING
func IsListLike ¶
IsListLike returns true for List, LargeList, FixedSizeList, and Map
func IsPrimitive ¶
IsPrimitive returns true if the provided type ID represents a fixed width primitive type.
func IsUnsignedInteger ¶
IsUnsignedInteger is a helper that returns true if the type ID provided is one of the uint integral types (uint8, uint16, uint32, uint64)
func RegisterExtensionType ¶
func RegisterExtensionType(typ ExtensionType) error
RegisterExtensionType registers the provided ExtensionType by calling ExtensionName to use as a Key for registrying the type. If a type with the same name is already registered then this will return an error saying so, otherwise it will return nil if successful registering the type. This function is safe to call from multiple goroutines simultaneously.
func TypeEqual ¶
func TypeEqual(left, right DataType, opts ...TypeEqualOption) bool
TypeEqual checks if two DataType are the same, optionally checking metadata equality for STRUCT types.
func TypesToString ¶
TypesToString is a convenience function to create a list of types which are comma delimited as a string
func UnregisterExtensionType ¶
UnregisterExtensionType removes the type with the given name from the registry causing any messages with that type which come in to be expressed with their metadata and underlying type instead of the extension type that isn't known. This function is safe to call from multiple goroutines simultaneously.
Types ¶
type Array ¶
type Array interface { json.Marshaler fmt.Stringer // DataType returns the type metadata for this instance. DataType() DataType // NullN returns the number of null values in the array. NullN() int // NullBitmapBytes returns a byte slice of the validity bitmap. NullBitmapBytes() []byte // IsNull returns true if value at index is null. // NOTE: IsNull will panic if NullBitmapBytes is not empty and 0 > i ≥ Len. IsNull(i int) bool // IsValid returns true if value at index is not null. // NOTE: IsValid will panic if NullBitmapBytes is not empty and 0 > i ≥ Len. IsValid(i int) bool Data() ArrayData // Len returns the number of elements in the array. Len() int // Retain increases the reference count by 1. // Retain may be called simultaneously from multiple goroutines. Retain() // Release decreases the reference count by 1. // Release may be called simultaneously from multiple goroutines. // When the reference count goes to zero, the memory is freed. Release() }
Array represents an immutable sequence of values using the Arrow in-memory format.
type ArrayData ¶
type ArrayData interface { // Retain increases the reference count by 1, it is safe to call // in multiple goroutines simultaneously. Retain() // Release decreases the reference count by 1, it is safe to call // in multiple goroutines simultaneously. Data is removed when reference // count is 0. Release() // DataType returns the current datatype stored in the object. DataType() DataType // NullN returns the number of nulls for this data instance. NullN() int // Len returns the length of this data instance Len() int // Offset returns the offset into the raw buffers where this data begins Offset() int // Buffers returns the slice of raw data buffers for this data instance. Their // meaning depends on the context of the data type. Buffers() []*memory.Buffer // Children returns the slice of children data instances, only relevant for // nested data types. For instance, List data will have a single child containing // elements of all the rows and Struct data will contain numfields children which // are the arrays for each field of the struct. Children() []ArrayData // Reset allows reusing this ArrayData object by replacing the data in this ArrayData // object without changing the reference count. Reset(newtype DataType, newlength int, newbuffers []*memory.Buffer, newchildren []ArrayData, newnulls int, newoffset int) // Dictionary returns the ArrayData object for the dictionary if this is a // dictionary array, otherwise it will be nil. Dictionary() ArrayData }
ArrayData is the underlying memory and metadata of an Arrow array, corresponding to the same-named object in the C++ implementation.
The Array interface and subsequent typed objects provide strongly typed accessors which support marshalling and other patterns to the data. This interface allows direct access to the underlying raw byte buffers which allows for manipulating the internal data and casting. For example, one could cast the raw bytes from int64 to float64 like so:
arrdata := GetMyInt64Data().Data() newdata := array.NewData(arrow.PrimitiveTypes.Float64, arrdata.Len(), arrdata.Buffers(), nil, arrdata.NullN(), arrdata.Offset()) defer newdata.Release() float64arr := array.NewFloat64Data(newdata) defer float64arr.Release()
This is also useful in an analytics setting where memory may be reused. For example, if we had a group of operations all returning float64 such as:
Log(Sqrt(Expr(arr)))
The low-level implementations could have signatures such as:
func Log(values arrow.ArrayData) arrow.ArrayData
Another example would be a function that consumes one or more memory buffers in an input array and replaces them with newly-allocated data, changing the output data type as well.
type BinaryDataType ¶
type BinaryType ¶
type BinaryType struct{}
func (*BinaryType) Fingerprint ¶
func (t *BinaryType) Fingerprint() string
func (*BinaryType) ID ¶
func (t *BinaryType) ID() Type
func (BinaryType) IsUtf8 ¶
func (BinaryType) IsUtf8() bool
func (*BinaryType) Layout ¶
func (t *BinaryType) Layout() DataTypeLayout
func (*BinaryType) Name ¶
func (t *BinaryType) Name() string
func (*BinaryType) OffsetTypeTraits ¶
func (t *BinaryType) OffsetTypeTraits() OffsetTraits
func (*BinaryType) String ¶
func (t *BinaryType) String() string
type BooleanType ¶
type BooleanType struct{}
func (*BooleanType) BitWidth ¶
func (t *BooleanType) BitWidth() int
BitWidth returns the number of bits required to store a single element of this data type in memory.
func (BooleanType) Bytes ¶
func (BooleanType) Bytes() int
func (*BooleanType) Fingerprint ¶
func (t *BooleanType) Fingerprint() string
func (*BooleanType) ID ¶
func (t *BooleanType) ID() Type
func (BooleanType) Layout ¶
func (BooleanType) Layout() DataTypeLayout
func (*BooleanType) Name ¶
func (t *BooleanType) Name() string
func (*BooleanType) String ¶
func (t *BooleanType) String() string
type BufferKind ¶
type BufferKind int8
BufferKind describes the type of buffer expected when defining a layout specification
const ( KindFixedWidth BufferKind = iota KindVarWidth KindBitmap KindAlwaysNull )
The expected types of buffers
type BufferSpec ¶
type BufferSpec struct { Kind BufferKind ByteWidth int // for KindFixedWidth }
BufferSpec provides a specification for the buffers of a particular datatype
func SpecAlwaysNull ¶
func SpecAlwaysNull() BufferSpec
func SpecBitmap ¶
func SpecBitmap() BufferSpec
func SpecFixedWidth ¶
func SpecFixedWidth(w int) BufferSpec
func SpecVariableWidth ¶
func SpecVariableWidth() BufferSpec
func (BufferSpec) Equals ¶
func (b BufferSpec) Equals(other BufferSpec) bool
type Chunked ¶
type Chunked struct {
// contains filtered or unexported fields
}
Chunked manages a collection of primitives arrays as one logical large array.
func NewChunked ¶
NewChunked returns a new chunked array from the slice of arrays.
NewChunked panics if the chunks do not have the same data type.
type Column ¶
type Column struct {
// contains filtered or unexported fields
}
Column is an immutable column data structure consisting of a field (type metadata) and a chunked data array.
To get strongly typed data from a Column, you need to iterate the chunks and type assert each individual Array. For example:
switch column.DataType().ID { case arrow.INT32: for _, c := range column.Data().Chunks() { arr := c.(*array.Int32) // do something with arr } case arrow.INT64: for _, c := range column.Data().Chunks() { arr := c.(*array.Int64) // do something with arr } case ... }
func NewColumn ¶
NewColumn returns a column from a field and a chunked data array.
NewColumn panics if the field's data type is inconsistent with the data type of the chunked data array.
func NewColumnFromArr ¶
NewColumnFromArr is a convenience function to create a column from a field and a non-chunked array.
This provides a simple mechanism for bypassing the middle step of constructing a Chunked array of one and then releasing it because of the ref counting.
type DataType ¶
type DataType interface { fmt.Stringer ID() Type // Name is name of the data type. Name() string Fingerprint() string Layout() DataTypeLayout }
DataType is the representation of an Arrow type.
type DataTypeLayout ¶
type DataTypeLayout struct { Buffers []BufferSpec HasDict bool }
DataTypeLayout represents the physical layout of a datatype's buffers including the number of and types of those binary buffers. This will correspond with the buffers in the ArrayData for an array of that type.
type Date32 ¶
type Date32 int32
func Date32FromTime ¶
Date32FromTime returns a Date32 value from a time object
func (Date32) FormattedString ¶
type Date32Type ¶
type Date32Type struct{}
func (*Date32Type) BitWidth ¶
func (t *Date32Type) BitWidth() int
func (*Date32Type) Bytes ¶
func (t *Date32Type) Bytes() int
func (*Date32Type) Fingerprint ¶
func (t *Date32Type) Fingerprint() string
func (*Date32Type) ID ¶
func (t *Date32Type) ID() Type
func (*Date32Type) Layout ¶
func (t *Date32Type) Layout() DataTypeLayout
func (*Date32Type) Name ¶
func (t *Date32Type) Name() string
func (*Date32Type) String ¶
func (t *Date32Type) String() string
type Date64 ¶
type Date64 int64
func Date64FromTime ¶
Date64FromTime returns a Date64 value from a time object
func (Date64) FormattedString ¶
type Date64Type ¶
type Date64Type struct{}
func (*Date64Type) BitWidth ¶
func (t *Date64Type) BitWidth() int
func (*Date64Type) Bytes ¶
func (t *Date64Type) Bytes() int
func (*Date64Type) Fingerprint ¶
func (t *Date64Type) Fingerprint() string
func (*Date64Type) ID ¶
func (t *Date64Type) ID() Type
func (*Date64Type) Layout ¶
func (t *Date64Type) Layout() DataTypeLayout
func (*Date64Type) Name ¶
func (t *Date64Type) Name() string
func (*Date64Type) String ¶
func (t *Date64Type) String() string
type DayTimeInterval ¶
DayTimeInterval represents a number of days and milliseconds (fraction of day).
type DayTimeIntervalType ¶
type DayTimeIntervalType struct{}
DayTimeIntervalType is encoded as a pair of 32-bit signed integer, representing a number of days and milliseconds (fraction of day).
func (*DayTimeIntervalType) BitWidth ¶
func (t *DayTimeIntervalType) BitWidth() int
BitWidth returns the number of bits required to store a single element of this data type in memory.
func (DayTimeIntervalType) Bytes ¶
func (DayTimeIntervalType) Bytes() int
func (*DayTimeIntervalType) Fingerprint ¶
func (*DayTimeIntervalType) Fingerprint() string
func (*DayTimeIntervalType) ID ¶
func (*DayTimeIntervalType) ID() Type
func (DayTimeIntervalType) Layout ¶
func (DayTimeIntervalType) Layout() DataTypeLayout
func (*DayTimeIntervalType) Name ¶
func (*DayTimeIntervalType) Name() string
func (*DayTimeIntervalType) String ¶
func (*DayTimeIntervalType) String() string
type Decimal128Type ¶
Decimal128Type represents a fixed-size 128-bit decimal type.
func (*Decimal128Type) BitWidth ¶
func (*Decimal128Type) BitWidth() int
func (*Decimal128Type) Bytes ¶
func (*Decimal128Type) Bytes() int
func (*Decimal128Type) Fingerprint ¶
func (t *Decimal128Type) Fingerprint() string
func (*Decimal128Type) ID ¶
func (*Decimal128Type) ID() Type
func (Decimal128Type) Layout ¶
func (Decimal128Type) Layout() DataTypeLayout
func (*Decimal128Type) Name ¶
func (*Decimal128Type) Name() string
func (*Decimal128Type) String ¶
func (t *Decimal128Type) String() string
type Decimal256Type ¶
Decimal256Type represents a fixed-size 256-bit decimal type.
func (*Decimal256Type) BitWidth ¶
func (*Decimal256Type) BitWidth() int
func (*Decimal256Type) Bytes ¶
func (*Decimal256Type) Bytes() int
func (*Decimal256Type) Fingerprint ¶
func (t *Decimal256Type) Fingerprint() string
func (*Decimal256Type) ID ¶
func (*Decimal256Type) ID() Type
func (Decimal256Type) Layout ¶
func (Decimal256Type) Layout() DataTypeLayout
func (*Decimal256Type) Name ¶
func (*Decimal256Type) Name() string
func (*Decimal256Type) String ¶
func (t *Decimal256Type) String() string
type DenseUnionType ¶
type DenseUnionType struct {
// contains filtered or unexported fields
}
DenseUnionType is the concrete type for dense union data.
A dense union is a nested type where each logical value is taken from a single child, at a specific offset. A buffer of 8-bit type ids (typed as UnionTypeCode) indicates which child a given logical value is to be taken from and a buffer of 32-bit offsets indicating which physical position in the given child array has the logical value for that index.
Unlike a sparse union, a dense union allows encoding only the child values which are actually referred to by the union array. This is counterbalanced by the additional footprint of the offsets buffer, and the additional indirection cost when looking up values.
Unlike most other types, unions don't have a top-level validity bitmap
func DenseUnionFromArrays ¶
func DenseUnionFromArrays(children []Array, fields []string, codes []UnionTypeCode) *DenseUnionType
DenseUnionFromArrays enables creating a union type from a list of Arrays, field names, and type codes. len(fields) should be either 0 or equal to len(children). len(codes) should also be either 0, or equal to len(children).
If len(fields) == 0, then the fields will be named numerically as "0", "1", "2"... and so on. If len(codes) == 0, then the type codes will be constructed as [0, 1, 2, ..., n].
func DenseUnionOf ¶
func DenseUnionOf(fields []Field, typeCodes []UnionTypeCode) *DenseUnionType
DenseUnionOf is equivalent to UnionOf(arrow.DenseMode, fields, typeCodes), constructing a SparseUnionType from a list of fields and type codes.
If len(fields) != len(typeCodes) this will panic. They are allowed to be of length 0.
func (*DenseUnionType) Fingerprint ¶
func (t *DenseUnionType) Fingerprint() string
func (DenseUnionType) ID ¶
func (DenseUnionType) ID() Type
func (DenseUnionType) Layout ¶
func (DenseUnionType) Layout() DataTypeLayout
func (*DenseUnionType) MaxTypeCode ¶
func (t *DenseUnionType) MaxTypeCode() (max UnionTypeCode)
func (DenseUnionType) Mode ¶
func (DenseUnionType) Mode() UnionMode
func (DenseUnionType) Name ¶
func (DenseUnionType) Name() string
func (DenseUnionType) OffsetTypeTraits ¶
func (DenseUnionType) OffsetTypeTraits() OffsetTraits
func (*DenseUnionType) String ¶
func (t *DenseUnionType) String() string
func (DenseUnionType) TypeCodes ¶
func (t DenseUnionType) TypeCodes() []UnionTypeCode
type DictionaryType ¶
DictionaryType represents categorical or dictionary-encoded in-memory data It contains a dictionary-encoded value type (any type) and an index type (any integer type).
func (*DictionaryType) BitWidth ¶
func (d *DictionaryType) BitWidth() int
func (*DictionaryType) Bytes ¶
func (d *DictionaryType) Bytes() int
func (*DictionaryType) Fingerprint ¶
func (d *DictionaryType) Fingerprint() string
func (*DictionaryType) ID ¶
func (*DictionaryType) ID() Type
func (*DictionaryType) Layout ¶
func (d *DictionaryType) Layout() DataTypeLayout
func (*DictionaryType) Name ¶
func (*DictionaryType) Name() string
func (*DictionaryType) String ¶
func (d *DictionaryType) String() string
type DurationType ¶
type DurationType struct {
Unit TimeUnit
}
DurationType is encoded as a 64-bit signed integer, representing an amount of elapsed time without any relation to a calendar artifact.
func (*DurationType) BitWidth ¶
func (*DurationType) BitWidth() int
func (*DurationType) Bytes ¶
func (*DurationType) Bytes() int
func (*DurationType) Fingerprint ¶
func (t *DurationType) Fingerprint() string
func (*DurationType) ID ¶
func (*DurationType) ID() Type
func (DurationType) Layout ¶
func (DurationType) Layout() DataTypeLayout
func (*DurationType) Name ¶
func (*DurationType) Name() string
func (*DurationType) String ¶
func (t *DurationType) String() string
func (*DurationType) TimeUnit ¶
func (t *DurationType) TimeUnit() TimeUnit
type ExtensionBase ¶
type ExtensionBase struct { // Storage is the underlying storage type Storage DataType }
ExtensionBase is the base struct for user-defined Extension Types which must be embedded in any user-defined types like so:
type UserDefinedType struct { arrow.ExtensionBase // any other data }
func (*ExtensionBase) Fields ¶
func (e *ExtensionBase) Fields() []Field
func (*ExtensionBase) Fingerprint ¶
func (e *ExtensionBase) Fingerprint() string
func (*ExtensionBase) ID ¶
func (*ExtensionBase) ID() Type
ID always returns arrow.EXTENSION and should not be overridden
func (*ExtensionBase) Layout ¶
func (e *ExtensionBase) Layout() DataTypeLayout
func (*ExtensionBase) Name ¶
func (*ExtensionBase) Name() string
Name should always return "extension" and should not be overridden
func (*ExtensionBase) StorageType ¶
func (e *ExtensionBase) StorageType() DataType
StorageType returns the underlying storage type and exists so that functions written against the ExtensionType interface can access the storage type.
func (*ExtensionBase) String ¶
func (e *ExtensionBase) String() string
String by default will return "extension_type<storage=storage_type>" by can be overridden to customize what is printed out when printing this extension type.
type ExtensionType ¶
type ExtensionType interface { DataType // ArrayType should return the reflect.TypeOf(ExtensionArrayType{}) where the // ExtensionArrayType is a type that implements the array.ExtensionArray interface. // Such a type must also embed the array.ExtensionArrayBase in it. This will be used // when creating arrays of this ExtensionType by using reflect.New ArrayType() reflect.Type // ExtensionName is what will be used when registering / unregistering this extension // type. Multiple user-defined types can be defined with a parameterized ExtensionType // as long as the parameter is used in the ExtensionName to distinguish the instances // in the global Extension Type registry. // The return from this is also what will be placed in the metadata for IPC communication // under the key ARROW:extension:name ExtensionName() string // StorageType returns the underlying storage type which is used by this extension // type. It is already implemented by the ExtensionBase struct and thus does not need // to be re-implemented by a user-defined type. StorageType() DataType // ExtensionEquals is used to tell whether two ExtensionType instances are equal types. ExtensionEquals(ExtensionType) bool // Serialize should produce any extra metadata necessary for initializing an instance of // this user-defined type. Not all user-defined types require this and it is valid to return // nil from this function or an empty slice. This is used for the IPC format and will be // added to metadata for IPC communication under the key ARROW:extension:metadata // This should be implemented such that it is valid to be called by multiple goroutines // concurrently. Serialize() string // Deserialize is called when reading in extension arrays and types via the IPC format // in order to construct an instance of the appropriate extension type. The data passed in // is pulled from the ARROW:extension:metadata key and may be nil or an empty slice. // If the storage type is incorrect or something else is invalid with the data this should // return nil and an appropriate error. Deserialize(storageType DataType, data string) (ExtensionType, error) // contains filtered or unexported methods }
ExtensionType is an interface for handling user-defined types. They must be DataTypes and must embed arrow.ExtensionBase in them in order to work properly ensuring that they always have the expected base behavior.
The arrow.ExtensionBase that needs to be embedded implements the DataType interface leaving the remaining functions having to be implemented by the actual user-defined type in order to be handled properly.
func GetExtensionType ¶
func GetExtensionType(typName string) ExtensionType
GetExtensionType retrieves and returns the extension type of the given name from the global extension type registry. If the type isn't found it will return nil. This function is safe to call from multiple goroutines concurrently.
type Field ¶
type Field struct { Name string // Field name Type DataType // The field's data type Nullable bool // Fields can be nullable Metadata Metadata // The field's metadata, if any }
func (Field) Fingerprint ¶
func (Field) HasMetadata ¶
type FixedSizeBinaryType ¶
type FixedSizeBinaryType struct {
ByteWidth int
}
func (*FixedSizeBinaryType) BitWidth ¶
func (t *FixedSizeBinaryType) BitWidth() int
func (*FixedSizeBinaryType) Bytes ¶
func (t *FixedSizeBinaryType) Bytes() int
func (*FixedSizeBinaryType) Fingerprint ¶
func (t *FixedSizeBinaryType) Fingerprint() string
func (*FixedSizeBinaryType) ID ¶
func (*FixedSizeBinaryType) ID() Type
func (*FixedSizeBinaryType) Layout ¶
func (t *FixedSizeBinaryType) Layout() DataTypeLayout
func (*FixedSizeBinaryType) Name ¶
func (*FixedSizeBinaryType) Name() string
func (*FixedSizeBinaryType) String ¶
func (t *FixedSizeBinaryType) String() string
type FixedSizeListType ¶
type FixedSizeListType struct {
// contains filtered or unexported fields
}
FixedSizeListType describes a nested type in which each array slot contains a fixed-size sequence of values, all having the same relative type.
func FixedSizeListOf ¶
func FixedSizeListOf(n int32, t DataType) *FixedSizeListType
FixedSizeListOf returns the list type with element type t. For example, if t represents int32, FixedSizeListOf(10, t) represents [10]int32.
FixedSizeListOf panics if t is nil or invalid. FixedSizeListOf panics if n is <= 0. NullableElem defaults to true
func FixedSizeListOfField ¶
func FixedSizeListOfField(n int32, f Field) *FixedSizeListType
func FixedSizeListOfNonNullable ¶
func FixedSizeListOfNonNullable(n int32, t DataType) *FixedSizeListType
FixedSizeListOfNonNullable is like FixedSizeListOf but NullableElem defaults to false indicating that the child type should be marked as non-nullable.
func (*FixedSizeListType) Elem ¶
func (t *FixedSizeListType) Elem() DataType
Elem returns the FixedSizeListType's element type.
func (*FixedSizeListType) ElemField ¶
func (t *FixedSizeListType) ElemField() Field
func (*FixedSizeListType) Fields ¶
func (t *FixedSizeListType) Fields() []Field
func (*FixedSizeListType) Fingerprint ¶
func (t *FixedSizeListType) Fingerprint() string
func (*FixedSizeListType) ID ¶
func (*FixedSizeListType) ID() Type
func (FixedSizeListType) Layout ¶
func (FixedSizeListType) Layout() DataTypeLayout
func (*FixedSizeListType) Len ¶
func (t *FixedSizeListType) Len() int32
Len returns the FixedSizeListType's size.
func (*FixedSizeListType) Name ¶
func (*FixedSizeListType) Name() string
func (*FixedSizeListType) String ¶
func (t *FixedSizeListType) String() string
type FixedWidthDataType ¶
type FixedWidthDataType interface { DataType // BitWidth returns the number of bits required to store a single element of this data type in memory. BitWidth() int // Bytes returns the number of bytes required to store a single element of this data type in memory. Bytes() int }
FixedWidthDataType is the representation of an Arrow type that requires a fixed number of bits in memory for each element.
type Float16Type ¶
type Float16Type struct{}
Float16Type represents a floating point value encoded with a 16-bit precision.
func (*Float16Type) BitWidth ¶
func (t *Float16Type) BitWidth() int
BitWidth returns the number of bits required to store a single element of this data type in memory.
func (Float16Type) Bytes ¶
func (Float16Type) Bytes() int
func (*Float16Type) Fingerprint ¶
func (t *Float16Type) Fingerprint() string
func (*Float16Type) ID ¶
func (t *Float16Type) ID() Type
func (Float16Type) Layout ¶
func (Float16Type) Layout() DataTypeLayout
func (*Float16Type) Name ¶
func (t *Float16Type) Name() string
func (*Float16Type) String ¶
func (t *Float16Type) String() string
type Float32Type ¶
type Float32Type struct{}
func (*Float32Type) BitWidth ¶
func (t *Float32Type) BitWidth() int
func (*Float32Type) Bytes ¶
func (t *Float32Type) Bytes() int
func (*Float32Type) Fingerprint ¶
func (t *Float32Type) Fingerprint() string
func (*Float32Type) ID ¶
func (t *Float32Type) ID() Type
func (*Float32Type) Layout ¶
func (t *Float32Type) Layout() DataTypeLayout
func (*Float32Type) Name ¶
func (t *Float32Type) Name() string
func (*Float32Type) String ¶
func (t *Float32Type) String() string
type Float64Type ¶
type Float64Type struct{}
func (*Float64Type) BitWidth ¶
func (t *Float64Type) BitWidth() int
func (*Float64Type) Bytes ¶
func (t *Float64Type) Bytes() int
func (*Float64Type) Fingerprint ¶
func (t *Float64Type) Fingerprint() string
func (*Float64Type) ID ¶
func (t *Float64Type) ID() Type
func (*Float64Type) Layout ¶
func (t *Float64Type) Layout() DataTypeLayout
func (*Float64Type) Name ¶
func (t *Float64Type) Name() string
func (*Float64Type) String ¶
func (t *Float64Type) String() string
type Int16Type ¶
type Int16Type struct{}
func (*Int16Type) Fingerprint ¶
func (*Int16Type) Layout ¶
func (t *Int16Type) Layout() DataTypeLayout
type Int32Type ¶
type Int32Type struct{}
func (*Int32Type) Fingerprint ¶
func (*Int32Type) Layout ¶
func (t *Int32Type) Layout() DataTypeLayout
type Int64Type ¶
type Int64Type struct{}
func (*Int64Type) Fingerprint ¶
func (*Int64Type) Layout ¶
func (t *Int64Type) Layout() DataTypeLayout
type Int8Type ¶
type Int8Type struct{}
func (*Int8Type) Fingerprint ¶
func (*Int8Type) Layout ¶
func (t *Int8Type) Layout() DataTypeLayout
type LargeBinaryType ¶
type LargeBinaryType struct{}
func (*LargeBinaryType) Fingerprint ¶
func (t *LargeBinaryType) Fingerprint() string
func (*LargeBinaryType) ID ¶
func (t *LargeBinaryType) ID() Type
func (LargeBinaryType) IsUtf8 ¶
func (LargeBinaryType) IsUtf8() bool
func (*LargeBinaryType) Layout ¶
func (t *LargeBinaryType) Layout() DataTypeLayout
func (*LargeBinaryType) Name ¶
func (t *LargeBinaryType) Name() string
func (*LargeBinaryType) OffsetTypeTraits ¶
func (t *LargeBinaryType) OffsetTypeTraits() OffsetTraits
func (*LargeBinaryType) String ¶
func (t *LargeBinaryType) String() string
type LargeListType ¶
type LargeListType struct {
ListType
}
func LargeListOf ¶
func LargeListOf(t DataType) *LargeListType
ListOf returns the list type with element type t. For example, if t represents int32, ListOf(t) represents []int32.
ListOf panics if t is nil or invalid. NullableElem defaults to true
func LargeListOfField ¶
func LargeListOfField(f Field) *LargeListType
func LargeListOfNonNullable ¶
func LargeListOfNonNullable(t DataType) *LargeListType
ListOfNonNullable is like ListOf but NullableElem defaults to false, indicating that the child type should be marked as non-nullable.
func (*LargeListType) Fingerprint ¶
func (t *LargeListType) Fingerprint() string
func (LargeListType) ID ¶
func (LargeListType) ID() Type
func (LargeListType) Layout ¶
func (LargeListType) Layout() DataTypeLayout
func (LargeListType) Name ¶
func (LargeListType) Name() string
func (LargeListType) OffsetTypeTraits ¶
func (LargeListType) OffsetTypeTraits() OffsetTraits
func (*LargeListType) String ¶
func (t *LargeListType) String() string
type LargeStringType ¶
type LargeStringType struct{}
func (*LargeStringType) Fingerprint ¶
func (t *LargeStringType) Fingerprint() string
func (*LargeStringType) ID ¶
func (t *LargeStringType) ID() Type
func (LargeStringType) IsUtf8 ¶
func (LargeStringType) IsUtf8() bool
func (*LargeStringType) Layout ¶
func (t *LargeStringType) Layout() DataTypeLayout
func (*LargeStringType) Name ¶
func (t *LargeStringType) Name() string
func (*LargeStringType) OffsetTypeTraits ¶
func (t *LargeStringType) OffsetTypeTraits() OffsetTraits
func (*LargeStringType) String ¶
func (t *LargeStringType) String() string
type ListType ¶
type ListType struct {
// contains filtered or unexported fields
}
ListType describes a nested type in which each array slot contains a variable-size sequence of values, all having the same relative type.
func ListOf ¶
ListOf returns the list type with element type t. For example, if t represents int32, ListOf(t) represents []int32.
ListOf panics if t is nil or invalid. NullableElem defaults to true
func ListOfField ¶
func ListOfNonNullable ¶
ListOfNonNullable is like ListOf but NullableElem defaults to false, indicating that the child type should be marked as non-nullable.
func (*ListType) Fingerprint ¶
func (ListType) Layout ¶
func (ListType) Layout() DataTypeLayout
func (ListType) OffsetTypeTraits ¶
func (ListType) OffsetTypeTraits() OffsetTraits
func (*ListType) SetElemMetadata ¶
func (*ListType) SetElemNullable ¶
type MapType ¶
type MapType struct { KeysSorted bool // contains filtered or unexported fields }
func (*MapType) Fingerprint ¶
func (*MapType) Layout ¶
func (t *MapType) Layout() DataTypeLayout
func (MapType) OffsetTypeTraits ¶
func (MapType) OffsetTypeTraits() OffsetTraits
func (*MapType) SetItemNullable ¶
func (*MapType) ValueField ¶
func (*MapType) ValueType ¶
func (t *MapType) ValueType() *StructType
type Metadata ¶
type Metadata struct {
// contains filtered or unexported fields
}
func MetadataFrom ¶
func NewMetadata ¶
type MonthDayNanoInterval ¶
type MonthDayNanoInterval struct { Months int32 `json:"months"` Days int32 `json:"days"` Nanoseconds int64 `json:"nanoseconds"` }
MonthDayNanoInterval represents a number of months, days and nanoseconds (fraction of day).
type MonthDayNanoIntervalType ¶
type MonthDayNanoIntervalType struct{}
MonthDayNanoIntervalType is encoded as two signed 32-bit integers representing a number of months and a number of days, followed by a 64-bit integer representing the number of nanoseconds since midnight for fractions of a day.
func (*MonthDayNanoIntervalType) BitWidth ¶
func (*MonthDayNanoIntervalType) BitWidth() int
BitWidth returns the number of bits required to store a single element of this data type in memory.
func (*MonthDayNanoIntervalType) Bytes ¶
func (*MonthDayNanoIntervalType) Bytes() int
func (*MonthDayNanoIntervalType) Fingerprint ¶
func (*MonthDayNanoIntervalType) Fingerprint() string
func (*MonthDayNanoIntervalType) ID ¶
func (*MonthDayNanoIntervalType) ID() Type
func (MonthDayNanoIntervalType) Layout ¶
func (MonthDayNanoIntervalType) Layout() DataTypeLayout
func (*MonthDayNanoIntervalType) Name ¶
func (*MonthDayNanoIntervalType) Name() string
func (*MonthDayNanoIntervalType) String ¶
func (*MonthDayNanoIntervalType) String() string
type MonthInterval ¶
type MonthInterval int32
MonthInterval represents a number of months.
func (MonthInterval) MarshalJSON ¶
func (m MonthInterval) MarshalJSON() ([]byte, error)
func (*MonthInterval) UnmarshalJSON ¶
func (m *MonthInterval) UnmarshalJSON(data []byte) error
type MonthIntervalType ¶
type MonthIntervalType struct{}
MonthIntervalType is encoded as a 32-bit signed integer, representing a number of months.
func (*MonthIntervalType) BitWidth ¶
func (t *MonthIntervalType) BitWidth() int
BitWidth returns the number of bits required to store a single element of this data type in memory.
func (MonthIntervalType) Bytes ¶
func (MonthIntervalType) Bytes() int
func (*MonthIntervalType) Fingerprint ¶
func (*MonthIntervalType) Fingerprint() string
func (*MonthIntervalType) ID ¶
func (*MonthIntervalType) ID() Type
func (MonthIntervalType) Layout ¶
func (MonthIntervalType) Layout() DataTypeLayout
func (*MonthIntervalType) Name ¶
func (*MonthIntervalType) Name() string
func (*MonthIntervalType) String ¶
func (*MonthIntervalType) String() string
type NestedType ¶
type NullType ¶
type NullType struct{}
NullType describes a degenerate array, with zero physical storage.
var (
Null *NullType
)
func (*NullType) Fingerprint ¶
func (*NullType) Layout ¶
func (*NullType) Layout() DataTypeLayout
type OffsetTraits ¶
type OffsetTraits interface { // BytesRequired returns the number of bytes required to be allocated // in order to hold the passed in number of elements of this type. BytesRequired(int) int }
OffsetTraits is a convenient interface over the various type traits constants such as arrow.Int32Traits allowing types with offsets, like BinaryType, StringType, LargeBinaryType and LargeStringType to have a method to return information about their offset type and how many bytes would be required to allocate an offset buffer for them.
type OffsetsDataType ¶
type OffsetsDataType interface { DataType OffsetTypeTraits() OffsetTraits }
type Record ¶
type Record interface { json.Marshaler Release() Retain() Schema() *Schema NumRows() int64 NumCols() int64 Columns() []Array Column(i int) Array ColumnName(i int) string // NewSlice constructs a zero-copy slice of the record with the indicated // indices i and j, corresponding to array[i:j]. // The returned record must be Release()'d after use. // // NewSlice panics if the slice is outside the valid range of the record array. // NewSlice panics if j < i. NewSlice(i, j int64) Record }
Record is a collection of equal-length arrays matching a particular Schema. Also known as a RecordBatch in the spec and in some implementations.
It is also possible to construct a Table from a collection of Records that all have the same schema.
type Schema ¶
type Schema struct {
// contains filtered or unexported fields
}
Schema is a sequence of Field values, describing the columns of a table or a record batch.
func NewSchema ¶
NewSchema returns a new Schema value from the slice of fields and metadata.
NewSchema panics if there is a field with an invalid DataType.
func NewSchemaWithEndian ¶
func NewSchemaWithEndian(fields []Field, metadata *Metadata, e endian.Endianness) *Schema
func (*Schema) Endianness ¶
func (sc *Schema) Endianness() endian.Endianness
func (*Schema) Equal ¶
Equal returns whether two schema are equal. Equal does not compare the metadata.
func (*Schema) FieldIndices ¶
FieldIndices returns the indices of the named field or nil.
func (*Schema) Fingerprint ¶
func (*Schema) HasMetadata ¶
func (*Schema) IsNativeEndian ¶
func (*Schema) WithEndianness ¶
func (sc *Schema) WithEndianness(e endian.Endianness) *Schema
type SparseUnionType ¶
type SparseUnionType struct {
// contains filtered or unexported fields
}
SparseUnionType is the concrete type for Sparse union data.
A sparse union is a nested type where each logical value is taken from a single child. A buffer of 8-bit type ids indicates which child a given logical value is to be taken from.
In a sparse union, each child array will have the same length as the union array itself, regardless of the actual number of union values which refer to it.
Unlike most other types, unions do not have a top-level validity bitmap.
func SparseUnionFromArrays ¶
func SparseUnionFromArrays(children []Array, fields []string, codes []UnionTypeCode) *SparseUnionType
SparseUnionFromArrays enables creating a union type from a list of Arrays, field names, and type codes. len(fields) should be either 0 or equal to len(children). len(codes) should also be either 0, or equal to len(children).
If len(fields) == 0, then the fields will be named numerically as "0", "1", "2"... and so on. If len(codes) == 0, then the type codes will be constructed as [0, 1, 2, ..., n].
func SparseUnionOf ¶
func SparseUnionOf(fields []Field, typeCodes []UnionTypeCode) *SparseUnionType
SparseUnionOf is equivalent to UnionOf(arrow.SparseMode, fields, typeCodes), constructing a SparseUnionType from a list of fields and type codes.
If len(fields) != len(typeCodes) this will panic. They are allowed to be of length 0.
func (*SparseUnionType) Fingerprint ¶
func (t *SparseUnionType) Fingerprint() string
func (SparseUnionType) ID ¶
func (SparseUnionType) ID() Type
func (SparseUnionType) Layout ¶
func (SparseUnionType) Layout() DataTypeLayout
func (*SparseUnionType) MaxTypeCode ¶
func (t *SparseUnionType) MaxTypeCode() (max UnionTypeCode)
func (SparseUnionType) Mode ¶
func (SparseUnionType) Mode() UnionMode
func (SparseUnionType) Name ¶
func (SparseUnionType) Name() string
func (*SparseUnionType) String ¶
func (t *SparseUnionType) String() string
func (SparseUnionType) TypeCodes ¶
func (t SparseUnionType) TypeCodes() []UnionTypeCode
type StringType ¶
type StringType struct{}
func (*StringType) Fingerprint ¶
func (t *StringType) Fingerprint() string
func (*StringType) ID ¶
func (t *StringType) ID() Type
func (StringType) IsUtf8 ¶
func (StringType) IsUtf8() bool
func (*StringType) Layout ¶
func (t *StringType) Layout() DataTypeLayout
func (*StringType) Name ¶
func (t *StringType) Name() string
func (*StringType) OffsetTypeTraits ¶
func (t *StringType) OffsetTypeTraits() OffsetTraits
func (*StringType) String ¶
func (t *StringType) String() string
type StructType ¶
type StructType struct {
// contains filtered or unexported fields
}
StructType describes a nested type parameterized by an ordered sequence of relative types, called its fields.
func StructOf ¶
func StructOf(fs ...Field) *StructType
StructOf returns the struct type with fields fs.
StructOf panics if there are duplicated fields. StructOf panics if there is a field with an invalid DataType.
func (*StructType) Field ¶
func (t *StructType) Field(i int) Field
func (*StructType) FieldByName ¶
func (t *StructType) FieldByName(name string) (Field, bool)
func (*StructType) Fields ¶
func (t *StructType) Fields() []Field
func (*StructType) Fingerprint ¶
func (t *StructType) Fingerprint() string
func (*StructType) ID ¶
func (*StructType) ID() Type
func (StructType) Layout ¶
func (StructType) Layout() DataTypeLayout
func (*StructType) Name ¶
func (*StructType) Name() string
func (*StructType) String ¶
func (t *StructType) String() string
type Table ¶
type Table interface { Schema() *Schema NumRows() int64 NumCols() int64 Column(i int) *Column Retain() Release() }
Table represents a logical sequence of chunked arrays of equal length. It is similar to a Record except that the columns are ChunkedArrays instead, allowing for a Table to be built up by chunks progressively whereas the columns in a single Record are always each a single contiguous array.
type TemporalWithUnit ¶
type TemporalWithUnit interface { FixedWidthDataType TimeUnit() TimeUnit }
type Time32 ¶
type Time32 int32
func Time32FromString ¶
Time32FromString parses a string to return a Time32 value in the given unit, unit needs to be only seconds or milliseconds and the string should be in the form of HH:MM or HH:MM:SS[.zzz] where the fractions of a second are optional.
func (Time32) FormattedString ¶
type Time32Type ¶
type Time32Type struct {
Unit TimeUnit
}
Time32Type is encoded as a 32-bit signed integer, representing either seconds or milliseconds since midnight.
func (*Time32Type) BitWidth ¶
func (*Time32Type) BitWidth() int
func (*Time32Type) Bytes ¶
func (*Time32Type) Bytes() int
func (*Time32Type) Fingerprint ¶
func (t *Time32Type) Fingerprint() string
func (*Time32Type) ID ¶
func (*Time32Type) ID() Type
func (Time32Type) Layout ¶
func (Time32Type) Layout() DataTypeLayout
func (*Time32Type) Name ¶
func (*Time32Type) Name() string
func (*Time32Type) String ¶
func (t *Time32Type) String() string
func (*Time32Type) TimeUnit ¶
func (t *Time32Type) TimeUnit() TimeUnit
type Time64 ¶
type Time64 int64
func Time64FromString ¶
Time64FromString parses a string to return a Time64 value in the given unit, unit needs to be only microseconds or nanoseconds and the string should be in the form of HH:MM or HH:MM:SS[.zzzzzzzzz] where the fractions of a second are optional.
func (Time64) FormattedString ¶
type Time64Type ¶
type Time64Type struct {
Unit TimeUnit
}
Time64Type is encoded as a 64-bit signed integer, representing either microseconds or nanoseconds since midnight.
func (*Time64Type) BitWidth ¶
func (*Time64Type) BitWidth() int
func (*Time64Type) Bytes ¶
func (*Time64Type) Bytes() int
func (*Time64Type) Fingerprint ¶
func (t *Time64Type) Fingerprint() string
func (*Time64Type) ID ¶
func (*Time64Type) ID() Type
func (Time64Type) Layout ¶
func (Time64Type) Layout() DataTypeLayout
func (*Time64Type) Name ¶
func (*Time64Type) Name() string
func (*Time64Type) String ¶
func (t *Time64Type) String() string
func (*Time64Type) TimeUnit ¶
func (t *Time64Type) TimeUnit() TimeUnit
type Timestamp ¶
type Timestamp int64
func TimestampFromString ¶
TimestampFromString parses a string and returns a timestamp for the given unit level.
The timestamp should be in one of the following forms, [T] can be either T or a space, and [.zzzzzzzzz] can be either left out or up to 9 digits of fractions of a second.
YYYY-MM-DD YYYY-MM-DD[T]HH YYYY-MM-DD[T]HH:MM YYYY-MM-DD[T]HH:MM:SS[.zzzzzzzz]
You can also optionally have an ending Z to indicate UTC or indicate a specific timezone using ±HH, ±HHMM or ±HH:MM at the end of the string.
func TimestampFromStringInLocation ¶
func TimestampFromStringInLocation(val string, unit TimeUnit, loc *time.Location) (Timestamp, bool, error)
TimestampFromStringInLocation is like TimestampFromString, but treats the time instant as if it were in the passed timezone before converting to UTC for internal representation.
type TimestampConvertOp ¶
type TimestampConvertOp int8
func GetTimestampConvert ¶
func GetTimestampConvert(in, out TimeUnit) (op TimestampConvertOp, factor int64)
type TimestampType ¶
type TimestampType struct { Unit TimeUnit TimeZone string // contains filtered or unexported fields }
TimestampType is encoded as a 64-bit signed integer since the UNIX epoch (2017-01-01T00:00:00Z). The zero-value is a nanosecond and time zone neutral. Time zone neutral can be considered UTC without having "UTC" as a time zone.
func (*TimestampType) BitWidth ¶
func (*TimestampType) BitWidth() int
BitWidth returns the number of bits required to store a single element of this data type in memory.
func (TimestampType) Bytes ¶
func (TimestampType) Bytes() int
func (*TimestampType) ClearCachedLocation ¶
func (t *TimestampType) ClearCachedLocation()
ClearCachedLocation clears the cached time.Location object in the type. This should be called if you change the value of the TimeZone after having potentially called GetZone.
func (*TimestampType) Fingerprint ¶
func (t *TimestampType) Fingerprint() string
func (*TimestampType) GetToTimeFunc ¶
func (t *TimestampType) GetToTimeFunc() (func(Timestamp) time.Time, error)
GetToTimeFunc returns a function for converting an arrow.Timestamp value into a time.Time object with proper TimeZone and precision. If the TimeZone is invalid this will return an error. It calls GetZone to get the timezone for consistency.
func (*TimestampType) GetZone ¶
func (t *TimestampType) GetZone() (*time.Location, error)
GetZone returns a *time.Location that represents the current TimeZone member of the TimestampType. If it is "", "UTC", or "utc", you'll get time.UTC. Otherwise it must either be a valid tzdata string such as "America/New_York" or of the format +HH:MM or -HH:MM indicating an absolute offset.
The location object will be cached in the TimestampType for subsequent calls so if you change the value of TimeZone after calling this, make sure to call ClearCachedLocation.
func (*TimestampType) ID ¶
func (*TimestampType) ID() Type
func (TimestampType) Layout ¶
func (TimestampType) Layout() DataTypeLayout
func (*TimestampType) Name ¶
func (*TimestampType) Name() string
func (*TimestampType) String ¶
func (t *TimestampType) String() string
func (*TimestampType) TimeUnit ¶
func (t *TimestampType) TimeUnit() TimeUnit
type Type ¶
type Type int
Type is a logical type. They can be expressed as either a primitive physical type (bytes or bits of some fixed size), a nested type consisting of other data types, or another data type (e.g. a timestamp encoded as an int64)
const ( // NULL type having no physical storage NULL Type = iota // BOOL is a 1 bit, LSB bit-packed ordering BOOL // UINT8 is an Unsigned 8-bit little-endian integer UINT8 // INT8 is a Signed 8-bit little-endian integer INT8 // UINT16 is an Unsigned 16-bit little-endian integer UINT16 // INT16 is a Signed 16-bit little-endian integer INT16 // UINT32 is an Unsigned 32-bit little-endian integer UINT32 // INT32 is a Signed 32-bit little-endian integer INT32 // UINT64 is an Unsigned 64-bit little-endian integer UINT64 // INT64 is a Signed 64-bit little-endian integer INT64 // FLOAT16 is a 2-byte floating point value FLOAT16 // FLOAT32 is a 4-byte floating point value FLOAT32 // FLOAT64 is an 8-byte floating point value FLOAT64 // STRING is a UTF8 variable-length string STRING // BINARY is a Variable-length byte type (no guarantee of UTF8-ness) BINARY // FIXED_SIZE_BINARY is a binary where each value occupies the same number of bytes FIXED_SIZE_BINARY // DATE32 is int32 days since the UNIX epoch DATE32 // DATE64 is int64 milliseconds since the UNIX epoch DATE64 // TIMESTAMP is an exact timestamp encoded with int64 since UNIX epoch // Default unit millisecond TIMESTAMP // TIME32 is a signed 32-bit integer, representing either seconds or // milliseconds since midnight TIME32 // TIME64 is a signed 64-bit integer, representing either microseconds or // nanoseconds since midnight TIME64 // INTERVAL_MONTHS is YEAR_MONTH interval in SQL style INTERVAL_MONTHS // INTERVAL_DAY_TIME is DAY_TIME in SQL Style INTERVAL_DAY_TIME // DECIMAL128 is a precision- and scale-based decimal type. Storage type depends on the // parameters. DECIMAL128 // DECIMAL256 is a precision and scale based decimal type, with 256 bit max. not yet implemented DECIMAL256 // LIST is a list of some logical data type LIST // STRUCT of logical types STRUCT // SPARSE_UNION of logical types. not yet implemented SPARSE_UNION // DENSE_UNION of logical types. not yet implemented DENSE_UNION // DICTIONARY aka Category type DICTIONARY // MAP is a repeated struct logical type MAP // Custom data type, implemented by user EXTENSION // Fixed size list of some logical type FIXED_SIZE_LIST // Measure of elapsed time in either seconds, milliseconds, microseconds // or nanoseconds. DURATION // like STRING, but 64-bit offsets. not yet implemented LARGE_STRING // like BINARY but with 64-bit offsets, not yet implemented LARGE_BINARY // like LIST but with 64-bit offsets. not yet implmented LARGE_LIST // calendar interval with three fields INTERVAL_MONTH_DAY_NANO // INTERVAL could be any of the interval types, kept to avoid breaking anyone // after switching to individual type ids for the interval types that were using // it when calling MakeFromData or NewBuilder // // Deprecated and will be removed in the next major version release INTERVAL // Alias to ensure we do not break any consumers DECIMAL = DECIMAL128 )
type TypeEqualOption ¶
type TypeEqualOption func(*typeEqualsConfig)
TypeEqualOption is a functional option type used for configuring type equality checks.
func CheckMetadata ¶
func CheckMetadata() TypeEqualOption
CheckMetadata is an option for TypeEqual that allows checking for metadata equality besides type equality. It only makes sense for STRUCT type.
type Uint16Type ¶
type Uint16Type struct{}
func (*Uint16Type) BitWidth ¶
func (t *Uint16Type) BitWidth() int
func (*Uint16Type) Bytes ¶
func (t *Uint16Type) Bytes() int
func (*Uint16Type) Fingerprint ¶
func (t *Uint16Type) Fingerprint() string
func (*Uint16Type) ID ¶
func (t *Uint16Type) ID() Type
func (*Uint16Type) Layout ¶
func (t *Uint16Type) Layout() DataTypeLayout
func (*Uint16Type) Name ¶
func (t *Uint16Type) Name() string
func (*Uint16Type) String ¶
func (t *Uint16Type) String() string
type Uint32Type ¶
type Uint32Type struct{}
func (*Uint32Type) BitWidth ¶
func (t *Uint32Type) BitWidth() int
func (*Uint32Type) Bytes ¶
func (t *Uint32Type) Bytes() int
func (*Uint32Type) Fingerprint ¶
func (t *Uint32Type) Fingerprint() string
func (*Uint32Type) ID ¶
func (t *Uint32Type) ID() Type
func (*Uint32Type) Layout ¶
func (t *Uint32Type) Layout() DataTypeLayout
func (*Uint32Type) Name ¶
func (t *Uint32Type) Name() string
func (*Uint32Type) String ¶
func (t *Uint32Type) String() string
type Uint64Type ¶
type Uint64Type struct{}
func (*Uint64Type) BitWidth ¶
func (t *Uint64Type) BitWidth() int
func (*Uint64Type) Bytes ¶
func (t *Uint64Type) Bytes() int
func (*Uint64Type) Fingerprint ¶
func (t *Uint64Type) Fingerprint() string
func (*Uint64Type) ID ¶
func (t *Uint64Type) ID() Type
func (*Uint64Type) Layout ¶
func (t *Uint64Type) Layout() DataTypeLayout
func (*Uint64Type) Name ¶
func (t *Uint64Type) Name() string
func (*Uint64Type) String ¶
func (t *Uint64Type) String() string
type Uint8Type ¶
type Uint8Type struct{}
func (*Uint8Type) Fingerprint ¶
func (*Uint8Type) Layout ¶
func (t *Uint8Type) Layout() DataTypeLayout
type UnionType ¶
type UnionType interface { NestedType // Mode returns either SparseMode or DenseMode depending on the current // concrete data type. Mode() UnionMode // ChildIDs returns a slice of ints to map UnionTypeCode values to // the index in the Fields that represents the given Type. It is // initialized with all values being InvalidUnionChildID (-1) // before being populated based on the TypeCodes and fields of the type. // The field for a given type can be retrieved by Fields()[ChildIDs()[typeCode]] ChildIDs() []int // TypeCodes returns the list of available type codes for this union type // which will correspond to indexes into the ChildIDs slice to locate the // appropriate child. A union Array contains a buffer of these type codes // which indicate for a given index, which child has the value for that index. TypeCodes() []UnionTypeCode // MaxTypeCode returns the value of the largest TypeCode in the list of typecodes // that are defined by this Union type MaxTypeCode() UnionTypeCode }
UnionType is an interface to encompass both Dense and Sparse Union types.
A UnionType is a nested type where each logical value is taken from a single child. A buffer of 8-bit type ids (typed as UnionTypeCode) indicates which child a given logical value is to be taken from. This is represented as the "child id" or "child index", which is the index into the list of child fields for a given child.
type UnionTypeCode ¶
type UnionTypeCode = int8
UnionTypeCode is an alias to int8 which is the type of the ids used for union arrays.
Source Files ¶
- array.go
- compare.go
- datatype.go
- datatype_binary.go
- datatype_extension.go
- datatype_fixedwidth.go
- datatype_nested.go
- datatype_null.go
- datatype_numeric.gen.go
- doc.go
- errors.go
- record.go
- schema.go
- table.go
- type_string.go
- type_traits_boolean.go
- type_traits_decimal128.go
- type_traits_decimal256.go
- type_traits_float16.go
- type_traits_interval.go
- type_traits_numeric.gen.go
- unionmode_string.go
Directories ¶
Path | Synopsis |
---|---|
_examples
|
|
_tools
|
|
Package array provides implementations of various Arrow array types.
|
Package array provides implementations of various Arrow array types. |
Package arrio exposes functions to manipulate records, exposing and using interfaces not unlike the ones defined in the stdlib io package.
|
Package arrio exposes functions to manipulate records, exposing and using interfaces not unlike the ones defined in the stdlib io package. |
Package csv reads CSV files and presents the extracted data as records, also writes data as record into CSV files
|
Package csv reads CSV files and presents the extracted data as records, also writes data as record into CSV files |
flightsql/example
Package example contains a FlightSQL Server implementation using sqlite as the backing engine.
|
Package example contains a FlightSQL Server implementation using sqlite as the backing engine. |
flightsql/schema_ref
Package schema_ref contains the expected reference Schemas to be used by FlightSQL servers and clients.
|
Package schema_ref contains the expected reference Schemas to be used by FlightSQL servers and clients. |
arrdata
Package arrdata exports arrays and records data ready to be used for tests.
|
Package arrdata exports arrays and records data ready to be used for tests. |
arrjson
Package arrjson provides types and functions to encode and decode ARROW types and data to and from JSON files.
|
Package arrjson provides types and functions to encode and decode ARROW types and data to and from JSON files. |
debug
Package debug provides APIs for conditional runtime assertions and debug logging.
|
Package debug provides APIs for conditional runtime assertions and debug logging. |
flight_integration/cmd/arrow-flight-integration-client
Client for use with Arrow Flight Integration tests via archery
|
Client for use with Arrow Flight Integration tests via archery |
testing/types
Package types contains user-defined types for use in the tests for the arrow package
|
Package types contains user-defined types for use in the tests for the arrow package |
cmd/arrow-cat
Command arrow-cat displays the content of an Arrow stream or file.
|
Command arrow-cat displays the content of an Arrow stream or file. |
cmd/arrow-ls
Command arrow-ls displays the listing of an Arrow file.
|
Command arrow-ls displays the listing of an Arrow file. |
Package math provides optimized mathematical functions for processing Arrow arrays.
|
Package math provides optimized mathematical functions for processing Arrow arrays. |
Package memory provides support for allocating and manipulating memory at a low level.
|
Package memory provides support for allocating and manipulating memory at a low level. |
Package tensor provides types that implement n-dimensional arrays.
|
Package tensor provides types that implement n-dimensional arrays. |