df

package module

v0.0.0-...-b595fe3 Latest Latest Go to latest Published: Sep 12, 2025 License: MIT Imports: 8 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/solutia-tech/dataframe

Links

Open Source Insights

README ¶

DataFrame

A high-performance data manipulation library for Go, providing pandas-like functionality with type-safe operations, comprehensive data I/O, and advanced analytics capabilities.

Features

DataFrame Operations: Table-like data structure with column type integrity
Series Support: Flexible array operations with type safety and NaN handling
Data I/O: Read from CSV, JSON, HTML, Excel, and structs; write to CSV and JSON
Data Manipulation: Filtering, grouping, aggregation, sorting, and subsetting
Statistical Operations: Built-in statistical functions with Gonum integration
Type Safety: Strong typing with automatic type inference and conversion
Performance: Optimized for large datasets with memory-efficient operations
Missing Data: Comprehensive NaN handling and data cleaning operations

Installation

go get github.com/solutia-tech/dataframe

Quick Start

Basic DataFrame Operations

package main

import (
    "fmt"
    df "github.com/solutia-tech/dataframe"
    "github.com/solutia-tech/dataframe/series"
)

func main() {
    // Create a DataFrame from series
    df := df.New(
        series.Strings([]string{"Alice", "Bob", "Charlie"}).SetName("Name"),
        series.Ints([]int{25, 30, 35}).SetName("Age"),
        series.Floats([]float64{65.5, 70.2, 68.9}).SetName("Weight"),
    )
    
    fmt.Println("DataFrame:")
    fmt.Println(df)
    
    // Get DataFrame dimensions
    fmt.Printf("Shape: %d rows, %d columns\n", df.Nrow(), df.Ncol())
    
    // Access columns
    names := df.Col("Name")
    ages := df.Col("Age")
    
    fmt.Println("Names:", names)
    fmt.Println("Ages:", ages)
}

Data Loading from CSV

package main

import (
    "fmt"
    "strings"
    "github.com/solutia-tech/dataframe/reader/csvreader"
)

func main() {
    // Load from CSV
    csvData := `name,age,salary
Alice,25,50000
Bob,30,60000
Charlie,35,70000`
    
    csvReader := csvreader.New(strings.NewReader(csvData))
    df := csvReader.Read()
    
    if df.Error() != nil {
        panic(df.Error())
    }
    
    fmt.Println("Loaded DataFrame:")
    fmt.Println(df)
}

Data Loading from JSON

package main

import (
    "fmt"
    "strings"
    "github.com/solutia-tech/dataframe/reader/jsonreader"
)

func main() {
    // Load from JSON
    jsonData := `[
        {"name": "Alice", "age": 25, "salary": 50000},
        {"name": "Bob", "age": 30, "salary": 60000},
        {"name": "Charlie", "age": 35, "salary": 70000}
    ]`
    
    jsonReader := jsonreader.New(strings.NewReader(jsonData))
    df := jsonReader.Read()
    
    if df.Error() != nil {
        panic(df.Error())
    }
    
    fmt.Println("Loaded DataFrame:")
    fmt.Println(df)
}

Data Loading from Excel

package main

import (
    "fmt"
    "os"
    "github.com/solutia-tech/dataframe/reader/xlsreader"
)

func main() {
    // Load from Excel file
    file, err := os.Open("data.xlsx")
    if err != nil {
        panic(err)
    }
    defer file.Close()
    
    // Basic Excel reading
    df := xlsreader.Read(file)
    
    if df.Error() != nil {
        panic(df.Error())
    }
    
    fmt.Println("Loaded DataFrame from Excel:")
    fmt.Println(df)
    
    // Advanced Excel reading with options
    file2, _ := os.Open("data.xlsx")
    defer file2.Close()
    
    df2 := xlsreader.Read(file2,
        xlsreader.WithSheetName("Sales"),        // Read specific sheet
        xlsreader.HasHeader(true),               // First row is header
        xlsreader.SkipEmptyRows(true),           // Skip empty rows
        xlsreader.SkipEmptyColumns(false),       // Keep empty columns
    )
    
    if df2.Error() != nil {
        panic(df2.Error())
    }
    
    fmt.Println("Advanced Excel DataFrame:")
    fmt.Println(df2)
}

Data Filtering

package main

import (
    "fmt"
    df "github.com/solutia-tech/dataframe"
    "github.com/solutia-tech/dataframe/series"
)

func main() {
    // Create sample data
    df := df.New(
        series.Strings([]string{"Alice", "Bob", "Charlie", "Diana"}).SetName("Name"),
        series.Ints([]int{25, 30, 35, 28}).SetName("Age"),
        series.Floats([]float64{65.5, 70.2, 68.9, 62.1}).SetName("Weight"),
    )
    
    // Filter rows where age > 30
    filtered := df.FilterAggregation(
        df.And,
        df.F{Colname: "Age", Comparator: series.Greater, Comparando: 30},
    )
    
    fmt.Println("Filtered DataFrame (Age > 30):")
    fmt.Println(filtered)
    
    // Multiple filters with OR
    filtered2 := df.FilterAggregation(
        df.Or,
        df.F{Colname: "Age", Comparator: series.Greater, Comparando: 30},
        df.F{Colname: "Weight", Comparator: series.Less, Comparando: 65.0},
    )
    
    fmt.Println("Filtered DataFrame (Age > 30 OR Weight < 65):")
    fmt.Println(filtered2)
}

Data Selection and Subsetting

package main

import (
    "fmt"
    df "github.com/solutia-tech/dataframe"
    "github.com/solutia-tech/dataframe/series"
)

func main() {
    df := df.New(
        series.Strings([]string{"Alice", "Bob", "Charlie", "Diana"}).SetName("Name"),
        series.Ints([]int{25, 30, 35, 28}).SetName("Age"),
        series.Floats([]float64{65.5, 70.2, 68.9, 62.1}).SetName("Weight"),
    )
    
    // Select specific columns by name
    selected := df.Select("Name", "Weight")
    fmt.Println("Selected columns:")
    fmt.Println(selected)
    
    // Select by index (first and third rows)
    indexed := df.Subset([]int{0, 2})
    fmt.Println("Selected rows:")
    fmt.Println(indexed)
    
    // Get specific column
    names := df.Col("Name")
    fmt.Println("Names column:", names)
}

Grouping and Aggregation

package main

import (
    "fmt"
    df "github.com/solutia-tech/dataframe"
    "github.com/solutia-tech/dataframe/series"
)

func main() {
    // Create sample sales data
    df := df.New(
        series.Strings([]string{"A", "A", "B", "B", "C", "C"}).SetName("Category"),
        series.Floats([]float64{100, 150, 200, 250, 300, 350}).SetName("Sales"),
        series.Ints([]int{1, 2, 1, 2, 1, 2}).SetName("Quarter"),
    )
    
    // Group by category
    groups := df.GroupBy("Category")
    
    // Calculate sum of sales per category
    aggregated := groups.Aggregation(
        []df.AggregationType{df.Aggregation_SUM},
        []string{"Sales"},
    )
    
    fmt.Println("Sales by Category (Sum):")
    fmt.Println(aggregated)
    
    // Multiple aggregations
    multiAgg := groups.Aggregation(
        []df.AggregationType{df.Aggregation_SUM, df.Aggregation_MEAN, df.Aggregation_COUNT},
        []string{"Sales", "Sales", "Sales"},
    )
    
    fmt.Println("Multiple aggregations:")
    fmt.Println(multiAgg)
}

Statistical Operations

package main

import (
    "fmt"
    "math"
    df "github.com/solutia-tech/dataframe"
    "github.com/solutia-tech/dataframe/series"
)

func main() {
    // Create sample data with some NaN values
    df := df.New(
        series.Floats([]float64{1.0, 2.0, math.NaN(), 4.0, 5.0}).SetName("Values"),
        series.Floats([]float64{10.0, 20.0, 30.0, math.NaN(), 50.0}).SetName("Scores"),
    )
    
    // Calculate descriptive statistics
    values := df.Col("Values")
    
    fmt.Printf("Mean: %.2f\n", values.Mean())
    fmt.Printf("Median: %.2f\n", values.Median())
    fmt.Printf("Std: %.2f\n", values.StdDev())
    fmt.Printf("Min: %.2f\n", values.Min())
    fmt.Printf("Max: %.2f\n", values.Max())
    fmt.Printf("Sum: %.2f\n", values.Sum())
    fmt.Printf("Count: %d\n", values.Len())
    
    // Quantile calculation
    fmt.Printf("25th percentile: %.2f\n", values.Quantile(0.25))
    fmt.Printf("75th percentile: %.2f\n", values.Quantile(0.75))
}

Loading from Structs

package main

import (
    "fmt"
    df "github.com/solutia-tech/dataframe"
)

type Employee struct {
    Name   string  `dataframe:"name"`
    Age    int     `dataframe:"age"`
    Salary float64 `dataframe:"salary"`
    Dept   string  `dataframe:"department"`
}

func main() {
    employees := []Employee{
        {Name: "Alice", Age: 25, Salary: 50000, Dept: "Engineering"},
        {Name: "Bob", Age: 30, Salary: 60000, Dept: "Marketing"},
        {Name: "Charlie", Age: 35, Salary: 70000, Dept: "Engineering"},
    }
    
    // Load from struct slice
    df := df.LoadStructs(employees)
    
    if df.Error() != nil {
        panic(df.Error())
    }
    
    fmt.Println("Employees DataFrame:")
    fmt.Println(df)
    
    // Group by department and calculate average salary
    groups := df.GroupBy("department")
    avgSalary := groups.Aggregation(
        []df.AggregationType{df.Aggregation_MEAN},
        []string{"salary"},
    )
    
    fmt.Println("Average Salary by Department:")
    fmt.Println(avgSalary)
}

Data Joins

package main

import (
    "fmt"
    df "github.com/solutia-tech/dataframe"
    "github.com/solutia-tech/dataframe/series"
)

func main() {
    // Create two DataFrames
    df1 := df.New(
        series.Strings([]string{"Alice", "Bob"}).SetName("Name"),
        series.Ints([]int{25, 30}).SetName("Age"),
    )
    
    df2 := df.New(
        series.Strings([]string{"Alice", "Bob"}).SetName("Name"),
        series.Strings([]string{"Engineering", "Marketing"}).SetName("Department"),
    )
    
    // Inner join
    joined := df1.InnerJoin(df2, "Name")
    fmt.Println("Inner Join:")
    fmt.Println(joined)
    
    // Left join
    leftJoined := df1.LeftJoin(df2, "Name")
    fmt.Println("Left Join:")
    fmt.Println(leftJoined)
    
    // Right join
    rightJoined := df1.RightJoin(df2, "Name")
    fmt.Println("Right Join:")
    fmt.Println(rightJoined)
    
    // Outer join
    outerJoined := df1.OuterJoin(df2, "Name")
    fmt.Println("Outer Join:")
    fmt.Println(outerJoined)
}

Data Concatenation

package main

import (
    "fmt"
    df "github.com/solutia-tech/dataframe"
    "github.com/solutia-tech/dataframe/series"
)

func main() {
    // Create two DataFrames
    df1 := df.New(
        series.Strings([]string{"Alice", "Bob"}).SetName("Name"),
        series.Ints([]int{25, 30}).SetName("Age"),
    )
    
    df2 := df.New(
        series.Strings([]string{"Charlie", "Diana"}).SetName("Name"),
        series.Ints([]int{35, 28}).SetName("Age"),
    )
    
    // Concatenate by rows (RBind)
    concatenated := df1.RBind(df2)
    fmt.Println("Concatenated DataFrame (rows):")
    fmt.Println(concatenated)
    
    // Concatenate by columns (CBind)
    df3 := df.New(
        series.Strings([]string{"F", "M"}).SetName("Gender"),
    )
    
    columnConcatenated := df1.CBind(df3)
    fmt.Println("Concatenated DataFrame (columns):")
    fmt.Println(columnConcatenated)
}

Data Transformation

package main

import (
    "fmt"
    df "github.com/solutia-tech/dataframe"
    "github.com/solutia-tech/dataframe/series"
)

func main() {
    df := df.New(
        series.Strings([]string{"Alice", "Bob", "Charlie"}).SetName("Name"),
        series.Ints([]int{25, 30, 35}).SetName("Age"),
    )
    
    // Add new column
    df = df.Mutate(series.Strings([]string{"F", "M", "M"}).SetName("Gender"))
    
    // Transform existing column (age in months)
    ageInMonths := df.Col("Age").Mul(12).SetName("AgeInMonths")
    df = df.Mutate(ageInMonths)
    
    fmt.Println("Transformed DataFrame:")
    fmt.Println(df)
    
    // Apply function to rows
    transformed := df.Rapply(func(s series.Serie) series.Serie {
        // Example: convert age to string with " years old" suffix
        if s.Name == "Age" {
            return s.Map(func(e series.Element) series.Element {
                age := e.Int()
                return &series.StringElement{E: fmt.Sprintf("%d years old", age)}
            })
        }
        return s
    })
    
    fmt.Println("Row-applied transformation:")
    fmt.Println(transformed)
}

Series Operations

package main

import (
    "fmt"
    "math"
    "github.com/solutia-tech/dataframe/series"
)

func main() {
    // Create different types of series
    intSeries := series.Ints([]int{1, 2, 3, 4, 5})
    floatSeries := series.Floats([]float64{1.1, 2.2, math.NaN(), 4.4, 5.5})
    stringSeries := series.Strings([]string{"A", "B", "C", "D", "E"})
    boolSeries := series.Bools([]bool{true, false, true, false, true})
    
    fmt.Println("Integer Series:", intSeries)
    fmt.Println("Float Series:", floatSeries)
    fmt.Println("String Series:", stringSeries)
    fmt.Println("Boolean Series:", boolSeries)
    
    // Type conversion
    floatFromInt := intSeries.Float()
    fmt.Println("Converted to Float:", floatFromInt)
    
    // Mathematical operations
    doubled := intSeries.Mul(2)
    fmt.Println("Doubled:", doubled)
    
    // Statistical operations
    fmt.Printf("Mean: %.2f\n", floatSeries.Mean())
    fmt.Printf("Std: %.2f\n", floatSeries.StdDev())
    fmt.Printf("Sum: %.2f\n", floatSeries.Sum())
    
    // String operations
    fmt.Printf("Max String: %s\n", stringSeries.MaxStr())
    fmt.Printf("Min String: %s\n", stringSeries.MinStr())
}

Data Writing

package main

import (
    "fmt"
    "os"
    df "github.com/solutia-tech/dataframe"
    "github.com/solutia-tech/dataframe/series"
    "github.com/solutia-tech/dataframe/writer/csvwriter"
    "github.com/solutia-tech/dataframe/writer/jsonwrite"
)

func main() {
    // Create sample DataFrame
    df := df.New(
        series.Strings([]string{"Alice", "Bob", "Charlie"}).SetName("Name"),
        series.Ints([]int{25, 30, 35}).SetName("Age"),
        series.Floats([]float64{65.5, 70.2, 68.9}).SetName("Weight"),
    )
    
    // Write to CSV
    csvFile, err := os.Create("output.csv")
    if err != nil {
        panic(err)
    }
    defer csvFile.Close()
    
    csvWriter := csvwriter.New(csvFile)
    err = csvWriter.Write(&df)
    if err != nil {
        panic(err)
    }
    
    fmt.Println("DataFrame written to output.csv")
    
    // Write to JSON
    jsonFile, err := os.Create("output.json")
    if err != nil {
        panic(err)
    }
    defer jsonFile.Close()
    
    jsonWriter := jsonwrite.New(jsonFile)
    err = jsonWriter.Write(&df)
    if err != nil {
        panic(err)
    }
    
    fmt.Println("DataFrame written to output.json")
}

Advanced Series Operations

package main

import (
    "fmt"
    "github.com/solutia-tech/dataframe/series"
)

func main() {
    data := series.Floats([]float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10})
    
    // Series comparison
    greaterThan5 := data.Compare(series.Greater, 5.0)
    fmt.Println("Values > 5:", greaterThan5)
    
    // Series ordering
    ordered := data.Order(false) // ascending
    fmt.Println("Ordered indices:", ordered)
    
    // Series subsetting
    subset := data.Subset([]int{0, 2, 4, 6, 8})
    fmt.Println("Subset (even indices):", subset)
    
    // Series slicing
    slice := data.Slice(2, 7) // from index 2 to 6
    fmt.Println("Slice (2:7):", slice)
    
    // Series concatenation
    moreData := series.Floats([]float64{11, 12, 13})
    concatenated := data.Concat(moreData)
    fmt.Println("Concatenated:", concatenated)
}

Best Practices

Error Handling: Always check for errors after DataFrame operations using df.Error()
Type Safety: Use appropriate series types (Strings, Ints, Floats, Bools)
Memory Management: Use Copy() when you need to preserve original data
Data Cleaning: Handle NaN values appropriately in statistical operations
Performance: Use vectorized operations instead of loops when possible
Column Names: Use descriptive column names for better code readability
Grouping: Pre-filter data before grouping for better performance
Excel Files: Use SkipEmptyRows(true) for cleaner data and specify sheet names explicitly
File I/O: Always close files after reading and handle file opening errors properly
Data Types: Let the library detect types automatically unless you have specific requirements

License

Desenvolvido com ❤️ pela equipe Solutia Tech

Documentation ¶

Index ¶

Constants
type Aggregation
- func (a Aggregation) String() string
type AggregationType
- func (i AggregationType) String() string
type DataFrame
type F
type Groups
- func (gps Groups) Aggregation(typs []AggregationType, colnames []string) DataFrame
- func (g Groups) GetGroups() map[string]DataFrame
type LoadOption
type Matrix
type Order
- func RevSort(colname string) Order
- func Sort(colname string) Order
type SelectIndexes
type Writer

Constants ¶

View Source

const KEY_ERROR = "KEY_ERROR"

Variables ¶

This section is empty.

Functions ¶

This section is empty.

Types ¶

type Aggregation ¶

type Aggregation int

Aggregation defines the filter aggregation

const (
	// Or aggregates filters with logical or
	Or Aggregation = iota
	// And aggregates filters with logical and
	And
)

func (Aggregation) String ¶

func (a Aggregation) String() string

type AggregationType ¶

type AggregationType int

AggregationType Aggregation method type

const (
	Aggregation_MAX    AggregationType = iota + 1 // MAX
	Aggregation_MIN                               // MIN
	Aggregation_MEAN                              // MEAN
	Aggregation_MEDIAN                            // MEDIAN
	Aggregation_STD                               // STD
	Aggregation_SUM                               // SUM
	Aggregation_COUNT                             // COUNT
)

func (AggregationType) String ¶

func (i AggregationType) String() string

type DataFrame ¶

type DataFrame struct {

	// deprecated: Use Error() instead
	Err error
	// contains filtered or unexported fields
}

DataFrame is a data structure designed for operating on table like data (Such as Excel, CSV files, SQL table results...) where every column have to keep type integrity. As a general rule of thumb, variables are stored on columns where every row of a DataFrame represents an observation for each variable.

On the real world, data is very messy and sometimes there are non measurements or missing data. For this reason, DataFrame has support for NaN elements and allows the most common data cleaning and mungling operations such as subsetting, filtering, type transformations, etc. In addition to this, this library provides the necessary functions to concatenate DataFrames (By rows or columns), different Join operations (Inner, Outer, Left, Right, Cross) and the ability to read and write from different formats (CSV/JSON).

func LoadMaps ¶

func LoadMaps(maps []map[string]interface{}, options ...LoadOption) DataFrame

LoadMaps creates a new DataFrame based on the given maps. This function assumes that every map on the array represents a row of observations.

func LoadRecords ¶

func LoadRecords(records [][]string, options ...LoadOption) DataFrame

LoadRecords creates a new DataFrame based on the given records.

func New ¶

func New(se ...series.Serie) DataFrame

New is the generic DataFrame constructor

func (DataFrame) Arrange ¶

func (df DataFrame) Arrange(order ...Order) DataFrame

Arrange sort the rows of a DataFrame according to the given Order

func (DataFrame) CBind ¶

func (df DataFrame) CBind(dfb DataFrame) DataFrame

CBind combines the columns of this DataFrame and dfb DataFrame.

func (DataFrame) Capply ¶

func (df DataFrame) Capply(f func(series.Serie) series.Serie) DataFrame

Capply applies the given function to the columns of a DataFrame

func (DataFrame) Col ¶

func (df DataFrame) Col(colname string) series.Serie

Col returns a copy of the Series with the given column name contained in the DataFrame.

func (DataFrame) Concat ¶

func (df DataFrame) Concat(dfb DataFrame) DataFrame

Concat concatenates rows of two DataFrames like RBind, but also including unmatched columns.

func (DataFrame) Copy ¶

func (df DataFrame) Copy() DataFrame

Copy returns a copy of the DataFrame

func (DataFrame) CrossJoin ¶

func (df DataFrame) CrossJoin(b DataFrame) DataFrame

CrossJoin returns a DataFrame containing the cross join of two DataFrames.

func (DataFrame) Describe ¶

func (df DataFrame) Describe() DataFrame

Describe prints the summary statistics for each column of the dataframe

func (DataFrame) Dims ¶

func (df DataFrame) Dims() (int, int)

Dims retrieves the dimensions of a DataFrame.

func (DataFrame) Drop ¶

func (df DataFrame) Drop(indexes SelectIndexes) DataFrame

Drop the given DataFrame columns

func (DataFrame) Elem ¶

func (df DataFrame) Elem(r, c int) series.Element

Elem returns the element on row `r` and column `c`. Will panic if the index is out of bounds.

func (*DataFrame) Error ¶

func (df *DataFrame) Error() error

Returns error or nil if no error occured

func (DataFrame) Filter ¶

func (df DataFrame) Filter(filters ...F) DataFrame

Filter will filter the rows of a DataFrame based on the given filters. All filters on the argument of a Filter call are aggregated as an OR operation whereas if we chain Filter calls, every filter will act as an AND operation with regards to the rest.

func (DataFrame) FilterAggregation ¶

func (df DataFrame) FilterAggregation(agg Aggregation, filters ...F) DataFrame

FilterAggregation will filter the rows of a DataFrame based on the given filters. All filters on the argument of a Filter call are aggregated depending on the supplied aggregation.

func (DataFrame) GroupBy ¶

func (df DataFrame) GroupBy(colnames ...string) *Groups

GroupBy Group dataframe by columns

func (DataFrame) InnerJoin ¶

func (df DataFrame) InnerJoin(b DataFrame, keys ...string) DataFrame

InnerJoin returns a DataFrame containing the inner join of two DataFrames.

func (DataFrame) LeftJoin ¶

func (df DataFrame) LeftJoin(b DataFrame, keys ...string) DataFrame

LeftJoin returns a DataFrame containing the left join of two DataFrames.

func (DataFrame) Maps ¶

func (df DataFrame) Maps() []map[string]interface{}

Maps return the array of maps representation of a DataFrame.

func (DataFrame) Mutate ¶

func (df DataFrame) Mutate(s series.Serie) DataFrame

Mutate changes a column of the DataFrame with the given Series or adds it as a new column if the column name does not exist.

func (DataFrame) Names ¶

func (df DataFrame) Names() []string

Names returns the name of the columns on a DataFrame.

func (DataFrame) Ncol ¶

func (df DataFrame) Ncol() int

Ncol returns the number of columns on a DataFrame.

func (DataFrame) Nrow ¶

func (df DataFrame) Nrow() int

Nrow returns the number of rows on a DataFrame.

func (DataFrame) OuterJoin ¶

func (df DataFrame) OuterJoin(b DataFrame, keys ...string) DataFrame

OuterJoin returns a DataFrame containing the outer join of two DataFrames.

func (DataFrame) RBind ¶

func (df DataFrame) RBind(dfb DataFrame) DataFrame

RBind matches the column names of two DataFrames and returns combined rows from both of them.

func (DataFrame) Rapply ¶

func (df DataFrame) Rapply(f func(series.Serie) series.Serie) DataFrame

Rapply applies the given function to the rows of a DataFrame. Prior to applying the function the elements of each row are cast to a Series of a specific type. In order of priority: String -> Float -> Int -> Bool. This casting also takes place after the function application to equalize the type of the columns.

func (DataFrame) Records ¶

func (df DataFrame) Records() [][]string

Records return the string record representation of a DataFrame.

func (DataFrame) Rename ¶

func (df DataFrame) Rename(newname, oldname string) DataFrame

Rename changes the name of one of the columns of a DataFrame

func (DataFrame) RightJoin ¶

func (df DataFrame) RightJoin(b DataFrame, keys ...string) DataFrame

RightJoin returns a DataFrame containing the right join of two DataFrames.

func (DataFrame) Select ¶

func (df DataFrame) Select(indexes SelectIndexes) DataFrame

Select the given DataFrame columns

func (DataFrame) Set ¶

func (df DataFrame) Set(indexes series.Indexes, newvalues DataFrame) DataFrame

Set will update the values of a DataFrame for the rows selected via indexes.

func (DataFrame) SetNames ¶

func (df DataFrame) SetNames(colnames ...string) error

SetNames changes the column names of a DataFrame to the ones passed as an argument.

func (DataFrame) String ¶

func (df DataFrame) String() (str string)

String implements the Stringer interface for DataFrame

func (DataFrame) Subset ¶

func (df DataFrame) Subset(indexes series.Indexes) DataFrame

Subset returns a subset of the rows of the original DataFrame based on the Series subsetting indexes.

func (DataFrame) Types ¶

func (df DataFrame) Types() []series.ElementType

Types returns the types of the columns on a DataFrame.

type F ¶

type F struct {
	Colidx     int
	Colname    string
	Comparator series.Comparator
	Comparando interface{}
}

F is the filtering structure

type Groups ¶

type Groups struct {
	Err error
	// contains filtered or unexported fields
}

Groups : structure generated by groupby

func (Groups) Aggregation ¶

func (gps Groups) Aggregation(typs []AggregationType, colnames []string) DataFrame

Aggregation :Aggregate dataframe by aggregation type and aggregation column name

func (Groups) GetGroups ¶

func (g Groups) GetGroups() map[string]DataFrame

GetGroups returns the grouped data frames created by GroupBy

type LoadOption ¶

type LoadOption func(*loadOptions)

LoadOption is the type used to configure the load of elements

func DefaultType ¶

func DefaultType(t series.ElementType) LoadOption

DefaultType sets the defaultType option for loadOptions.

func DetectTypes ¶

func DetectTypes(b bool) LoadOption

DetectTypes sets the detectTypes option for loadOptions.

func HasHeader ¶

func HasHeader(b bool) LoadOption

HasHeader sets the hasHeader option for loadOptions.

func NaNValues ¶

func NaNValues(nanValues []string) LoadOption

NaNValues sets the nanValues option for loadOptions.

func Names ¶

func Names(names ...string) LoadOption

Names sets the names option for loadOptions.

func WithComments ¶

func WithComments(b rune) LoadOption

WithComments sets the csv comment line detect to remove lines

func WithDelimiter ¶

func WithDelimiter(b rune) LoadOption

WithDelimiter sets the csv delimiter other than ',', for example '\t'

func WithLazyQuotes ¶

func WithLazyQuotes(b bool) LoadOption

WithLazyQuotes sets csv parsing option to LazyQuotes

func WithTypes ¶

func WithTypes(coltypes map[string]series.ElementType) LoadOption

WithTypes sets the types option for loadOptions.

type Matrix ¶

type Matrix interface {
	Dims() (r, c int)
	At(i, j int) float64
}

Matrix is an interface which is compatible with gonum's mat.Matrix interface

type Order ¶

type Order struct {
	Colname string
	Reverse bool
}

Order is the ordering structure

func RevSort ¶

func RevSort(colname string) Order

RevSort return an ordering structure for reverse column sorting.

func Sort ¶

func Sort(colname string) Order

Sort return an ordering structure for regular column sorting sort.

type SelectIndexes ¶

type SelectIndexes any

SelectIndexes are the supported indexes used for the DataFrame.Select method. Currently supported are:

int              // Matches the given index number
[]int            // Matches all given index numbers
[]bool           // Matches all columns marked as true
string           // Matches the column with the matching column name
[]string         // Matches all columns with the matching column names
Series [Int]     // Same as []int
Series [Bool]    // Same as []bool
Series [String]  // Same as []string

type Writer ¶

type Writer interface {
	// Write writes a DataFrame to a source.
	Write(df *DataFrame) error
}

Writer is the interface for writing a DataFrame. It is used to write a DataFrame to a source.

Source Files ¶

View all Source files

Directories ¶

Path	Synopsis
internal
reader
csvreader
htmlreader
jsonreader
recordreader
structreader
xlsreader
series
writer
csvwriter
jsonwrite

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL