yacr

package module
v0.0.0-...-2234ea3 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Apr 22, 2018 License: BSD-3-Clause Imports: 14 Imported by: 0

README

Yet another CSV reader (and writer) with small memory usage.

All credit goes to:

  • Rob Pike, creator of Scanner interface,
  • D. Richard Hipp, for his CSV parser implementation.

Build Status

GoDoc

There is a standard package named encoding/csv.

BenchmarkParsing	    5000	    381518 ns/op	 256.87 MB/s	    4288 B/op	       5 allocs/op
BenchmarkQuotedParsing	    5000	    487599 ns/op	 209.19 MB/s	    4288 B/op	       5 allocs/op
BenchmarkEmbeddedNL	    5000	    594618 ns/op	 201.81 MB/s	    4288 B/op	       5 allocs/op
BenchmarkStdParser	     500	   5026100 ns/op	  23.88 MB/s	  625499 B/op	   16037 allocs/op
BenchmarkYacrParser	    5000	    593165 ns/op	 202.30 MB/s	    4288 B/op	       5 allocs/op
BenchmarkYacrWriter	  200000	      9433 ns/op	  98.05 MB/s	    2755 B/op	       0 allocs/op
BenchmarkStdWriter	  100000	     27804 ns/op	  33.27 MB/s	    2755 B/op	       0 allocs/op

USAGES

Documentation

Overview

Package yacr is yet another CSV reader (and writer) with small memory usage.

Example
package main

import (
	"fmt"
	"os"

	yacr "github.com/gwenn/yacr"
)

func main() {
	r := yacr.NewReader(os.Stdin, '\t', false, false)
	w := yacr.NewWriter(os.Stdout, '\t', false)

	for r.Scan() && w.Write(r.Bytes()) {
		if r.EndOfRecord() {
			w.EndOfRecord()
		}
	}
	w.Flush()
	if err := r.Err(); err != nil {
		fmt.Fprintln(os.Stderr, err)
	}
	if err := w.Err(); err != nil {
		fmt.Fprintln(os.Stderr, err)
	}
}
Output:

Example (Reader)
package main

import (
	"fmt"
	"strings"

	yacr "github.com/gwenn/yacr"
)

func main() {
	r := yacr.DefaultReader(strings.NewReader("c1,\"c\"\"2\",\"c\n3\",\"c,4\""))
	fmt.Print("[")
	for r.Scan() {
		fmt.Print(r.Text())
		if r.EndOfRecord() {
			fmt.Print("]\n")
		} else {
			fmt.Print(" ")
		}
	}
	if err := r.Err(); err != nil {
		fmt.Println(err)
	}
}
Output:

[c1 c"2 c
3 c,4]
Example (Writer)
package main

import (
	"fmt"
	"os"

	yacr "github.com/gwenn/yacr"
)

func main() {
	w := yacr.DefaultWriter(os.Stdout)
	for _, field := range []string{"c1", "c\"2", "c\n3", "c,4"} {
		if !w.WriteString(field) {
			break
		}
	}
	w.Flush()
	if err := w.Err(); err != nil {
		fmt.Fprintln(os.Stderr, err)
	}
}
Output:

c1,"c""2","c
3","c,4"

Index

Examples

Constants

This section is empty.

Variables

View Source
var (
	// ErrNewLine is the error returned when a value contains a newline in unquoted mode.
	ErrNewLine = errors.New("yacr.Writer: newline character in value")
	// ErrSeparator is the error returned when a value contains a separator in unquoted mode.
	ErrSeparator = errors.New("yacr.Writer: separator in value")
)

Functions

func IsNumber

func IsNumber(s []byte) (isNum bool, isReal bool)

IsNumber determines if the string is a number or not. Only works for single-byte encodings (ASCII, ISO-8859-1) and UTF-8.

func Zopen

func Zopen(filepath string) (io.ReadCloser, error)

Zopen transparently opens gzip/bzip files (based on their extension).

Types

type Reader

type Reader struct {
	*bufio.Scanner

	Trim    bool // trim spaces (only on unquoted values). Break rfc4180 rule: "Spaces are considered part of a field and should not be ignored."
	Comment byte // character marking the start of a line comment. When specified (not 0), line comment appears as empty line.
	Lazy    bool // specify if quoted values may contains unescaped quote not followed by a separator or a newline

	Headers map[string]int // Index (first is 1) by header
	// contains filtered or unexported fields
}

Reader provides an interface for reading CSV data (compatible with rfc4180 and extended with the option of having a separator other than ","). Successive calls to the Scan method will step through the 'fields', skipping the separator/newline between the fields. The EndOfRecord method tells when a field is terminated by a line break.

func DefaultReader

func DefaultReader(rd io.Reader) *Reader

DefaultReader creates a "standard" CSV reader (separator is comma and quoted mode active)

func NewReader

func NewReader(r io.Reader, sep byte, quoted, guess bool) *Reader

NewReader returns a new CSV scanner to read from r. When quoted is false, values must not contain a separator or newline.

func (*Reader) EndOfRecord

func (s *Reader) EndOfRecord() bool

EndOfRecord returns true when the most recent field has been terminated by a newline (not a separator).

func (*Reader) IsNumber

func (s *Reader) IsNumber() (isNum bool, isReal bool)

IsNumber determines if the current token is a number or not. Only works for single-byte encodings (ASCII, ISO-8859-1) and UTF-8.

func (*Reader) LineNumber

func (s *Reader) LineNumber() int

LineNumber returns current line number (not record number)

func (*Reader) ScanField

func (s *Reader) ScanField(data []byte, atEOF bool) (advance int, token []byte, err error)

ScanField implements bufio.SplitFunc for CSV. Lexing is adapted from csv_read_one_field function in SQLite3 shell sources.

func (*Reader) ScanHeaders

func (s *Reader) ScanHeaders() error

ScanHeaders loads current line as the header line.

func (*Reader) ScanRecord

func (s *Reader) ScanRecord(values ...interface{}) (int, error)

ScanRecord decodes one line fields to values. Empty lines are ignored/skipped. It's like fmt.Scan or database.sql.Rows.Scan. Returns (0, nil) on EOF, (*, err) on error and (n >= 1, nil) on success (n may be less or greater than len(values)).

var n int
var err error
for {
  values := make([]string, N)
  if n, err = s.ScanRecord(&values[0]/*, &values[1], ...*/); err != nil || n == 0 {
    break // or error handling
  } else if (n > N) {
    n = N // ignore extra values
  }
  for _, value := range values[0:n] {
    // ...
  }
}
if err != nil {
  // error handling
}
Example
package main

import (
	"fmt"
	"strings"

	yacr "github.com/gwenn/yacr"
)

func main() {
	r := yacr.DefaultReader(strings.NewReader("11,12,13,14\n21,22,23,24\n31,32,33,34\n41,42,43,44"))
	fmt.Print("[")
	var i1, i2, i3, i4 int
	for {
		if n, err := r.ScanRecord(&i1, &i2, &i3, &i4); err != nil {
			fmt.Println(err)
			break
		} else if n != 4 {
			break
		}
		fmt.Println(i1, i2, i3, i4)
	}
	fmt.Print("]")
}
Output:

[11 12 13 14
21 22 23 24
31 32 33 34
41 42 43 44
]

func (*Reader) ScanRecordByName

func (s *Reader) ScanRecordByName(args ...interface{}) (int, error)

ScanRecordByName decodes one line fields by name (name1, value1, ...). Specified names must match Headers.

func (*Reader) ScanValue

func (s *Reader) ScanValue(value interface{}) error

ScanValue advances to the next token and decodes field's content to value. The value may point to data that will be overwritten by a subsequent call to Scan.

func (*Reader) Sep

func (s *Reader) Sep() byte

Sep returns the values separator used/guessed

func (*Reader) SkipRecords

func (s *Reader) SkipRecords(n int) error

SkipRecords skips n records/headers

func (*Reader) Value

func (s *Reader) Value(value interface{}) error

Value decodes field's content to value. The value may point to data that will be overwritten by a subsequent call to Scan.

Example
package main

import (
	"fmt"
	"strings"

	yacr "github.com/gwenn/yacr"
)

func main() {
	r := yacr.DefaultReader(strings.NewReader("1,\"2\",3,4"))
	fmt.Print("[")
	var i int
	for r.Scan() {
		if err := r.Value(&i); err != nil {
			fmt.Println(err)
			break
		}
		fmt.Print(i)
		if r.EndOfRecord() {
			fmt.Print("]\n")
		} else {
			fmt.Print(" ")
		}
	}
	if err := r.Err(); err != nil {
		fmt.Println(err)
	}
}
Output:

[1 2 3 4]

type Writer

type Writer struct {
	UseCRLF bool // True to use \r\n as the line terminator
	// contains filtered or unexported fields
}

Writer provides an interface for writing CSV data (compatible with rfc4180 and extended with the option of having a separator other than ","). Successive calls to the Write method will automatically insert the separator. The EndOfRecord method tells when a line break is inserted.

func DefaultWriter

func DefaultWriter(wr io.Writer) *Writer

DefaultWriter creates a "standard" CSV writer (separator is comma and quoted mode active)

func NewWriter

func NewWriter(w io.Writer, sep byte, quoted bool) *Writer

NewWriter returns a new CSV writer.

func (*Writer) EndOfRecord

func (w *Writer) EndOfRecord()

EndOfRecord tells when a line break must be inserted.

func (*Writer) Err

func (w *Writer) Err() error

Err returns the first error that was encountered by the Writer.

func (*Writer) Flush

func (w *Writer) Flush()

Flush ensures the writer's buffer is flushed.

func (*Writer) Write

func (w *Writer) Write(value []byte) bool

Write ensures that value is quoted when needed.

func (*Writer) WriteRecord

func (w *Writer) WriteRecord(values ...interface{}) bool

WriteRecord ensures that values are quoted when needed. It's like fmt.Println.

func (*Writer) WriteString

func (w *Writer) WriteString(value string) bool

WriteString ensures that value is quoted when needed.

func (*Writer) WriteValue

func (w *Writer) WriteValue(value interface{}) bool

WriteValue ensures that value is quoted when needed. Value's type/kind is used to encode value to text.

Directories

Path Synopsis

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL