Documentation
¶
Overview ¶
Package file implements Input/Output for text files. Text files can
- Be delimited or fixed-width
- Have header rows or not
Index ¶
- func Rdrs(rdr0 *Reader, nRdrs int) (r []chutils.Input, err error)
- func Wrtrs(tmpDir string, nWrtr int, con *chutils.Connect, separator rune, eol rune, ...) (wrtrs []chutils.Output, err error)
- type Reader
- func (rdr *Reader) Close() error
- func (rdr *Reader) CountLines() (numLines int, err error)
- func (rdr *Reader) EOL() rune
- func (rdr *Reader) Init(key string, engine chutils.EngineType) error
- func (rdr *Reader) Name() string
- func (rdr *Reader) Read(nTarget int, validate bool) (data []chutils.Row, valid []chutils.Valid, err error)
- func (rdr *Reader) Reset() error
- func (rdr *Reader) Seek(lineNo int) error
- func (rdr *Reader) Separator() rune
- func (rdr *Reader) SetTableSpec(ts *chutils.TableDef)
- func (rdr *Reader) TableSpec() *chutils.TableDef
- type Writer
Examples ¶
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
Types ¶
type Reader ¶
type Reader struct {
Skip int // Skip is the # of rows to skip in the file
RowsRead int // RowsRead is current count of rows read from the file (includes header)
MaxRead int // MaxRead is the maximum number of rows to read
Width int // Width is the line width for flat files
Quote rune // Quote is the optional quote around strings that contain the Separator
// contains filtered or unexported fields
}
Reader implements chutils.Input interface.
func NewReader ¶
func NewReader(filename string, separator rune, eol rune, quote rune, width int, skip int, maxRead int, rws io.ReadSeekCloser, bufSize int) *Reader
NewReader initializes an instance of Reader
func (*Reader) CountLines ¶
CountLines returns the number of rows in the source data. This does not include any header rows.
func (*Reader) Init ¶
func (rdr *Reader) Init(key string, engine chutils.EngineType) error
Init initialize FieldDefs slice Reader.TableSpec() from header row of input. It does not set any of the field types.
func (*Reader) Read ¶
func (rdr *Reader) Read(nTarget int, validate bool) (data []chutils.Row, valid []chutils.Valid, err error)
Read reads nTarget rows. If nTarget == 0, the entire file is read.
If validation == true:
- The data is validated according to the rules in rdr.TableSpec.
- The results are returned as the slice valid.
- data is returned with the fields appropriately typed.
If validation == false:
- The data is not validated.
- The return slice valid is nil
- The fields are returned as strings.
err returns io.EOF at end of file
Example (CSV) ¶
Loading a CSV, cleaning values and loading into ClickHouse using package file reader and writer
/*
If you haven't created the table first, you'll get this error simply importing the file via clickhouse-client
Code: 60. DB::Exception: Received from 127.0.0.1:9000. DB::Exception: Table testing.values doesn't exist. (UNKNOWN_TABLE)
Once the table exists, the clickhouse-client approach produces this error:
Row 3:
Column 0, name: id, type: String, parsed text: "1B23"
Column 1, name: zip, type: FixedString(5), parsed text: "77810"
Column 2, name: value, type: Float64, parsed text: "NA"ERROR
Code: 27. DB::Exception: Cannot parse NaN. (CANNOT_PARSE_INPUT_ASSERTION_FAILED) (version 22.4.5.9 (official build))
/home/test/data/zip_data.csv:
id,zip,value
1A34,90210,20.8
1X88,43210,19.2
1B23,77810,NA
1r99,94043,100.4
1x09,hello,9.9
*/
const inFile = "/home/will/tmp/zip_data.csv" // source data
const tmpFile = "/home/will/tmp/tmp.csv" // temp file to write data to for import
const table = "testing.values" // ClickHouse destination table
var con *chutils.Connect
con, err := chutils.NewConnect("http", "127.0.0.1", "tester", "testGoNow")
if err != nil {
log.Fatalln(err)
}
defer func() {
if con.Close() != nil {
log.Fatalln(err)
}
}()
f, err := os.Open(inFile)
if err != nil {
log.Fatalln(err)
}
rdr := NewReader(inFile, ',', '\n', '"', 0, 1, 0, f, 50000)
defer func() {
if rdr.Close() != nil {
log.Fatalln(err)
}
}()
if e := rdr.Init("id", chutils.MergeTree); e != nil {
log.Fatalln(err)
}
if e := rdr.TableSpec().Impute(rdr, 0, .95); e != nil {
log.Fatalln(e)
}
// Check the internal consistency of TableSpec
if e := rdr.TableSpec().Check(); e != nil {
log.Fatalln(e)
}
// Specify zip as FixedString(5) with a missing value of 00000
_, fd, err := rdr.TableSpec().Get("zip")
if err != nil {
log.Fatalln(err)
}
// zip will impute to int if we don't make this change
fd.ChSpec.Base = chutils.ChFixedString
fd.ChSpec.Length = 5
fd.Missing = "00000"
legal := make(map[string]int)
legal["90210"], legal["43210"], legal["77810"], legal["94043"] = 1, 1, 1, 1
fd.Legal.Levels = &legal
// Specify value as having a range of [0,30] with a missing value of -1.0
_, fd, err = rdr.TableSpec().Get("value")
if err != nil {
log.Fatalln(err)
}
fd.Legal.HighLimit = 30.0
fd.Legal.LowLimit = 0.0
fd.Missing = -1.0
rdr.TableSpec().Engine = chutils.MergeTree
rdr.TableSpec().Key = "id"
if err := rdr.TableSpec().Create(con, table); err != nil {
log.Fatalln(err)
}
fx, err := os.Create(tmpFile)
if err != nil {
log.Fatalln(err)
}
defer func() {
if fx.Close() != nil {
log.Fatalln(err)
}
}()
defer func() {
if os.Remove(tmpFile) != nil {
log.Fatalln(err)
}
}()
wrtr := NewWriter(fx, tmpFile, con, '|', '\n', table)
if err := chutils.Load(rdr, wrtr); err != nil {
log.Fatalln(err)
}
qry := fmt.Sprintf("SELECT * FROM %s", table)
res, err := con.Query(qry)
if err != nil {
log.Fatalln(err)
}
defer func() {
if res.Close() != nil {
log.Fatalln(err)
}
}()
for res.Next() {
var (
id string
zip string
value float64
)
if res.Scan(&id, &zip, &value) != nil {
log.Fatalln(err)
}
fmt.Println(id, zip, value)
}
Output: 1A34 90210 20.8 1B23 77810 -1 1X88 43210 19.2 1r99 94043 -1 1x09 00000 9.9
func (*Reader) SetTableSpec ¶
SetTableSpec sets Reader.tablespec. Needed if tablespec is not created by Reader.TableSpec().Impute().
type Writer ¶
type Writer struct {
io.WriteCloser
Table string // Table is the ClickHouse table to Insert to
// contains filtered or unexported fields
}
Writer implements chutils.Output. Writer will accept any type that satisfies WriterCloser. Typically, this would be a file.
func NewWriter ¶
func NewWriter(f io.WriteCloser, name string, con *chutils.Connect, separator rune, eol rune, table string) *Writer
NewWriter creates a new Writer instance
func (*Writer) Insert ¶
Insert inserts the file Writer.Name into ClickHouse table Writer.Table via the clickhouse-client program.