reader

package module
v2.0.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jul 10, 2025 License: BSD-3-Clause Imports: 21 Imported by: 49

README

go-reader

There are many interfaces for reading files. This one is ours. It returns io.ReadSeekCloser instances.

Documentation

Go Reference

Example

Readers are instantiated with the reader.NewReader method which takes as its arguments a context.Context instance and a URI string. The URI's scheme represents the type of reader it implements and the remaining (URI) properties are used by that reader type to instantiate itself.

For example to read files from a directory on the local filesystem you would write:

package main

import (
	"context"
	"io"
	"os"

	"github.com/whosonfirst/go-reader/v2"
)

func main() {
	ctx := context.Background()
	r, _ := reader.NewReader(ctx, "file:///usr/local/data")
	fh, _ := r.Read(ctx, "example.txt")
	defer fh.Close()
	io.Copy(os.Stdout, fh)
}

There is also a handy "null" reader in case you need a "pretend" reader that doesn't actually do anything:

package main

import (
	"context"
	"io"
	"os"

	"github.com/whosonfirst/go-reader/v2"
)

func main() {
	ctx := context.Background()
	r, _ := reader.NewReader(ctx, "null://")
	fh, _ := r.Read(ctx, "example.txt")
	defer fh.Close()
	io.Copy(os.Stdout, fh)
}

Interfaces

reader.Reader
// Reader is an interface for reading data from multiple sources or targets.
type Reader interface {
	// Reader returns a `io.ReadSeekCloser` instance for a URI resolved by the instance implementing the `Reader` interface.
	Read(context.Context, string) (io.ReadSeekCloser, error)
	// Exists returns a boolean value indicating whether a URI already exists.
	Exists(context.Context, string) (bool, error)
	// The absolute path for the file is determined by the instance implementing the `Reader` interface.
	ReaderURI(context.Context, string) string
}

Custom readers

Custom readers need to:

  1. Implement the interface above.
  2. Announce their availability using the go-reader.RegisterReader method on initialization, passing in an initialization function implementing the go-reader.ReaderInitializationFunc interface.

For example, this is how the http:// reader is implemented:

package reader

import (
	"context"
	"fmt"
	"io"
	"net/http"
	"net/url"
	"path/filepath"
	"time"

	"github.com/whosonfirst/go-ioutil"
)

type HTTPReader struct {
	Reader
	url        *url.URL
	throttle   <-chan time.Time
	user_agent string
}

func init() {

	ctx := context.Background()

	schemes := []string{
		"http",
		"https",
	}

	for _, s := range schemes {

		err := RegisterReader(ctx, s, NewHTTPReader)

		if err != nil {
			panic(err)
		}
	}
}

func NewHTTPReader(ctx context.Context, uri string) (Reader, error) {

	u, err := url.Parse(uri)

	if err != nil {
		return nil, err
	}

	rate := time.Second / 3
	throttle := time.Tick(rate)

	r := HTTPReader{
		throttle: throttle,
		url:      u,
	}

	q := u.Query()
	ua := q.Get("user-agent")

	if ua != "" {
		r.user_agent = ua
	}

	return &r, nil
}

func (r *HTTPReader) Exists(ctx context.Context, uri string) (bool, error) {

	<-r.throttle

	u, _ := url.Parse(r.url.String())
	u.Path = filepath.Join(u.Path, uri)

	url := u.String()

	req, err := http.NewRequest(http.MethodHead, url, nil)

	if err != nil {
		return false, fmt.Errorf("Failed to create new request, %w", err)
	}

	if r.user_agent != "" {
		req.Header.Set("User-Agent", r.user_agent)
	}

	cl := &http.Client{}

	rsp, err := cl.Do(req)

	if err != nil {
		return false, err
	}

	defer rsp.Body.Close()

	if rsp.StatusCode != http.StatusOK {
		return false, nil
	}

	return true, nil
}

func (r *HTTPReader) Read(ctx context.Context, uri string) (io.ReadSeekCloser, error) {

	<-r.throttle

	u, _ := url.Parse(r.url.String())
	u.Path = filepath.Join(u.Path, uri)

	url := u.String()

	req, err := http.NewRequest(http.MethodGet, url, nil)

	if err != nil {
		return nil, fmt.Errorf("Failed to create new request, %w", err)
	}

	if r.user_agent != "" {
		req.Header.Set("User-Agent", r.user_agent)
	}

	cl := &http.Client{}

	rsp, err := cl.Do(req)

	if err != nil {
		return nil, fmt.Errorf("Failed to execute request, %w", err)
	}

	if rsp.StatusCode != 200 {
		return nil, fmt.Errorf("Unexpected status code: %s", rsp.Status)
	}

	fh, err := ioutil.NewReadSeekCloser(rsp.Body)

	if err != nil {
		return nil, fmt.Errorf("Failed to create new ReadSeekCloser, %w", err)
	}

	return fh, nil
}

func (r *HTTPReader) ReaderURI(ctx context.Context, uri string) string {
	return uri
}

And then to use it you would do this:

package main

import (
	"context"
	"io"
	"os"

	"github.com/whosonfirst/go-reader/v2"
)

func main() {
	ctx := context.Background()
	r, _ := reader.NewReader(ctx, "https://data.whosonfirst.org")
	fh, _ := r.Read(ctx, "101/736/545/101736545.geojson")
	defer fh.Close()
	io.Copy(os.Stdout, fh)
}

Available readers

"blob"

Read files from any registered Go Cloud Blob source. For example:

import (
	"context"

	_ "github.com/whosonfirst/go-reader-blob/v2"
	_ "gocloud.dev/blob/s3blob"	

	"github.com/whosonfirst/go-reader/v2"
)

func main() {
	ctx := context.Background()
	r, _ := reader.NewReader(ctx, "s3://whosonfirst-data?region=us-west-1")
}
findingaid://

Read files derived from a Who's On First style findingaid endpoint.

import (
       "context"
       "fmt"

	_ "github.com/whosonfirst/go-reader-findingaid/v2"

	"github.com/whosonfirst/go-reader/v2"
)

func main() {

	cwd, _ := os.Getwd()
	template := fmt.Sprintf("fs://%s/fixtures/{repo}/data", cwd)
	reader_uri := fmt.Sprintf("findingaid://sqlite?dsn=fixtures/sfomuseum-data-maps.db&template=%s", template)

	ctx := context.Background()
	r, _ := reader.NewReader(ctx, reader_uri)
}	

* https://github.com/whosonfirst/go-reader-findingaid

### github://

Read files from a GitHub repository.

import ( "context"

_ "github.com/whosonfirst/go-reader-github/v2"

"github.com/whosonfirst/go-reader/v2"

)

func main() { ctx := context.Background() r, _ := reader.NewReader(ctx, "github://{GITHUB_OWNER}/{GITHUB_REPO}")

// to specify a specific branch you would do this:
// r, _ := reader.NewReader(ctx, "github://{GITHUB_OWNER}/{GITHUB_REPO}?branch={GITHUB_BRANCH}")

}


* https://github.com/whosonfirst/go-reader-github

### githubapi://

Read files from a GitHub repository using the GitHub API.

import ( "context"

_ "github.com/whosonfirst/go-reader-github/v2"

"github.com/whosonfirst/go-reader/v2"

)

func main() { ctx := context.Background() r, _ := reader.NewReader(ctx, "githubapi://{GITHUB_OWNER}/{GITHUB_REPO}?access_token={GITHUBAPI_ACCESS_TOKEN}")

// to specify a specific branch you would do this:
// r, _ := reader.NewReader(ctx, "githubapi://{GITHUB_OWNER}/{GITHUB_REPO}/?branch={GITHUB_BRANCH}&access_token={GITHUBAPI_ACCESS_TOKEN}")

}


* https://github.com/whosonfirst/go-reader-github

### http:// and https://

Read files from an HTTP(S) endpoint.

import ( "context"

"github.com/whosonfirst/go-reader/v2"

)

func main() { ctx := context.Background() r, _ := reader.NewReader(ctx, "https://{HTTP_HOST_AND_PATH}") }


* https://github.com/whosonfirst/go-reader-http

### file://

Read files from a local filesystem.

import ( "context"

"github.com/whosonfirst/go-reader/v2"

)

func main() { ctx := context.Background() r, _ := reader.NewReader(ctx, "file://{PATH_TO_DIRECTORY}") }


If you are importing the `go-reader-blob` package and using the GoCloud's [fileblob](https://gocloud.dev/howto/blob/#local) driver then instantiating the `file://` scheme will fail since it will have already been registered. You can work around this by using the `fs://` scheme. For example:

r, _ := reader.NewReader(ctx, "fs://{PATH_TO_DIRECTORY}")


* https://github.com/whosonfirst/go-reader

### null://

Pretend to read files.

import ( "context"

"github.com/whosonfirst/go-reader/v2"

)

func main() { ctx := context.Background() r, _ := reader.NewReader(ctx, "null://") }


### repo://

This is a convenience scheme for working with Who's On First data repositories.

It will update a URI by appending a `data` directory to its path and changing its scheme to `fs://` before invoking `reader.NewReader` with the updated URI.

import ( "context"

"github.com/whosonfirst/go-reader/v2"

)

func main() { ctx := context.Background() r, _ := reader.NewReader(ctx, "repo:///usr/local/data/whosonfirst-data-admin-ca") }


### sql://

Read "files" from a `database/sql` database driver.

import ( "context"

_ "github.com/mattn/go-sqlite3"

"github.com/whosonfirst/go-reader/v2"

)

func main() { ctx := context.Background() r, _ := reader.NewReader(ctx, "sql://sqlite3/geojson/id/body?dsn=example.db") }


### stdin://

Read "files" from `STDIN`

import ( "context"

"github.com/whosonfirst/go-reader/v2"

)

func main() { ctx := context.Background() r, _ := reader.NewReader(ctx, "stdin://") }


And then to use, something like:

cat README.md | ./bin/read -reader-uri stdin:// - | wc -l 339


Note the use of `-` for a URI. This is the convention (when reading from STDIN) but it can be whatever you want it to be.

## See also

* https://github.com/whosonfirst/go-writer

Documentation

Overview

Example:

package main

import (
 	"context"
	"github.com/whosonfirst/go-reader"
	"io"
 	"os"
)

 func main() {
 	ctx := context.Background()
 	r, _ := reader.NewReader(ctx, "fs:///usr/local/data")
 	fh, _ := r.Read(ctx, "example.txt")
 	defer fh.Close()
 	io.Copy(os.Stdout, fh)
 }

Package reader provides a common interface for reading from a variety of sources. It has the following interface:

type Reader interface {
	Read(context.Context, string) (io.ReadSeekCloser, error)
	ReaderURI(string) string
}

Reader intstances are created either by calling a package-specific New{SOME_READER}Reader method or by invoking the reader.NewReader method passing in a context.Context instance and a URI specific to the reader class. For example:

r, _ := reader.NewReader(ctx, "fs:///usr/local/data")

Custom reader packages implement the reader.Reader interface and register their availability by calling the reader.RegisterRegister method on initialization. For example:

func init() {

	ctx := context.Background()

	err = RegisterReader(ctx, "file", NewFileReader)

 	if err != nil {
		panic(err)
	}
}

Index

Constants

View Source
const STDIN string = "-"

Constant string value representing STDIN.

Variables

View Source
var URI_QUERYFUNC queryFunc

URI_QUERYFUNC is a custom function to convert paths passed to the `Read` or `Exists` methods to query condintions used to perform record searches. The default is nil.

View Source
var URI_READFUNC readFunc

URI_READFUNC is a custom function to convert paths passed to the `Read` or `Exists` methods to values stored in the underlying database's "ID" column. The default is nil.

View Source
var VALID_BODY *regexp.Regexp

VALID_BODY is a `regexp.Regexp` for validating "body" column names. The default is `^[a-zA-Z0-9-_]+$`.

View Source
var VALID_ID *regexp.Regexp

VALID_ID is a `regexp.Regexp` for validating "ID" column names. The default is `^[a-zA-Z0-9-_]+$`.

View Source
var VALID_TABLE *regexp.Regexp

VALID_TABLE is a `regexp.Regexp` for validating table names. The default is `^[a-zA-Z0-9-_]+$`.

Functions

func ReaderSchemes

func ReaderSchemes() []string

ReaderSchemes returns the list of schemes that have been registered.

func RegisterReader

func RegisterReader(ctx context.Context, scheme string, init_func ReaderInitializationFunc) error

RegisterReader registers 'scheme' as a key pointing to 'init_func' in an internal lookup table used to create new `Reader` instances by the `NewReader` method.

Types

type FileReader

type FileReader struct {
	Reader
	// contains filtered or unexported fields
}

FileReader is a struct that implements the `Reader` interface for reading documents from files on a local disk.

func (*FileReader) Exists

func (r *FileReader) Exists(ctx context.Context, path string) (bool, error)

Exists returns a boolean value indicating whether 'path' already exists.

func (*FileReader) Read

func (r *FileReader) Read(ctx context.Context, path string) (io.ReadSeekCloser, error)

Read will open an `io.ReadSeekCloser` for a file matching 'path'.

func (*FileReader) ReaderURI

func (r *FileReader) ReaderURI(ctx context.Context, path string) string

ReaderURI returns the absolute URL for 'path'.

type HTTPReader

type HTTPReader struct {
	Reader
	// contains filtered or unexported fields
}

HTTPReader is a struct that implements the `Reader` interface for reading documents from an HTTP(S) resource.

func (*HTTPReader) Exists

func (r *HTTPReader) Exists(ctx context.Context, uri string) (bool, error)

Exists returns a boolean value indicating whether 'path' already exists.

func (*HTTPReader) Read

func (r *HTTPReader) Read(ctx context.Context, uri string) (io.ReadSeekCloser, error)

Read will open a `io.ReadSeekCloser` for the resource located at 'uri'.

func (*HTTPReader) ReaderURI

func (r *HTTPReader) ReaderURI(ctx context.Context, uri string) string

ReaderURI returns 'uri'.

type MultiReader

type MultiReader struct {
	Reader
	// contains filtered or unexported fields
}

MultiReader is a struct that implements the `Reader` interface for reading documents from one or more `Reader` instances.

func (*MultiReader) Exists

func (mr *MultiReader) Exists(ctx context.Context, path string) (bool, error)

Exists returns a boolean value indicating whether 'path' already exists.

func (*MultiReader) Read

func (mr *MultiReader) Read(ctx context.Context, path string) (io.ReadSeekCloser, error)

Read will open an `io.ReadSeekCloser` for a file matching 'path'. In the case of multiple underlying `Reader` instances the first instance to successfully load 'path' will be returned.

func (*MultiReader) ReaderURI

func (mr *MultiReader) ReaderURI(ctx context.Context, path string) string

ReaderURI returns the absolute URL for 'path'. In the case of multiple underlying `Reader` instances the first instance to successfully load 'path' will be returned.

type NullReader

type NullReader struct {
	Reader
}

NullReader is a struct that implements the `Reader` interface for reading documents from nowhere.

func (*NullReader) Exists

func (r *NullReader) Exists(ctx context.Context, path string) (bool, error)

Exists returns a boolean value indicating whether 'path' already exists (meaning it will always return false).

func (*NullReader) Read

func (r *NullReader) Read(ctx context.Context, path string) (io.ReadSeekCloser, error)

Read will open and return an empty `io.ReadSeekCloser` for any value of 'path'.

func (*NullReader) ReaderURI

func (r *NullReader) ReaderURI(ctx context.Context, path string) string

ReaderURI returns the value of 'path'.

type Reader

type Reader interface {
	// Reader returns a `io.ReadSeekCloser` instance for a URI resolved by the instance implementing the `Reader` interface.
	Read(context.Context, string) (io.ReadSeekCloser, error)
	// Exists returns a boolean value indicating whether a URI already exists.
	Exists(context.Context, string) (bool, error)
	// The absolute path for the file is determined by the instance implementing the `Reader` interface.
	ReaderURI(context.Context, string) string
}

Reader is an interface for reading data from multiple sources or targets.

func NewCwdReader

func NewCwdReader(ctx context.Context, uri string) (Reader, error)

NewFileReader returns a new `FileReader` instance for reading documents from the current working directory, configured by 'uri' in the form of:

cwd://

func NewFileReader

func NewFileReader(ctx context.Context, uri string) (Reader, error)

NewFileReader returns a new `FileReader` instance for reading documents from local files on disk, configured by 'uri' in the form of:

fs://{PATH}

Where {PATH} is an absolute path to an existing directory where files will be read from.

func NewHTTPReader

func NewHTTPReader(ctx context.Context, uri string) (Reader, error)

NewStdinReader returns a new `Reader` instance for reading documents from an HTTP(s) resource, configured by 'uri' in the form of:

http(s)://{HOST}?{PARAMS}

Where {PARAMS} can be: * user-agent - An optional user agent string to include with requests.

func NewMultiReader

func NewMultiReader(ctx context.Context, readers ...Reader) (Reader, error)

NewMultiReaderFromURIs returns a new `Reader` instance for reading documents from one or more `Reader` instances.

func NewMultiReaderFromURIs

func NewMultiReaderFromURIs(ctx context.Context, uris ...string) (Reader, error)

NewMultiReaderFromURIs returns a new `Reader` instance for reading documents from one or more `Reader` instances. 'uris' is assumed to be a list of URIs each of which will be used to invoke the `NewReader` method.

func NewNullReader

func NewNullReader(ctx context.Context, uri string) (Reader, error)

NewNullReader returns a new `FileReader` instance for reading documents from nowhere, configured by 'uri' in the form of:

null://

Technically 'uri' can also be an empty string.

func NewReader

func NewReader(ctx context.Context, uri string) (Reader, error)

NewReader returns a new `Reader` instance configured by 'uri'. The value of 'uri' is parsed as a `url.URL` and its scheme is used as the key for a corresponding `ReaderInitializationFunc` function used to instantiate the new `Reader`. It is assumed that the scheme (and initialization function) have been registered by the `RegisterReader` method.

func NewRepoReader

func NewRepoReader(ctx context.Context, uri string) (Reader, error)

NewRepoReader is a convenience method to update 'uri' by appending a `data` directory to its path and changing its scheme to `fs://` before invoking NewReader with the updated URI.

func NewSQLReader

func NewSQLReader(ctx context.Context, uri string) (Reader, error)

NewSQLReader returns a new `SQLReader` instance for reading documents from from a `database/sql` compatible database engine configured by 'uri' in the form of:

sql://{ENGINE}/{TABLE}/{ID_COLUMN}/{BODY_COLUMN}?dsn={DSN}

For example:

sql://sqlite/geojson/id/body?dsn=test.db

The expectation is that `{TABLE}` will have a `{BODY_COLUMN}` column containing a Who's On First record which can be retrieved with a unique identifer defined in the `{ID_COLUMN}` column.

func NewStdinReader

func NewStdinReader(ctx context.Context, uri string) (Reader, error)

NewStdinReader returns a new `Reader` instance for reading documents from STDIN, configured by 'uri' in the form of:

stdin://

Technically 'uri' can also be an empty string.

type ReaderInitializationFunc

type ReaderInitializationFunc func(ctx context.Context, uri string) (Reader, error)

ReaderInitializationFunc is a function defined by individual reader package and used to create an instance of that reader

type SQLReader

type SQLReader struct {
	Reader
	// contains filtered or unexported fields
}

SQLReader is a struct that implements the `Reader` interface for reading documents from a `database/sql` compatible database engine.

func (*SQLReader) Exists

func (r *SQLReader) Exists(ctx context.Context, raw_uri string) (bool, error)

Exists returns a boolean value indicating whether 'path' already exists (meaning it will always return false). Read will open a `io.ReadSeekCloser` instance for the record whose "ID" column matches 'raw_uri'. See notes about `URI_READFUNC` and `URI_QUERYFUNC` for modifying, or deriving query criteria from, 'raw_uri' before database queries are performed.

func (*SQLReader) Read

func (r *SQLReader) Read(ctx context.Context, raw_uri string) (io.ReadSeekCloser, error)

Read will open a `io.ReadSeekCloser` instance for the record whose "ID" column matches 'raw_uri'. See notes about `URI_READFUNC` and `URI_QUERYFUNC` for modifying, or deriving query criteria from, 'raw_uri' before database queries are performed.

func (*SQLReader) ReaderURI

func (r *SQLReader) ReaderURI(ctx context.Context, raw_uri string) string

ReaderURI will return the value of 'raw_uri' optionally modified by `URI_READFUNC` if defined..

type StdinReader

type StdinReader struct {
	Reader
}

StdinReader is a struct that implements the `Reader` interface for reading documents from STDIN.

func (*StdinReader) Exists

func (r *StdinReader) Exists(ctx context.Context, path string) (bool, error)

Exists returns a boolean value indicating whether 'path' already exists (meaning it will always return false).

func (*StdinReader) Read

func (r *StdinReader) Read(ctx context.Context, uri string) (io.ReadSeekCloser, error)

Read will open a `io.ReadSeekCloser` instance wrapping `os.Stdin`.

func (*StdinReader) ReaderURI

func (r *StdinReader) ReaderURI(ctx context.Context, uri string) string

ReaderURI will return the value of the `STDIN` constant.

Directories

Path Synopsis
app
cmd
read command

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL