giniapi

package module
v1.0.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Nov 23, 2018 License: MIT Imports: 13 Imported by: 0

README

gini-api-go

GoDoc Build Status License

Go client to interact with Gini's information extraction API. Visit godoc for more implementation details.

Usage example

package giniapi_test

import (
	"fmt"
	"github.com/dkerwin/gini-api-go"
	"log"
	"os"
	"time"
)

// Very simplistic example. You shoud have a lot more error handling in place
func ExampleNewClient() {

	//////////////////////////////////
	// Oauth2
	//////////////////////////////////

	// Setup api connection
	api, err := giniapi.NewClient(&giniapi.Config{
		ClientID:       "MY_CLIENT_ID",
		ClientSecret:   "********",
		Username:       "user1",
		Password:       "secret",
		Authentication: giniapi.UseOauth2,
	})

	if err != nil {
		log.Panicf("Gini API login failed: %s", err)
	}

	// Read a PDF document
	document, _ := os.Open("/tmp/invoice.pdf")

	// Upload document to gini without doctype hint and user identifier
	doc, _ := api.Upload(document, giniapi.UploadOptions{FileName: "invoice.pdf", PollTimeout: 10 * time.Second})

	// Get extractions from our uploaded document
	extractions, _ := doc.GetExtractions(false)

	// Print IBAN
	fmt.Printf("IBAN has been found: %s Woohoo!\n", extractions.GetValue("iban"))

	//////////////////////////////////
	// basic Auth
	//////////////////////////////////

	// Setup api connection
	api, err = giniapi.NewClient(&giniapi.Config{
		ClientID:       "MY_CLIENT_ID",
		ClientSecret:   "********",
		Authentication: giniapi.UseBasicAuth,
	})

	if err != nil {
		log.Panicf("Gini API login failed: %s", err)
	}

	// Read a PDF document
	document, _ = os.Open("/tmp/invoice.pdf")

	// Upload document to gini without doctype hint and user identifier
	doc, _ = api.Upload(document, giniapi.UploadOptions{FileName: "invoice.pdf", UserIdentifier: "user123", PollTimeout: 10 * time.Second})

	// Get extractions from our uploaded document
	extractions, _ = doc.GetExtractions(false)

	// Print IBAN
	fmt.Printf("IBAN has been found: %s Woohoo!\n", extractions.GetValue("iban"))
}

Documentation

Overview

Package giniapi interacts with Gini's API service to make sense of unstructured documents. Please visit http://developer.gini.net/gini-api/html/index.html for more details about the Gini API and it's capabilities.

API features

Supported API calls include:

  • Upload documents (native, scanned, text)
  • List a users documents
  • Search documents
  • Get extractions (incubator is supported)
  • Download rendered pages, processed document and layout XML
  • Submit feedback on extractions
  • Submit error reports

Contributing

It's awesome that you consider contributing to gini-api-go. Here are the 5 easy steps you should follow:

  • Fork repository on Github
  • Create a topic/feature branch
  • Write code AND tests
  • Update documentation if necessary
  • Open a pull request

Index

Examples

Constants

View Source
const (
	// VERSION is the API client version
	VERSION                   = "1.0.0"
	ErrConfigInvalid          = "failed to initialize config object"
	ErrMissingCredentials     = "username or password cannot be empty in Oauth2 flow"
	ErrOauthAuthCodeExchange  = "failed to exchange oauth2 auth code"
	ErrOauthCredentials       = "failed to obtain token with username/password"
	ErrOauthParametersMissing = "oauth2 authentication requires AuthCode or Username + Password"
	ErrUploadFailed           = "failed to upload document"
	ErrDocumentGet            = "failed to GET document object"
	ErrDocumentParse          = "failed to parse document json"
	ErrDocumentRead           = "failed to read document body"
	ErrDocumentList           = "failed to get document list"
	ErrDocumentTimeout        = "failed to process document in time"
	ErrDocumentProcessing     = "failed to process document"
	ErrDocumentDelete         = "failed to delete document"
	ErrDocumentLayout         = "failed to retrieve layout"
	ErrDocumentExtractions    = "failed to retrieve extractions"
	ErrDocumentProcessed      = "failed to retrieve processed document"
	ErrDocumentFeedback       = "failed to submit feedback"
	ErrHTTPPostFailed         = "failed to complete POST request"
	ErrHTTPGetFailed          = "failed to complete GET request"
	ErrHTTPDeleteFailed       = "failed to complete DELETE request"
	ErrHTTPPutFailed          = "failed to complete PUT request"
)

Variables

View Source
var (
	UseOauth2    Oauth2
	UseBasicAuth BasicAuth
)

Handy vars to simplify the initialization in a new API clients

Functions

This section is empty.

Types

type APIAuthScheme

type APIAuthScheme interface {
	Authenticate(config *Config) (*http.Client, APIResponse)
}

APIAuthScheme interface simplifies the addition of new auth mechanisms

type APIClient

type APIClient struct {
	// Config
	Config

	// Http client
	HTTPClient *http.Client
}

APIClient is the main interface for the user

func NewClient

func NewClient(config *Config) (*APIClient, error)

NewClient validates your Config parameters and returns a APIClient object with a matching http client included.

Example

Very simplistic example. You shoud have a lot more error handling in place

//////////////////////////////////
// Oauth2
//////////////////////////////////

// Setup api connection
api, err := giniapi.NewClient(&giniapi.Config{
	ClientID:       "MY_CLIENT_ID",
	ClientSecret:   "********",
	Username:       "user1",
	Password:       "secret",
	Authentication: giniapi.UseOauth2,
})

if err != nil {
	log.Panicf("Gini API login failed: %s", err)
}

// create context
ctx := context.Background()
ctx, cancel := context.WithTimeout(ctx, 5*time.Second)
defer cancel()

// Read a PDF document
document, _ := os.Open("/tmp/invoice.pdf")

// Upload document to gini without doctype hint and user identifier
doc, _ := api.Upload(ctx, document, giniapi.UploadOptions{FileName: "invoice.pdf"})

// Poll progress
resp := doc.Poll(ctx, 0)

if resp.Error != nil {
	log.Printf("Polling Error: %s", resp.Error)
	return
}

// Get extractions from our uploaded document
extractions, _ := doc.GetExtractions(ctx, false)

// Print IBAN
fmt.Printf("IBAN has been found: %s Woohoo!\n", extractions.GetValue("iban"))

//////////////////////////////////
// basic Auth
//////////////////////////////////

// Setup api connection
api, err = giniapi.NewClient(&giniapi.Config{
	ClientID:       "MY_CLIENT_ID",
	ClientSecret:   "********",
	Authentication: giniapi.UseBasicAuth,
})

if err != nil {
	log.Panicf("Gini API login failed: %s", err)
}

// Read a PDF document
document, _ = os.Open("/tmp/invoice.pdf")

// Upload document to gini without doctype hint and user identifier
doc, _ = api.Upload(ctx, document, giniapi.UploadOptions{FileName: "invoice.pdf", UserIdentifier: "user123"})

// Poll progress
resp = doc.Poll(ctx, 0)

if resp.Error != nil {
	log.Printf("Polling Error: %s", resp.Error)
	return
}

// Get extractions from our uploaded document
extractions, _ = doc.GetExtractions(ctx, false)

// Print IBAN
fmt.Printf("IBAN has been found: %s Woohoo!\n", extractions.GetValue("iban"))
Output:

func (*APIClient) Get

func (api *APIClient) Get(ctx context.Context, url, userIdentifier string) (*Document, APIResponse)

Get Document struct from URL

func (*APIClient) List

func (api *APIClient) List(ctx context.Context, options ListOptions) (*DocumentSet, APIResponse)

List returns DocumentSet

func (*APIClient) Upload

func (api *APIClient) Upload(ctx context.Context, document io.Reader, options UploadOptions) (*Document, APIResponse)

Upload a document from a given io.Reader object (document). Additional options can be passed with a instance of UploadOptions. FileName and DocType are optional and can be empty. UserIdentifier is required if Authentication method is "basic_auth". Upload time is measured and stored in Timing struct (part of Document).

type APIResponse added in v1.0.0

type APIResponse struct {
	// Error: stores error object encountered on the way
	Error error
	// Message: error message with more context
	Message string
	// DocumentId: internal Id of the document. Can be empty
	DocumentId string
	// RequestId: request Id returned for the request to the gini API
	RequestId string
	// HttpResponse: full response object
	HttpResponse *http.Response
}

APIResponse will transport about the request back to the caller

type BasicAuth

type BasicAuth struct{}

func (BasicAuth) Authenticate

func (_ BasicAuth) Authenticate(config *Config) (*http.Client, APIResponse)

Authenticate satisfies the APIAuthScheme interface for BasicAuth

type BasicAuthTransport

type BasicAuthTransport struct {
	Transport http.RoundTripper
	Config    *Config
}

BasicAuthTransport is a net/http transport that automatically adds a matching authorization header for Gini's basic auth system.

func (BasicAuthTransport) RoundTrip

func (bat BasicAuthTransport) RoundTrip(r *http.Request) (*http.Response, error)

RoundTrip to add basic auth header to all requests

type Box

type Box struct {
	Height float64 `json:"height"`
	Left   float64 `json:"left"`
	Page   int     `json:"page"`
	Top    float64 `json:"top"`
	Width  float64 `json:"width"`
}

Box struct

type Config

type Config struct {
	// ClientID is the application's ID.
	ClientID string
	// ClientSecret is the application's secret.
	ClientSecret string
	// Username for oauth2 password grant
	Username string
	// Password for oauth2 pssword grant
	Password string
	// Auth_code to exchange for oauth2 token
	AuthCode string
	// Scopes to use (leave empty for all assigned scopes)
	Scopes []string
	// API & Usercenter endpoints
	Endpoints
	// APIVersion to use (v1)
	APIVersion string `default:"v1"`
	// Authentication to use
	// oauth2: auth_code || password credentials
	// basicAuth: basic auth + user identifier
	Authentication APIAuthScheme
}

Config to setup Gini API connection

func (*Config) Verify

func (c *Config) Verify() error

type Document

type Document struct {
	Timing `json:"-"`

	Owner                string `json:"-"`
	Links                Links  `json:"_links"`
	CreationDate         int    `json:"creationDate"`
	ID                   string `json:"id"`
	Name                 string `json:"name"`
	Origin               string `json:"origin"`
	PageCount            int    `json:"pageCount"`
	Pages                []Page `json:"pages"`
	Progress             string `json:"progress"`
	SourceClassification string `json:"sourceClassification"`
	// contains filtered or unexported fields
}

Document contains all informations about a single document

func (*Document) Delete

func (d *Document) Delete(ctx context.Context) APIResponse

Delete a document

func (*Document) GetExtractions

func (d *Document) GetExtractions(ctx context.Context, incubator bool) (*Extractions, APIResponse)

GetExtractions returns a documents extractions in a Extractions struct

func (*Document) GetLayout

func (d *Document) GetLayout(ctx context.Context) (*Layout, APIResponse)

GetLayout returns the JSON representation of a documents layout parsed as Layout struct

func (*Document) GetProcessed

func (d *Document) GetProcessed(ctx context.Context) ([]byte, APIResponse)

GetProcessed returns a byte array of the processed (rectified, optimized) document

func (*Document) Poll

func (d *Document) Poll(ctx context.Context, pause time.Duration) APIResponse

Poll the progress state of a document and return nil when the processing has completed (successful or failed). On timeout return error

func (*Document) String

func (d *Document) String() string

String representaion of a document

func (*Document) SubmitFeedback

func (d *Document) SubmitFeedback(ctx context.Context, feedback map[string]map[string]interface{}) APIResponse

SubmitFeedback submits feedback from map

func (*Document) Update

func (d *Document) Update(ctx context.Context) APIResponse

Update document struct from self-contained document link

type DocumentSet

type DocumentSet struct {
	TotalCount int         `json:"totalCount"`
	Documents  []*Document `json:"documents"`
}

DocumentSet is a list of documents with the total count

type Endpoints

type Endpoints struct {
	API        string `default:"https://api.gini.net"`
	UserCenter string `default:"https://user.gini.net"`
}

Endpoints to access API and Usercenter

type Extraction

type Extraction struct {
	Box        `json:"box"`
	Candidates string `json:"candidates,omitempty"`
	Entity     string `json:"entity,omitempty"`
	Value      string `json:"value,omitempty"`
}

Extraction struct

type Extractions

type Extractions struct {
	Candidates  map[string][]Extraction `json:"candidates"`
	Extractions map[string]Extraction   `json:"extractions"`
}

Document extractions struct

func (*Extractions) GetValue

func (e *Extractions) GetValue(key string) string

GetValue is a helper function to get the extraction value or a empty string

type Layout

type Layout struct {
	Pages []PageLayout
}

type Line

type Line struct {
	PageCoordinates
	Words []Word
}
type Links struct {
	Document    string `json:"document"`
	Extractions string `json:"extractions"`
	Layout      string `json:"layout"`
	Processed   string `json:"processed"`
}

Links contains the links to a documents resources

type ListOptions

type ListOptions struct {
	Limit          int
	Offset         int
	UserIdentifier string
}

ListOptions specify parameters to the List function

type Oauth2

type Oauth2 struct{}

func (Oauth2) Authenticate

func (_ Oauth2) Authenticate(config *Config) (*http.Client, APIResponse)

Authenticate satisfies the APIAuthScheme interface for Oauth2

type Page

type Page struct {
	Images     map[string]string `json:"images"`
	PageNumber int               `json:"pageNumber"`
}

Page describes a documents pages

type PageCoordinates

type PageCoordinates struct {
	W float64
	H float64
	T float64
	L float64
}

type PageLayout

type PageLayout struct {
	Number    int
	SizeX     float64
	SizeY     float64
	TextZones []TextZone
	Regions   []Region
}

type Paragraph

type Paragraph struct {
	PageCoordinates
	Lines []Line
}

type Region

type Region struct {
	PageCoordinates
	Type string
}

type TextZone

type TextZone struct {
	Paragraphs []Paragraph
}

type Timing

type Timing struct {
	Upload     time.Duration
	Processing time.Duration
}

Timing struct

func (*Timing) Total

func (t *Timing) Total() time.Duration

Total returns the summarized timings of upload and processing

type UploadOptions

type UploadOptions struct {
	FileName       string
	DocType        string
	UserIdentifier string
}

UploadOptions specify parameters to the Upload function

type Word

type Word struct {
	PageCoordinates
	Fontsize   float64
	FontFamily string
	Bold       bool
	Text       string
}

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL