Documentation ¶
Index ¶
- Constants
- func GetEntities(ctx context.Context, client *language.Client, doc *docs.Document) ([]*languagepb.Entity, error)
- func ReadText(doc *docs.Document) (string, error)
- type Client
- type DriveSearch
- type FakeSearch
- type GoogleDocUri
- type HyperLink
- type Indexer
- func (idx *Indexer) Index(driveId string) error
- func (idx *Indexer) IndexDocument(docId string) error
- func (idx *Indexer) ProcessDoc(r *datastore.DocReference)
- func (idx *Indexer) ProcessDocLinks(r *datastore.DocReference, d *docs.Document) error
- func (idx *Indexer) ProcessEntities(r *datastore.DocReference, d *docs.Document) error
- type IndexerOption
- type QueryStats
- type ResultFunc
Constants ¶
const (
// DocumentMimeType is the mime type for Google Documents.
DocumentMimeType = "application/vnd.google-apps.document"
)
const (
GoogleDocsHost = "docs.google.com"
)
Variables ¶
This section is empty.
Functions ¶
func GetEntities ¶
func GetEntities(ctx context.Context, client *language.Client, doc *docs.Document) ([]*languagepb.Entity, error)
GetEntities gets the entities from the document.
N.B. The current implementation doesn't keep track of
func ReadText ¶
ReadText reads all the text from the provided document. It is based on https://developers.google.com/docs/api/samples/extract-text#python.
TODO(https://github.com/jlewi/p22h/issues/1): Linearize text so as to preserve positioning.
Types ¶
type Client ¶
type Client struct {
// contains filtered or unexported fields
}
Client is a high level client for interacting with gdrive
type DriveSearch ¶
type DriveSearch interface {
Search(query string, driveId string, corpora string, resultFunc ResultFunc) error
}
type FakeSearch ¶
type FakeSearch struct {
Docs []*drive.File
}
FakeSearch implements the search interface for an in memory set of drive documents. FakeSearch is intended for testing.
func (*FakeSearch) Search ¶
func (f *FakeSearch) Search(query string, driveId string, corpora string, resultFunc ResultFunc) error
type GoogleDocUri ¶
func ParseGoogleDocUri ¶
func ParseGoogleDocUri(u string) (*GoogleDocUri, error)
ParseGoogleDocUri parses a google document URI Return nil if not a googledocument.
type HyperLink ¶
func GetAllLinks ¶
GetAllLinks gets all the links from the document.
type Indexer ¶
type Indexer struct {
// contains filtered or unexported fields
}
Indexer indexes Google Drive.
func NewIndexer ¶
func NewIndexer(searcher DriveSearch, docsService *docs.Service, store *datastore.Datastore, nlpClient *language.Client, log logr.Logger, opts ...IndexerOption) (*Indexer, error)
NewIndexer creates a new indexer
func (*Indexer) IndexDocument ¶
IndexDocument indexes a specific document
func (*Indexer) ProcessDoc ¶
func (idx *Indexer) ProcessDoc(r *datastore.DocReference)
ProcessDoc processes the referenced doc.
TODO(jeremy): This function should really return an error. Originally it wasn't returning an error because it was only being called from Index which just continued but now its being called from IndexDocument and we should propogate the error to that.
func (*Indexer) ProcessDocLinks ¶
func (idx *Indexer) ProcessDocLinks(r *datastore.DocReference, d *docs.Document) error
ProcessDocLinks processes all the docs for the doc referenced by r and represented by d.
func (*Indexer) ProcessEntities ¶
func (idx *Indexer) ProcessEntities(r *datastore.DocReference, d *docs.Document) error
ProcessEntities gets all the entities in the document
type IndexerOption ¶
type IndexerOption func(*Indexer)
func IndexerWithHTTPClient ¶
func IndexerWithHTTPClient(c *http.Client) IndexerOption
func IndexerWithLogger ¶
func IndexerWithLogger(log logr.Logger) IndexerOption
type QueryStats ¶
QueryStats contains statistics about the results of a search query
type ResultFunc ¶
type ResultFunc func(file *drive.File) error
ResultFunc is invoked by search to process each result A non nil error causes result processing to stop.
func NewStatsBuilder ¶
func NewStatsBuilder(s *QueryStats) (ResultFunc, error)
NewStatsBuilder returns a ResultFunc that will aggregate statistics.