document

package
v0.0.0-...-e29e17f Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Mar 30, 2020 License: Apache-2.0, Apache-2.0 Imports: 10 Imported by: 0

Documentation

Index

Constants

View Source
const (
	// TypeMarkdown is a markdown document_type_id
	TypeMarkdown = iota + 1
	// TypePDF is a PDF document_type_id
	TypePDF
	// TypeUnsupported is a unsupported document_type_id
	TypeUnsupported
)

Variables

View Source
var (
	// ErrDuplicateIndex is returned when an index is being inserted and it already exists
	ErrDuplicateIndex = errors.New("index already exists")

	// ErrDuplicateDocument is returned when a document is being inserted and it already exists
	ErrDuplicateDocument = errors.New("document already exists")
)

Functions

This section is empty.

Types

type Document

type Document struct {
	ID               uuid.UUID `json:"id" db:"document_id"`
	FolderID         uuid.UUID `json:"folderID" db:"folder_id"`
	TypeID           int       `json:"typeID" db:"document_type_id"`
	ObjectStorageURL string    `json:"objectStorageURL" db:"object_storage_url"`
	DownloadURL      string    `json:"downloadURL" db:"download_url"`
	Path             string    `json:"path" db:"path"`
	Name             string    `json:"name" db:"name"`
	Body             string    `json:"body" db:"body"`
	Created          time.Time `json:"created" db:"created"`
	Updated          time.Time `json:"updated" db:"updated"`
}

Document represents a document that was indexed

type Folder

type Folder struct {
	FolderID uuid.UUID `json:"id" db:"folder_id"`
	Name     string    `json:"name" db:"name"`
	Path     string    `json:"path" db:"path"`
	Created  time.Time `json:"created" db:"created"`
	Updated  time.Time `json:"updated" db:"updated"`
}

Folder represents a folder

type SearchResult

type SearchResult struct {
	DocumentID   uuid.UUID `json:"id" db:"document_id"`
	SentenceID   uuid.UUID `json:"sentenceID" db:"sentence_id"`
	AnnoyID      int       `json:"annoyID" db:"annoy_id"`
	DocumentName string    `json:"name" db:"name"`
	Path         string    `json:"path" db:"path"`
	DownloadURL  string    `json:"downloadURL" db:"download_url"`
	Text         string    `json:"text" db:"sentence_text"`
	Rank         float32   `json:"rel" db:"rel"`
}

SearchResult represents search results

type Sentence

type Sentence struct {
	ID         uuid.UUID       `db:"sentence_id"`
	DocumentID uuid.UUID       `db:"document_id"`
	StoreID    uuid.UUID       `db:"store_id"`
	AnnoyID    int             `db:"annoy_id"`
	Embedding  json.RawMessage `db:"embedding"`
	Body       string          `db:"body"`
	Context    string          `db:"context"`
	Created    time.Time       `json:"created" db:"created"`
	Updated    time.Time       `json:"updated" db:"updated"`
}

Sentence represents indexed sentence from a document

func (*Sentence) GetEmbeddings

func (s *Sentence) GetEmbeddings() ([]float32, error)

GetEmbeddings returns the json value of embedding to a []float32

type Service

type Service struct {
	DB *sqlx.DB
}

Service contains functionality for managing the document data service

func NewService

func NewService(db *sqlx.DB) *Service

NewService returns a new agent service

func (*Service) CreateDocument

func (s *Service) CreateDocument(d *Document) (*Document, error)

CreateDocument creates a new document

func (*Service) CreateFolderPath

func (s *Service) CreateFolderPath(path string, recursing bool) (string, error)

CreateFolderPath creates the folder a document is placed in if it does not exist. Parameter 'recursing' should be false when calling this method

func (*Service) CreateSentence

func (s *Service) CreateSentence(i *Sentence) (*Sentence, error)

CreateSentence stores sentence and mapping to annoy index id from a parsed document

func (*Service) DeleteDocument

func (s *Service) DeleteDocument(path string) error

DeleteDocument deletes a document and all referencing data

func (*Service) FullTextSearch

func (s *Service) FullTextSearch(text string) ([]SearchResult, error)

FullTextSearch performs postgres FTS on sentence bodies to get SearchResults

func (*Service) GetDocumentByPath

func (s *Service) GetDocumentByPath(path string) (*Document, error)

GetDocumentByPath gets a document given a specific path

func (*Service) GetFolderByPath

func (s *Service) GetFolderByPath(path string) (*Folder, error)

GetFolderByPath gets a folder given a specific path

func (*Service) GetIndexContent

func (s *Service) GetIndexContent() ([]Sentence, error)

GetIndexContent gets all content previously indexed to rebuild the index

func (*Service) GetSearchResults

func (s *Service) GetSearchResults(sIDs []int) ([]SearchResult, error)

GetSearchResults returns documents given sentence ids. Spotify annoy returns a list of ids which relates to sentences

type Type

type Type struct {
	ID      int       `json:"id" db:"document_type_id"`
	Name    string    `json:"name" db:"name"`
	Created time.Time `json:"created" db:"created"`
	Updated time.Time `json:"updated" db:"updated"`
}

Type represents a document type

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL