unstructured

package module
v0.1.0-alpha.2 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Aug 22, 2025 License: MIT Imports: 14 Imported by: 0

README

Unstructured.io Go SDK

A lightweight Go client for the Unstructured.io Workflow Endpoint API with zero external dependencies.

Go Reference Go Report Card

Features

  • Zero external dependencies - Uses only Go standard library
  • ✅ (Nearly) Complete coverage for Unstructured.io Workflow API
  • ✅ Type-safe request/response structures
  • ✅ Comprehensive error handling
  • ✅ Connection testing utilities
  • ✅ Helper functions for optional fields
  • ✅ Context support for timeouts and cancellation

Installation

go get github.com/aws-gopher/unstructured-sdk-go

Quick Start

package main

import (
    "context"
    "log"
    "github.com/aws-gopher/unstructured-sdk-go"
)

func main() {
    // Create a client
    client, err := unstructured.New(
        unstructured.WithEndpoint("https://platform.unstructured.io/api/v1"),
        unstructured.WithKey("your-api-key"),
    )
    if err != nil {
        log.Fatal(err)
    }

    ctx := context.Background()

    // Create a source connector (S3)
    source, err := client.CreateSource(ctx, unstructured.CreateSourceRequest{
        Name: "My S3 Source",
        Config: unstructured.S3SourceConnectorConfigInput{
            RemoteURL: "s3://my-bucket/input/",
            Key:       unstructured.String("your-access-key"),
            Secret:    unstructured.String("your-secret-key"),
        },
    })
    if err != nil {
        log.Fatal(err)
    }

    // Create a destination connector (S3)
    destination, err := client.CreateDestination(ctx, unstructured.CreateDestinationRequest{
        Name: "My S3 Destination",
        Config: unstructured.S3DestinationConnectorConfigInput{
            RemoteURL: "s3://my-bucket/output/",
            Key:       unstructured.String("your-access-key"),
            Secret:    unstructured.String("your-secret-key"),
        },
    })
    if err != nil {
        log.Fatal(err)
    }

    // Create a workflow
    workflow, err := client.CreateWorkflow(ctx, unstructured.CreateWorkflowRequest{
        Name:          "My Processing Workflow",
        SourceID:      &source.ID,
        DestinationID: &destination.ID,
        WorkflowType:  unstructured.WorkflowTypeBasic,
        WorkflowNodes: []unstructured.WorkflowNode{
            {
                Name:    "Partitioner",
                Type:    "partition",
                Subtype: "fast",
            },
            {
                Name:    "Chunker",
                Type:    "chunk",
                Subtype: "by_title",
                Settings: map[string]interface{}{
                    "chunk_size": 1000,
                    "overlap":    200,
                },
            },
        },
    })
    if err != nil {
        log.Fatal(err)
    }

    // Run the workflow
    job, err := client.RunWorkflow(ctx, workflow.ID, &unstructured.RunWorkflowRequest{
        InputFiles: []string{"document1.pdf", "document2.docx"},
    })
    if err != nil {
        log.Fatal(err)
    }

    log.Printf("Job started with ID: %s", job.ID)
}

Key Concepts

Connectors
  • Source Connectors: Ingest files or data into Unstructured from source locations like S3, Google Drive, databases, etc.
  • Destination Connectors: Send processed data from Unstructured to destination locations like S3, databases, vector stores, etc.
Workflows

Workflows define how Unstructured processes your data through a series of nodes:

  • Source Node: Represents where your files or data come from
  • Partitioner Node: Extracts content from unstructured files and outputs structured document elements
  • Chunker Node: Chunks partitioned data into smaller pieces for RAG applications
  • Enrichment Node: Applies enrichments like image summaries, table summaries, NER, etc.
  • Embedder Node: Generates vector embeddings for vector-based searches
  • Destination Node: Represents where processed data goes
Jobs

Jobs run workflows at specific points in time and can be monitored for status and results.

Working with Different Connector Types

Source Connectors
// Azure Blob Storage
azureSource, err := client.CreateSource(ctx, unstructured.CreateSourceRequest{
    Name: "Azure Source",
    Config: unstructured.AzureSourceConnectorConfigInput{
        RemoteURL:        "https://myaccount.blob.core.windows.net/container/",
        ConnectionString: unstructured.String("your-connection-string"),
    },
})

// Google Drive
gdriveSource, err := client.CreateSource(ctx, unstructured.CreateSourceRequest{
    Name: "Google Drive Source",
    Config: unstructured.GoogleDriveSourceConnectorConfigInput{
        DriveID:           "your-drive-id",
        ServiceAccountKey: unstructured.String("your-service-account-key"),
        Extensions:        []string{".pdf", ".docx", ".txt"},
    },
})

// Salesforce
salesforceSource, err := client.CreateSource(ctx, unstructured.CreateSourceRequest{
    Name: "Salesforce Source",
    Config: unstructured.SalesforceSourceConnectorConfigInput{
        Username:    "your-username",
        ConsumerKey: "your-consumer-key",
        PrivateKey:  "your-private-key",
        Categories:  []string{"cases", "opportunities"},
    },
})
Destination Connectors
// S3 Destination
s3Dest, err := client.CreateDestination(ctx, unstructured.CreateDestinationRequest{
    Name: "S3 Destination",
    Config: unstructured.S3DestinationConnectorConfigInput{
        RemoteURL: "s3://my-bucket/processed/",
        Key:       unstructured.String("your-access-key"),
        Secret:    unstructured.String("your-secret-key"),
    },
})

// Postgres Database
postgresDest, err := client.CreateDestination(ctx, unstructured.CreateDestinationRequest{
    Name: "Postgres Destination",
    Config: unstructured.PostgresDestinationConnectorConfigInput{
        Host:      "your-postgres-host",
        Database:  "your-database",
        Port:      5432,
        Username:  "your-username",
        Password:  "your-password",
        TableName: "processed_documents",
    },
})

Managing Workflows

// List workflows with filtering
workflows, _ := client.ListWorkflows(ctx, &unstructured.ListWorkflowsRequest{
    Status:        &unstructured.WorkflowStateActive,
    Page:          unstructured.Int(1),
    PageSize:      unstructured.Int(10),
    SortBy:        unstructured.String("created_at"),
    SortDirection: &unstructured.SortDirectionDesc,
})

// Get workflow details
workflow, _ := client.GetWorkflow(ctx, "workflow-id")

// Update workflow
updatedWorkflow, _ := client.UpdateWorkflow(ctx, "workflow-id", unstructured.UpdateWorkflowRequest{
    Name: unstructured.String("Updated Workflow Name"),
    WorkflowNodes: []unstructured.WorkflowNode{
        {
            Name:    "Partitioner",
            Type:    "partition",
            Subtype: "fast",
        },
        {
            Name:    "Chunker",
            Type:    "chunk",
            Subtype: "by_title",
            Settings: map[string]interface{}{
                "chunk_size": 1500,
                "overlap":    300,
            },
        },
        {
            Name:    "Embedder",
            Type:    "embed",
            Subtype: "openai",
            Settings: map[string]interface{}{
                "model": "text-embedding-ada-002",
            },
        },
    },
})

Monitoring Jobs

// List jobs
jobs, _ := client.ListJobs(ctx, &unstructured.ListJobsRequest{
    WorkflowID: unstructured.String("workflow-id"),
    Status:     &unstructured.JobStatusCompleted,
})

// Get job details
job, _ := client.GetJob(ctx, "job-id")

// Get detailed processing information
jobDetails, _ := client.GetJobDetails(ctx, "job-id")

// Check for failed files
failedFiles, err := client.GetJobFailedFiles(ctx, "job-id")
if err == nil && len(failedFiles.FailedFiles) > 0 {
    for _, failed := range failedFiles.FailedFiles {
        log.Printf("Failed file: %s, Error: %s", failed.Document, failed.Error)
    }
}

// Download job results
reader, err := client.DownloadJob(ctx, "job-id")
if err != nil {
    log.Fatal(err)
}
defer reader.Close()

// Save to file
file, err := os.Create("job-results.json")
if err != nil {
    log.Fatal(err)
}
defer file.Close()

_, err = io.Copy(file, reader)
if err != nil {
    log.Fatal(err)
}

Connection Testing

// Test source connector connection
connectionCheck, err := client.CreateSourceConnectionCheck(ctx, "source-id")
if err != nil {
    log.Fatal(err)
}

// Check connection status
checkResult, err := client.GetSourceConnectionCheck(ctx, "source-id")
if err != nil {
    log.Fatal(err)
}

switch checkResult.Status {
case unstructured.ConnectionCheckStatusSuccess:
    log.Println("Connection successful")

case unstructured.ConnectionCheckStatusFailure:
    log.Printf("Connection failed: %s", *checkResult.Reason)

case unstructured.ConnectionCheckStatusScheduled:
    log.Println("Connection check in progress")
}

Error Handling

The package provides comprehensive error handling with typed errors:

source, err := client.CreateSource(ctx, request)
if err != nil {
    // Check for validation errors
    ve := new(HTTPValidationError)
    if errors.As(err, &ve) {
        log.Printf("Validation failed: %v", ve)
        for _, detail := range ve.Detail {
            log.Printf("  - %s at %v: %s", detail.Type, detail.Location, detail.Message)
        }
        return
    }
    
    // Handle other errors
    log.Printf("Source creation failed: %v", err)
    return
}

Helper Functions

The package provides several helper functions for working with pointers to primitive types. These functions are useful when you need to pass optional values to API requests.

Creating pointers from values
str := unstructured.String("optional value")
enabled := unstructured.Bool(true)
count := unstructured.Int(42)
Converting pointers back to values with safe defaults
value := unstructured.ToString(str)    // returns "" if str is nil
flag := unstructured.ToBool(enabled)   // returns false if enabled is nil
number := unstructured.ToInt(count)    // returns 0 if count is nil

These helper functions are particularly useful when working with optional fields in request structures:

workflow, err := client.CreateWorkflow(ctx, unstructured.CreateWorkflowRequest{
    Name:         "My Workflow",
    WorkflowType: unstructured.WorkflowTypeBasic,
    ReprocessAll: unstructured.Bool(true),  // Optional boolean field
    Page:         unstructured.Int(1),      // Optional integer field
})

Supported File Types

The Unstructured.io platform supports a wide variety of file types including:

  • Documents: PDF, DOCX, PPTX, XLSX, TXT, RTF
  • Images: JPG, PNG, TIFF, BMP
  • Archives: ZIP, TAR, RAR
  • Web: HTML, XML, JSON
  • And many more

Authentication

The package supports API key authentication:

client, err := unstructured.New(
    unstructured.WithEndpoint("https://platform.unstructured.io/api/v1"),
    unstructured.WithKey("your-api-key"),
)

Rate Limiting and Best Practices

  • Use context.Context for timeout and cancellation
  • Implement proper error handling and retry logic
  • Monitor job status before attempting downloads
  • Use connection checks to validate connector configurations
  • Consider implementing exponential backoff for retries

Dependencies

This package has zero external dependencies and uses only the Go standard library:

  • context - For context support
  • encoding/json - For JSON marshaling/unmarshaling
  • fmt - For string formatting
  • io - For I/O operations
  • net/http - For HTTP client functionality
  • strings - For string operations
  • time - For time-related operations

Contributing

Contributions are welcome! Please feel free to submit a Pull Request.

License

This project is licensed under the MIT License - see the LICENSE file for details.

API Reference

For more information about the Unstructured.io API, visit the Unstructured Workflow API docs.

Documentation

Overview

Package unstructured provides a Go client for the Unstructured.io Workflow Endpoint API.

The Unstructured.io Workflow Endpoint enables you to work with connectors, workflows, and jobs programmatically. This package provides a complete Go implementation of the API, allowing you to:

  • Create and manage source connectors that ingest files or data from various locations
  • Create and manage destination connectors that send processed data to different destinations
  • Define and manage workflows that specify how Unstructured processes your data
  • Run jobs that execute workflows at specific points in time

Key Concepts

Connectors

  • Source Connectors: Ingest files or data into Unstructured from source locations like S3, Google Drive, databases, etc.
  • Destination Connectors: Send processed data from Unstructured to destination locations like S3, databases, vector stores, etc.

Workflows

Workflows define how Unstructured processes your data through a series of nodes:

  • Source Node: Represents where your files or data come from
  • Partitioner Node: Extracts content from unstructured files and outputs structured document elements
  • Chunker Node: Chunks partitioned data into smaller pieces for RAG applications
  • Enrichment Node: Applies enrichments like image summaries, table summaries, NER, etc.
  • Embedder Node: Generates vector embeddings for vector-based searches
  • Destination Node: Represents where processed data goes

Jobs

Jobs run workflows at specific points in time and can be monitored for status and results.

Quick Start

package main

import (
	"context"
	"log"
	"github.com/your-org/unstructured-sdk-go"
)

func main() {
	// Create a client
	client, err := unstructured.New(
		unstructured.WithEndpoint("https://platform.unstructured.io/api/v1"),
		unstructured.WithKey("your-api-key"),
	)
	if err != nil {
		log.Fatal(err)
	}

	ctx := context.Background()

	// Create a source connector (S3)
	source, err := client.CreateSource(ctx, unstructured.CreateSourceRequest{
		Name: "My S3 Source",
		Config: unstructured.S3SourceConnectorConfigInput{
			RemoteURL: "s3://my-bucket/input/",
			Key:       unstructured.String("your-access-key"),
			Secret:    unstructured.String("your-secret-key"),
		},
	})
	if err != nil {
		log.Fatal(err)
	}

	// Create a destination connector (S3)
	destination, err := client.CreateDestination(ctx, unstructured.CreateDestinationRequest{
		Name: "My S3 Destination",
		Config: unstructured.S3DestinationConnectorConfigInput{
			RemoteURL: "s3://my-bucket/output/",
			Key:       unstructured.String("your-access-key"),
			Secret:    unstructured.String("your-secret-key"),
		},
	})
	if err != nil {
		log.Fatal(err)
	}

	// Create a workflow
	workflow, err := client.CreateWorkflow(ctx, unstructured.CreateWorkflowRequest{
		Name:          "My Processing Workflow",
		SourceID:      &source.ID,
		DestinationID: &destination.ID,
		WorkflowType:  unstructured.WorkflowTypeBasic,
		WorkflowNodes: []unstructured.WorkflowNode{
			{
				Name:    "Partitioner",
				Type:    "partition",
				Subtype: "fast",
			},
			{
				Name:    "Chunker",
				Type:    "chunk",
				Subtype: "by_title",
				Settings: map[string]interface{}{
					"chunk_size": 1000,
					"overlap":    200,
				},
			},
		},
	})
	if err != nil {
		log.Fatal(err)
	}

	// Run the workflow
	job, err := client.RunWorkflow(ctx, workflow.ID, &unstructured.RunWorkflowRequest{
		InputFiles: []string{"document1.pdf", "document2.docx"},
	})
	if err != nil {
		log.Fatal(err)
	}

	log.Printf("Job started with ID: %s", job.ID)
}

Working with Different Connector Types

Source Connectors

// Azure Blob Storage
azureSource, err := client.CreateSource(ctx, unstructured.CreateSourceRequest{
	Name: "Azure Source",
	Config: unstructured.AzureSourceConnectorConfigInput{
		RemoteURL:        "https://myaccount.blob.core.windows.net/container/",
		ConnectionString: unstructured.String("your-connection-string"),
	},
})

// Google Drive
gdriveSource, err := client.CreateSource(ctx, unstructured.CreateSourceRequest{
	Name: "Google Drive Source",
	Config: unstructured.GoogleDriveSourceConnectorConfigInput{
		DriveID:           "your-drive-id",
		ServiceAccountKey: unstructured.String("your-service-account-key"),
		Extensions:        []string{".pdf", ".docx", ".txt"},
	},
})

// Salesforce
salesforceSource, err := client.CreateSource(ctx, unstructured.CreateSourceRequest{
	Name: "Salesforce Source",
	Config: unstructured.SalesforceSourceConnectorConfigInput{
		Username:    "your-username",
		ConsumerKey: "your-consumer-key",
		PrivateKey:  "your-private-key",
		Categories:  []string{"cases", "opportunities"},
	},
})

Destination Connectors

// S3 Destination
s3Dest, err := client.CreateDestination(ctx, unstructured.CreateDestinationRequest{
	Name: "S3 Destination",
	Config: unstructured.S3DestinationConnectorConfigInput{
		RemoteURL: "s3://my-bucket/processed/",
		Key:       unstructured.String("your-access-key"),
		Secret:    unstructured.String("your-secret-key"),
	},
})

// Postgres Database
postgresDest, err := client.CreateDestination(ctx, unstructured.CreateDestinationRequest{
	Name: "Postgres Destination",
	Config: unstructured.PostgresDestinationConnectorConfigInput{
		Host:     "your-postgres-host",
		Database: "your-database",
		Port:     5432,
		Username: "your-username",
		Password: "your-password",
		TableName: "processed_documents",
	},
})

Managing Workflows

// List workflows with filtering
workflows, err := client.ListWorkflows(ctx, &unstructured.ListWorkflowsRequest{
	Status:        &unstructured.WorkflowStateActive,
	Page:          unstructured.Int(1),
	PageSize:      unstructured.Int(10),
	SortBy:        unstructured.String("created_at"),
	SortDirection: &unstructured.SortDirectionDesc,
})

// Get workflow details
workflow, err := client.GetWorkflow(ctx, "workflow-id")

// Update workflow
updatedWorkflow, err := client.UpdateWorkflow(ctx, "workflow-id", unstructured.UpdateWorkflowRequest{
	Name: unstructured.String("Updated Workflow Name"),
	WorkflowNodes: []unstructured.WorkflowNode{
		{
			Name:    "Partitioner",
			Type:    "partition",
			Subtype: "fast",
		},
		{
			Name:    "Chunker",
			Type:    "chunk",
			Subtype: "by_title",
			Settings: map[string]interface{}{
				"chunk_size": 1500,
				"overlap":    300,
			},
		},
		{
			Name:    "Embedder",
			Type:    "embed",
			Subtype: "openai",
			Settings: map[string]interface{}{
				"model": "text-embedding-ada-002",
			},
		},
	},
})

Monitoring Jobs

// List jobs
jobs, err := client.ListJobs(ctx, &unstructured.ListJobsRequest{
	WorkflowID: unstructured.String("workflow-id"),
	Status:     &unstructured.JobStatusCompleted,
})

// Get job details
job, err := client.GetJob(ctx, "job-id")

// Get detailed processing information
jobDetails, err := client.GetJobDetails(ctx, "job-id")

// Check for failed files
failedFiles, err := client.GetJobFailedFiles(ctx, "job-id")
if err == nil && len(failedFiles.FailedFiles) > 0 {
	for _, failed := range failedFiles.FailedFiles {
		log.Printf("Failed file: %s, Error: %s", failed.Document, failed.Error)
	}
}

// Download job results
reader, err := client.DownloadJob(ctx, "job-id")
if err != nil {
	log.Fatal(err)
}
defer reader.Close()

// Save to file
file, err := os.Create("job-results.json")
if err != nil {
	log.Fatal(err)
}
defer file.Close()

_, err = io.Copy(file, reader)
if err != nil {
	log.Fatal(err)
}

Connection Testing

// Test source connector connection
connectionCheck, err := client.CreateSourceConnectionCheck(ctx, "source-id")
if err != nil {
	log.Fatal(err)
}

// Check connection status
checkResult, err := client.GetSourceConnectionCheck(ctx, "source-id")
if err != nil {
	log.Fatal(err)
}

switch checkResult.Status {
case unstructured.ConnectionCheckStatusSuccess:
	log.Println("Connection successful")

case unstructured.ConnectionCheckStatusFailure:
	log.Printf("Connection failed: %s", *checkResult.Reason)

case unstructured.ConnectionCheckStatusScheduled:
	log.Println("Connection check in progress")
}

Error Handling

The package provides comprehensive error handling:

source, err := client.CreateSource(ctx, request)
if err != nil {
	// Check for validation errors
	ve := new(HTTPValidationError)
	if errors.As(err, &ve) {
		log.Printf("Validation failed: %v", ve)
		for _, detail := range ve.Detail {
		    log.Printf("  - %s at %v: %s", detail.Type, detail.Location, detail.Message)
		}
		return
	}

	// Handle other errors
	log.Printf("Source creation failed: %v", err)
	return
}

Supported File Types

The Unstructured.io platform supports a wide variety of file types including:

  • Documents: PDF, DOCX, PPTX, XLSX, TXT, RTF
  • Images: JPG, PNG, TIFF, BMP
  • Archives: ZIP, TAR, RAR
  • Web: HTML, XML, JSON
  • And many more

Rate Limiting and Best Practices

  • Use context.Context for timeout and cancellation
  • Implement proper error handling and retry logic
  • Monitor job status before attempting downloads
  • Use connection checks to validate connector configurations
  • Consider implementing exponential backoff for retries

Authentication

The package supports API key authentication:

client, err := unstructured.New(
	unstructured.WithEndpoint("https://platform.unstructured.io/api/v1"),
	unstructured.WithKey("your-api-key"),
)

Helper Functions

The package provides several helper functions for working with pointers to primitive types. These functions are useful when you need to pass optional values to API requests.

Creating pointers from values:

str := unstructured.String("optional value")
enabled := unstructured.Bool(true)
count := unstructured.Int(42)

Converting pointers back to values with safe defaults:

value := unstructured.ToString(str) // returns "" if str is nil
flag := unstructured.ToBool(enabled) // returns false if enabled is nil
number := unstructured.ToInt(count)  // returns 0 if count is nil

These helper functions are particularly useful when working with optional fields in request structures:

workflow, err := client.CreateWorkflow(ctx, unstructured.CreateWorkflowRequest{
	Name:          "My Workflow",
	WorkflowType:  unstructured.WorkflowTypeBasic,
	ReprocessAll:  unstructured.Bool(true),  // Optional boolean field
	Page:          unstructured.Int(1),      // Optional integer field
})

For more information about the Unstructured.io API, visit: https://docs.unstructured.io/api-reference/workflow/overview

Index

Constants

View Source
const (
	PartitionerStrategyAuto  = "auto"
	PartitionerStrategyVLM   = "vlm"
	PartitionerStrategyHiRes = "hi_res"
	PartitionerStrategyFast  = "fast"
)

Partitioner strategy constants.

View Source
const (
	ConnectorTypeAstraDB                    = "astradb"
	ConnectorTypeAzureAISearch              = "azure_ai_search"
	ConnectorTypeAzure                      = "azure"
	ConnectorTypeBox                        = "box"
	ConnectorTypeConfluence                 = "confluence"
	ConnectorTypeCouchbase                  = "couchbase"
	ConnectorTypeDatabricksVolumes          = "databricks_volumes"
	ConnectorTypeDatabricksVolumeDeltaTable = "databricks_volume_delta_tables"
	ConnectorTypeDeltaTable                 = "delta_table"
	ConnectorTypeDropbox                    = "dropbox"
	ConnectorTypeElasticsearch              = "elasticsearch"
	ConnectorTypeGCS                        = "gcs"
	ConnectorTypeGoogleDrive                = "google_drive"
	ConnectorTypeJira                       = "jira"
	ConnectorTypeKafkaCloud                 = "kafka-cloud"
	ConnectorTypeMilvus                     = "milvus"
	ConnectorTypeMongoDB                    = "mongodb"
	ConnectorTypeMotherDuck                 = "motherduck"
	ConnectorTypeNeo4j                      = "neo4j"
	ConnectorTypeOneDrive                   = "onedrive"
	ConnectorTypeOutlook                    = "outlook"
	ConnectorTypePinecone                   = "pinecone"
	ConnectorTypePostgres                   = "postgres"
	ConnectorTypeQdrantCloud                = "qdrant-cloud"
	ConnectorTypeRedis                      = "redis"
	ConnectorTypeS3                         = "s3"
	ConnectorTypeSalesforce                 = "salesforce"
	ConnectorTypeSharePoint                 = "sharepoint"
	ConnectorTypeSlack                      = "slack"
	ConnectorTypeSnowflake                  = "snowflake"
	ConnectorTypeWeaviateCloud              = "weaviate-cloud"
	ConnectorTypeZendesk                    = "zendesk"
	ConnectorTypeIBMWatsonxS3               = "ibm_watsonx_s3"
)

Connector type constants

View Source
const HeaderKey = "Unstructured-API-Key"

HeaderKey is "Unstructured-API-Key", which is the header where Unstructured expects to find the API key.

Variables

This section is empty.

Functions

func Bool

func Bool(b bool) *bool

Bool returns a pointer to the given boolean value. This is useful when you need to pass optional boolean values to API requests.

func Int

func Int(i int) *int

Int returns a pointer to the given integer value. This is useful when you need to pass optional integer values to API requests.

func Ptr

func Ptr[T any](v T) *T

Ptr returns a pointer to the given value. This is useful when you need to pass optional values to API requests.

func String

func String(s string) *string

String returns a pointer to the given string value. This is useful when you need to pass optional string values to API requests.

func ToBool

func ToBool(p *bool) bool

ToBool converts a boolean pointer to a boolean value. If the pointer is nil, it returns false.

func ToInt

func ToInt(p *int) int

ToInt converts an integer pointer to an integer value. If the pointer is nil, it returns 0.

func ToString

func ToString(p *string) string

ToString converts a string pointer to a string value. If the pointer is nil, it returns an empty string.

func ToVal

func ToVal[T any](p *T) T

ToVal converts a pointer to a value. If the pointer is nil, it returns the zero value of the type.

Types

type APIError

type APIError struct {
	Code int
	Err  error
}

APIError represents an error returned by the API when a non-200 status code is returned.

func (*APIError) Error

func (e *APIError) Error() string

Error returns a string representation of the API error.

func (*APIError) Unwrap

func (e *APIError) Unwrap() error

type AstraDBConnectorConfig

type AstraDBConnectorConfig struct {
	CollectionName  string  `json:"collection_name"`
	Keyspace        *string `json:"keyspace,omitempty"`
	BatchSize       *int    `json:"batch_size,omitempty"`
	APIEndpoint     string  `json:"api_endpoint"`
	Token           string  `json:"token"`
	FlattenMetadata *bool   `json:"flatten_metadata,omitempty"`
	// contains filtered or unexported fields
}

AstraDBConnectorConfig represents the configuration for an AstraDB destination connector. It contains the collection name, keyspace, batch size, API endpoint, and token.

func (AstraDBConnectorConfig) Type

func (c AstraDBConnectorConfig) Type() string

Type always returns the connector type identifier for AstraDB: "astra_db".

type AzureAISearchConnectorConfig

type AzureAISearchConnectorConfig struct {
	Endpoint string `json:"endpoint"`
	Index    string `json:"index"`
	Key      string `json:"key"`
	// contains filtered or unexported fields
}

AzureAISearchConnectorConfig represents the configuration for an Azure AI Search destination connector. It contains the endpoint, index name, and API key.

func (AzureAISearchConnectorConfig) Type

Type always returns the connector type identifier for Azure AI Search: "azure_ai_search".

type AzureSourceConnectorConfig

type AzureSourceConnectorConfig struct {
	RemoteURL        string  `json:"remote_url"`
	AccountName      *string `json:"account_name,omitempty"`
	AccountKey       *string `json:"account_key,omitempty"`
	ConnectionString *string `json:"connection_string,omitempty"`
	SASToken         *string `json:"sas_token,omitempty"`
	Recursive        *bool   `json:"recursive,omitempty"`
	// contains filtered or unexported fields
}

AzureSourceConnectorConfig represents the configuration for an Azure Blob Storage source connector. It supports authentication via connection string, account key, or SAS token.

func (AzureSourceConnectorConfig) Type

Type always returns the connector type identifier for Azure: "azure".

type BlockType

type BlockType string

BlockType is a type that represents a block type.

const (
	BlockTypeImage BlockType = "Image"
	BlockTypeTable BlockType = "Table"
)

BlockType constants.

type BoxSourceConnectorConfig

type BoxSourceConnectorConfig struct {
	BoxAppConfig string `json:"box_app_config"`
	RemoteURL    string `json:"remote_url"`
	Recursive    *bool  `json:"recursive,omitempty"`
	// contains filtered or unexported fields
}

BoxSourceConnectorConfig represents the configuration for a Box source connector. It contains Box app configuration and file access settings.

func (BoxSourceConnectorConfig) Type

Type always returns the connector type identifier for Box: "box".

type ChunkerCharacter

type ChunkerCharacter struct {
	ID                  string `json:"-"`
	Name                string `json:"-"`
	APIURL              string `json:"unstructured_api_url,omitempty"`
	APIKey              string `json:"unstructured_api_key,omitempty"`
	IncludeOrigElements bool   `json:"include_orig_elements,omitempty"`
	NewAfterNChars      int    `json:"new_after_n_chars,omitempty"`
	MaxCharacters       int    `json:"max_characters,omitempty"`
	Overlap             int    `json:"overlap,omitempty"`
	OverlapAll          bool   `json:"overlap_all"`
}

ChunkerCharacter is a node that chunks text by character.

func (ChunkerCharacter) MarshalJSON

func (c ChunkerCharacter) MarshalJSON() ([]byte, error)

MarshalJSON implements the json.Marshaler interface.

type ChunkerPage

type ChunkerPage struct {
	ID                  string `json:"-"`
	Name                string `json:"-"`
	APIURL              string `json:"unstructured_api_url,omitempty"`
	APIKey              string `json:"unstructured_api_key,omitempty"`
	IncludeOrigElements bool   `json:"include_orig_elements,omitempty"`
	NewAfterNChars      int    `json:"new_after_n_chars,omitempty"`
	MaxCharacters       int    `json:"max_characters,omitempty"`
	Overlap             int    `json:"overlap,omitempty"`
	OverlapAll          bool   `json:"overlap_all"`
}

ChunkerPage is a node that chunks text by character.

func (ChunkerPage) MarshalJSON

func (c ChunkerPage) MarshalJSON() ([]byte, error)

MarshalJSON implements the json.Marshaler interface.

type ChunkerSimilarity

type ChunkerSimilarity struct {
	ID                  string `json:"-"`
	Name                string `json:"-"`
	APIURL              string `json:"unstructured_api_url,omitempty"`
	APIKey              string `json:"unstructured_api_key,omitempty"`
	IncludeOrigElements bool   `json:"include_orig_elements,omitempty"`
	NewAfterNChars      int    `json:"new_after_n_chars,omitempty"`
	MaxCharacters       int    `json:"max_characters,omitempty"`
	Overlap             int    `json:"overlap,omitempty"`
	OverlapAll          bool   `json:"overlap_all"`
}

ChunkerSimilarity is a node that chunks text by character.

func (ChunkerSimilarity) MarshalJSON

func (c ChunkerSimilarity) MarshalJSON() ([]byte, error)

MarshalJSON implements the json.Marshaler interface.

type ChunkerSubtype

type ChunkerSubtype string

ChunkerSubtype is a type that represents a chunker subtype.

const (
	ChunkerSubtypeCharacter  ChunkerSubtype = "chunk_by_character"
	ChunkerSubtypeTitle      ChunkerSubtype = "chunk_by_title"
	ChunkerSubtypePage       ChunkerSubtype = "chunk_by_page"
	ChunkerSubtypeSimilarity ChunkerSubtype = "chunk_by_similarity"
)

ChunkerSubtype constants.

type ChunkerTitle

type ChunkerTitle struct {
	ID                  string `json:"-"`
	Name                string `json:"-"`
	APIURL              string `json:"unstructured_api_url,omitempty"`
	APIKey              string `json:"unstructured_api_key,omitempty"`
	CombineTextUnderN   int    `json:"combine_text_under_n_chars,omitempty"`
	IncludeOrigElements bool   `json:"include_orig_elements,omitempty"`
	NewAfterNChars      int    `json:"new_after_n_chars,omitempty"`
	MaxCharacters       int    `json:"max_characters,omitempty"`
	Overlap             int    `json:"overlap,omitempty"`
	OverlapAll          bool   `json:"overlap_all"`
}

ChunkerTitle is a node that chunks text by character.

func (ChunkerTitle) MarshalJSON

func (c ChunkerTitle) MarshalJSON() ([]byte, error)

MarshalJSON implements the json.Marshaler interface.

type Client

type Client struct {
	// contains filtered or unexported fields
}

Client represents an HTTP client for interacting with the Unstructured.io API. It handles authentication, request formatting, and response parsing.

func New

func New(opts ...Option) (*Client, error)

New creates a new Client instance with the provided options. If the `UNSTRUCTURED_API_KEY` environment variable is set, it will be used as the API key for authentication. If the `UNSTRUCTURED_API_URL` environment variable is set to a valid URL, it will be used as the base URL for the Unstructured.io API. If no endpoint option is given via options or environment variables, the endpoint will default to the Unstructured.io platform at `https://platform.unstructuredapp.io/api/v1`. In order to configure the client properly, an API key must be provided via WithKey or the `UNSTRUCTURED_API_KEY` environment variable.

func (*Client) CancelJob

func (c *Client) CancelJob(ctx context.Context, id string) error

CancelJob cancels a running job by its ID

func (*Client) CreateDestination

func (c *Client) CreateDestination(ctx context.Context, in CreateDestinationRequest) (*Destination, error)

CreateDestination creates a new destination connector with the specified configuration. It returns the created destination connector with its assigned ID and metadata.

func (*Client) CreateDestinationConnectionCheck

func (c *Client) CreateDestinationConnectionCheck(ctx context.Context, id string) (*DagNodeConnectionCheck, error)

CreateDestinationConnectionCheck initiates a connection check for a destination connector by its ID. It returns a DagNodeConnectionCheck with the status of the check.

func (*Client) CreateSource

func (c *Client) CreateSource(ctx context.Context, in CreateSourceRequest) (*Source, error)

CreateSource creates a new source connector with the specified configuration. It returns the created source connector with its assigned ID and metadata.

func (*Client) CreateSourceConnectionCheck

func (c *Client) CreateSourceConnectionCheck(ctx context.Context, id string) (*DagNodeConnectionCheck, error)

CreateSourceConnectionCheck initiates a connection check for a source connector by its ID. It returns a DagNodeConnectionCheck with the status of the check.

func (*Client) CreateWorkflow

func (c *Client) CreateWorkflow(ctx context.Context, in *CreateWorkflowRequest) (*Workflow, error)

CreateWorkflow creates a new workflow

func (*Client) DeleteDestination

func (c *Client) DeleteDestination(ctx context.Context, id string) error

DeleteDestination deletes a specific destination connector by its ID

func (*Client) DeleteSource

func (c *Client) DeleteSource(ctx context.Context, id string) error

DeleteSource deletes a specific source connector identified by its ID

func (*Client) DeleteWorkflow

func (c *Client) DeleteWorkflow(ctx context.Context, id string) error

DeleteWorkflow deletes a workflow by its ID

func (*Client) DownloadJob

func (c *Client) DownloadJob(ctx context.Context, in DownloadJobRequest) (io.ReadCloser, error)

DownloadJob downloads the output files from a completed job

func (*Client) GetDestination

func (c *Client) GetDestination(ctx context.Context, id string) (*Destination, error)

GetDestination retrieves detailed information for a specific destination connector by its ID

func (*Client) GetDestinationConnectionCheck

func (c *Client) GetDestinationConnectionCheck(ctx context.Context, id string) (*DagNodeConnectionCheck, error)

GetDestinationConnectionCheck retrieves the status of a connection check for a destination connector by its ID. It returns a DagNodeConnectionCheck with the current status and reason if any.

func (*Client) GetJob

func (c *Client) GetJob(ctx context.Context, id string) (*Job, error)

GetJob retrieves detailed information for a specific job by its ID

func (*Client) GetJobDetails

func (c *Client) GetJobDetails(ctx context.Context, id string) (*JobDetails, error)

GetJobDetails retrieves detailed processing information for a specific job by its ID. It returns a JobDetails struct with node stats and processing status.

func (*Client) GetJobFailedFiles

func (c *Client) GetJobFailedFiles(ctx context.Context, id string) (*JobFailedFiles, error)

GetJobFailedFiles retrieves the list of failed files for a specific job by its ID. It returns a JobFailedFiles struct containing the failed files and error messages.

func (*Client) GetSource

func (c *Client) GetSource(ctx context.Context, id string) (*Source, error)

GetSource retrieves detailed information for a specific source connector by its ID

func (*Client) GetSourceConnectionCheck

func (c *Client) GetSourceConnectionCheck(ctx context.Context, id string) (*DagNodeConnectionCheck, error)

GetSourceConnectionCheck retrieves the status of a connection check for a source connector by its ID. It returns a DagNodeConnectionCheck with the current status and reason if any.

func (*Client) GetWorkflow

func (c *Client) GetWorkflow(ctx context.Context, id string) (*Workflow, error)

GetWorkflow retrieves detailed information for a specific workflow by its ID

func (*Client) ListDestinations

func (c *Client) ListDestinations(ctx context.Context, typ string) ([]Destination, error)

ListDestinations retrieves a list of available destination connectors

func (*Client) ListJobs

func (c *Client) ListJobs(ctx context.Context, in *ListJobsRequest) ([]Job, error)

ListJobs retrieves a list of jobs with optional filtering.

func (*Client) ListSources

func (c *Client) ListSources(ctx context.Context, typ string) ([]Source, error)

ListSources retrieves a list of available source connectors

func (*Client) ListWorkflows

func (c *Client) ListWorkflows(ctx context.Context, in *ListWorkflowsRequest) ([]Workflow, error)

ListWorkflows retrieves a list of workflows with optional filtering and pagination.

func (*Client) RunWorkflow

func (c *Client) RunWorkflow(ctx context.Context, in *RunWorkflowRequest) (*Job, error)

RunWorkflow runs a workflow by triggering a new job

func (*Client) UpdateDestination

func (c *Client) UpdateDestination(ctx context.Context, in UpdateDestinationRequest) (*Destination, error)

UpdateDestination updates the configuration of an existing destination connector. It returns the updated destination connector.

func (*Client) UpdateSource

func (c *Client) UpdateSource(ctx context.Context, in UpdateSourceRequest) (*Source, error)

UpdateSource updates the configuration of an existing source connector. It returns the updated source connector.

func (*Client) UpdateWorkflow

func (c *Client) UpdateWorkflow(ctx context.Context, in UpdateWorkflowRequest) (*Workflow, error)

UpdateWorkflow updates the configuration of an existing workflow. It returns the updated workflow.

type ConfluenceSourceConnectorConfig

type ConfluenceSourceConnectorConfig struct {
	URL                       string   `json:"url"`
	Username                  string   `json:"username"`
	Password                  *string  `json:"password,omitempty"`
	APIToken                  *string  `json:"api_token,omitempty"`
	Token                     *string  `json:"token,omitempty"`
	Cloud                     *bool    `json:"cloud,omitempty"`
	ExtractImages             *bool    `json:"extract_images,omitempty"`
	ExtractFiles              *bool    `json:"extract_files,omitempty"`
	MaxNumOfSpaces            *int     `json:"max_num_of_spaces,omitempty"`
	MaxNumOfDocsFromEachSpace *int     `json:"max_num_of_docs_from_each_space,omitempty"`
	Spaces                    []string `json:"spaces,omitempty"`
	// contains filtered or unexported fields
}

ConfluenceSourceConnectorConfig represents the configuration for a Confluence source connector. It contains authentication details and content extraction settings.

func (ConfluenceSourceConnectorConfig) Type

Type always returns the connector type identifier for Confluence: "confluence".

type ConnectionCheckStatus

type ConnectionCheckStatus string

ConnectionCheckStatus represents the status of a connection check (scheduled, success, or failure).

const (
	// ConnectionCheckStatusScheduled indicates the connection check is scheduled.
	ConnectionCheckStatusScheduled ConnectionCheckStatus = "SCHEDULED"
	// ConnectionCheckStatusSuccess indicates the connection check succeeded.
	ConnectionCheckStatusSuccess ConnectionCheckStatus = "SUCCESS"
	// ConnectionCheckStatusFailure indicates the connection check failed.
	ConnectionCheckStatusFailure ConnectionCheckStatus = "FAILURE"
)

type CouchbaseConnectorConfig

type CouchbaseConnectorConfig struct {
	Bucket           string  `json:"bucket"`
	ConnectionString string  `json:"connection_string"`
	Scope            *string `json:"scope,omitempty"`
	Collection       *string `json:"collection,omitempty"`
	BatchSize        int     `json:"batch_size"`
	Username         string  `json:"username"`
	Password         string  `json:"password"`
	CollectionID     *string `json:"collection_id,omitempty"`
	// contains filtered or unexported fields
}

CouchbaseConnectorConfig represents the configuration for a Couchbase connector. It contains connection details, bucket information, and authentication credentials.

func (CouchbaseConnectorConfig) Type

Type always returns the connector type identifier for Couchbase: "couchbase".

type CreateDestinationRequest

type CreateDestinationRequest struct {
	Name   string
	Config DestinationConfig
}

CreateDestinationRequest represents a request to create a new destination connector. It contains the name, type, and configuration for the destination.

type CreateSourceRequest

type CreateSourceRequest struct {
	Name   string
	Config SourceConfig
}

CreateSourceRequest represents a request to create a new source connector. It contains the name and configuration for the source.

type CreateWorkflowRequest

type CreateWorkflowRequest struct {
	Name          string         `json:"name"`
	SourceID      *string        `json:"source_id,omitempty"`
	DestinationID *string        `json:"destination_id,omitempty"`
	WorkflowNodes []WorkflowNode `json:"workflow_nodes,omitempty"`
	Schedule      *string        `json:"schedule,omitempty"`
	ReprocessAll  *bool          `json:"reprocess_all,omitempty"`
}

CreateWorkflowRequest represents the request to create a workflow

type CronTabEntry

type CronTabEntry struct {
	CronExpression string `json:"cron_expression"`
}

CronTabEntry represents a cron tab entry for scheduling workflows.

type DagNodeConnectionCheck

type DagNodeConnectionCheck struct {
	ID         string                `json:"id"`
	Status     ConnectionCheckStatus `json:"status"`
	Reason     *string               `json:"reason,omitempty"`
	CreatedAt  time.Time             `json:"created_at,omitempty"`
	ReportedAt *string               `json:"reported_at,omitempty"`
}

DagNodeConnectionCheck represents a connection check result for a DAG node (source or destination connector).

type DatabricksVDTDestinationConnectorConfig

type DatabricksVDTDestinationConnectorConfig struct {
	ServerHostname string  `json:"server_hostname"`
	HTTPPath       string  `json:"http_path"`
	Token          *string `json:"token,omitempty"`
	ClientID       *string `json:"client_id,omitempty"`
	ClientSecret   *string `json:"client_secret,omitempty"`
	Catalog        string  `json:"catalog"`
	Database       *string `json:"database,omitempty"`
	TableName      *string `json:"table_name,omitempty"`
	Schema         *string `json:"schema,omitempty"`
	Volume         string  `json:"volume"`
	VolumePath     *string `json:"volume_path,omitempty"`
	// contains filtered or unexported fields
}

DatabricksVDTDestinationConnectorConfig represents the configuration for a Databricks Volume Delta Tables destination connector. It contains server details, authentication, and table configuration.

func (DatabricksVDTDestinationConnectorConfig) Type

Type always returns the connector type identifier for Databricks Volume Delta Table: "databricks_volume_delta_table".

type DatabricksVolumesConnectorConfig

type DatabricksVolumesConnectorConfig struct {
	Host         string  `json:"host"`
	Catalog      string  `json:"catalog"`
	Schema       *string `json:"schema,omitempty"`
	Volume       string  `json:"volume"`
	VolumePath   string  `json:"volume_path"`
	ClientSecret string  `json:"client_secret"`
	ClientID     string  `json:"client_id"`
	// contains filtered or unexported fields
}

DatabricksVolumesConnectorConfig represents the configuration for a Databricks Volumes connector. It contains host details, catalog information, and authentication credentials.

func (DatabricksVolumesConnectorConfig) Type

Type always returns the connector type identifier for Databricks Volumes: "databricks_volumes".

type DatabricksVolumesConnectorConfigInput

type DatabricksVolumesConnectorConfigInput struct {
	Host         string  `json:"host"`
	Catalog      string  `json:"catalog"`
	Schema       *string `json:"schema,omitempty"`
	Volume       string  `json:"volume"`
	VolumePath   string  `json:"volume_path"`
	ClientSecret string  `json:"client_secret"`
	ClientID     string  `json:"client_id"`
	// contains filtered or unexported fields
}

DatabricksVolumesConnectorConfigInput represents the configuration for a Databricks Volumes connector. It contains host details, catalog information, and authentication credentials.

func (DatabricksVolumesConnectorConfigInput) Type

Type always returns the connector type identifier for Databricks Volumes: "databricks_volumes".

type DeltaTableConnectorConfig

type DeltaTableConnectorConfig struct {
	AwsAccessKeyID     string `json:"aws_access_key_id"`
	AwsSecretAccessKey string `json:"aws_secret_access_key"`
	AwsRegion          string `json:"aws_region"`
	TableURI           string `json:"table_uri"`
	// contains filtered or unexported fields
}

DeltaTableConnectorConfig represents the configuration for a Delta Table destination connector. It contains AWS credentials and table URI for Delta Lake storage.

func (DeltaTableConnectorConfig) Type

Type always returns the connector type identifier for Delta Table: "delta_table".

type Destination

type Destination struct {
	ID        string            `json:"id"`
	Name      string            `json:"name"`
	CreatedAt time.Time         `json:"created_at,omitzero"`
	UpdatedAt time.Time         `json:"updated_at,omitzero"`
	Type      string            `json:"type"`
	Config    DestinationConfig `json:"config"`
}

Destination represents a destination connector that sends processed data to various locations. It contains metadata about the connector and its configuration.

func (*Destination) UnmarshalJSON

func (d *Destination) UnmarshalJSON(data []byte) error

UnmarshalJSON implements custom JSON unmarshaling for Destination. It handles the polymorphic Config field by determining the correct type based on the "type" field in the JSON data.

type DestinationConfig

type DestinationConfig interface {
	Type() string
	// contains filtered or unexported methods
}

DestinationConfig is an interface that all destination connector configurations implement. It provides a way to identify and work with different destination connector types.

type DownloadJobRequest

type DownloadJobRequest struct {
	JobID  string
	NodeID string
	FileID string
}

DownloadJobRequest represents a request to download a job output file.

type DropboxSourceConnectorConfig

type DropboxSourceConnectorConfig struct {
	Token     string `json:"token"`
	RemoteURL string `json:"remote_url"`
	Recursive *bool  `json:"recursive,omitempty"`
	// contains filtered or unexported fields
}

DropboxSourceConnectorConfig represents the configuration for a Dropbox source connector. It contains access token and file path configuration.

func (DropboxSourceConnectorConfig) Type

Type always returns the connector type identifier for Dropbox: "dropbox".

type ElasticsearchConnectorConfig

type ElasticsearchConnectorConfig struct {
	Hosts     []string `json:"hosts"`
	IndexName string   `json:"index_name"`
	ESAPIKey  string   `json:"es_api_key"`
	// contains filtered or unexported fields
}

ElasticsearchConnectorConfig represents the configuration for an Elasticsearch connector. It contains host details, index information, and API key authentication.

func (ElasticsearchConnectorConfig) Type

Type always returns the connector type identifier for Elasticsearch: "elasticsearch".

type ElasticsearchConnectorConfigInput

type ElasticsearchConnectorConfigInput struct {
	Hosts     []string `json:"hosts"`
	IndexName string   `json:"index_name"`
	ESAPIKey  string   `json:"es_api_key"`
	// contains filtered or unexported fields
}

ElasticsearchConnectorConfigInput represents the configuration for an Elasticsearch connector. It contains host details, index information, and API key authentication.

func (ElasticsearchConnectorConfigInput) Type

Type always returns the connector type identifier for Elasticsearch: "elasticsearch".

type Embedder

type Embedder struct {
	ID        string          `json:"-"`
	Name      string          `json:"-"`
	Subtype   EmbedderSubtype `json:"-"`
	ModelName EmbedderModel   `json:"model_name"`
}

Embedder represents an embedding node in a workflow.

func (Embedder) MarshalJSON

func (e Embedder) MarshalJSON() ([]byte, error)

MarshalJSON implements the json.Marshaler interface.

func (*Embedder) ValidateModel

func (e *Embedder) ValidateModel() error

ValidateModel validates that the model is compatible with the subtype.

type EmbedderModel

type EmbedderModel string

EmbedderModel is a type that represents an embedder model.

const (
	EmbedderModelAzureOpenAITextEmbedding3Small EmbedderModel = "text-embedding-3-small"
	EmbedderModelAzureOpenAITextEmbedding3Large EmbedderModel = "text-embedding-3-large"
	EmbedderModelAzureOpenAITextEmbeddingAda002 EmbedderModel = "text-embedding-ada-002"
)

EmbedderModel constants for Azure OpenAI.

const (
	EmbedderModelBedrockTitanEmbedTextV2        EmbedderModel = "amazon.titan-embed-text-v2:0"
	EmbedderModelBedrockTitanEmbedTextV1        EmbedderModel = "amazon.titan-embed-text-v1"
	EmbedderModelBedrockTitanEmbedImageV1       EmbedderModel = "amazon.titan-embed-image-v1"
	EmbedderModelBedrockCohereEmbedEnglish      EmbedderModel = "cohere.embed-english-v3"
	EmbedderModelBedrockCohereEmbedMultilingual EmbedderModel = "cohere.embed-multilingual-v3"
)

EmbedderModel constants for Bedrock.

const (
	EmbedderModelVoyageAI3           EmbedderModel = "voyage-3"
	EmbedderModelVoyageAI3Large      EmbedderModel = "voyage-3-large"
	EmbedderModelVoyageAI3Lite       EmbedderModel = "voyage-3-lite"
	EmbedderModelVoyageAICode3       EmbedderModel = "voyage-code-3"
	EmbedderModelVoyageAIFinance2    EmbedderModel = "voyage-finance-2"
	EmbedderModelVoyageAILaw2        EmbedderModel = "voyage-law-2"
	EmbedderModelVoyageAICode2       EmbedderModel = "voyage-code-2"
	EmbedderModelVoyageAIMultimodal3 EmbedderModel = "voyage-multimodal-3"
)

EmbedderModel constants for VoyageAI.

const (
	EmbedderModelTogetherAIM2Bert80M32kRetrieval EmbedderModel = "togethercomputer/m2-bert-80M-32k-retrieval"
)

EmbedderModel constants for TogetherAI.

type EmbedderSubtype

type EmbedderSubtype string

EmbedderSubtype is a type that represents an embedder subtype.

const (
	EmbedderSubtypeAzureOpenAI EmbedderSubtype = "azure_openai"
	EmbedderSubtypeBedrock     EmbedderSubtype = "bedrock"
	EmbedderSubtypeTogetherAI  EmbedderSubtype = "togetherai"
	EmbedderSubtypeVoyageAI    EmbedderSubtype = "voyageai"
)

EmbedderSubtype constants.

type Encoding

type Encoding string

Encoding is a type that represents an encoding.

const (
	EncodingUTF8          Encoding = "utf_8"
	EncodingISO88591      Encoding = "iso_8859_1"
	EncodingISO88596      Encoding = "iso_8859_6"
	EncodingISO88598      Encoding = "iso_8859_8"
	EncodingASCII         Encoding = "ascii"
	EncodingBig5          Encoding = "big5"
	EncodingUTF16         Encoding = "utf_16"
	EncodingUTF16Be       Encoding = "utf_16_be"
	EncodingUTF16Le       Encoding = "utf_16_le"
	EncodingUTF32         Encoding = "utf_32"
	EncodingUTF32Be       Encoding = "utf_32_be"
	EncodingUTF32Le       Encoding = "utf_32_le"
	EncodingEUCJIS2004    Encoding = "euc_jis_2004"
	EncodingEUCJISX0213   Encoding = "euc_jisx0213"
	EncodingEUCJP         Encoding = "euc_jp"
	EncodingEUCKR         Encoding = "euc_kr"
	EncodingGb18030       Encoding = "gb18030"
	EncodingSHIFTJIS      Encoding = "shift_jis"
	EncodingSHIFTJIS2004  Encoding = "shift_jis_2004"
	EncodingSHIFTJISX0213 Encoding = "shift_jisx0213"
)

Encoding constants.

func (Encoding) String

func (e Encoding) String() string

String implements the fmt.Stringer interface, canonicalizing the encoding name.

type Enricher

type Enricher struct {
	ID                string         `json:"-"`
	Name              string         `json:"-"`
	Subtype           EnrichmentType `json:"-"`
	NERPromptOverride string         `json:"prompt_interface_overrides,omitempty"`
}

Enricher is a node that enriches text.

func (Enricher) MarshalJSON

func (e Enricher) MarshalJSON() ([]byte, error)

MarshalJSON implements the json.Marshaler interface.

type EnrichmentType

type EnrichmentType string

EnrichmentType is a type that represents an enrichment type.

const (
	EnrichmentTypeImageOpenAI      EnrichmentType = "openai_image_description"
	EnrichmentTypeTableOpenAI      EnrichmentType = "openai_table_description"
	EnrichmentTypeTable2HTMLOpenAI EnrichmentType = "openai_table2html"
	EnrichmentTypeNEROpenAI        EnrichmentType = "openai_ner"

	EnrichmentTypeImageAnthropic EnrichmentType = "anthropic_image_description"
	EnrichmentTypeTableAnthropic EnrichmentType = "anthropic_table_description"
	EnrichmentTypeNERAnthropic   EnrichmentType = "anthropic_ner"

	EnrichmentTypeImageBedrock EnrichmentType = "bedrock_image_description"
	EnrichmentTypeTableBedrock EnrichmentType = "bedrock_table_description"
)

EnrichmentType constants.

type ExcludeableElement

type ExcludeableElement string

ExcludeableElement represents elements that can be excluded during document processing.

const (
	ExcludableElementFigureCaption     ExcludeableElement = "FigureCaption"
	ExcludableElementNarrativeText     ExcludeableElement = "NarrativeText"
	ExcludableElementListItem          ExcludeableElement = "ListItem"
	ExcludableElementTitle             ExcludeableElement = "Title"
	ExcludableElementAddress           ExcludeableElement = "Address"
	ExcludableElementTable             ExcludeableElement = "Table"
	ExcludableElementPageBreak         ExcludeableElement = "PageBreak"
	ExcludableElementHeader            ExcludeableElement = "Header"
	ExcludableElementFooter            ExcludeableElement = "Footer"
	ExcludableElementUncategorizedText ExcludeableElement = "UncategorizedText"
	ExcludableElementImage             ExcludeableElement = "Image"
	ExcludableElementFormula           ExcludeableElement = "Formula"
	ExcludableElementEmailAddress      ExcludeableElement = "EmailAddress"
)

Excludeable element constants for document processing.

type FailedFile

type FailedFile struct {
	Document string `json:"document"`
	Error    string `json:"error"`
}

FailedFile represents a failed file in a job, including the document and error message.

type File

type File interface {
	Name() string
	io.Reader
}

File represents a file to upload to the workflow.

type FileBytes

type FileBytes struct {
	Filename string
	Bytes    io.Reader
}

FileBytes implements the File interface for an io.Reader in memory.

func (*FileBytes) Name

func (f *FileBytes) Name() string

Name returns the name of the file.

func (*FileBytes) Read

func (f *FileBytes) Read(p []byte) (n int, err error)

Read reads the file into the given buffer.

type GCSConnectorConfig

type GCSConnectorConfig struct {
	RemoteURL         string `json:"remote_url"`
	ServiceAccountKey string `json:"service_account_key"`
	Recursive         *bool  `json:"recursive,omitempty"`
	// contains filtered or unexported fields
}

GCSConnectorConfig represents the configuration for a Google Cloud Storage connector. It contains the remote URL and service account key for authentication.

func (GCSConnectorConfig) Type

func (c GCSConnectorConfig) Type() string

Type always returns the connector type identifier for GCS: "gcs".

type GoogleDriveSourceConnectorConfig

type GoogleDriveSourceConnectorConfig struct {
	DriveID           string   `json:"drive_id"`
	ServiceAccountKey *string  `json:"service_account_key,omitempty"`
	Extensions        []string `json:"extensions,omitempty"`
	Recursive         *bool    `json:"recursive,omitempty"`
	// contains filtered or unexported fields
}

GoogleDriveSourceConnectorConfig represents the configuration for a Google Drive source connector. It contains drive ID, service account key, and file filtering settings.

func (GoogleDriveSourceConnectorConfig) Type

Type always returns the connector type identifier for Google Drive: "google_drive".

type HTTPValidationError

type HTTPValidationError struct {
	Detail []*ValidationError `json:"detail"`
}

HTTPValidationError represents the structure of validation error responses returned by the API when a 422 status code is returned.

func (*HTTPValidationError) Error

func (e *HTTPValidationError) Error() string

type IBMWatsonxS3DestinationConnectorConfig

type IBMWatsonxS3DestinationConnectorConfig struct {
	IAMApiKey             string  `json:"iam_api_key"`
	AccessKeyID           string  `json:"access_key_id"`
	SecretAccessKey       string  `json:"secret_access_key"`
	IcebergEndpoint       string  `json:"iceberg_endpoint"`
	ObjectStorageEndpoint string  `json:"object_storage_endpoint"`
	ObjectStorageRegion   string  `json:"object_storage_region"`
	Catalog               string  `json:"catalog"`
	MaxRetriesConnection  *int    `json:"max_retries_connection,omitempty"`
	Namespace             string  `json:"namespace"`
	Table                 string  `json:"table"`
	MaxRetries            *int    `json:"max_retries,omitempty"`
	RecordIDKey           *string `json:"record_id_key,omitempty"`
	// contains filtered or unexported fields
}

IBMWatsonxS3DestinationConnectorConfig represents the configuration for an IBM Watsonx S3 destination connector. It contains IBM Cloud authentication, storage endpoints, and table configuration.

func (IBMWatsonxS3DestinationConnectorConfig) Type

Type always returns the connector type identifier for IBM Watsonx S3: "ibm_watsonx_s3".

type JiraSourceConnectorConfig

type JiraSourceConnectorConfig struct {
	URL                 string   `json:"url"`
	Username            string   `json:"username"`
	Password            *string  `json:"password,omitempty"`
	Token               *string  `json:"token,omitempty"`
	Cloud               *bool    `json:"cloud,omitempty"`
	Projects            []string `json:"projects,omitempty"`
	Boards              []string `json:"boards,omitempty"`
	Issues              []string `json:"issues,omitempty"`
	StatusFilters       []string `json:"status_filters,omitempty"`
	DownloadAttachments *bool    `json:"download_attachments,omitempty"`
	// contains filtered or unexported fields
}

JiraSourceConnectorConfig represents the configuration for a Jira source connector. It contains authentication details and project/issue filtering settings.

func (JiraSourceConnectorConfig) Type

Type always returns the connector type identifier for Jira: "jira".

type Job

type Job struct {
	ID              string             `json:"id"`
	WorkflowID      string             `json:"workflow_id"`
	WorkflowName    string             `json:"workflow_name"`
	Status          JobStatus          `json:"status"`
	CreatedAt       time.Time          `json:"created_at,omitzero"`
	Runtime         *string            `json:"runtime,omitempty"`
	InputFileIDs    []string           `json:"input_file_ids,omitempty"`
	OutputNodeFiles []NodeFileMetadata `json:"output_node_files,omitempty"`
	JobType         WorkflowJobType    `json:"job_type"`
}

Job represents a job, which is an execution of a workflow in Unstructured.io.

func (*Job) UnmarshalJSON

func (j *Job) UnmarshalJSON(data []byte) error

UnmarshalJSON implements the json.Unmarshaler interface.

type JobDetails

type JobDetails struct {
	ID               string              `json:"id"`
	ProcessingStatus JobProcessingStatus `json:"processing_status"`
	NodeStats        []JobNodeDetails    `json:"node_stats"`
	Message          *string             `json:"message,omitempty"`
}

JobDetails represents detailed information about a job, including processing status and node stats.

type JobFailedFiles

type JobFailedFiles struct {
	FailedFiles []FailedFile `json:"failed_files"`
}

JobFailedFiles represents failed files for a job.

type JobNodeDetails

type JobNodeDetails struct {
	NodeName    *string `json:"node_name,omitempty"`
	NodeType    *string `json:"node_type,omitempty"`
	NodeSubtype *string `json:"node_subtype,omitempty"`
	Ready       int     `json:"ready"`
	InProgress  int     `json:"in_progress"`
	Success     int     `json:"success"`
	Failure     int     `json:"failure"`
}

JobNodeDetails represents details about a job node, including status counts.

type JobProcessingStatus

type JobProcessingStatus string

JobProcessingStatus represents the processing status of a job (e.g., scheduled, in progress, success, etc.).

const (
	// JobProcessingStatusScheduled indicates the job is scheduled for processing.
	JobProcessingStatusScheduled JobProcessingStatus = "SCHEDULED"
	// JobProcessingStatusInProgress indicates the job is currently being processed.
	JobProcessingStatusInProgress JobProcessingStatus = "IN_PROGRESS"
	// JobProcessingStatusSuccess indicates the job was processed successfully.
	JobProcessingStatusSuccess JobProcessingStatus = "SUCCESS"
	// JobProcessingStatusCompletedWithErrors indicates the job completed with errors.
	JobProcessingStatusCompletedWithErrors JobProcessingStatus = "COMPLETED_WITH_ERRORS"
	// JobProcessingStatusStopped indicates the job was stopped.
	JobProcessingStatusStopped JobProcessingStatus = "STOPPED"
	// JobProcessingStatusFailed indicates the job failed.
	JobProcessingStatusFailed JobProcessingStatus = "FAILED"
)

type JobStatus

type JobStatus string

JobStatus represents the status of a job (e.g., scheduled, in progress, completed, stopped, failed).

const (
	// JobStatusScheduled indicates the job is scheduled.
	JobStatusScheduled JobStatus = "SCHEDULED"
	// JobStatusInProgress indicates the job is in progress.
	JobStatusInProgress JobStatus = "IN_PROGRESS"
	// JobStatusCompleted indicates the job is completed.
	JobStatusCompleted JobStatus = "COMPLETED"
	// JobStatusStopped indicates the job is stopped.
	JobStatusStopped JobStatus = "STOPPED"
	// JobStatusFailed indicates the job has failed.
	JobStatusFailed JobStatus = "FAILED"
)

type KafkaCloudConnectorConfig

type KafkaCloudConnectorConfig struct {
	BootstrapServers     string  `json:"bootstrap_servers"`
	Port                 *int    `json:"port,omitempty"`
	GroupID              *string `json:"group_id,omitempty"`
	Topic                string  `json:"topic"`
	KafkaAPIKey          string  `json:"kafka_api_key"`
	Secret               string  `json:"secret"`
	NumMessagesToConsume *int    `json:"num_messages_to_consume,omitempty"`
	BatchSize            *int    `json:"batch_size,omitempty"`
	// contains filtered or unexported fields
}

KafkaCloudConnectorConfig represents the configuration for a Kafka Cloud connector. It contains broker details, topic information, and authentication credentials.

func (KafkaCloudConnectorConfig) Type

Type always returns the connector type identifier for Kafka Cloud: "kafka-cloud".

type Language

type Language string

Language is a ISO 639-1 three-letter code representing a language supported by Unstructured for OCR.

const (
	LanguageEnglish Language = "eng"

	LanguageAfrikaans                  Language = "afr"
	LanguageAmharic                    Language = "amh"
	LanguageArabic                     Language = "ara"
	LanguageAssamese                   Language = "asm"
	LanguageAzerbaijani                Language = "aze"
	LanguageAzerbaijaniCyrillic        Language = "aze_cyrl"
	LanguageBelarusian                 Language = "bel"
	LanguageBengali                    Language = "ben"
	LanguageTibetan                    Language = "bod"
	LanguageBosnian                    Language = "bos"
	LanguageBreton                     Language = "bre"
	LanguageBulgarian                  Language = "bul"
	LanguageCatalan                    Language = "cat"
	LanguageCebuano                    Language = "ceb"
	LanguageCzech                      Language = "ces"
	LanguageSimplifiedChinese          Language = "chi_sim"
	LanguageSimplifiedChineseVertical  Language = "chi_sim_vert"
	LanguageTraditionalChinese         Language = "chi_tra"
	LanguageTraditionalChineseVertical Language = "chi_tra_vert"
	LanguageCherokee                   Language = "chr"
	LanguageCorsican                   Language = "cos"
	LanguageWelsh                      Language = "cym"
	LanguageDanish                     Language = "dan"
	LanguageGerman                     Language = "deu"
	LanguageDivehi                     Language = "div"
	LanguageDzongkha                   Language = "dzo"
	LanguageGreek                      Language = "ell"
	LanguageMiddleEnglish              Language = "enm"
	LanguageEsperanto                  Language = "epo"
	LanguageEquationDetection          Language = "equ"
	LanguageEstonian                   Language = "est"
	LanguageBasque                     Language = "eus"
	LanguageFaroese                    Language = "fao"
	LanguagePersian                    Language = "fas"
	LanguageFilipino                   Language = "fil"
	LanguageFinnish                    Language = "fin"
	LanguageFrench                     Language = "fra"
	LanguageGermanFraktur              Language = "frk"
	LanguageFrenchMiddle               Language = "frm"
	LanguageFrisian                    Language = "fry"
	LanguageScottishGaelic             Language = "gla"
	LanguageIrish                      Language = "gle"
	LanguageGalician                   Language = "glg"
	LanguageAncientGreek               Language = "grc"
	LanguageGujarati                   Language = "guj"
	LanguageHaitian                    Language = "hat"
	LanguageHebrew                     Language = "heb"
	LanguageHindi                      Language = "hin"
	LanguageCroatian                   Language = "hrv"
	LanguageHungarian                  Language = "hun"
	LanguageArmenian                   Language = "hye"
	LanguageInuktitut                  Language = "iku"
	LanguageIndonesian                 Language = "ind"
	LanguageIcelandic                  Language = "isl"
	LanguageItalian                    Language = "ita"
	LanguageItalianOld                 Language = "ita_old"
	LanguageJavanese                   Language = "jav"
	LanguageJapanese                   Language = "jpn"
	LanguageJapaneseVertical           Language = "jpn_vert"
	LanguageKannada                    Language = "kan"
	LanguageGeorgian                   Language = "kat"
	LanguageGeorgianOld                Language = "kat_old"
	LanguageKazakh                     Language = "kaz"
	LanguageKhmer                      Language = "khm"
	LanguageKyrgyz                     Language = "kir"
	LanguageKurdish                    Language = "kmr"
	LanguageKorean                     Language = "kor"
	LanguageKoreanVertical             Language = "kor_vert"
	LanguageLao                        Language = "lao"
	LanguageLatin                      Language = "lat"
	LanguageLatvian                    Language = "lav"
	LanguageLithuanian                 Language = "lit"
	LanguageLuxembourgish              Language = "ltz"
	LanguageMalayalam                  Language = "mal"
	LanguageMarathi                    Language = "mar"
	LanguageMacedonian                 Language = "mkd"
	LanguageMaltese                    Language = "mlt"
	LanguageMongolian                  Language = "mon"
	LanguageMaori                      Language = "mri"
	LanguageMalay                      Language = "msa"
	LanguageBurmese                    Language = "mya"
	LanguageNepali                     Language = "nep"
	LanguageDutch                      Language = "nld"
	LanguageNorwegian                  Language = "nor"
	LanguageOccitan                    Language = "oci"
	LanguageOriya                      Language = "ori"
	LanguageOrientationDetection       Language = "osd"
	LanguagePanjabi                    Language = "pan"
	LanguagePolish                     Language = "pol"
	LanguagePortuguese                 Language = "por"
	LanguagePunjabi                    Language = "pus"
	LanguageQuechua                    Language = "que"
	LanguageRomanian                   Language = "ron"
	LanguageRussian                    Language = "rus"
	LanguageSanskrit                   Language = "san"
	LanguageSinhala                    Language = "sin"
	LanguageSlovak                     Language = "slk"
	LanguageSlovenian                  Language = "slv"
	LanguageSindhi                     Language = "snd"
	LanguageSNUM                       Language = "snum"
	LanguageSpanish                    Language = "spa"
	LanguageSpanishOld                 Language = "spa_old"
	LanguageAlbanian                   Language = "sqi"
	LanguageSerbian                    Language = "srp"
	LanguageSerbianLatin               Language = "srp_latn"
	LanguageSundanese                  Language = "sun"
	LanguageSwahili                    Language = "swa"
	LanguageSwedish                    Language = "swe"
	LanguageSyriac                     Language = "syr"
	LanguageTamil                      Language = "tam"
	LanguageTatar                      Language = "tat"
	LanguageTelugu                     Language = "tel"
	LanguageTajik                      Language = "tgk"
	LanguageThai                       Language = "tha"
	LanguageTigrinya                   Language = "tir"
	LanguageTonga                      Language = "ton"
	LanguageTurkish                    Language = "tur"
	LanguageUyghur                     Language = "uig"
	LanguageUkrainian                  Language = "ukr"
	LanguageUrdu                       Language = "urd"
	LanguageUzbek                      Language = "uzb"
	LanguageUzbekCyrillic              Language = "uzb_cyrl"
	LanguageVietnamese                 Language = "vie"
	LanguageYiddish                    Language = "yid"
	LanguageYoruba                     Language = "yor"
)

Supported language constants.

type ListJobsRequest

type ListJobsRequest struct {
	WorkflowID *string
	Status     *JobStatus
}

ListJobsRequest represents the request to list jobs with optional filters.

type ListWorkflowsRequest

type ListWorkflowsRequest struct {
	DagNodeConfigurationID   *string
	SourceID                 *string
	DestinationID            *string
	Status                   *WorkflowState
	Page                     *int
	PageSize                 *int
	CreatedSince             *time.Time
	CreatedBefore            *time.Time
	Name                     *string
	SortBy                   *string
	SortDirection            *SortDirection
	ShowOnlySoftDeleted      *bool
	ShowRecommenderWorkflows *bool
}

ListWorkflowsRequest represents the request to list workflows with optional filters.

type MilvusDestinationConnectorConfig

type MilvusDestinationConnectorConfig struct {
	URI            string  `json:"uri"`
	User           *string `json:"user,omitempty"`
	Token          *string `json:"token,omitempty"`
	Password       *string `json:"password,omitempty"`
	DBName         *string `json:"db_name,omitempty"`
	CollectionName string  `json:"collection_name"`
	RecordIDKey    string  `json:"record_id_key"`
	// contains filtered or unexported fields
}

MilvusDestinationConnectorConfig represents the configuration for a Milvus destination connector. It contains connection details, collection information, and authentication.

func (MilvusDestinationConnectorConfig) Type

Type always returns the connector type identifier for Milvus: "milvus".

type Model

type Model string

Model represents an AI model identifier.

const (
	ModelGPT4o                 Model = "gpt-4o"
	ModelGPT4oMini             Model = "gpt-4o-mini"
	ModelClaude35Sonnet        Model = "claude-3-5-sonnet-20241022"
	ModelClaude37Sonnet        Model = "claude-3-7-sonnet-20250219"
	ModelBedrockNovaLite       Model = "us.amazon.nova-lite-v1:0"
	ModelBedrockNovaPro        Model = "us.amazon.nova-pro-v1:0"
	ModelBedrockClaude3Opus    Model = "us.anthropic.claude-3-opus-20240229-v1:0"
	ModelBedrockClaude3Haiku   Model = "us.anthropic.claude-3-haiku-20240307-v1:0"
	ModelBedrockClaude3Sonnet  Model = "us.anthropic.claude-3-sonnet-20240229-v1:0"
	ModelBedrockClaude35Sonnet Model = "us.anthropic.claude-3-5-sonnet-20241022-v2:0"
	ModelBedrockLlama3211B     Model = "us.meta.llama3-2-11b-instruct-v1:0"
	ModelBedrockLlama3290B     Model = "us.meta.llama3-2-90b-instruct-v1:0"
)

Model constants.

type MongoDBConnectorConfig

type MongoDBConnectorConfig struct {
	Database   string `json:"database"`
	Collection string `json:"collection"`
	URI        string `json:"uri"`
	// contains filtered or unexported fields
}

MongoDBConnectorConfig represents the configuration for a MongoDB connector. It contains database connection details and collection information.

func (MongoDBConnectorConfig) Type

func (c MongoDBConnectorConfig) Type() string

Type always returns the connector type identifier for MongoDB: "mongodb".

type MongoDBConnectorConfigInput

type MongoDBConnectorConfigInput struct {
	Database   string `json:"database"`
	Collection string `json:"collection"`
	URI        string `json:"uri"`
	// contains filtered or unexported fields
}

MongoDBConnectorConfigInput represents the configuration for a MongoDB connector. It contains database connection details and collection information.

func (MongoDBConnectorConfigInput) Type

Type always returns the connector type identifier for MongoDB: "mongodb".

type MotherduckDestinationConnectorConfig

type MotherduckDestinationConnectorConfig struct {
	Account     string  `json:"account"`
	Role        string  `json:"role"`
	User        string  `json:"user"`
	Password    string  `json:"password"`
	Host        string  `json:"host"`
	Port        *int    `json:"port,omitempty"`
	Database    string  `json:"database"`
	Schema      *string `json:"schema,omitempty"`
	TableName   *string `json:"table_name,omitempty"`
	BatchSize   *int    `json:"batch_size,omitempty"`
	RecordIDKey *string `json:"record_id_key,omitempty"`
	// contains filtered or unexported fields
}

MotherduckDestinationConnectorConfig represents the configuration for a MotherDuck destination connector. It contains database connection details and authentication credentials.

func (MotherduckDestinationConnectorConfig) Type

Type always returns the connector type identifier for MotherDuck: "mother_duck".

type Neo4jDestinationConnectorConfig

type Neo4jDestinationConnectorConfig struct {
	URI       string `json:"uri"`
	Database  string `json:"database"`
	Username  string `json:"username"`
	Password  string `json:"password"`
	BatchSize *int   `json:"batch_size,omitempty"`
	// contains filtered or unexported fields
}

Neo4jDestinationConnectorConfig represents the configuration for a Neo4j destination connector. It contains database connection details and authentication credentials.

func (Neo4jDestinationConnectorConfig) Type

Type always returns the connector type identifier for Neo4j: "neo4j".

type NodeFileMetadata

type NodeFileMetadata struct {
	NodeID string `json:"node_id"`
	FileID string `json:"file_id"`
}

NodeFileMetadata represents metadata for a node file in a job.

type OneDriveConnectorConfig

type OneDriveConnectorConfig struct {
	ClientID     string  `json:"client_id"`
	UserPName    string  `json:"user_pname"`
	Tenant       string  `json:"tenant"`
	AuthorityURL string  `json:"authority_url"`
	ClientCred   string  `json:"client_cred"`
	Recursive    *bool   `json:"recursive,omitempty"`
	Path         *string `json:"path,omitempty"`
	RemoteURL    *string `json:"remote_url,omitempty"`
	// contains filtered or unexported fields
}

OneDriveConnectorConfig represents the configuration for a OneDrive connector. It contains Microsoft Graph API authentication and file access settings.

func (OneDriveConnectorConfig) Type

Type always returns the connector type identifier for OneDrive: "onedrive".

type Option

type Option func(*Client) error

Option is a function that configures a Client instance. Options are used to set the endpoint URL and API key during client creation.

func WithClient

func WithClient(hc *http.Client) Option

WithClient returns an Option that sets the HTTP client to use for requests. If no client is provided, the client will default to http.DefaultClient.

func WithEndpoint

func WithEndpoint(endpoint string) Option

WithEndpoint returns an Option that sets the API endpoint URL. The endpoint should be the base URL for the Unstructured.io API, including the base path like "/api/v1". Without this option, the client will default to `https://platform.unstructuredapp.io/api/v1`.

func WithKey

func WithKey(key string) Option

WithKey returns an Option that sets the API key for authentication. The API key is used to authenticate all requests to the Unstructured.io API. This is accomplished using a http.RoundTripper that sets the key as the value of the `Unstructured-API-Key` header on all requests.

type OutlookSourceConnectorConfig

type OutlookSourceConnectorConfig struct {
	AuthorityURL   *string  `json:"authority_url,omitempty"`
	Tenant         *string  `json:"tenant,omitempty"`
	ClientID       string   `json:"client_id"`
	ClientCred     string   `json:"client_cred"`
	OutlookFolders []string `json:"outlook_folders,omitempty"`
	Recursive      *bool    `json:"recursive,omitempty"`
	UserEmail      string   `json:"user_email"`
	// contains filtered or unexported fields
}

OutlookSourceConnectorConfig represents the configuration for an Outlook source connector. It contains Microsoft Graph API authentication and email folder settings.

func (OutlookSourceConnectorConfig) Type

Type always returns the connector type identifier for Outlook: "outlook".

type OutputFormat

type OutputFormat string

OutputFormat represents the output format for document processing.

const (
	OutputFormatHTML OutputFormat = "text/html"
	OutputFormatJSON OutputFormat = "application/json"
)

Output format constants.

type PartitionerAuto

type PartitionerAuto struct {
	ID             string       `json:"-"`
	Name           string       `json:"-"`
	Strategy       string       `json:"strategy"`
	Provider       Provider     `json:"provider,omitempty"`
	ProviderAPIKey string       `json:"provider_api_key,omitempty"`
	Model          Model        `json:"model,omitempty"`
	OutputFormat   OutputFormat `json:"output_format,omitempty"`
	Prompt         struct {
		Text string `json:"text,omitempty"`
	} `json:"prompt,omitzero"`
	FormatHTML       *bool `json:"format_html,omitzero"`
	UniqueElementIDs *bool `json:"unique_element_ids,omitzero"`
	IsDynamic        bool  `json:"is_dynamic"`
	AllowFast        bool  `json:"allow_fast"`
}

PartitionerAuto is a partitioner that uses the Auto strategy.

func (PartitionerAuto) MarshalJSON

func (p PartitionerAuto) MarshalJSON() ([]byte, error)

MarshalJSON implements the json.Marshaler interface.

type PartitionerFast

type PartitionerFast struct {
	ID                     string               `json:"-"`
	Name                   string               `json:"-"`
	PageBreaks             bool                 `json:"include_page_breaks,omitzero"`
	PDFInferTableStructure bool                 `json:"pdf_infer_table_structure,omitzero"`
	ExcludeElements        []ExcludeableElement `json:"exclude_elements,omitzero"`
	XMLKeepTags            bool                 `json:"xml_keep_tags,omitzero"`
	Encoding               Encoding             `json:"encoding,omitzero"`
	OCRLanguages           []Language           `json:"ocr_languages,omitzero"`
	ExtractImageBlockTypes []BlockType          `json:"extract_image_block_types,omitzero"`
	InferTableStructure    bool                 `json:"infer_table_structure,omitzero"`
}

PartitionerFast represents a fast partitioner configuration for document processing.

func (PartitionerFast) MarshalJSON

func (p PartitionerFast) MarshalJSON() ([]byte, error)

MarshalJSON implements the json.Marshaler interface for PartitionerFast.

type PartitionerHiRes

type PartitionerHiRes struct {
	ID                     string               `json:"-"`
	Name                   string               `json:"-"`
	PageBreaks             bool                 `json:"include_page_breaks,omitzero"`
	PDFInferTableStructure bool                 `json:"pdf_infer_table_structure,omitzero"`
	ExcludeElements        []ExcludeableElement `json:"exclude_elements,omitzero"`
	XMLKeepTags            bool                 `json:"xml_keep_tags,omitzero"`
	Encoding               Encoding             `json:"encoding,omitzero"`
	OCRLanguages           []Language           `json:"ocr_languages,omitzero"`
	ExtractImageBlockTypes []BlockType          `json:"extract_image_block_types,omitzero"`
	InferTableStructure    bool                 `json:"infer_table_structure,omitzero"`
}

PartitionerHiRes represents a high-resolution partitioner configuration for document processing.

func (PartitionerHiRes) MarshalJSON

func (p PartitionerHiRes) MarshalJSON() ([]byte, error)

MarshalJSON implements the json.Marshaler interface for PartitionerHiRes.

type PartitionerVLM

type PartitionerVLM struct {
	ID             string       `json:"-"`
	Name           string       `json:"-"`
	Strategy       string       `json:"strategy,omitempty"`
	Provider       Provider     `json:"provider,omitempty"`
	ProviderAPIKey string       `json:"provider_api_key,omitempty"`
	Model          Model        `json:"model,omitempty"`
	OutputFormat   OutputFormat `json:"output_format,omitempty"`
	Prompt         struct {
		Text string `json:"text,omitempty"`
	} `json:"prompt,omitzero"`
	FormatHTML       *bool `json:"format_html,omitzero"`
	UniqueElementIDs *bool `json:"unique_element_ids,omitzero"`
	IsDynamic        *bool `json:"is_dynamic,omitzero"`
	AllowFast        *bool `json:"allow_fast,omitzero"`
}

PartitionerVLM is a partitioner that uses the VLM strategy.

func (PartitionerVLM) MarshalJSON

func (p PartitionerVLM) MarshalJSON() ([]byte, error)

MarshalJSON implements the json.Marshaler interface.

type PineconeDestinationConnectorConfig

type PineconeDestinationConnectorConfig struct {
	IndexName string `json:"index_name"`
	APIKey    string `json:"api_key"`
	Namespace string `json:"namespace"`
	BatchSize *int   `json:"batch_size,omitempty"`
	// contains filtered or unexported fields
}

PineconeDestinationConnectorConfig represents the configuration for a Pinecone destination connector. It contains index details, API key, and namespace information.

func (PineconeDestinationConnectorConfig) Type

Type always returns the connector type identifier for Pinecone: "pinecone".

type PostgresConnectorConfig

type PostgresConnectorConfig struct {
	Host      string   `json:"host"`
	Database  string   `json:"database"`
	Port      int      `json:"port"`
	Username  string   `json:"username"`
	Password  string   `json:"password"`
	TableName string   `json:"table_name"`
	BatchSize int      `json:"batch_size"`
	IDColumn  *string  `json:"id_column,omitempty"`
	Fields    []string `json:"fields,omitempty"`
	// contains filtered or unexported fields
}

PostgresConnectorConfig represents the configuration for a PostgreSQL connector. It contains database connection details and table configuration.

func (PostgresConnectorConfig) Type

Type always returns the connector type identifier for PostgreSQL: "postgres".

type Provider

type Provider string

Provider represents an AI model provider.

const (
	ProviderAuto      Provider = "auto"
	ProviderAnthropic Provider = "anthropic"
	ProviderOpenAI    Provider = "openai"
	ProviderBedrock   Provider = "bedrock"
)

Provider constants.

type QdrantCloudDestinationConnectorConfig

type QdrantCloudDestinationConnectorConfig struct {
	URL            string `json:"url"`
	APIKey         string `json:"api_key"`
	CollectionName string `json:"collection_name"`
	BatchSize      *int   `json:"batch_size,omitempty"`
	// contains filtered or unexported fields
}

QdrantCloudDestinationConnectorConfig represents the configuration for a Qdrant Cloud destination connector. It contains API endpoint, collection details, and authentication.

func (QdrantCloudDestinationConnectorConfig) Type

Type always returns the connector type identifier for Qdrant Cloud: "qdrant_cloud".

type RedisDestinationConnectorConfig

type RedisDestinationConnectorConfig struct {
	Host      string  `json:"host"`
	Port      *int    `json:"port,omitempty"`
	Username  *string `json:"username,omitempty"`
	Password  *string `json:"password,omitempty"`
	URI       *string `json:"uri,omitempty"`
	Database  *int    `json:"database,omitempty"`
	SSL       *bool   `json:"ssl,omitempty"`
	BatchSize *int    `json:"batch_size,omitempty"`
	// contains filtered or unexported fields
}

RedisDestinationConnectorConfig represents the configuration for a Redis destination connector. It contains connection details, database selection, and authentication.

func (RedisDestinationConnectorConfig) Type

Type always returns the connector type identifier for Redis: "redis".

type RunWorkflowRequest

type RunWorkflowRequest struct {
	ID string

	// InputFiles is a list of files to upload to the workflow.
	// The files must implement the io.Reader interface.
	InputFiles []File
}

RunWorkflowRequest represents the request to run a workflow

type S3ConnectorConfig

type S3ConnectorConfig struct {
	RemoteURL   string  `json:"remote_url"`
	Anonymous   *bool   `json:"anonymous,omitempty"`
	Key         *string `json:"key,omitempty"`
	Secret      *string `json:"secret,omitempty"`
	Token       *string `json:"token,omitempty"`
	EndpointURL *string `json:"endpoint_url,omitempty"`
	Recursive   *bool   `json:"recursive,omitempty"`
	// contains filtered or unexported fields
}

S3ConnectorConfig represents the configuration for an S3 connector. It supports both AWS S3 and S3-compatible storage services.

func (S3ConnectorConfig) Type

func (c S3ConnectorConfig) Type() string

Type always returns the connector type identifier for S3: "s3".

type SalesforceSourceConnectorConfig

type SalesforceSourceConnectorConfig struct {
	Username    string   `json:"username"`
	ConsumerKey string   `json:"consumer_key"`
	PrivateKey  string   `json:"private_key"`
	Categories  []string `json:"categories"`
	// contains filtered or unexported fields
}

SalesforceSourceConnectorConfig represents the configuration for a Salesforce source connector. It contains authentication details and data category filtering.

func (SalesforceSourceConnectorConfig) Type

Type always returns the connector type identifier for Salesforce: "salesforce".

type SharePointSourceConnectorConfig

type SharePointSourceConnectorConfig struct {
	Site         string  `json:"site"`
	Tenant       string  `json:"tenant"`
	AuthorityURL *string `json:"authority_url,omitempty"`
	UserPName    string  `json:"user_pname"`
	ClientID     string  `json:"client_id"`
	ClientCred   string  `json:"client_cred"`
	Recursive    *bool   `json:"recursive,omitempty"`
	Path         *string `json:"path,omitempty"`
	// contains filtered or unexported fields
}

SharePointSourceConnectorConfig represents the configuration for a SharePoint source connector. It contains Microsoft Graph API authentication and site access details.

func (SharePointSourceConnectorConfig) Type

Type always returns the connector type identifier for SharePoint: "sharepoint".

type SlackSourceConnectorConfig

type SlackSourceConnectorConfig struct {
	Channels  []string `json:"channels"`
	StartDate *string  `json:"start_date,omitempty"`
	EndDate   *string  `json:"end_date,omitempty"`
	Token     string   `json:"token"`
	// contains filtered or unexported fields
}

SlackSourceConnectorConfig represents the configuration for a Slack source connector. It contains channel selection, date range filtering, and authentication token.

func (SlackSourceConnectorConfig) Type

Type always returns the connector type identifier for Slack: "slack".

type SnowflakeConnectorConfig

type SnowflakeConnectorConfig struct {
	Account     string   `json:"account"`
	Role        string   `json:"role"`
	User        string   `json:"user"`
	Password    string   `json:"password"`
	Host        string   `json:"host"`
	Port        *int     `json:"port,omitempty"`
	Database    string   `json:"database"`
	Schema      *string  `json:"schema,omitempty"`
	TableName   *string  `json:"table_name,omitempty"`
	BatchSize   *int     `json:"batch_size,omitempty"`
	IDColumn    *string  `json:"id_column,omitempty"`
	Fields      []string `json:"fields,omitempty"`
	RecordIDKey *string  `json:"record_id_key,omitempty"`
	// contains filtered or unexported fields
}

SnowflakeConnectorConfig represents the configuration for a Snowflake connector. It contains account details, authentication, and table configuration.

func (SnowflakeConnectorConfig) Type

Type always returns the connector type identifier for Snowflake: "snowflake".

type SortDirection

type SortDirection string

SortDirection represents the sort direction for listing workflows.

const (
	// SortDirectionAsc sorts results in ascending order.
	SortDirectionAsc SortDirection = "asc"
	// SortDirectionDesc sorts results in descending order.
	SortDirectionDesc SortDirection = "desc"
)

type Source

type Source struct {
	ID        string       `json:"id"`
	Name      string       `json:"name"`
	CreatedAt time.Time    `json:"created_at,omitzero"`
	UpdatedAt time.Time    `json:"updated_at,omitzero"`
	Config    SourceConfig `json:"config"`
}

Source represents a source connector that ingests files or data from various locations. It contains metadata about the connector and its configuration.

func (*Source) UnmarshalJSON

func (s *Source) UnmarshalJSON(data []byte) error

UnmarshalJSON implements custom JSON unmarshaling for Source. It handles the polymorphic Config field by determining the correct type based on the "type" field in the JSON data.

type SourceConfig

type SourceConfig interface {
	Type() string
	// contains filtered or unexported methods
}

SourceConfig is an interface that all source connector configurations implement. It provides a way to identify and work with different source connector types.

type UpdateDestinationRequest

type UpdateDestinationRequest struct {
	ID     string
	Config DestinationConfig
}

UpdateDestinationRequest represents the request to update a destination connector.

type UpdateSourceRequest

type UpdateSourceRequest struct {
	ID     string
	Config SourceConfig
}

UpdateSourceRequest represents the request to update a source connector.

type UpdateWorkflowRequest

type UpdateWorkflowRequest struct {
	ID            string         `json:"-"`
	Name          *string        `json:"name,omitempty"`
	SourceID      *string        `json:"source_id,omitempty"`
	DestinationID *string        `json:"destination_id,omitempty"`
	WorkflowType  *WorkflowType  `json:"workflow_type,omitempty"`
	WorkflowNodes []WorkflowNode `json:"workflow_nodes,omitempty"`
	Schedule      *string        `json:"schedule,omitempty"`
	ReprocessAll  *bool          `json:"reprocess_all,omitempty"`
}

UpdateWorkflowRequest represents the request to update a workflow.

type ValidationError

type ValidationError struct {
	// Location is an array that can contain strings or integers indicating
	// where the validation error occurred (e.g., field names, array indices).
	Location []any `json:"loc"`
	// Message is a string describing the validation error.
	Message string `json:"msg"`
	// Type is a string indicating the type of error.
	Type string `json:"type"`
}

ValidationError represents a single validation error within the HTTPValidationError response.

func (*ValidationError) Error

func (e *ValidationError) Error() string

type WeaviateDestinationConnectorConfig

type WeaviateDestinationConnectorConfig struct {
	ClusterURL string  `json:"cluster_url"`
	APIKey     string  `json:"api_key"`
	Collection *string `json:"collection,omitempty"`
	// contains filtered or unexported fields
}

WeaviateDestinationConnectorConfig represents the configuration for a Weaviate destination connector. It contains cluster URL, API key, and collection information.

func (WeaviateDestinationConnectorConfig) Type

Type always returns the connector type identifier for Weaviate Cloud: "weaviate_cloud".

type Workflow

type Workflow struct {
	ID            string            `json:"id"`
	Name          string            `json:"name"`
	Sources       []string          `json:"sources"`
	Destinations  []string          `json:"destinations"`
	WorkflowType  *WorkflowType     `json:"workflow_type,omitempty"`
	WorkflowNodes WorkflowNodes     `json:"workflow_nodes"`
	Schedule      *WorkflowSchedule `json:"schedule,omitempty"`
	Status        WorkflowState     `json:"status"`
	CreatedAt     time.Time         `json:"created_at,omitzero"`
	UpdatedAt     time.Time         `json:"updated_at,omitzero"`
	ReprocessAll  *bool             `json:"reprocess_all,omitempty"`
}

Workflow represents a workflow, which defines a series of processing steps for data in Unstructured.io. A workflow connects sources, destinations, and processing nodes.

type WorkflowJobType

type WorkflowJobType string

WorkflowJobType represents the type of workflow job (ephemeral, persistent, scheduled).

const (
	// WorkflowJobTypeEphemeral is an ephemeral job type.
	WorkflowJobTypeEphemeral WorkflowJobType = "ephemeral"
	// WorkflowJobTypePersistent is a persistent job type.
	WorkflowJobTypePersistent WorkflowJobType = "persistent"
	// WorkflowJobTypeScheduled is a scheduled job type.
	WorkflowJobTypeScheduled WorkflowJobType = "scheduled"
)

type WorkflowNode

type WorkflowNode interface {
	json.Marshaler
	// contains filtered or unexported methods
}

WorkflowNode is a node in a workflow.

type WorkflowNodes

type WorkflowNodes []WorkflowNode

WorkflowNodes is a slice of WorkflowNode.

func (WorkflowNodes) MarshalJSON

func (w WorkflowNodes) MarshalJSON() ([]byte, error)

MarshalJSON implements the json.Marshaler interface.

func (*WorkflowNodes) UnmarshalJSON

func (w *WorkflowNodes) UnmarshalJSON(data []byte) error

UnmarshalJSON implements the json.Unmarshaler interface.

func (WorkflowNodes) ValidateNodeOrder

func (w WorkflowNodes) ValidateNodeOrder() (err error)

ValidateNodeOrder validates the order of nodes in a workflow.

type WorkflowSchedule

type WorkflowSchedule struct {
	CronTabEntries []CronTabEntry `json:"crontab_entries"`
}

WorkflowSchedule represents a workflow schedule, which can include cron tab entries.

type WorkflowState

type WorkflowState string

WorkflowState represents the state of a workflow (active or inactive).

const (
	// WorkflowStateActive indicates the workflow is active.
	WorkflowStateActive WorkflowState = "active"
	// WorkflowStateInactive indicates the workflow is inactive.
	WorkflowStateInactive WorkflowState = "inactive"
)

type WorkflowType

type WorkflowType string

WorkflowType represents the type of workflow (e.g., basic, advanced, platinum, custom).

const (
	// WorkflowTypeBasic is a basic workflow type.
	WorkflowTypeBasic WorkflowType = "basic"
	// WorkflowTypeAdvanced is an advanced workflow type.
	WorkflowTypeAdvanced WorkflowType = "advanced"
	// WorkflowTypePlatinum is a platinum workflow type.
	WorkflowTypePlatinum WorkflowType = "platinum"
	// WorkflowTypeCustom is a custom workflow type.
	WorkflowTypeCustom WorkflowType = "custom"
)

type ZendeskSourceConnectorConfig

type ZendeskSourceConnectorConfig struct {
	Subdomain string  `json:"subdomain"`
	Email     string  `json:"email"`
	APIToken  string  `json:"api_token"`
	ItemType  *string `json:"item_type,omitempty"`
	BatchSize *int    `json:"batch_size,omitempty"`
	// contains filtered or unexported fields
}

ZendeskSourceConnectorConfig represents the configuration for a Zendesk source connector. It contains subdomain, authentication, and item type filtering.

func (ZendeskSourceConnectorConfig) Type

Type always returns the connector type identifier for Zendesk: "zendesk".

Directories

Path Synopsis
Package test provides testing utilities and examples for the unstructured SDK.
Package test provides testing utilities and examples for the unstructured SDK.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL