metadata

package
v1.2.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Apr 3, 2026 License: Apache-2.0 Imports: 7 Imported by: 0

Documentation

Overview

Package metadata provides metadata enrichment from external sources.

Overview

Package registries provide basic information (name, version, dependencies), but Stacktower's analysis features need additional data from source repositories. This package provides deps.MetadataProvider implementations that fetch supplementary metadata from GitHub and other sources.

GitHub Provider

The GitHub provider enriches packages with repository data:

  • Stars count (for popularity ranking)
  • Owner username (for Nebraska maintainer analysis)
  • Contributors list (for bus factor assessment)
  • Last commit/release dates (for staleness detection)
  • Archived status (for brittle package detection)

Usage:

provider, err := metadata.NewGitHub(token, 24*time.Hour)
opts := deps.Options{MetadataProviders: []deps.MetadataProvider{provider}}
g, err := resolver.Resolve(ctx, "fastapi", opts)

The provider automatically extracts GitHub URLs from package metadata (ProjectURLs, Repository, HomePage) or falls back to GitHub search.

Metadata Keys

Enriched data is stored in node metadata using these standard keys:

Composite Provider

Composite combines multiple providers, merging their results:

providers := metadata.NewComposite(
    github,
    gitlab,
)

deps.MetadataProvider: github.com/matzehuels/stacktower/pkg/core/deps.MetadataProvider

Example (MetadataKeys)
package main

import (
	"fmt"

	"github.com/matzehuels/stacktower/pkg/core/deps/metadata"
)

func main() {
	// Demonstrate standard metadata keys
	fmt.Println("Standard metadata keys:")
	fmt.Printf("  %s: Repository URL\n", metadata.RepoURL)
	fmt.Printf("  %s: Repository owner\n", metadata.RepoOwner)
	fmt.Printf("  %s: Star count\n", metadata.RepoStars)
	fmt.Printf("  %s: Archived status\n", metadata.RepoArchived)
	fmt.Printf("  %s: Maintainer list\n", metadata.RepoMaintainers)
	fmt.Printf("  %s: Last commit date\n", metadata.RepoLastCommit)
	fmt.Printf("  %s: Last release date\n", metadata.RepoLastRelease)
	fmt.Printf("  %s: Primary language\n", metadata.RepoLanguage)
	fmt.Printf("  %s: Topic tags\n", metadata.RepoTopics)
}
Output:
Standard metadata keys:
  repo_url: Repository URL
  repo_owner: Repository owner
  repo_stars: Star count
  repo_archived: Archived status
  repo_maintainers: Maintainer list
  repo_last_commit: Last commit date
  repo_last_release: Last release date
  repo_language: Primary language
  repo_topics: Topic tags

Index

Examples

Constants

View Source
const (
	RepoURL         = "repo_url"
	RepoOwner       = "repo_owner"
	RepoDescription = "repo_description"
	RepoStars       = "repo_stars"
	RepoArchived    = "repo_archived"
	RepoLanguage    = "repo_language"
	RepoTopics      = "repo_topics"
	RepoMaintainers = "repo_maintainers"
	RepoLastCommit  = "repo_last_commit"
	RepoLastRelease = "repo_last_release"
	RepoLicense     = "repo_license"
	HomePage        = "homepage"
)

Variables

This section is empty.

Functions

func ParseTime

func ParseTime(s string) *time.Time

ParseTime attempts to parse a time string in common formats. Returns nil if the string is empty or cannot be parsed.

Supported formats:

  • RFC3339 (ISO 8601): "2006-01-02T15:04:05Z07:00"
  • RFC3339Nano: "2006-01-02T15:04:05.999999999Z07:00"
  • Date only: "2006-01-02"

Types

type Composite

type Composite struct {
	// contains filtered or unexported fields
}

func NewComposite

func NewComposite(providers ...deps.MetadataProvider) *Composite
Example
package main

import (
	"context"
	"fmt"
	"time"

	"github.com/matzehuels/stacktower/pkg/cache"
	"github.com/matzehuels/stacktower/pkg/core/deps"
	"github.com/matzehuels/stacktower/pkg/core/deps/metadata"
	"github.com/matzehuels/stacktower/pkg/core/deps/python"
)

func main() {
	// Combine multiple metadata providers
	github := metadata.NewGitHub(cache.NewNullCache(), "", 24*time.Hour)

	// Composite merges results from all providers
	composite := metadata.NewComposite(github)

	// Use in resolver options
	opts := deps.Options{
		MetadataProviders: []deps.MetadataProvider{composite},
		MaxDepth:          3,
		MaxNodes:          50,
	}

	resolver, _ := python.Language.Resolver(cache.NewNullCache(), opts)
	ctx := context.Background()
	g, err := resolver.Resolve(ctx, "flask", opts)
	if err != nil {
		fmt.Println("Error:", err)
		return
	}

	fmt.Printf("Resolved with composite metadata provider\n")
	fmt.Printf("Packages: %d\n", g.NodeCount())
	// Output varies based on network and API availability
}

func (*Composite) Enrich

func (c *Composite) Enrich(ctx context.Context, pkg *deps.PackageRef, refresh bool) (map[string]any, error)

func (*Composite) Name

func (c *Composite) Name() string

type GitHub

type GitHub struct {
	// contains filtered or unexported fields
}

func NewGitHub

func NewGitHub(backend cache.Cache, token string, cacheTTL time.Duration, opts ...GitHubOption) *GitHub
Example
package main

import (
	"context"
	"fmt"
	"time"

	"github.com/matzehuels/stacktower/pkg/cache"
	"github.com/matzehuels/stacktower/pkg/core/deps"
	"github.com/matzehuels/stacktower/pkg/core/deps/metadata"
	"github.com/matzehuels/stacktower/pkg/core/deps/python"
)

func main() {
	// Create a GitHub metadata provider with authentication
	// Use an empty string for unauthenticated requests (lower rate limits)
	token := "" // or os.Getenv("GITHUB_TOKEN")
	provider := metadata.NewGitHub(cache.NewNullCache(), token, 24*time.Hour)

	// Use with resolver options
	opts := deps.Options{
		MetadataProviders: []deps.MetadataProvider{provider},
		MaxDepth:          5,
		MaxNodes:          100,
	}

	resolver, _ := python.Language.Resolver(cache.NewNullCache(), opts)
	ctx := context.Background()
	g, err := resolver.Resolve(ctx, "requests", opts)
	if err != nil {
		fmt.Println("Error:", err)
		return
	}

	fmt.Printf("Resolved %d packages with GitHub metadata\n", g.NodeCount())
	// Output varies based on network and API availability
}

func (*GitHub) Enrich

func (g *GitHub) Enrich(ctx context.Context, pkg *deps.PackageRef, refresh bool) (map[string]any, error)
Example
package main

import (
	"context"
	"fmt"
	"time"

	"github.com/matzehuels/stacktower/pkg/cache"
	"github.com/matzehuels/stacktower/pkg/core/deps"
	"github.com/matzehuels/stacktower/pkg/core/deps/metadata"
)

func main() {
	// Enrich a single package with GitHub metadata
	token := "" // or os.Getenv("GITHUB_TOKEN")
	provider := metadata.NewGitHub(cache.NewNullCache(), token, 24*time.Hour)

	// Package reference with GitHub URL
	pkg := &deps.PackageRef{
		Name:     "requests",
		HomePage: "https://github.com/psf/requests",
	}

	ctx := context.Background()
	meta, err := provider.Enrich(ctx, pkg, false)
	if err != nil {
		fmt.Println("Error:", err)
		return
	}

	if meta != nil {
		fmt.Println("Enriched with GitHub data")
		if stars, ok := meta[metadata.RepoStars].(int); ok {
			fmt.Printf("Stars: %d\n", stars)
		}
		if owner, ok := meta[metadata.RepoOwner].(string); ok {
			fmt.Printf("Owner: %s\n", owner)
		}
	}
	// Output varies based on network and API availability
}

func (*GitHub) EnrichBatch added in v1.1.0

func (g *GitHub) EnrichBatch(ctx context.Context, pkgs []*deps.PackageRef, refresh bool) (map[string]map[string]any, error)

EnrichBatch fetches metadata for all packages in one or two GraphQL calls. Only packages with GitHub URLs in their registry metadata (ProjectURLs, HomePage) are enriched. Packages without discoverable URLs are silently skipped - we don't use SearchPackageRepo here because it's too slow/rate-limited for batch operations. If WithContributors() was used, additional REST API calls fetch contributor data.

func (*GitHub) Name

func (g *GitHub) Name() string

type GitHubOption added in v1.1.0

type GitHubOption func(*GitHub)

GitHubOption configures the GitHub metadata provider.

func WithContributors added in v1.1.0

func WithContributors() GitHubOption

WithContributors enables fetching contributor data from GitHub. This requires additional API calls per repository and is slower, but enables accurate Nebraska (maintainer) rankings.

type NodeMetadata

type NodeMetadata struct {
	// Version is the package version (e.g., "2.31.0").
	Version string

	// Description is a short summary of the package.
	Description string

	// RepoURL is the canonical repository URL (e.g., "https://github.com/owner/repo").
	RepoURL string

	// RepoOwner is the repository owner/organization name.
	RepoOwner string

	// RepoStars is the GitHub/GitLab star count.
	RepoStars int

	// RepoArchived indicates whether the repository is archived.
	RepoArchived bool

	// RepoLanguage is the primary programming language.
	RepoLanguage string

	// RepoTopics are the repository topic tags.
	RepoTopics []string

	// RepoMaintainers are the top contributors/maintainers.
	RepoMaintainers []string

	// RepoLastCommit is the date of the most recent commit.
	RepoLastCommit string

	// RepoLastRelease is the date of the most recent release.
	RepoLastRelease string

	// RepoLicense is the SPDX license identifier (e.g., "MIT").
	RepoLicense string

	// Extra holds additional metadata not covered by typed fields.
	// This preserves arbitrary registry data.
	Extra map[string]any
}

NodeMetadata provides typed access to common node metadata fields. This is an optional helper that provides compile-time safety for frequently accessed metadata. The underlying dag.Metadata remains map[string]any for flexibility with arbitrary registry data.

Usage:

typed := metadata.FromMap(node.Meta)
if typed.RepoURL != "" {
    fmt.Println("Repository:", typed.RepoURL)
}

To convert back to map form:

node.Meta = typed.ToMap()

func FromMap

func FromMap(m map[string]any) NodeMetadata

FromMap converts a dag.Metadata map to typed NodeMetadata. Unknown fields are preserved in the Extra map.

This function is safe to call with nil input - it returns a zero NodeMetadata.

func (NodeMetadata) ToMap

func (n NodeMetadata) ToMap() map[string]any

ToMap converts typed NodeMetadata back to a dag.Metadata map. Only non-zero fields are included in the output. Extra fields are merged into the result.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL