segment

package
v0.5.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Mar 2, 2026 License: MIT Imports: 3 Imported by: 0

Documentation

Overview

Package segment provides abstractions for content units that can be rendered to video. Both Marp slides and browser demos implement the Segment interface, allowing them to share the same pipeline.

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

This section is empty.

Types

type AudioResult

type AudioResult struct {
	// SegmentID matches the segment's GetID()
	SegmentID string

	// AudioFiles maps language code to audio file path
	AudioFiles map[string]string

	// Durations maps language code to duration in milliseconds
	Durations map[string]int

	// MaxDuration is the maximum duration across all languages
	MaxDuration int

	// VoiceoverCount is how many voiceovers were generated
	VoiceoverCount int
}

AudioResult contains the result of TTS generation for a segment

type BrowserSegment

type BrowserSegment struct {
	// contains filtered or unexported fields
}

BrowserSegment represents a browser-driven demo

func NewBrowserSegment

func NewBrowserSegment(index int, title, url string, steps []browser.Step) *BrowserSegment

NewBrowserSegment creates a new browser segment

func NewBrowserSegmentFromTranscript

func NewBrowserSegmentFromTranscript(slide transcript.Slide) *BrowserSegment

NewBrowserSegmentFromTranscript creates a browser segment from a transcript.Slide

func (*BrowserSegment) GetID

func (b *BrowserSegment) GetID() string

GetID returns the segment identifier

func (*BrowserSegment) GetIndex

func (b *BrowserSegment) GetIndex() int

GetIndex returns the segment position

func (*BrowserSegment) GetLanguages

func (b *BrowserSegment) GetLanguages() []string

GetLanguages returns all available language codes

func (*BrowserSegment) GetSourceType

func (b *BrowserSegment) GetSourceType() SourceType

GetSourceType returns SourceTypeBrowser

func (*BrowserSegment) GetStepVoiceovers

func (b *BrowserSegment) GetStepVoiceovers() []string

GetStepVoiceovers returns voiceover texts extracted from steps

func (*BrowserSegment) GetSteps

func (b *BrowserSegment) GetSteps() []browser.Step

GetSteps returns the browser automation steps

func (*BrowserSegment) GetTitle

func (b *BrowserSegment) GetTitle() string

GetTitle returns the segment title

func (*BrowserSegment) GetTranscripts

func (b *BrowserSegment) GetTranscripts() map[string]transcript.LanguageContent

GetTranscripts returns the raw transcript data

func (*BrowserSegment) GetURL

func (b *BrowserSegment) GetURL() string

GetURL returns the starting URL for the browser session

func (*BrowserSegment) GetVideoPath

func (b *BrowserSegment) GetVideoPath() string

GetVideoPath returns the path to the recorded video

func (*BrowserSegment) GetVoiceovers

func (b *BrowserSegment) GetVoiceovers(language string) []Voiceover

GetVoiceovers returns voiceovers for a specific language. Browser segments may have multiple voiceovers - one per step that has voiceover text. If transcripts are defined, they override step voiceovers.

func (*BrowserSegment) LimitSteps

func (b *BrowserSegment) LimitSteps(n int)

LimitSteps truncates the steps to the first n steps (for testing)

func (*BrowserSegment) SetTranscripts

func (b *BrowserSegment) SetTranscripts(transcripts map[string]transcript.LanguageContent)

SetTranscripts sets multi-language transcripts for the segment

func (*BrowserSegment) SetVideoPath

func (b *BrowserSegment) SetVideoPath(path string)

SetVideoPath sets the path to the recorded video

func (*BrowserSegment) UpdateStepMinDurations

func (b *BrowserSegment) UpdateStepMinDurations(durations map[int]int)

UpdateStepMinDurations updates minDuration for steps based on TTS durations. It handles two cases: 1. Step-embedded voiceovers: each step with a voiceover gets the corresponding duration 2. Transcript-based voiceovers: total duration is distributed across all steps

type Segment

type Segment interface {
	// GetID returns a unique identifier for this segment (e.g., "segment_001")
	GetID() string

	// GetIndex returns the position of this segment in the sequence (0-based)
	GetIndex() int

	// GetSourceType returns whether this is a slide or browser segment
	GetSourceType() SourceType

	// GetTitle returns an optional human-readable title
	GetTitle() string

	// GetVoiceovers returns the voiceover content for a specific language.
	// Slides typically have one voiceover; browser segments may have multiple
	// (one per step with voiceover text).
	GetVoiceovers(language string) []Voiceover

	// GetLanguages returns all available language codes for this segment
	GetLanguages() []string

	// GetTranscripts returns the raw transcript data for all languages
	GetTranscripts() map[string]transcript.LanguageContent
}

Segment represents a unit of content that can be rendered to video. Both slides and browser demos implement this interface.

type SlideSegment

type SlideSegment struct {
	// contains filtered or unexported fields
}

SlideSegment represents a static slide from a presentation

func NewSlideSegment

func NewSlideSegment(index int, title string, transcripts map[string]transcript.LanguageContent) *SlideSegment

NewSlideSegment creates a new slide segment

func NewSlideSegmentFromTranscript

func NewSlideSegmentFromTranscript(slide transcript.Slide) *SlideSegment

NewSlideSegmentFromTranscript creates a slide segment from a transcript.Slide

func (*SlideSegment) GetFullText

func (s *SlideSegment) GetFullText(language string) string

GetFullText returns the complete voiceover text for a language

func (*SlideSegment) GetID

func (s *SlideSegment) GetID() string

GetID returns the segment identifier

func (*SlideSegment) GetImagePath

func (s *SlideSegment) GetImagePath() string

GetImagePath returns the path to the rendered slide image

func (*SlideSegment) GetIndex

func (s *SlideSegment) GetIndex() int

GetIndex returns the segment position

func (*SlideSegment) GetLanguages

func (s *SlideSegment) GetLanguages() []string

GetLanguages returns all available language codes

func (*SlideSegment) GetSourceType

func (s *SlideSegment) GetSourceType() SourceType

GetSourceType returns SourceTypeSlide

func (*SlideSegment) GetTitle

func (s *SlideSegment) GetTitle() string

GetTitle returns the slide title

func (*SlideSegment) GetTotalPauseDuration

func (s *SlideSegment) GetTotalPauseDuration(language string) int

GetTotalPauseDuration returns the total pause duration for a language

func (*SlideSegment) GetTranscripts

func (s *SlideSegment) GetTranscripts() map[string]transcript.LanguageContent

GetTranscripts returns the raw transcript data

func (*SlideSegment) GetVoiceovers

func (s *SlideSegment) GetVoiceovers(language string) []Voiceover

GetVoiceovers returns voiceovers for a specific language Slides have a single voiceover combining all segments

func (*SlideSegment) SetImagePath

func (s *SlideSegment) SetImagePath(path string)

SetImagePath sets the path to the rendered slide image

type SourceType

type SourceType string

SourceType identifies the content source type

const (
	// SourceTypeSlide represents a static slide (e.g., Marp presentation)
	SourceTypeSlide SourceType = "slide"
	// SourceTypeBrowser represents a browser-driven demo
	SourceTypeBrowser SourceType = "browser"
)

type TimingInfo

type TimingInfo struct {
	// StartMs is the start time relative to the full video (milliseconds)
	StartMs int

	// EndMs is the end time relative to the full video (milliseconds)
	EndMs int

	// Duration is the segment duration (milliseconds)
	Duration int

	// VoiceoverTimings contains per-voiceover timing
	VoiceoverTimings []VoiceoverTiming
}

TimingInfo contains timing data for synchronization

type VideoResult

type VideoResult struct {
	// SegmentID matches the segment's GetID()
	SegmentID string

	// VideoPath is the path to the generated video file
	VideoPath string

	// Duration is the video duration in milliseconds
	Duration int

	// FrameCount is the number of frames in the video
	FrameCount int
}

VideoResult contains the result of video generation for a segment

type Voiceover

type Voiceover struct {
	// Index is the order within the segment (0 for slides, 0-N for browser steps)
	Index int

	// Text is the content to speak
	Text string

	// Language is the BCP-47 language code
	Language string

	// Voice overrides the default voice configuration
	Voice *transcript.VoiceConfig

	// Pause is the duration to pause after speaking (milliseconds)
	Pause int

	// MinDuration ensures the voiceover takes at least this long (milliseconds)
	// Useful for browser steps where the action must complete before moving on
	MinDuration int

	// StepIndex links back to the browser step index (only for browser segments)
	StepIndex int
}

Voiceover represents a piece of text to be spoken

type VoiceoverTiming

type VoiceoverTiming struct {
	// Index matches Voiceover.Index
	Index int

	// StartMs relative to segment start
	StartMs int

	// EndMs relative to segment start
	EndMs int

	// Text is the voiceover text
	Text string
}

VoiceoverTiming contains timing for a single voiceover

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL