Documentation
¶
Overview ¶
Package segment provides abstractions for content units that can be rendered to video. Both Marp slides and browser demos implement the Segment interface, allowing them to share the same pipeline.
Index ¶
- type AudioResult
- type BrowserSegment
- func (b *BrowserSegment) GetID() string
- func (b *BrowserSegment) GetIndex() int
- func (b *BrowserSegment) GetLanguages() []string
- func (b *BrowserSegment) GetSourceType() SourceType
- func (b *BrowserSegment) GetStepVoiceovers() []string
- func (b *BrowserSegment) GetSteps() []browser.Step
- func (b *BrowserSegment) GetTitle() string
- func (b *BrowserSegment) GetTranscripts() map[string]transcript.LanguageContent
- func (b *BrowserSegment) GetURL() string
- func (b *BrowserSegment) GetVideoPath() string
- func (b *BrowserSegment) GetVoiceovers(language string) []Voiceover
- func (b *BrowserSegment) LimitSteps(n int)
- func (b *BrowserSegment) SetTranscripts(transcripts map[string]transcript.LanguageContent)
- func (b *BrowserSegment) SetVideoPath(path string)
- func (b *BrowserSegment) UpdateStepMinDurations(durations map[int]int)
- type Segment
- type SlideSegment
- func (s *SlideSegment) GetFullText(language string) string
- func (s *SlideSegment) GetID() string
- func (s *SlideSegment) GetImagePath() string
- func (s *SlideSegment) GetIndex() int
- func (s *SlideSegment) GetLanguages() []string
- func (s *SlideSegment) GetSourceType() SourceType
- func (s *SlideSegment) GetTitle() string
- func (s *SlideSegment) GetTotalPauseDuration(language string) int
- func (s *SlideSegment) GetTranscripts() map[string]transcript.LanguageContent
- func (s *SlideSegment) GetVoiceovers(language string) []Voiceover
- func (s *SlideSegment) SetImagePath(path string)
- type SourceType
- type TimingInfo
- type VideoResult
- type Voiceover
- type VoiceoverTiming
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type AudioResult ¶
type AudioResult struct {
// SegmentID matches the segment's GetID()
SegmentID string
// AudioFiles maps language code to audio file path
AudioFiles map[string]string
// Durations maps language code to duration in milliseconds
Durations map[string]int
// MaxDuration is the maximum duration across all languages
MaxDuration int
// VoiceoverCount is how many voiceovers were generated
VoiceoverCount int
}
AudioResult contains the result of TTS generation for a segment
type BrowserSegment ¶
type BrowserSegment struct {
// contains filtered or unexported fields
}
BrowserSegment represents a browser-driven demo
func NewBrowserSegment ¶
func NewBrowserSegment(index int, title, url string, steps []browser.Step) *BrowserSegment
NewBrowserSegment creates a new browser segment
func NewBrowserSegmentFromTranscript ¶
func NewBrowserSegmentFromTranscript(slide transcript.Slide) *BrowserSegment
NewBrowserSegmentFromTranscript creates a browser segment from a transcript.Slide
func (*BrowserSegment) GetID ¶
func (b *BrowserSegment) GetID() string
GetID returns the segment identifier
func (*BrowserSegment) GetIndex ¶
func (b *BrowserSegment) GetIndex() int
GetIndex returns the segment position
func (*BrowserSegment) GetLanguages ¶
func (b *BrowserSegment) GetLanguages() []string
GetLanguages returns all available language codes
func (*BrowserSegment) GetSourceType ¶
func (b *BrowserSegment) GetSourceType() SourceType
GetSourceType returns SourceTypeBrowser
func (*BrowserSegment) GetStepVoiceovers ¶
func (b *BrowserSegment) GetStepVoiceovers() []string
GetStepVoiceovers returns voiceover texts extracted from steps
func (*BrowserSegment) GetSteps ¶
func (b *BrowserSegment) GetSteps() []browser.Step
GetSteps returns the browser automation steps
func (*BrowserSegment) GetTitle ¶
func (b *BrowserSegment) GetTitle() string
GetTitle returns the segment title
func (*BrowserSegment) GetTranscripts ¶
func (b *BrowserSegment) GetTranscripts() map[string]transcript.LanguageContent
GetTranscripts returns the raw transcript data
func (*BrowserSegment) GetURL ¶
func (b *BrowserSegment) GetURL() string
GetURL returns the starting URL for the browser session
func (*BrowserSegment) GetVideoPath ¶
func (b *BrowserSegment) GetVideoPath() string
GetVideoPath returns the path to the recorded video
func (*BrowserSegment) GetVoiceovers ¶
func (b *BrowserSegment) GetVoiceovers(language string) []Voiceover
GetVoiceovers returns voiceovers for a specific language. Browser segments may have multiple voiceovers - one per step that has voiceover text. If transcripts are defined, they override step voiceovers.
func (*BrowserSegment) LimitSteps ¶
func (b *BrowserSegment) LimitSteps(n int)
LimitSteps truncates the steps to the first n steps (for testing)
func (*BrowserSegment) SetTranscripts ¶
func (b *BrowserSegment) SetTranscripts(transcripts map[string]transcript.LanguageContent)
SetTranscripts sets multi-language transcripts for the segment
func (*BrowserSegment) SetVideoPath ¶
func (b *BrowserSegment) SetVideoPath(path string)
SetVideoPath sets the path to the recorded video
func (*BrowserSegment) UpdateStepMinDurations ¶
func (b *BrowserSegment) UpdateStepMinDurations(durations map[int]int)
UpdateStepMinDurations updates minDuration for steps based on TTS durations. It handles two cases: 1. Step-embedded voiceovers: each step with a voiceover gets the corresponding duration 2. Transcript-based voiceovers: total duration is distributed across all steps
type Segment ¶
type Segment interface {
// GetID returns a unique identifier for this segment (e.g., "segment_001")
GetID() string
// GetIndex returns the position of this segment in the sequence (0-based)
GetIndex() int
// GetSourceType returns whether this is a slide or browser segment
GetSourceType() SourceType
// GetTitle returns an optional human-readable title
GetTitle() string
// GetVoiceovers returns the voiceover content for a specific language.
// Slides typically have one voiceover; browser segments may have multiple
// (one per step with voiceover text).
GetVoiceovers(language string) []Voiceover
// GetLanguages returns all available language codes for this segment
GetLanguages() []string
// GetTranscripts returns the raw transcript data for all languages
GetTranscripts() map[string]transcript.LanguageContent
}
Segment represents a unit of content that can be rendered to video. Both slides and browser demos implement this interface.
type SlideSegment ¶
type SlideSegment struct {
// contains filtered or unexported fields
}
SlideSegment represents a static slide from a presentation
func NewSlideSegment ¶
func NewSlideSegment(index int, title string, transcripts map[string]transcript.LanguageContent) *SlideSegment
NewSlideSegment creates a new slide segment
func NewSlideSegmentFromTranscript ¶
func NewSlideSegmentFromTranscript(slide transcript.Slide) *SlideSegment
NewSlideSegmentFromTranscript creates a slide segment from a transcript.Slide
func (*SlideSegment) GetFullText ¶
func (s *SlideSegment) GetFullText(language string) string
GetFullText returns the complete voiceover text for a language
func (*SlideSegment) GetID ¶
func (s *SlideSegment) GetID() string
GetID returns the segment identifier
func (*SlideSegment) GetImagePath ¶
func (s *SlideSegment) GetImagePath() string
GetImagePath returns the path to the rendered slide image
func (*SlideSegment) GetIndex ¶
func (s *SlideSegment) GetIndex() int
GetIndex returns the segment position
func (*SlideSegment) GetLanguages ¶
func (s *SlideSegment) GetLanguages() []string
GetLanguages returns all available language codes
func (*SlideSegment) GetSourceType ¶
func (s *SlideSegment) GetSourceType() SourceType
GetSourceType returns SourceTypeSlide
func (*SlideSegment) GetTitle ¶
func (s *SlideSegment) GetTitle() string
GetTitle returns the slide title
func (*SlideSegment) GetTotalPauseDuration ¶
func (s *SlideSegment) GetTotalPauseDuration(language string) int
GetTotalPauseDuration returns the total pause duration for a language
func (*SlideSegment) GetTranscripts ¶
func (s *SlideSegment) GetTranscripts() map[string]transcript.LanguageContent
GetTranscripts returns the raw transcript data
func (*SlideSegment) GetVoiceovers ¶
func (s *SlideSegment) GetVoiceovers(language string) []Voiceover
GetVoiceovers returns voiceovers for a specific language Slides have a single voiceover combining all segments
func (*SlideSegment) SetImagePath ¶
func (s *SlideSegment) SetImagePath(path string)
SetImagePath sets the path to the rendered slide image
type SourceType ¶
type SourceType string
SourceType identifies the content source type
const ( // SourceTypeSlide represents a static slide (e.g., Marp presentation) SourceTypeSlide SourceType = "slide" // SourceTypeBrowser represents a browser-driven demo SourceTypeBrowser SourceType = "browser" )
type TimingInfo ¶
type TimingInfo struct {
// StartMs is the start time relative to the full video (milliseconds)
StartMs int
// EndMs is the end time relative to the full video (milliseconds)
EndMs int
// Duration is the segment duration (milliseconds)
Duration int
// VoiceoverTimings contains per-voiceover timing
VoiceoverTimings []VoiceoverTiming
}
TimingInfo contains timing data for synchronization
type VideoResult ¶
type VideoResult struct {
// SegmentID matches the segment's GetID()
SegmentID string
// VideoPath is the path to the generated video file
VideoPath string
// Duration is the video duration in milliseconds
Duration int
// FrameCount is the number of frames in the video
FrameCount int
}
VideoResult contains the result of video generation for a segment
type Voiceover ¶
type Voiceover struct {
// Index is the order within the segment (0 for slides, 0-N for browser steps)
Index int
// Text is the content to speak
Text string
// Language is the BCP-47 language code
Language string
// Voice overrides the default voice configuration
Voice *transcript.VoiceConfig
// Pause is the duration to pause after speaking (milliseconds)
Pause int
// MinDuration ensures the voiceover takes at least this long (milliseconds)
// Useful for browser steps where the action must complete before moving on
MinDuration int
// StepIndex links back to the browser step index (only for browser segments)
StepIndex int
}
Voiceover represents a piece of text to be spoken