Documentation
¶
Index ¶
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type AvatarConfig ¶
type AvatarConfig struct {
Provider string `json:"provider"` // heygen, synthesia, d-id, etc.
AvatarID string `json:"avatarId"` // Provider-specific avatar ID
Position string `json:"position,omitempty"` // bottom-right, bottom-left, full, pip
Size string `json:"size,omitempty"` // small, medium, large
Style string `json:"style,omitempty"` // casual, professional, etc.
Custom map[string]string `json:"custom,omitempty"` // Provider-specific settings
}
AvatarConfig specifies virtual avatar/speaker settings
type BrowserStep ¶ added in v0.3.0
type BrowserStep struct {
// Action is the type of action (navigate, click, input, wait, etc.)
Action string `json:"action"`
// Selector is the CSS selector for element actions
Selector string `json:"selector,omitempty"`
// Value is used for input actions
Value string `json:"value,omitempty"`
// URL is used for navigate actions
URL string `json:"url,omitempty"`
// Duration is used for wait actions (milliseconds)
Duration int `json:"duration,omitempty"`
// Script is JavaScript code for evaluate actions
Script string `json:"script,omitempty"`
// Voiceover is the text to speak during this step
Voiceover string `json:"voiceover,omitempty"`
// Description provides context for the step
Description string `json:"description,omitempty"`
// ScrollX and ScrollY are pixel amounts for scroll actions
ScrollX int `json:"scrollX,omitempty"`
ScrollY int `json:"scrollY,omitempty"`
// ScrollMode determines if scroll is relative (delta) or absolute (position)
// Valid values: "relative" (default), "absolute"
ScrollMode string `json:"scrollMode,omitempty"`
// ScrollBehavior determines if scroll is instant or animated
// Valid values: "auto" (instant, default), "smooth" (animated)
ScrollBehavior string `json:"scrollBehavior,omitempty"`
// Timing contains timing data after recording
Timing *StepTimingInfo `json:"timing,omitempty"`
}
BrowserStep represents a single browser automation step in the transcript
type LanguageContent ¶
type LanguageContent struct {
Voice *VoiceConfig `json:"voice,omitempty"` // Override default voice for this language
Segments []Segment `json:"segments"` // Text segments with timing/effects
Timing *TimingInfo `json:"timing,omitempty"` // Populated after TTS generation
}
LanguageContent contains the transcript for one language
func (*LanguageContent) GetFullText ¶
func (lc *LanguageContent) GetFullText() string
GetFullText returns the complete text for a language content (for TTS)
func (*LanguageContent) GetTotalPauseDuration ¶
func (lc *LanguageContent) GetTotalPauseDuration() int
GetTotalPauseDuration returns the total pause duration in milliseconds
type Metadata ¶
type Metadata struct {
Title string `json:"title"`
Description string `json:"description,omitempty"`
DefaultLanguage string `json:"defaultLanguage"` // BCP-47 code (e.g., "en-US", "en-GB", "fr-CA", "zh-Hans")
DefaultVoice VoiceConfig `json:"defaultVoice"` // Default voice settings
DefaultVenue string `json:"defaultVenue,omitempty"` // udemy, youtube, coursera, etc.
Tags []string `json:"tags,omitempty"` // For organization/filtering
Custom map[string]string `json:"custom,omitempty"` // User-defined metadata
}
Metadata contains presentation-level settings
type SSMLHints ¶
type SSMLHints struct {
Breaks []string `json:"breaks,omitempty"` // e.g., ["400ms", "1s"]
Emphasis []string `json:"emphasis,omitempty"` // Words to emphasize
Prosody string `json:"prosody,omitempty"` // Custom prosody settings
SayAs string `json:"sayAs,omitempty"` // date, time, telephone, etc.
Phoneme string `json:"phoneme,omitempty"` // IPA pronunciation
SubAlias string `json:"subAlias,omitempty"` // Substitution text
}
SSMLHints provides SSML-compatible markup hints
type Segment ¶
type Segment struct {
Text string `json:"text"` // Text to speak
Pause int `json:"pause,omitempty"` // Pause after segment (milliseconds)
Emphasis string `json:"emphasis,omitempty"` // none, moderate, strong
Rate string `json:"rate,omitempty"` // slow, medium, fast
Pitch string `json:"pitch,omitempty"` // low, medium, high, +Xst, -Xst
Voice *VoiceConfig `json:"voice,omitempty"` // Override voice for this segment
SSML *SSMLHints `json:"ssml,omitempty"` // Additional SSML hints
}
Segment represents a portion of speech with optional effects
type Slide ¶
type Slide struct {
Index int `json:"index"`
Title string `json:"title,omitempty"` // Optional slide title for reference
SourceType SourceType `json:"sourceType,omitempty"` // slide or browser (defaults to slide)
Transcripts map[string]LanguageContent `json:"transcripts"` // Keyed by language code
Avatar *AvatarConfig `json:"avatar,omitempty"` // Optional avatar/speaker config
Notes string `json:"notes,omitempty"` // Internal notes (not spoken)
// Browser-specific fields (only used when SourceType is "browser")
BrowserURL string `json:"browserUrl,omitempty"` // Starting URL for browser segment
BrowserSteps []BrowserStep `json:"browserSteps,omitempty"` // Browser automation steps
}
Slide represents a single slide's transcript data
func (*Slide) GetBrowserVoiceovers ¶ added in v0.3.0
GetBrowserVoiceovers returns all voiceover texts from browser steps
func (*Slide) GetEffectiveSourceType ¶ added in v0.3.0
func (s *Slide) GetEffectiveSourceType() SourceType
GetEffectiveSourceType returns the source type, defaulting to slide for backward compatibility
func (*Slide) IsBrowserSegment ¶ added in v0.3.0
IsBrowserSegment returns true if this is a browser segment
func (*Slide) IsSlideSegment ¶ added in v0.3.0
IsSlideSegment returns true if this is a slide segment
type SourceType ¶ added in v0.3.0
type SourceType string
SourceType identifies the segment content source
const ( // SourceTypeSlide indicates a Marp slide segment SourceTypeSlide SourceType = "slide" // SourceTypeBrowser indicates a browser-driven demo segment SourceTypeBrowser SourceType = "browser" )
type StepTimingInfo ¶ added in v0.3.0
type StepTimingInfo struct {
StartMs int `json:"startMs"` // Start time relative to segment start
EndMs int `json:"endMs"` // End time relative to segment start
DurationMs int `json:"durationMs"` // Actual step duration
}
StepTimingInfo contains timing data for a browser step
type TimingInfo ¶
type TimingInfo struct {
AudioDuration int `json:"audioDuration"` // Audio duration in milliseconds
PauseDuration int `json:"pauseDuration"` // Total pause duration in milliseconds
TotalDuration int `json:"totalDuration"` // Total slide duration in milliseconds
}
TimingInfo contains timing data (populated after TTS generation)
type Transcript ¶
type Transcript struct {
Version string `json:"version"`
Metadata Metadata `json:"metadata"`
Slides []Slide `json:"slides"`
}
Transcript represents the complete transcript for a presentation
func LoadFromFile ¶
func LoadFromFile(path string) (*Transcript, error)
LoadFromFile loads a transcript from a JSON file
func (*Transcript) GetBrowserSlides ¶ added in v0.3.0
func (t *Transcript) GetBrowserSlides() []Slide
GetBrowserSlides returns only browser-type slides from the transcript
func (*Transcript) GetSlideSlides ¶ added in v0.3.0
func (t *Transcript) GetSlideSlides() []Slide
GetSlideSlides returns only slide-type slides from the transcript
func (*Transcript) GetSlideTranscript ¶
func (t *Transcript) GetSlideTranscript(slideIndex int, language string) (*LanguageContent, error)
GetSlideTranscript returns the transcript for a slide in the specified language Falls back to default language if the requested language is not available
func (*Transcript) SaveToFile ¶
func (t *Transcript) SaveToFile(path string) error
SaveToFile saves the transcript to a JSON file
type VoiceConfig ¶
type VoiceConfig struct {
Provider string `json:"provider,omitempty"` // elevenlabs, deepgram, etc.
VoiceID string `json:"voiceId"` // Provider-specific voice ID
VoiceName string `json:"voiceName,omitempty"` // Human-readable name
Model string `json:"model,omitempty"` // Provider-specific model
OutputFormat string `json:"outputFormat,omitempty"` // mp3, wav, pcm, opus
SampleRate int `json:"sampleRate,omitempty"` // 22050, 44100, etc.
Speed float64 `json:"speed,omitempty"` // Speech speed multiplier (1.0 = normal)
Pitch float64 `json:"pitch,omitempty"` // Pitch adjustment (-1.0 to 1.0)
Stability float64 `json:"stability,omitempty"` // Voice consistency (0.0 to 1.0)
SimilarityBoost float64 `json:"similarityBoost,omitempty"` // Voice similarity (0.0 to 1.0)
Style float64 `json:"style,omitempty"` // Style exaggeration (0.0 to 1.0)
}
VoiceConfig specifies TTS voice settings (compatible with OmniVoice SynthesisConfig)