transcript

package

v0.3.0 Latest Latest Go to latest Published: Feb 21, 2026 License: MIT Imports: 3 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/grokify/marp2video

Links

Open Source Insights

Documentation ¶

Index ¶

type AvatarConfig
type BrowserStep
type LanguageContent
- func (lc *LanguageContent) GetFullText() string
- func (lc *LanguageContent) GetTotalPauseDuration() int
type Metadata
type SSMLHints
type Segment
type Slide
type SourceType
type StepTimingInfo
type TimingInfo
type Transcript
- func LoadFromFile(path string) (*Transcript, error)
type VoiceConfig

Constants ¶

This section is empty.

Variables ¶

This section is empty.

Functions ¶

This section is empty.

Types ¶

type AvatarConfig ¶

type AvatarConfig struct {
	Provider string            `json:"provider"`           // heygen, synthesia, d-id, etc.
	AvatarID string            `json:"avatarId"`           // Provider-specific avatar ID
	Position string            `json:"position,omitempty"` // bottom-right, bottom-left, full, pip
	Size     string            `json:"size,omitempty"`     // small, medium, large
	Style    string            `json:"style,omitempty"`    // casual, professional, etc.
	Custom   map[string]string `json:"custom,omitempty"`   // Provider-specific settings
}

AvatarConfig specifies virtual avatar/speaker settings

type BrowserStep ¶ added in v0.3.0

type BrowserStep struct {
	// Action is the type of action (navigate, click, input, wait, etc.)
	Action string `json:"action"`

	// Selector is the CSS selector for element actions
	Selector string `json:"selector,omitempty"`

	// Value is used for input actions
	Value string `json:"value,omitempty"`

	// URL is used for navigate actions
	URL string `json:"url,omitempty"`

	// Duration is used for wait actions (milliseconds)
	Duration int `json:"duration,omitempty"`

	// Script is JavaScript code for evaluate actions
	Script string `json:"script,omitempty"`

	// Voiceover is the text to speak during this step
	Voiceover string `json:"voiceover,omitempty"`

	// Description provides context for the step
	Description string `json:"description,omitempty"`

	// ScrollX and ScrollY are pixel amounts for scroll actions
	ScrollX int `json:"scrollX,omitempty"`
	ScrollY int `json:"scrollY,omitempty"`

	// ScrollMode determines if scroll is relative (delta) or absolute (position)
	// Valid values: "relative" (default), "absolute"
	ScrollMode string `json:"scrollMode,omitempty"`

	// ScrollBehavior determines if scroll is instant or animated
	// Valid values: "auto" (instant, default), "smooth" (animated)
	ScrollBehavior string `json:"scrollBehavior,omitempty"`

	// Timing contains timing data after recording
	Timing *StepTimingInfo `json:"timing,omitempty"`
}

BrowserStep represents a single browser automation step in the transcript

type LanguageContent ¶

type LanguageContent struct {
	Voice    *VoiceConfig `json:"voice,omitempty"`  // Override default voice for this language
	Segments []Segment    `json:"segments"`         // Text segments with timing/effects
	Timing   *TimingInfo  `json:"timing,omitempty"` // Populated after TTS generation
}

LanguageContent contains the transcript for one language

func (*LanguageContent) GetFullText ¶

func (lc *LanguageContent) GetFullText() string

GetFullText returns the complete text for a language content (for TTS)

func (*LanguageContent) GetTotalPauseDuration ¶

func (lc *LanguageContent) GetTotalPauseDuration() int

GetTotalPauseDuration returns the total pause duration in milliseconds

type Metadata ¶

type Metadata struct {
	Title           string            `json:"title"`
	Description     string            `json:"description,omitempty"`
	DefaultLanguage string            `json:"defaultLanguage"`        // BCP-47 code (e.g., "en-US", "en-GB", "fr-CA", "zh-Hans")
	DefaultVoice    VoiceConfig       `json:"defaultVoice"`           // Default voice settings
	DefaultVenue    string            `json:"defaultVenue,omitempty"` // udemy, youtube, coursera, etc.
	Tags            []string          `json:"tags,omitempty"`         // For organization/filtering
	Custom          map[string]string `json:"custom,omitempty"`       // User-defined metadata
}

Metadata contains presentation-level settings

type SSMLHints ¶

type SSMLHints struct {
	Breaks   []string `json:"breaks,omitempty"`   // e.g., ["400ms", "1s"]
	Emphasis []string `json:"emphasis,omitempty"` // Words to emphasize
	Prosody  string   `json:"prosody,omitempty"`  // Custom prosody settings
	SayAs    string   `json:"sayAs,omitempty"`    // date, time, telephone, etc.
	Phoneme  string   `json:"phoneme,omitempty"`  // IPA pronunciation
	SubAlias string   `json:"subAlias,omitempty"` // Substitution text
}

SSMLHints provides SSML-compatible markup hints

type Segment ¶

type Segment struct {
	Text     string       `json:"text"`               // Text to speak
	Pause    int          `json:"pause,omitempty"`    // Pause after segment (milliseconds)
	Emphasis string       `json:"emphasis,omitempty"` // none, moderate, strong
	Rate     string       `json:"rate,omitempty"`     // slow, medium, fast
	Pitch    string       `json:"pitch,omitempty"`    // low, medium, high, +Xst, -Xst
	Voice    *VoiceConfig `json:"voice,omitempty"`    // Override voice for this segment
	SSML     *SSMLHints   `json:"ssml,omitempty"`     // Additional SSML hints
}

Segment represents a portion of speech with optional effects

type Slide ¶

type Slide struct {
	Index       int                        `json:"index"`
	Title       string                     `json:"title,omitempty"`      // Optional slide title for reference
	SourceType  SourceType                 `json:"sourceType,omitempty"` // slide or browser (defaults to slide)
	Transcripts map[string]LanguageContent `json:"transcripts"`          // Keyed by language code
	Avatar      *AvatarConfig              `json:"avatar,omitempty"`     // Optional avatar/speaker config
	Notes       string                     `json:"notes,omitempty"`      // Internal notes (not spoken)

	// Browser-specific fields (only used when SourceType is "browser")
	BrowserURL   string        `json:"browserUrl,omitempty"`   // Starting URL for browser segment
	BrowserSteps []BrowserStep `json:"browserSteps,omitempty"` // Browser automation steps
}

Slide represents a single slide's transcript data

func (*Slide) GetBrowserVoiceovers ¶ added in v0.3.0

func (s *Slide) GetBrowserVoiceovers() []string

GetBrowserVoiceovers returns all voiceover texts from browser steps

func (*Slide) GetEffectiveSourceType ¶ added in v0.3.0

func (s *Slide) GetEffectiveSourceType() SourceType

GetEffectiveSourceType returns the source type, defaulting to slide for backward compatibility

func (*Slide) IsBrowserSegment ¶ added in v0.3.0

func (s *Slide) IsBrowserSegment() bool

IsBrowserSegment returns true if this is a browser segment

func (*Slide) IsSlideSegment ¶ added in v0.3.0

func (s *Slide) IsSlideSegment() bool

IsSlideSegment returns true if this is a slide segment

type SourceType ¶ added in v0.3.0

type SourceType string

SourceType identifies the segment content source

const (
	// SourceTypeSlide indicates a Marp slide segment
	SourceTypeSlide SourceType = "slide"
	// SourceTypeBrowser indicates a browser-driven demo segment
	SourceTypeBrowser SourceType = "browser"
)

type StepTimingInfo ¶ added in v0.3.0

type StepTimingInfo struct {
	StartMs    int `json:"startMs"`    // Start time relative to segment start
	EndMs      int `json:"endMs"`      // End time relative to segment start
	DurationMs int `json:"durationMs"` // Actual step duration
}

StepTimingInfo contains timing data for a browser step

type TimingInfo ¶

type TimingInfo struct {
	AudioDuration int `json:"audioDuration"` // Audio duration in milliseconds
	PauseDuration int `json:"pauseDuration"` // Total pause duration in milliseconds
	TotalDuration int `json:"totalDuration"` // Total slide duration in milliseconds
}

TimingInfo contains timing data (populated after TTS generation)

type Transcript ¶

type Transcript struct {
	Version  string   `json:"version"`
	Metadata Metadata `json:"metadata"`
	Slides   []Slide  `json:"slides"`
}

Transcript represents the complete transcript for a presentation

func LoadFromFile ¶

func LoadFromFile(path string) (*Transcript, error)

LoadFromFile loads a transcript from a JSON file

func (*Transcript) GetBrowserSlides ¶ added in v0.3.0

func (t *Transcript) GetBrowserSlides() []Slide

GetBrowserSlides returns only browser-type slides from the transcript

func (*Transcript) GetSlideSlides ¶ added in v0.3.0

func (t *Transcript) GetSlideSlides() []Slide

GetSlideSlides returns only slide-type slides from the transcript

func (*Transcript) GetSlideTranscript ¶

func (t *Transcript) GetSlideTranscript(slideIndex int, language string) (*LanguageContent, error)

GetSlideTranscript returns the transcript for a slide in the specified language Falls back to default language if the requested language is not available

func (*Transcript) SaveToFile ¶

func (t *Transcript) SaveToFile(path string) error

SaveToFile saves the transcript to a JSON file

type VoiceConfig ¶

type VoiceConfig struct {
	Provider        string  `json:"provider,omitempty"`        // elevenlabs, deepgram, etc.
	VoiceID         string  `json:"voiceId"`                   // Provider-specific voice ID
	VoiceName       string  `json:"voiceName,omitempty"`       // Human-readable name
	Model           string  `json:"model,omitempty"`           // Provider-specific model
	OutputFormat    string  `json:"outputFormat,omitempty"`    // mp3, wav, pcm, opus
	SampleRate      int     `json:"sampleRate,omitempty"`      // 22050, 44100, etc.
	Speed           float64 `json:"speed,omitempty"`           // Speech speed multiplier (1.0 = normal)
	Pitch           float64 `json:"pitch,omitempty"`           // Pitch adjustment (-1.0 to 1.0)
	Stability       float64 `json:"stability,omitempty"`       // Voice consistency (0.0 to 1.0)
	SimilarityBoost float64 `json:"similarityBoost,omitempty"` // Voice similarity (0.0 to 1.0)
	Style           float64 `json:"style,omitempty"`           // Style exaggeration (0.0 to 1.0)
}

VoiceConfig specifies TTS voice settings (compatible with OmniVoice SynthesisConfig)

Source Files ¶

View all Source files

transcript.go

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL