normalizer

package
v0.0.0-...-69261f7 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Aug 29, 2023 License: Apache-2.0 Imports: 12 Imported by: 0

Documentation

Index

Constants

View Source
const (
	NoteTagPattern          = `<<note[:=].+?>>`
	WildcardMatchingPattern = `<<match=\.\+>>`

	OptionalWildcardMatchingPattern = `<<match=\.\*>>`

	ReplaceableTextPattern     = `<<(?:var;(?:name=(.+?);)?(?:original=(.*?);)?)?match=(.+?)>>`
	BeginOptionalLinePattern   = `(m)^<<beginoptional(?:;name=.*?)?>>`
	BeginOptionalPattern       = `<<beginoptional(?:;name=.*?)?>>`
	OmitableLine               = "<<omitable>>\n"
	Omitable                   = "<<omitable>>"
	EndOptionalPattern         = `<<endoptional>>`
	ReplaceEndPattern          = `<</omitable>>`
	CommentBlockOutsidePattern = `(?m)^\s*(?:/\*|-{2,3}\[=*\[)|(?:\*/|]=*])\s*$`
	CommentBlockInsidePattern  = `(?m)^\s*[*#]{1,6}|\*{1,6}$`
	CommentLinePattern         = `(?m)^\s*(?://|>|--|;{1,4})`
	HtmlStyleCommentPattern    = `(?m)^\s*<!--|-->\s*$`
	DashLikePattern            = "[\u002D\u2010\u2011\u2013\u2014\u2015\u2212\uFE58\uFE63\uFE0D]"
	QuoteLikePattern           = "[\u0022\u0027\u0060\u00B4\u2018\u2019\u201C\u201D]+"
	HTTPPattern                = `(?i)https?`
	BulletsPattern             = "(?m)^\\s*[*+\u2022-]\\s+"
	NumberingPattern           = "(?m)(?:\\s|^)\\(?(?:\\w|[\\divx#]+)[.)][\\s$]"
	SplitWords                 = `(?m)\b-$\s+\b`
	HorizontalRulePattern      = `(?m)^\s*[*=-]{3,}`
	Copyright                  = `©|\([cC]\)`
	ControlCharacters          = "[\u0000-\u0007\u000E-\u001B]"
	OddCharactersPattern       = "(?im)^\\^l$|\u0080|\u0099|\u009C|\u009D|\u00AC|\u00E2|\u00A7|\u00C2|\u00A4|\u0153|\u20AC|\uFFFD"
	LeadingWhitespacePattern   = `^\s`
	MiddleWhitespacePattern    = "(?:\\s|\u00A0|\u2028|\u00B7)+"
	TrailingWhitespacePattern  = `\s$`
)

Variables

View Source
var (
	Logger = log.NewLogger(log.INFO)

	NoteTagPatternRE                  = regexp.MustCompile(NoteTagPattern)
	WildcardMatchingPatternRE         = regexp.MustCompile(WildcardMatchingPattern)
	OptionalWildcardMatchingPatternRE = regexp.MustCompile(OptionalWildcardMatchingPattern)
	BeginOptionalLinePatternRE        = regexp.MustCompile(BeginOptionalLinePattern)
	BeginOptionalPatternRE            = regexp.MustCompile(BeginOptionalPattern)
	EndOptionalPatternRE              = regexp.MustCompile(EndOptionalPattern)
	HorizontalRulePatternRE           = regexp.MustCompile(HorizontalRulePattern)
	SplitWordsRE                      = regexp.MustCompile(SplitWords)
	HTTPPatternRE                     = regexp.MustCompile(HTTPPattern)
	QuoteLikeRE                       = regexp.MustCompile(QuoteLikePattern)
	DashLikeRE                        = regexp.MustCompile(DashLikePattern)
	ReplaceableTextPatternRE          = regexp.MustCompile(ReplaceableTextPattern)
	BulletsPatternRE                  = regexp.MustCompile(BulletsPattern)
	NumberingPatternRE                = regexp.MustCompile(NumberingPattern)
	CommentBlockOutsideRE             = regexp.MustCompile(CommentBlockOutsidePattern)
	CommentBlockInsideRE              = regexp.MustCompile(CommentBlockInsidePattern)
	HtmlStyleCommentRE                = regexp.MustCompile(HtmlStyleCommentPattern)
	CommentLineRE                     = regexp.MustCompile(CommentLinePattern)
	MiddleWhitespaceRE                = regexp.MustCompile(MiddleWhitespacePattern)
	LeadingWhitespaceRE               = regexp.MustCompile(LeadingWhitespacePattern)
	TrailingWhitespaceRE              = regexp.MustCompile(TrailingWhitespacePattern)
	OddCharactersPatternRE            = regexp.MustCompile(OddCharactersPattern)
	CopyrightRE                       = regexp.MustCompile(Copyright)
	ControlCharactersRE               = regexp.MustCompile(ControlCharacters)
)

Functions

This section is empty.

Types

type CaptureGroup

type CaptureGroup struct {
	GroupNumber int
	Name        string
	Original    string
	Matches     string
}

type Digest

type Digest struct {
	// Md5
	Md5 string
	// sha256
	Sha256 string
	// sha512
	Sha512 string
}

Digest provides an option to store a combination of hashes of a given package

type NormalizationData

type NormalizationData struct {
	// original input text
	OriginalText string
	// normalized version of the input text
	NormalizedText string
	IndexMap       []int
	CaptureGroups  []*CaptureGroup
	Hash           Digest
	IsTemplate     bool
	// contains filtered or unexported fields
}

NormalizationData holds the input data and its normalized text

func NewNormalizationData

func NewNormalizationData(originalText string, isTemplate bool) *NormalizationData

func (*NormalizationData) NormalizeText

func (n *NormalizationData) NormalizeText() error

NormalizeText normalizes the input text

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL