regexer

package module
v1.0.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Apr 14, 2025 License: MIT Imports: 6 Imported by: 0

README

regexer

[ 📄 docs ] [ 🐙 github ]

Go package with more powerful, flexible, and safe API for regular expressions. The main idea is to use the Go 1.24+ iterators to make finding/replacing submatches flexible, low-memory, and stoppable.

Features:

  • Type-safe
  • Lazy iteration.
  • Supports strings, bytes, and runes as input.
  • The same generic API for all inputs.
  • Everything possible with stdin regexp: find matches, find submatches, replace, replace with a template.
  • And much more, like the ability to replace only one or several matches.

Installation

go get github.com/orsinium-labs/regexer

Usage

Find and print all words in the text and their position.

rex := regexer.New(`\w+`)
input := "never gonna give you up"
matches := rex.String(input).Find()
for match := range matches {
    fmt.Println(match.Span.Start, match.Content)
}

The same but for a slice of bytes:

rex := regexer.New(`\w+`)
input := []byte("never gonna give you up")
matches := rex.Bytes(input).Find()
for match := range matches {
    fmt.Println(match.Span.Start, string(match.Content))
}

In both cases, matches is a lazy iterator. It doesn't require to allocate memory for all matches and if you stop iteration, it will stop scanning the input.

Replacing has very similar API:

rex := regexer.New(`\w+`)
input := "number 42 is the answer"
var result string
matches := rex.String(input).Replace(&result)
for match := range matches {
    template := string(`[$1]`)
    match.ReplaceTemplate(template)
}
fmt.Println(result)
// Output: [number] 42 [is] [the] [answer]

Accessing submatches:

rex := regexer.New(`([a-z.]+)@([a-z.]+)`)
input := "my email is mail@example.com, text me"
matches := rex.String(input).Find()
for match := range matches {
    username := match.Subs.At(1).Content
    domain := match.Subs.At(2).Content
    fmt.Printf("username: %s; domain: %s", username, domain)
}

Documentation

Index

Examples

Constants

This section is empty.

Variables

This section is empty.

Functions

This section is empty.

Types

type BMatch

type BMatch = Match[[]byte]

type BReplacement

type BReplacement struct {
	Match[[]byte]
	// contains filtered or unexported fields
}

func (BReplacement) ReplaceFunc

func (r BReplacement) ReplaceFunc(f func([]byte) []byte)
Example
package main

import (
	"bytes"
	"fmt"

	"github.com/orsinium-labs/regexer"
)

func main() {
	rex := regexer.New(`[a-z]+`)
	input := []byte("number 42 is the answer")
	var result []byte
	matches := rex.Bytes(input).Replace(&result)
	for match := range matches {
		match.ReplaceFunc(func(b []byte) []byte {
			return append(bytes.ToUpper(b[:1]), b[1:]...)
		})
	}
	fmt.Println(string(result))
}
Output:

Number 42 Is The Answer

func (BReplacement) ReplaceLiteral

func (r BReplacement) ReplaceLiteral(val []byte)
Example
package main

import (
	"bytes"
	"fmt"

	"github.com/orsinium-labs/regexer"
)

func main() {
	rex := regexer.New(`(is|the)`)
	input := []byte("number 42 is the answer")
	var result []byte
	matches := rex.Bytes(input).Replace(&result)
	for match := range matches {
		newVal := bytes.ToUpper(match.Content)
		match.ReplaceLiteral(newVal)
	}
	fmt.Println(string(result))
}
Output:

number 42 IS THE answer

func (BReplacement) ReplaceTemplate

func (r BReplacement) ReplaceTemplate(val []byte)
Example
package main

import (
	"fmt"

	"github.com/orsinium-labs/regexer"
)

func main() {
	rex := regexer.New(`(is|the)`)
	input := []byte("number 42 is the answer")
	var result []byte
	matches := rex.Bytes(input).Replace(&result)
	for match := range matches {
		template := []byte(`[$1]`)
		match.ReplaceTemplate(template)
	}
	fmt.Println(string(result))
}
Output:

number 42 [is] [the] answer

type BSub

type BSub = Sub[[]byte]

type BSubs

type BSubs = Subs[[]byte]

type Bytes

type Bytes struct {
	// contains filtered or unexported fields
}

func (Bytes) Contains

func (b Bytes) Contains() bool
Example
package main

import (
	"fmt"

	"github.com/orsinium-labs/regexer"
)

func main() {
	rex := regexer.New(`[0-9]+`)
	input := []byte("number 42 is the answer")
	contains := rex.Bytes(input).Contains()
	if contains {
		fmt.Println("the byte slice contains a regexp match")
	}
}
Output:

the byte slice contains a regexp match

func (Bytes) Find

func (b Bytes) Find() iter.Seq[BMatch]
Example
package main

import (
	"fmt"

	"github.com/orsinium-labs/regexer"
)

func main() {
	rex := regexer.New(`[a-z]+`)
	input := []byte("never gonna give you up")
	matches := rex.Bytes(input).Find()
	for match := range matches {
		fmt.Println(match.Span.Start, string(match.Content))
	}
}
Output:

0 never
6 gonna
12 give
17 you
21 up

func (Bytes) Replace

func (b Bytes) Replace(res *[]byte) iter.Seq[BReplacement]
Example
package main

import (
	"bytes"
	"fmt"

	"github.com/orsinium-labs/regexer"
)

func main() {
	rex := regexer.New(`(is|the)`)
	input := []byte("number 42 is the answer")
	var result []byte
	matches := rex.Bytes(input).Replace(&result)
	for match := range matches {
		newVal := bytes.ToUpper(match.Content)
		match.ReplaceLiteral(newVal)
	}
	fmt.Println(string(result))
}
Output:

number 42 IS THE answer

type Match

type Match[T text] struct {
	// The full match text.
	Content T
	// The range of the match in the original text.
	Span Span
	// Matches for sub-patterns.
	Subs Subs[T]
}

type RMatch

type RMatch struct {
	// The full match text.
	Content []rune
	// The range of the match in the original text.
	Span Span
	// Matches for sub-patterns.
	Subs RSubs
}

The same as Match but for runes.

Because the compiler can't infer the core type of [text] if we extend it with a slice of runes.

type RReplacement

type RReplacement struct {
	RMatch
	// contains filtered or unexported fields
}

func (RReplacement) ReplaceFunc

func (r RReplacement) ReplaceFunc(f func([]rune) []rune)
Example
package main

import (
	"fmt"
	"unicode"

	"github.com/orsinium-labs/regexer"
)

func main() {
	rex := regexer.New(`[a-z]+`)
	input := []rune("number 42 is the answer")
	var result []rune
	matches := rex.Runes(input).Replace(&result)
	for match := range matches {
		match.ReplaceFunc(func(b []rune) []rune {
			first := unicode.ToUpper(b[0])
			return append([]rune{first}, b[1:]...)
		})
	}
	fmt.Println(string(result))
}
Output:

Number 42 Is The Answer

func (RReplacement) ReplaceLiteral

func (r RReplacement) ReplaceLiteral(val []rune)
Example
package main

import (
	"fmt"
	"unicode"

	"github.com/orsinium-labs/regexer"
)

func main() {
	rex := regexer.New(`\w+`)
	input := []rune("number 42 is the answer")
	var result []rune
	matches := rex.Runes(input).Replace(&result)
	for match := range matches {
		first := unicode.ToUpper(match.Content[0])
		newVal := append([]rune{first}, match.Content[1:]...)
		match.ReplaceLiteral(newVal)
	}
	fmt.Println(string(result))
}
Output:

Number 42 Is The Answer

func (RReplacement) ReplaceTemplate

func (r RReplacement) ReplaceTemplate(val []rune)
Example
package main

import (
	"fmt"

	"github.com/orsinium-labs/regexer"
)

func main() {
	rex := regexer.New(`(is|the)`)
	input := []rune("number 42 is the answer")
	var result []rune
	matches := rex.Runes(input).Replace(&result)
	for match := range matches {
		template := []rune(`[$1]`)
		match.ReplaceTemplate(template)
	}
	fmt.Println(string(result))
}
Output:

number 42 [is] [the] answer

type RSub

type RSub = Sub[[]rune]

type RSubs

type RSubs struct {
	// contains filtered or unexported fields
}

Matches for sub-patterns.

func (RSubs) At

func (s RSubs) At(i int) RSub

func (RSubs) Iter

func (s RSubs) Iter() iter.Seq[RSub]

func (RSubs) Len

func (s RSubs) Len() int

func (RSubs) Slice

func (s RSubs) Slice() []RSub

type Regex

type Regex struct {
	// contains filtered or unexported fields
}

func New

func New(raw stringLiteral) Regex

func (Regex) Bytes

func (r Regex) Bytes(src []byte) Bytes

func (Regex) Runes

func (r Regex) Runes(src []rune) Runes

func (Regex) String

func (r Regex) String(src string) String

type Runes

type Runes struct {
	// contains filtered or unexported fields
}

func (Runes) Contains

func (b Runes) Contains() bool
Example
package main

import (
	"fmt"

	"github.com/orsinium-labs/regexer"
)

func main() {
	rex := regexer.New(`[0-9]+`)
	input := []rune("number 42 is the answer")
	contains := rex.Runes(input).Contains()
	if contains {
		fmt.Println("the rune slice contains a regexp match")
	}
}
Output:

the rune slice contains a regexp match

func (Runes) Find

func (b Runes) Find() iter.Seq[RMatch]
Example
package main

import (
	"fmt"

	"github.com/orsinium-labs/regexer"
)

func main() {
	rex := regexer.New(`[a-z]+`)
	input := []rune("never gonna give you up")
	matches := rex.Runes(input).Find()
	for match := range matches {
		fmt.Println(match.Span.Start, string(match.Content))
	}
}
Output:

0 never
6 gonna
12 give
17 you
21 up

func (Runes) Replace

func (b Runes) Replace(res *[]rune) iter.Seq[RReplacement]
Example
package main

import (
	"fmt"
	"unicode"

	"github.com/orsinium-labs/regexer"
)

func main() {
	rex := regexer.New(`\w+`)
	input := []rune("number 42 is the answer")
	var result []rune
	matches := rex.Runes(input).Replace(&result)
	for match := range matches {
		first := unicode.ToUpper(match.Content[0])
		newVal := append([]rune{first}, match.Content[1:]...)
		match.ReplaceLiteral(newVal)
	}
	fmt.Println(string(result))
}
Output:

Number 42 Is The Answer

type SMatch

type SMatch = Match[string]

type SReplacement

type SReplacement struct {
	Match[string]
	// contains filtered or unexported fields
}

func (SReplacement) ReplaceFunc

func (r SReplacement) ReplaceFunc(f func(string) string)

func (SReplacement) ReplaceLiteral

func (r SReplacement) ReplaceLiteral(val string)
Example
package main

import (
	"fmt"
	"strings"

	"github.com/orsinium-labs/regexer"
)

func main() {
	rex := regexer.New(`(is|the)`)
	input := "number 42 is the answer"
	var result string
	matches := rex.String(input).Replace(&result)
	for match := range matches {
		newVal := strings.ToUpper(match.Content)
		match.ReplaceLiteral(newVal)
	}
	fmt.Println(string(result))
}
Output:

number 42 IS THE answer

func (SReplacement) ReplaceTemplate

func (r SReplacement) ReplaceTemplate(val string)
Example
package main

import (
	"fmt"

	"github.com/orsinium-labs/regexer"
)

func main() {
	rex := regexer.New(`(is|the)`)
	input := "number 42 is the answer"
	var result string
	matches := rex.String(input).Replace(&result)
	for match := range matches {
		template := string(`[$1]`)
		match.ReplaceTemplate(template)
	}
	fmt.Println(string(result))
}
Output:

number 42 [is] [the] answer

type SSub

type SSub = Sub[string]

type SSubs

type SSubs = Subs[string]

type Span

type Span struct {
	Start int
	End   int
}

func (Span) Len

func (s Span) Len() int

type String

type String struct {
	// contains filtered or unexported fields
}

func (String) Contains

func (b String) Contains() bool
Example
package main

import (
	"fmt"

	"github.com/orsinium-labs/regexer"
)

func main() {
	rex := regexer.New(`[0-9]+`)
	input := "number 42 is the answer"
	contains := rex.String(input).Contains()
	if contains {
		fmt.Println("the string contains a regexp match")
	}
}
Output:

the string contains a regexp match

func (String) Find

func (b String) Find() iter.Seq[SMatch]
Example
package main

import (
	"fmt"

	"github.com/orsinium-labs/regexer"
)

func main() {
	rex := regexer.New(`[a-z]+`)
	input := "never gonna give you up"
	matches := rex.String(input).Find()
	for match := range matches {
		fmt.Println(match.Span.Start, match.Content)
	}
}
Output:

0 never
6 gonna
12 give
17 you
21 up

func (String) Replace

func (s String) Replace(res *string) iter.Seq[SReplacement]
Example
package main

import (
	"fmt"
	"strings"

	"github.com/orsinium-labs/regexer"
)

func main() {
	rex := regexer.New(`(is|the)`)
	input := "number 42 is the answer"
	var result string
	matches := rex.String(input).Replace(&result)
	for match := range matches {
		newVal := strings.ToUpper(match.Content)
		match.ReplaceLiteral(newVal)
	}
	fmt.Println(result)
}
Output:

number 42 IS THE answer

type Sub

type Sub[T rText] struct {
	Content T
	Span    Span
}

type Subs

type Subs[T text] struct {
	// contains filtered or unexported fields
}

Matches for sub-patterns.

func (Subs[T]) At

func (s Subs[T]) At(i int) Sub[T]
Example
package main

import (
	"fmt"

	"github.com/orsinium-labs/regexer"
)

func main() {
	rex := regexer.New(`([a-z.]+)@([a-z.]+)`)
	input := "my email is mail@example.com, text me"
	matches := rex.String(input).Find()
	for match := range matches {
		username := match.Subs.At(1).Content
		domain := match.Subs.At(2).Content
		fmt.Printf("username: %s; domain: %s", username, domain)
	}
}
Output:

username: mail; domain: example.com

func (Subs[T]) Iter

func (s Subs[T]) Iter() iter.Seq[Sub[T]]
Example
package main

import (
	"fmt"

	"github.com/orsinium-labs/regexer"
)

func main() {
	rex := regexer.New(`([a-z.]+)@([a-z.]+)`)
	input := "my email is mail@example.com, text me"
	matches := rex.String(input).Find()
	for match := range matches {
		for sub := range match.Subs.Iter() {
			fmt.Println(sub.Content)
		}
	}
}
Output:

mail@example.com
mail
example.com

func (Subs[T]) Len

func (s Subs[T]) Len() int

func (Subs[T]) Slice

func (s Subs[T]) Slice() []Sub[T]
Example
package main

import (
	"fmt"

	"github.com/orsinium-labs/regexer"
)

func main() {
	rex := regexer.New(`([a-z.]+)@([a-z.]+)`)
	input := "my email is mail@example.com, text me"
	matches := rex.String(input).Find()
	for match := range matches {
		subs := match.Subs.Slice()
		username := subs[1].Content
		domain := subs[2].Content
		fmt.Printf("username: %s; domain: %s", username, domain)
	}
}
Output:

username: mail; domain: example.com

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL