go_strings_regex/

directory
v0.0.0-...-e6528b1 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jun 1, 2025 License: CC-BY-4.0

README

back to contents

Go: strings, regex

↑ top




string

Code:

package main

import "fmt"

func main() {
	str := "Hello World!"
	fmt.Println(str[1]) // 101

	fmt.Println()

	for _, c := range "Hello World!" {
		fmt.Println(c, string(c))
	}
	/*
	   72 H
	   101 e
	   108 l
	   108 l
	   111 o
	   32
	   87 W
	   111 o
	   114 r
	   108 l
	   100 d
	   33 !
	*/

	fmt.Println()

	for _, c := range []byte("Hello World!") {
		fmt.Println(c, string(c))
	}
	/*
	   72 H
	   101 e
	   108 l
	   108 l
	   111 o
	   32
	   87 W
	   111 o
	   114 r
	   108 l
	   100 d
	   33 !
	*/
}

↑ top




clean up

Here are some useful functions to preprocess Go strings:

package main

import (
	"fmt"
	"regexp"
	"strings"
)

func main() {
	func() {
		str1 := "   Hello,    World! 124  2 This 23is Go project,		It is amazing  . I am excited!    Are you too?    "
		r1 := cleanUp(str1)
		if r1 != "Hello, World! 124 2 This 23is Go project, It is amazing . I am excited! Are you too?" {
			fmt.Errorf("Expected \"Hello, World! 124 2 This 23is Go project, It is amazing . I am excited! Are you too?\": %s", r1)
		}
		str2 := "		Hello World!	This is Go 		"
		r2 := cleanUp(str2)
		if r2 != "Hello World! This is Go" {
			fmt.Errorf("Expected \"Hello World! This is Go\": %s", r2)
		}
		str3 := "	\n\n\n	Hello World! \n\n	This is Go 		"
		r3 := cleanUp(str3)
		if r3 != "Hello World! This is Go" {
			fmt.Errorf("Expected \"Hello World! This is Go\": %s", r3)
		}
	}()

	func() {
		str := "was't weren't aren't isn't I'd you'd I'll he doesn't Don't I won't I'm You're i've what's up it's I didn't"
		r := expand(strings.ToLower(str))
		rc := "was not were not are not is not i would you would i will he does not do not i will not i am you are i have what is up it is i did not"
		if r != rc {
			fmt.Errorf("Expected\n%v\nbut\n%v", rc, r)
		}
	}()

	func() {
		slice1 := toWords("sadf \nsdafsdf a  s  \nsadfsdf")
		if len(slice1) != 5 {
			fmt.Errorf("Expected 5 but %#v", slice1)
		}
		str2 := "Hello World!  It is Good to See You, Them, 10. This is Go. How are you? He said \"I am x1000 good.\""
		slice2 := toWords(strings.ToLower(str2))
		cslice2 := []string{"hello", "world!", "it", "is", "good", "to", "see", "you,", "them,", "10.", "this", "is", "go.", "how", "are", "you?", "he", "said", "i", "am", "x1000", "good."}
		if len(slice2) != len(cslice2) {
			fmt.Errorf("%#v\n%#v", slice2, cslice2)
		}
		for k, v := range slice2 {
			if cslice2[k] != v {
				fmt.Errorf("%#v != %#v", cslice2[k], v)
			}
		}
	}()

	func() {
		str1 := "Hello World!  It is Good to See You, Them, 10. This is Go. How are you? He said \"I am x1000 good.\""
		slice1 := toSentences(str1)
		cslice1 := []string{"Hello World!", "It is Good to See You, Them, 10.", "This is Go.", "How are you?", "He said I am x1000 good."}
		if len(slice1) != len(cslice1) {
			fmt.Errorf("%#v\n%%#v", slice1, cslice1)
		}
		for k, v := range slice1 {
			if cslice1[k] != v {
				fmt.Errorf("%#v != %#v", cslice1[k], v)
			}
		}
		str2 := "Hello World!  It is Good to See You, Them, 10. This is Go. How are you? He said \"I am x1000 good.\""
		slice2 := toSentences(str2)
		cslice2 := []string{"Hello World!", "It is Good to See You, Them, 10.", "This is Go.", "How are you?", "He said I am x1000 good."}
		if len(slice2) != len(cslice2) {
			fmt.Errorf("%#v\n%%#v", slice2, cslice2)
		}
		for k, v := range slice2 {
			if cslice2[k] != v {
				fmt.Errorf("%#v != %#v", cslice2[k], v)
			}
		}
	}()
}

// cleanUp cleans up unnecessary characters in string.
// It cleans up the blank characters that carry no meaning in context,
// converts all white spaces into single whitespace.
// String is immutable, which means the original string would not change.
func cleanUp(str string) string {
	// validID := regexp.MustCompile(`\s{2,}`)
	// func TrimSpace(s string) string
	// slicing off all "leading" and
	// "trailing" white space, as defined by Unicode.
	str = strings.TrimSpace(str)

	// func Fields(s string) []string
	// Fields splits the slice s around each instance
	// of "one or more consecutive white space"
	slice := strings.Fields(str)

	// now join them with a single white space character
	return strings.Join(slice, " ")
}

func expand(str string) string {
	str = strings.Replace(str, "'d", " would", -1)
	str = strings.Replace(str, "isn't", "is not", -1)
	str = strings.Replace(str, "aren't", "are not", -1)
	str = strings.Replace(str, "was't", "was not", -1)
	str = strings.Replace(str, "weren't", "were not", -1)

	str = strings.Replace(str, "'ve", " have", -1)
	str = strings.Replace(str, "'re", " are", -1)
	str = strings.Replace(str, "'m", " am", -1)
	str = strings.Replace(str, "it's", "it is", -1)
	str = strings.Replace(str, "what's", "what is", -1)
	str = strings.Replace(str, "'ll", " will", -1)

	str = strings.Replace(str, "won't", "will not", -1)
	str = strings.Replace(str, "can't", "can not", -1)
	str = strings.Replace(str, "mustn't", "must not", -1)

	str = strings.Replace(str, "haven't", "have not", -1)
	str = strings.Replace(str, "hasn't", "has not", -1)

	str = strings.Replace(str, "dn't", "d not", -1)
	str = strings.Replace(str, "don't", "do not", -1)
	str = strings.Replace(str, "doesn't", "does not", -1)
	str = strings.Replace(str, "didn't", "did not", -1)

	return str
}

func toWords(str string) []string {
	str = expand(str)
	validID := regexp.MustCompile(`\"`)
	str = validID.ReplaceAllString(str, "")
	return strings.Split(cleanUp(str), " ")
}

func toSentences(str string) []string {
	validID1 := regexp.MustCompile(`\"`)
	str = validID1.ReplaceAllString(str, "")

	validID2 := regexp.MustCompile(`[.]`)
	str = validID2.ReplaceAllString(str, ".___")

	validID3 := regexp.MustCompile(`[?]`)
	str = validID3.ReplaceAllString(str, "?___")

	validID4 := regexp.MustCompile(`[!]`)
	str = validID4.ReplaceAllString(str, "!___")

	slice := strings.Split(str, "___")
	// to clean up the empty strings
	result := []string{}
	for _, value := range slice {
		value = cleanUp(value)
		if value != "" {
			result = append(result, value)
		}
	}
	return result
}

↑ top




regex

In Python, you can use regex to truncate the string like here:

import re
r = re.match("(.{0,5})", "12312312321")
print r.groups()[0]
# 12312
 
r = re.match("(.{0,5})", "abcdadfasfsddf")
print r.groups()[0]
# abcda

In Go, you can do this:

package main
 
import (
	"fmt"
	"regexp"
)
 
func main() {
	re := regexp.MustCompile("(.{0,5})")
	fmt.Printf("%s\n", re.FindString("12312312321"))    // 12312
	fmt.Printf("%s\n", re.FindString("abcdadfasfsddf")) // abcda
}

↑ top




comma

Try this code:

package main

import (
	"fmt"
	"strconv"
	"strings"
)

func main() {
	var num1 int64 = 21391239213
	r1 := insertCommaInt64(num1)

	if r1 != "21,391,239,213" {
		fmt.Errorf("InsertCommaInt(num1) should return \"21,391,239,213\" : %#v", r1)
	}

	// var num2 float64 = 21391239213.5122123
	num2 := 21391239213.5122123
	r2 := insertCommaFloat64(num2)

	if r2 != "21,391,239,213.51221" {
		fmt.Errorf("InsertCommaFloat(num2) should return \"21,391,239,213.51221\": %#v", r2)
	}
}

// insertCommaInt64 inserts comma in every three digit.
// It returns the new version of input integer, in string format.
func insertCommaInt64(num int64) string {
	// func FormatUint(i uint64, base int) string
	str := strconv.FormatInt(num, 10)
	result := []string{}
	i := len(str) % 3
	if i == 0 {
		i = 3
	}
	for index, elem := range strings.Split(str, "") {
		if i == index {
			result = append(result, ",")
			i += 3
		}
		result = append(result, elem)
	}
	return strings.Join(result, "")
}

// insertCommaFloat64 inserts comma in every three digit in integer part.
// It returns the new version of input float number, in string format.
func insertCommaFloat64(num float64) string {
	// FormatFloat(num, 'f', 6, 64) with precision 6
	// for arbitrary precision, put -1
	str := strconv.FormatFloat(num, 'f', -1, 64)
	slice := strings.Split(str, ".")
	intpart := slice[0]
	floatpart := ""
	if len(slice) > 1 {
		floatpart = slice[1]
	}
	result := []string{}
	i := len(intpart) % 3
	if i == 0 {
		i = 3
	}
	for index, elem := range strings.Split(intpart, "") {
		if i == index {
			result = append(result, ",")
			i += 3
		}
		result = append(result, elem)
	}

	intpart = strings.Join(result, "")
	return intpart + "." + floatpart
}

↑ top




replace

Here's how to replace strings with regexp:

package main

import (
	"fmt"
	"regexp"
	"strings"
)

func main() {
	func() {
		str1 := "Hello 1231thi21s123 is 12G33o25!!!!"
		r1 := replaceNonAlpha(strings.ToLower(str1), " ")
		if r1 != "hello     thi  s    is   g  o      " {
			fmt.Errorf("Expected \"hello     thi  s    is   g  o      \": %s", r1)
		}
		str2 := "Hello 1231thi21s123 is 12G33o25!!!!"
		r2 := replaceNonAlpha(strings.ToLower(str2), "")
		if r2 != "hello this is go" {
			fmt.Errorf("Expected \"hello this is go\": %s", r2)
		}
	}()

	func() {
		str1 := "It's me! Hello !@##%W@!#orl!@#!@#!@#d!"
		r1 := replaceNonAlnum(expand(strings.ToLower(str1)), " ")
		if r1 != "it is me  hello      w   orl         d " {
			fmt.Errorf("Expected \"it is me  hello      w   orl         d \": %s", r1)
		}
		str2 := "Hello !@##%W@!#orl!@#!@#!@#d!"
		r2 := replaceNonAlnum(expand(strings.ToLower(str2)), "")
		if r2 != "hello world" {
			fmt.Errorf("Expected \"hello world\": %s", r2)
		}
	}()

	func() {
		slice1 := []string{
			strings.ToLower(strings.Replace(replaceNonAlnumDash("List(Linked List) vs. Slice(Array)", ""), " ", "-", -1)),
			strings.ToLower(strings.Replace(replaceNonAlnumDash("What is Graph? (YouTube Clips)", ""), " ", "-", -1)),
			strings.ToLower(strings.Replace(replaceNonAlnumDash("Adjacency List vs. Adjacency Matrix", ""), " ", "-", -1)),
			strings.ToLower(strings.Replace(replaceNonAlnumDash("C++ Version", ""), " ", "-", -1)),
			strings.ToLower(strings.Replace(replaceNonAlnumDash("what is go-learn", ""), " ", "-", -1)),
		}
		slice2 := []string{
			"listlinked-list-vs-slicearray",
			"what-is-graph-youtube-clips",
			"adjacency-list-vs-adjacency-matrix",
			"c-version",
			"what-is-go-learn",
		}
		for idx, elem := range slice1 {
			if elem != slice2[idx] {
				fmt.Errorf("Should be same:\n%s\n%s\n", elem, slice2[idx])
			}
		}

	}()
}

// replaceNonAlpha replaces all non-alphabetic characters.
func replaceNonAlpha(str, replace string) string {
	// alphabetic (== [A-Za-z])
	// \s is a white space character
	validID := regexp.MustCompile(`[^[:alpha:]\s]`)
	return validID.ReplaceAllString(str, replace)
}

// replaceNonAlnum replaces all alphanumeric characters.
func replaceNonAlnum(str, replace string) string {
	// alphanumeric (== [0-9A-Za-z])
	// \s is a white space character
	validID := regexp.MustCompile(`[^[:alnum:]\s]`)
	return validID.ReplaceAllString(str, replace)
}

// replaceNonAlnumDash replaces all alphanumeric characters or dash(-).
func replaceNonAlnumDash(str, replace string) string {
	// alphanumeric (== [0-9A-Za-z])
	// \s is a white space character

	validID := regexp.MustCompile(`[^A-Za-z0-9-\s]`)
	// validID := regexp.MustCompile(`[^A-Za-z0-9\s-]`)

	return validID.ReplaceAllString(str, replace)
}

func expand(str string) string {
	str = strings.Replace(str, "'d", " would", -1)
	str = strings.Replace(str, "isn't", "is not", -1)
	str = strings.Replace(str, "aren't", "are not", -1)
	str = strings.Replace(str, "was't", "was not", -1)
	str = strings.Replace(str, "weren't", "were not", -1)

	str = strings.Replace(str, "'ve", " have", -1)
	str = strings.Replace(str, "'re", " are", -1)
	str = strings.Replace(str, "'m", " am", -1)
	str = strings.Replace(str, "it's", "it is", -1)
	str = strings.Replace(str, "what's", "what is", -1)
	str = strings.Replace(str, "'ll", " will", -1)

	str = strings.Replace(str, "won't", "will not", -1)
	str = strings.Replace(str, "can't", "can not", -1)
	str = strings.Replace(str, "mustn't", "must not", -1)

	str = strings.Replace(str, "haven't", "have not", -1)
	str = strings.Replace(str, "hasn't", "has not", -1)

	str = strings.Replace(str, "dn't", "d not", -1)
	str = strings.Replace(str, "don't", "do not", -1)
	str = strings.Replace(str, "doesn't", "does not", -1)
	str = strings.Replace(str, "didn't", "did not", -1)

	return str
}

↑ top




extract numbers

Here's how to extract numbers from a string:

package main

import (
	"fmt"
	"regexp"
)

func main() {
	str := "I have $1,500,000. The car is $79,000. But I pay only 20 dollars, and the tax was $10.57. The final price is $80,000.35. BRC Burrito is $0.50 right now. I've got change of $.20."
	r := extractNumber(str)
	s := []string{"1500000.", "79000.", "10.57", "80000.35", "0.50", ".20"}
	for idx, elem := range s {
		if r[idx] != elem {
			fmt.Errorf("Not same: \n%#v\n%#v", r, s)
		}
	}
}

func extractNumber(str string) []string {
	ns := regexp.MustCompile(`,`).ReplaceAllString(str, "")
	validID := regexp.MustCompile(`\d{0,}[.]\d{0,}`)
	numslice := validID.FindAllString(ns, -1)
	rs := []string{}
	for _, elem := range numslice {
		if elem != "." {
			rs = append(rs, elem)
		}
	}
	return rs
}

↑ top




space, tab

Try this code:

package main

import (
	"fmt"
	"regexp"
)

func main() {
	func() {
		str := "Hello	World	!	Hello"
		r := allTabIntoSingleSpace(str)
		if r != "Hello World ! Hello" {
			fmt.Errorf("AllTabIntoSingleSpace(str) should return \"Hello World ! Hello\": %#v", r)
		}
	}()

	func() {
		str := "Hello World! Hello"
		r := allSpaceIntoSingleTab(str)

		if r != "Hello	World!	Hello" {
			fmt.Errorf("AllSpaceIntoSingleTab(str) should return \"Hello	World!	Hello\": %#v", r)
		}

	}()

	func() {
		str := "Hello	World	Hello"
		r := tabToSpace(str)

		if r != "Hello World Hello" {
			fmt.Errorf("TabToSpace(str) should return \"Hello World Hello\": %#v", r)
		}
	}()

	func() {
		str := "Hello World Hello"
		r := spaceToTab(str)

		if r != "Hello	World	Hello" {
			fmt.Errorf("SpaceToTab(str) should return \"Hello	World	Hello\": %#v", r)
		}
	}()
}

// allTabIntoSingleSpace converts all tab characters into single whitespace character.
func allTabIntoSingleSpace(str string) string {
	// to take any tab chracters: single tab, double tabs, ...
	validID := regexp.MustCompile(`\t{1,}`)
	return validID.ReplaceAllString(str, " ")
}

// allSpaceIntoSingleTab converts all whitespace characters into single tab character.
func allSpaceIntoSingleTab(str string) string {
	// to take any whitespace characters: single whitespace, doulbe _, ...
	validID := regexp.MustCompile(`\s{1,}`)
	return validID.ReplaceAllString(str, "	")
}

// tabToSpace converts all tab characters into whitespace characters.
func tabToSpace(str string) string {
	validID := regexp.MustCompile(`\t`)
	return validID.ReplaceAllString(str, " ")
}

// spaceToTab converts all whitespace characters into tab characters.
func spaceToTab(str string) string {
	validID := regexp.MustCompile(`\s`)
	return validID.ReplaceAllString(str, "	")
}

↑ top




Directories

Path Synopsis

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL