从单词中删除特殊字符

huangapple go评论85阅读模式
英文:

Removing special chars from words

问题

我正在为清理单词编写一个GO函数,以这样的方式删除每个单词开头和结尾的特殊字符。

例如:

  • .-hello,-> hello
  • "back-to-back" -> back-to-back

通过逐个检查每个端点的字母是否属于unicode.Letter集合,我得到了以下结果,这个方法运行良好。但我想知道是否有更好或更高效的方法?我尝试过使用strings.TrimLeft/Right,但那样我就必须定义自己要删除的字符集。使用预定义的字符集会更好。

func TrimWord(word []rune) string {
	var prefix int = 0
	var suffix int = len(word)

	for x := 0; x < len(word); x++ {
		if !unicode.IsLetter(word[x]) {
			prefix++
		} else {
			break
		}
	}

	for x := len(word) - 1; x >= 0; x-- {
		if suffix == prefix {
			break
		}
		if !unicode.IsLetter(word[x]) {
			suffix--
		} else {
			break
		}
	}
	return string(word[prefix:suffix])
}
英文:

I was writing a function in GO for cleaning up individual words, in such a manner that special characters in the beginning and end of each would be removed.

ie:

  • .-hello, -> hello
  • "back-to-back" -> back-to-back

Ended up with the following, by checking letter by letter from each end if they belong to the unicode.Letter set, which works fine, but I was wondering if there are better or more efficient ways of doing so? I experimented with strings.TrimLeft/Right, but then I have to define my own set of chars to remove. It would have been nice to use a predefined set.

func TrimWord(word []rune) string {
	var prefix int = 0
	var suffix int = len(word)

	for x := 0; x &lt; len(word); x++ {
		if !unicode.IsLetter(word[x]) {
			prefix++
		} else {
			break
		}
	}

	for x := len(word) - 1; x &gt;= 0; x-- {
		if suffix == prefix {
			break
		}
		if !unicode.IsLetter(word[x]) {
			suffix--
		} else {
			break
		}
	}
	return string(word[prefix:suffix])
}

答案1

得分: 4

package main

import (
	"fmt"
	"strings"
	"unicode"
)

func trimWord(s string) string {
	return strings.TrimFunc(s, func(r rune) bool {
		return !unicode.IsLetter(r)
	})
}

func main() {
	fmt.Println(trimWord(`.-hello,`))       // -&gt; hello
	fmt.Println(trimWord(`"back-to-back"`)) // -&gt; back-to-back
}
package main

import (
	"fmt"
	"strings"
	"unicode"
)

func trimWord(s string) string {
	return strings.TrimFunc(s, func(r rune) bool {
		return !unicode.IsLetter(r)
	})
}

func main() {
	fmt.Println(trimWord(`.-hello,`))       // -&gt; hello
	fmt.Println(trimWord(`"back-to-back"`)) // -&gt; back-to-back
}
package main

import (
	"fmt"
	"strings"
	"unicode"
)

func trimWord(s string) string {
	return strings.TrimFunc(s, func(r rune) bool {
		return !unicode.IsLetter(r)
	})
}

func main() {
	fmt.Println(trimWord(`.-hello,`))       // -&gt; hello
	fmt.Println(trimWord(`"back-to-back"`)) // -&gt; back-to-back
}
package main

import (
	"fmt"
	"strings"
	"unicode"
)

func trimWord(s string) string {
	return strings.TrimFunc(s, func(r rune) bool {
		return !unicode.IsLetter(r)
	})
}

func main() {
	fmt.Println(trimWord(`.-hello,`))       // -&gt; hello
	fmt.Println(trimWord(`"back-to-back"`)) // -&gt; back-to-back
}
package main

import (
	"fmt"
	"strings"
	"unicode"
)

func trimWord(s string) string {
	return strings.TrimFunc(s, func(r rune) bool {
		return !unicode.IsLetter(r)
	})
}

func main() {
	fmt.Println(trimWord(`.-hello,`))       // -&gt; hello
	fmt.Println(trimWord(`"back-to-back"`)) // -&gt; back-to-back
}
package main

import (
	"fmt"
	"strings"
	"unicode"
)

func trimWord(s string) string {
	return strings.TrimFunc(s, func(r rune) bool {
		return !unicode.IsLetter(r)
	})
}

func main() {
	fmt.Println(trimWord(`.-hello,`))       // -&gt; hello
	fmt.Println(trimWord(`"back-to-back"`)) // -&gt; back-to-back
}
package main

import (
	"fmt"
	"strings"
	"unicode"
)

func trimWord(s string) string {
	return strings.TrimFunc(s, func(r rune) bool {
		return !unicode.IsLetter(r)
	})
}

func main() {
	fmt.Println(trimWord(`.-hello,`))       // -&gt; hello
	fmt.Println(trimWord(`"back-to-back"`)) // -&gt; back-to-back
}
package main

import (
	"fmt"
	"strings"
	"unicode"
)

func trimWord(s string) string {
	return strings.TrimFunc(s, func(r rune) bool {
		return !unicode.IsLetter(r)
	})
}

func main() {
	fmt.Println(trimWord(`.-hello,`))       // -&gt; hello
	fmt.Println(trimWord(`"back-to-back"`)) // -&gt; back-to-back
}
package main

import (
	"fmt"
	"strings"
	"unicode"
)

func trimWord(s string) string {
	return strings.TrimFunc(s, func(r rune) bool {
		return !unicode.IsLetter(r)
	})
}

func main() {
	fmt.Println(trimWord(`.-hello,`))       // -&gt; hello
	fmt.Println(trimWord(`"back-to-back"`)) // -&gt; back-to-back
}
package main

import (
	"fmt"
	"strings"
	"unicode"
)

func trimWord(s string) string {
	return strings.TrimFunc(s, func(r rune) bool {
		return !unicode.IsLetter(r)
	})
}

func main() {
	fmt.Println(trimWord(`.-hello,`))       // -&gt; hello
	fmt.Println(trimWord(`"back-to-back"`)) // -&gt; back-to-back
}
package main

import (
	"fmt"
	"strings"
	"unicode"
)

func trimWord(s string) string {
	return strings.TrimFunc(s, func(r rune) bool {
		return !unicode.IsLetter(r)
	})
}

func main() {
	fmt.Println(trimWord(`.-hello,`))       // -&gt; hello
	fmt.Println(trimWord(`"back-to-back"`)) // -&gt; back-to-back
}
package main

import (
	"fmt"
	"strings"
	"unicode"
)

func trimWord(s string) string {
	return strings.TrimFunc(s, func(r rune) bool {
		return !unicode.IsLetter(r)
	})
}

func main() {
	fmt.Println(trimWord(`.-hello,`))       // -&gt; hello
	fmt.Println(trimWord(`"back-to-back"`)) // -&gt; back-to-back
}
英文:
package main

import (
	&quot;fmt&quot;
	&quot;strings&quot;
	&quot;unicode&quot;
)

func trimWord(s string) string {
	return strings.TrimFunc(s, func(r rune) bool {
		return !unicode.IsLetter(r)
	})
}

func main() {
	fmt.Println(trimWord(`.-hello,`))       // -&gt; hello
	fmt.Println(trimWord(`&quot;back-to-back&quot;`)) // -&gt; back-to-back
}

https://go.dev/play/p/l1A4hBDvFfr

hello
back-to-back

> Package strings
>
> func TrimFunc(s string, f func(rune) bool) string
>
> TrimFunc returns a slice of the string s with all leading and trailing Unicode code points c satisfying f(c) removed.

答案2

得分: 1

换句话说

你想要从第一个 unicode.letter 到最后一个 unicode.letter

如果你不介意仍然使用相同的方法来获得结果,你可以稍微改进一下

func TrimWord(word []rune) string {
    var prefix int = 0
    var suffix int = len(word)

    for ; prefix < suffix; prefix++ {
        if unicode.IsLetter(word[prefix]) {
            break
        }
    }
    suffix--
    for ; suffix > prefix; suffix-- {
        if unicode.IsLetter(word[suffix]) {
            break
        }
    }
    return string(word[prefix : suffix+1])
}
英文:

In other word

You want first unicode.letter to last unicode.letter

You can get little improve if you don't care still using same way to get result

func TrimWord(word []rune) string {
	var prefix int = 0
	var suffix int = len(word)

	for ; prefix &lt; suffix;prefix++{
		if unicode.IsLetter(word[prefix]) {
			break
		}
	}
	suffix--
	for ; suffix &gt; prefix;suffix--{
		if unicode.IsLetter(word[suffix]) {
			break
		}
	}
	return string(word[prefix:suffix+1])
}

答案3

得分: -2

正则表达式非常简洁。

\pL 匹配任何 Unicode 字母;\PL 则是相同的匹配取反(匹配除了 Unicode 字母之外的任何字符)。

https://goplay.tools/snippet/SrezNxxawW7

package main

import (
	"fmt"
	"regexp"
)

func main() {
  rx := regexp.MustCompile(`^\PL+|\PL+$`)
  input := "<'foobar'>"
  trimmed := rx.ReplaceAllString(input, "")

  fmt.Println(trimmed)
}
英文:

Regular expressions are nice and concise.

\pL matches any Unicode letter; \PL is the same match negated (matches anything except a Unicode letter).

https://goplay.tools/snippet/SrezNxxawW7

package main

import (
	&quot;fmt&quot;
	&quot;regexp&quot;
)

func main() {
  rx := regexp.MustCompile(`^\PL+|\PL+$`)
  input := &quot;&lt;&#39;foobar&#39;&gt;&quot;
  trimmed := rx.ReplaceAllString(input, &quot;&quot;)

  fmt.Println(trimmed)
}

huangapple
  • 本文由 发表于 2023年4月11日 01:02:22
  • 转载请务必保留本文链接:https://go.coder-hub.com/75979066.html
匿名

发表评论

匿名网友

:?: :razz: :sad: :evil: :!: :smile: :oops: :grin: :eek: :shock: :???: :cool: :lol: :mad: :twisted: :roll: :wink: :idea: :arrow: :neutral: :cry: :mrgreen:

确定