问题

我正在将我的Python项目迁移到Go语言，并且有一个使用情况，需要将UTF-8编码转换为对应的GSM编码（如果可能的话）。我对Go语言非常陌生，如果能提供一些相关的文档或示例代码将非常有帮助。

例如：Python代码片段

ằ作为Unicode -> 在GSM编码后变为a

for character in text:
    if is_gsm(character):
       transliterated_text += character.encode('utf-8')
       continue
    if is_nonascii_utf8(character):
       transliterated_char = unidecode.unidecode(character)
       if transliterated_char == '?' or transliterated_char == '':
          gsm = False
          break
       if transliterated_char != rc:
          character = transliterated_char
          transliterated_text += character
    else:
          transliterated_text += character.encode('utf-8')

if gsm and is_gsm(transliterated_text.decode('utf-8')):
   text = transliterated_text.decode('utf-8')

谢谢。

英文:

I am working on migrating my project in python to golang and I have a use case for converting utf-8 encoding to corresponding gsm ones if possible. I am very new to go, it will be really helpful to get some documentation or examples around it.

For example: Python snippet

ằ as unicode -> a after gsm encoding

for character in text:
    if is_gsm(character):
       transliterated_text += character.encode(&#39;utf-8&#39;)
       continue
    if is_nonascii_utf8(character):
       transliterated_char = unidecode.unidecode(character)
       if transliterated_char == &#39;?&#39; or transliterated_char == &#39;&#39;:
          gsm = False
          break
       if transliterated_char != rc:
          character = transliterated_char
          transliterated_text += character
    else:
          transliterated_text += character.encode(&#39;utf-8&#39;)

if gsm and is_gsm(transliterated_text.decode(&#39;utf-8&#39;)):
   text = transliterated_text.decode(&#39;utf-8&#39;)

Thanks

答案1

得分: 2

你可以按照以下方式进行操作：

package main

import (
	"fmt"
	"regexp"
	"strings"
)

var utf8GsmChars = map[string]string{
	`@`:      "\x00", `&#163;`: "\x01", `$`:      "\x02",
	`&#165;`: "\x03", `&#232;`: "\x04", `&#233;`: "\x05",
	`&#249;`: "\x06", `&#236;`: "\x07", `&#242;`: "\x08",
	`&#199;`: "\x09", `&#216;`: "\x0B", `&#248;`: "\x0C",
	`&#197;`: "\x0E", `Δ`:      "\x10", `_`:      "\x11",
	`Φ`:      "\x12", `Γ`:      "\x13", `Λ`:      "\x14",
	`Ω`:      "\x15", `Π`:      "\x16", `Ψ`:      "\x17",
	`Σ`:      "\x18", `Θ`:      "\x19", `Ξ`:      "\x1A",
	`&#198;`: "\x1C", `&#230;`: "\x1D", `&#223;`: "\x1E",
	`&#201;`: "\x1F", `&#196;`: "\x5B", `&#214;`: "\x5C",
	`&#209;`: "\x5D", `&#220;`: "\x5E", `&#167;`: "\x5F",
	`&#191;`: "\x60", `&#228;`: "\x7B", `&#246;`: "\x7C",
	`&#241;`: "\x7D", `&#252;`: "\x7E", `&#224;`: "\x7F",

	`^`: "\x1B\x14`, `{`: "\x1B\x28",
	`}`: "\x1B\x29`, `\`: "\x1B\x2F",
	`[`: "\x1B\x3C`, `~`: "\x1B\x3D",
	`]`: "\x1B\x3E`, `|`: "\x1B\x40",
	`€`: "\x1B\x65",
}

var gsmUtf8Chars = map[string]string{
	"\x00": "\x40",
	"\x01": "\xC2\xA3",
	"\x02": "\x24",
	"\x03": "\xC2\xA5",
	"\x04": "\xC3\xA8",
	"\x05": "\xC3\xA9",
	"\x06": "\xC3\xB9",
	"\x07": "\xC3\xAC",
	"\x08": "\xC3\xB2",
	"\x09": "\xC3\x87",
	"\x0B": "\xC3\x98",
	"\x0C": "\xC3\xB8",
	"\x0E": "\xC3\xB8",
	"\x0F": "\xC3\xA5",
	"\x10": "\xCE\x94",
	"\x11": "\x5F",
	"\x12": "\xCE\xA6",
	"\x13": "\xCE\x93",
	"\x14": "\xCE\xA0",
	"\x15": "\xCE\xA9",
	"\x16": "\xCE\xA0",
	"\x17": "\xCE\xA8",
	"\x18": "\xCE\xA3",
	"\x19": "\xCE\x98",
	"\x1A": "\xCE\x9E",
	"\x1C": "\xC3\x86",
	"\x1D": "\xC3\xA6",
	"\x1E": "\xC3\x9F",
	"\x1F": "\xC3\x89",
	"\x20": "\x20",
	"\x24": "\xC2\xA4",
	"\x40": "\xC2\xA1",
	"\x5B": "\xC3\x84",
	"\x5C": "\xC3\x96",
	"\x5D": "\xC3\x91",
	"\x5E": "\xC3\x9C",
	"\x5F": "\xC2\xA7",
	"\x60": "\xC2\xBF",
	"\x7B": "\xC3\xA8",
	"\x7C": "\xC3\xB6",
	"\x7D": "\xC3\xB1",
	"\x7E": "\xC3\xBC",
	"\x7F": "\xC3\xA0",
}

func UTF8ToGsm0338(text string) string {
	var s string = text

	for k, v := range utf8GsmChars {
		s = strings.Replace(s, k, v, -1)
	}

	re := regexp.MustCompile("[\\x{0080}-\\x{10FFFF}]")
	s = re.ReplaceAllString(s, "?")

	return s
}

func GSM0338ToUTF8(text string) string {
	var s string = text

	for k, v := range gsmUtf8Chars {
		s = strings.Replace(s, k, v, -1)
	}

	return s
}

func main() {
	s := "Hello World"
	gsm := UTF8ToGsm0338(s)
	utf8 := GSM0338ToUTF8(gsm)
	fmt.Printf("word before: %s\nword after gsm: %s\nword after utf8: %s\n", s, gsm, utf8)

}

英文:

You can do it in this way:

package main

import (
        &quot;fmt&quot;
        &quot;regexp&quot;
        &quot;strings&quot;
)

var utf8GsmChars = map[string]string{
        `@`: &quot;\x00&quot;, `&#163;`: &quot;\x01&quot;, `$`: &quot;\x02&quot;,
        `&#165;`: &quot;\x03&quot;, `&#232;`: &quot;\x04&quot;, `&#233;`: &quot;\x05&quot;,
        `&#249;`: &quot;\x06&quot;, `&#236;`: &quot;\x07&quot;, `&#242;`: &quot;\x08&quot;,
        `&#199;`: &quot;\x09&quot;, `&#216;`: &quot;\x0B&quot;, `&#248;`: &quot;\x0C&quot;,
        `&#197;`: &quot;\x0E&quot;, `Δ`: &quot;\x10&quot;, `_`: &quot;\x11&quot;,
        `Φ`: &quot;\x12&quot;, `Γ`: &quot;\x13&quot;, `Λ`: &quot;\x14&quot;,
        `Ω`: &quot;\x15&quot;, `Π`: &quot;\x16&quot;, `Ψ`: &quot;\x17&quot;,
        `Σ`: &quot;\x18&quot;, `Θ`: &quot;\x19&quot;, `Ξ`: &quot;\x1A&quot;,
        `&#198;`: &quot;\x1C&quot;, `&#230;`: &quot;\x1D&quot;, `&#223;`: &quot;\x1E&quot;,
        `&#201;`: &quot;\x1F&quot;, `&#196;`: &quot;\x5B&quot;, `&#214;`: &quot;\x5C&quot;,
        `&#209;`: &quot;\x5D&quot;, `&#220;`: &quot;\x5E&quot;, `&#167;`: &quot;\x5F&quot;,
        `&#191;`: &quot;\x60&quot;, `&#228;`: &quot;\x7B&quot;, `&#246;`: &quot;\x7C&quot;,
        `&#241;`: &quot;\x7D&quot;, `&#252;`: &quot;\x7E&quot;, `&#224;`: &quot;\x7F&quot;,

        `^`: &quot;\x1B\x14&quot;, `{`: &quot;\x1B\x28&quot;,
        `}`: &quot;\x1B\x29&quot;, `\`: &quot;\x1B\x2F&quot;,
        `[`: &quot;\x1B\x3C&quot;, `~`: &quot;\x1B\x3D&quot;,
        `]`: &quot;\x1B\x3E&quot;, `|`: &quot;\x1B\x40&quot;,
        `€`: &quot;\x1B\x65&quot;,
}

var gsmUtf8Chars = map[string]string{
        &quot;\x00&quot;: &quot;\x40&quot;,
        &quot;\x01&quot;: &quot;\xC2\xA3&quot;,
        &quot;\x02&quot;: &quot;\x24&quot;,
        &quot;\x03&quot;: &quot;\xC2\xA5&quot;,
        &quot;\x04&quot;: &quot;\xC3\xA8&quot;,
        &quot;\x05&quot;: &quot;\xC3\xA9&quot;,
        &quot;\x06&quot;: &quot;\xC3\xB9&quot;,
        &quot;\x07&quot;: &quot;\xC3\xAC&quot;,
        &quot;\x08&quot;: &quot;\xC3\xB2&quot;,
        &quot;\x09&quot;: &quot;\xC3\x87&quot;,
        &quot;\x0B&quot;: &quot;\xC3\x98&quot;,
        &quot;\x0C&quot;: &quot;\xC3\xB8&quot;,
        &quot;\x0E&quot;: &quot;\xC3\xB8&quot;,
        &quot;\x0F&quot;: &quot;\xC3\xA5&quot;,
        &quot;\x10&quot;: &quot;\xCE\x94&quot;,
        &quot;\x11&quot;: &quot;\x5F&quot;,
        &quot;\x12&quot;: &quot;\xCE\xA6&quot;,
        &quot;\x13&quot;: &quot;\xCE\x93&quot;,
        &quot;\x14&quot;: &quot;\xCE\xA0&quot;,
        &quot;\x15&quot;: &quot;\xCE\xA9&quot;,
        &quot;\x16&quot;: &quot;\xCE\xA0&quot;,
        &quot;\x17&quot;: &quot;\xCE\xA8&quot;,
        &quot;\x18&quot;: &quot;\xCE\xA3&quot;,
        &quot;\x19&quot;: &quot;\xCE\x98&quot;,
        &quot;\x1A&quot;: &quot;\xCE\x9E&quot;,
        &quot;\x1C&quot;: &quot;\xC3\x86&quot;,
        &quot;\x1D&quot;: &quot;\xC3\xA6&quot;,
        &quot;\x1E&quot;: &quot;\xC3\x9F&quot;,
        &quot;\x1F&quot;: &quot;\xC3\x89&quot;,
        &quot;\x20&quot;: &quot;\x20&quot;,
        &quot;\x24&quot;: &quot;\xC2\xA4&quot;,
        &quot;\x40&quot;: &quot;\xC2\xA1&quot;,
        &quot;\x5B&quot;: &quot;\xC3\x84&quot;,
        &quot;\x5C&quot;: &quot;\xC3\x96&quot;,
        &quot;\x5D&quot;: &quot;\xC3\x91&quot;,
        &quot;\x5E&quot;: &quot;\xC3\x9C&quot;,
        &quot;\x5F&quot;: &quot;\xC2\xA7&quot;,
        &quot;\x60&quot;: &quot;\xC2\xBF&quot;,
        &quot;\x7B&quot;: &quot;\xC3\xA8&quot;,
        &quot;\x7C&quot;: &quot;\xC3\xB6&quot;,
        &quot;\x7D&quot;: &quot;\xC3\xB1&quot;,
        &quot;\x7E&quot;: &quot;\xC3\xBC&quot;,
        &quot;\x7F&quot;: &quot;\xC3\xA0&quot;,
}

func UTF8ToGsm0338(text string) string {
        var s string = text

        for k, v := range utf8GsmChars {
                s = strings.Replace(s, k, v, -1)
        }

        re := regexp.MustCompile(&quot;[\\x{0080}-\\x{10FFFF}]&quot;)
        s = re.ReplaceAllString(s, &quot;?&quot;)

        return s
}

func GSM0338ToUTF8(text string) string {
        var s string = text

        for k, v := range gsmUtf8Chars {
                s = strings.Replace(s, k, v, -1)
        }

        return s
}

func main() {
        s := &quot;Hello World&quot;
        gsm := UTF8ToGsm0338(s)
        utf8 := GSM0338ToUTF8(gsm)
        fmt.Printf(&quot;word before: %s\nword after gsm: %s\nword after utf8: %s\n&quot;, s, gsm, utf8)

}

通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库，让每个人都能够通过互相帮助和分享经验来进步。

将Unicode转换为GSM编码在Golang中的实现

问题

答案1

在浏览器中显示Go应用程序

无法完成使用Golang进行GitHub的OAuth Web工作流程。

在Cloud Run中找不到默认凭据。

InDelta和InEpsilon之间的区别是什么？

What's the correct way to type hint an empty list as a literal in python?

如何在Highcharts Gantt中更改本地化的星期名称

如何在同一个流中使用多个过滤器和映射函数？

如何使用Map/Set来将代码优化到O(n)？

.NET MAUI Android在GitHub Actions上构建失败，错误代码为1。

如何在Playwright视觉比较中屏蔽多个定位器？

在C++中，可以使用可变模板参数来检索类型的内部类型。

selenium.common.exceptions.StaleElementReferenceException: Message: stale element reference: stale element not found

Creating and opening a URL to log in to Website via Basic Auth with Robot Framework/Selenium (Python)

AG Grid 在上下文菜单中以大文本形式打开

发表评论