读取一个文本文件,替换其中的单词,并将结果输出到另一个文本文件中。

huangapple go评论185阅读模式
英文:

Read a text file, replace its words, output to another text file

问题

所以我正在尝试用GO编写一个程序,用于将一个充满代码的文本文件转换为GO代码,然后将该文件保存为GO文件或文本文件。我一直在努力弄清楚如何保存我对文本文件所做的更改,但是我唯一能看到更改的方法是通过println语句,因为我使用strings.replace来搜索存储在字符串数组中的文本文件并更改需要更改的每个单词的出现(例如,BEGIN -> {和END -> })。那么,是否有其他我不知道的在GO中搜索和替换的方法,或者有没有一种我不知道的编辑文本文件的方法,或者这是不可能的?

谢谢

以下是我目前的代码。

package main

import (
    "os"
    "bufio"
    "bytes"
    "io"
    "fmt"
    "strings"
)


func readLines(path string) (lines []string, errr error) {
    var (
        file *os.File
        part []byte
        prefix bool
    )
    if file, errr = os.Open(path); errr != nil {
        return
    }
    defer file.Close()

    reader := bufio.NewReader(file)
    buffer := bytes.NewBuffer(make([]byte, 0))
    for {
        if part, prefix, errr = reader.ReadLine(); errr != nil {
            break
        }
    buffer.Write(part)
        if !prefix {
            lines = append(lines, buffer.String())
            buffer.Reset()
        }
    }
    if errr == io.EOF {
        errr = nil
    }
    return
}

func writeLines(lines []string, path string) (errr error) {
    var (
        file *os.File
    )

    if file, errr = os.Create(path); errr != nil {
        return
    }
    defer file.Close()


    for _,item := range lines {

        _, errr := file.WriteString(strings.TrimSpace(item) + "\n");

        if errr != nil {

            fmt.Println(errr)
            break
        }
    }

    return
}

func FixBegin(lines []string) (errr error) {
    var(
    a string
    
    )
for i := 0; ; i++ {
        a = lines[i];
      
        fmt.Println(strings.Replace(a, "BEGIN", "{", -1))
    }

    return
}

func FixEnd(lines []string) (errr error) {
    var(
    a string
    
    )
for i := 0; ; i++ {
        a = lines[i];
      
        fmt.Println(strings.Replace(a, "END", "}", -1))
    }
    return
}

func main() {
    lines, errr := readLines("foo.txt")
    if errr != nil {
        fmt.Println("Error: %s\n", errr)
        return
    }
    for _, line := range lines {
        fmt.Println(line)
    }


    errr = FixBegin(lines)
   
    errr = writeLines(lines, "beer2.txt")
    fmt.Println(errr)

    errr = FixEnd(lines)
    lines, errr = readLines("beer2.txt")
    if errr != nil {
        fmt.Println("Error: %s\n", errr)
        return
    }
    errr = writeLines(lines, "beer2.txt")
    fmt.Println(errr)
}
英文:

So I am trying to make a program in GO to take a text file full of code and convert that into GO code and then save that file into a GO file or text file. I have been trying to figure out how to save the changes I made to the text file, but the only way I can see the changes is through a println statement because I am using strings.replace to search the string array that the text file is stored in and change each occurrence of a word that needs to be changed (ex. BEGIN -> { and END -> }). So is there any other way of searching and replacing in GO I don't know about or is there a way to edit a text file that I don't know about or is this impossible?

Thanks

Here is the code I have so far.

package main

import (
    "os"
    "bufio"
    "bytes"
    "io"
    "fmt"
    "strings"
)


func readLines(path string) (lines []string, errr error) {
    var (
        file *os.File
        part []byte
        prefix bool
    )
    if file, errr = os.Open(path); errr != nil {
        return
    }
    defer file.Close()

    reader := bufio.NewReader(file)
    buffer := bytes.NewBuffer(make([]byte, 0))
    for {
        if part, prefix, errr = reader.ReadLine(); errr != nil {
            break
        }
    buffer.Write(part)
        if !prefix {
            lines = append(lines, buffer.String())
            buffer.Reset()
        }
    }
    if errr == io.EOF {
        errr = nil
    }
    return
}

func writeLines(lines []string, path string) (errr error) {
    var (
        file *os.File
    )

    if file, errr = os.Create(path); errr != nil {
        return
    }
    defer file.Close()


    for _,item := range lines {

        _, errr := file.WriteString(strings.TrimSpace(item) + "\n");

        if errr != nil {

            fmt.Println(errr)
            break
        }
    }

    return
}

func FixBegin(lines []string) (errr error) {
    var(
    a string
    
    )
for i := 0; ; i++ {
        a = lines[i];
      
        fmt.Println(strings.Replace(a, "BEGIN", "{", -1))
    }

    return
}

func FixEnd(lines []string) (errr error) {
    var(
    a string
    
    )
for i := 0; ; i++ {
        a = lines[i];
      
        fmt.Println(strings.Replace(a, "END", "}", -1))
    }
    return
}

func main() {
    lines, errr := readLines("foo.txt")
    if errr != nil {
        fmt.Println("Error: %s\n", errr)
        return
    }
    for _, line := range lines {
        fmt.Println(line)
    }


    errr = FixBegin(lines)
   
    errr = writeLines(lines, "beer2.txt")
    fmt.Println(errr)

    errr = FixEnd(lines)
    lines, errr = readLines("beer2.txt")
    if errr != nil {
        fmt.Println("Error: %s\n", errr)
        return
    }
    errr = writeLines(lines, "beer2.txt")
    fmt.Println(errr)
}

答案1

得分: 3

jnml@fsc-r630:~/src/tmp/SO/13789882$ ls
foo.txt main.go
jnml@fsc-r630:~/src/tmp/SO/13789882$ cat main.go
package main

import (
"bytes"
"io/ioutil"
"log"
)

func main() {
src, err := ioutil.ReadFile("foo.txt")
if err != nil {
log.Fatal(err)
}

    src = bytes.Replace(src, []byte("BEGIN"), []byte("{"), -1)
    src = bytes.Replace(src, []byte("END"), []byte("}"), -1)
    if err = ioutil.WriteFile("beer2.txt", src, 0666); err != nil {
            log.Fatal(err)
    }

}
jnml@fsc-r630:~/src/tmp/SO/13789882$ cat foo.txt
BEGIN
FILE F(KIND=REMOTE);
EBCDIC ARRAY E[0:11];
REPLACE E BY "HELLO WORLD!";
WRITE(F, *, E);
END.
jnml@fsc-r630:~/src/tmp/SO/13789882$ go run main.go
jnml@fsc-r630:~/src/tmp/SO/13789882$ cat beer2.txt
{
FILE F(KIND=REMOTE);
EBCDIC ARRAY E[0:11];
REPLACE E BY "HELLO WORLD!";
WRITE(F, *, E);
}.

英文:
jnml@fsc-r630:~/src/tmp/SO/13789882$ ls
foo.txt  main.go
jnml@fsc-r630:~/src/tmp/SO/13789882$ cat main.go 
package main

import (
        "bytes"
        "io/ioutil"
        "log"
)

func main() {
        src, err := ioutil.ReadFile("foo.txt")
        if err != nil {
                log.Fatal(err)
        }

        src = bytes.Replace(src, []byte("BEGIN"), []byte("{"), -1)
        src = bytes.Replace(src, []byte("END"), []byte("}"), -1)
        if err = ioutil.WriteFile("beer2.txt", src, 0666); err != nil {
                log.Fatal(err)
        }
}
jnml@fsc-r630:~/src/tmp/SO/13789882$ cat foo.txt 
BEGIN
  FILE F(KIND=REMOTE);
  EBCDIC ARRAY E[0:11];
  REPLACE E BY "HELLO WORLD!";
  WRITE(F, *, E);
END.
jnml@fsc-r630:~/src/tmp/SO/13789882$ go run main.go 
jnml@fsc-r630:~/src/tmp/SO/13789882$ cat beer2.txt 
{
  FILE F(KIND=REMOTE);
  EBCDIC ARRAY E[0:11];
  REPLACE E BY "HELLO WORLD!";
  WRITE(F, *, E);
}.
jnml@fsc-r630:~/src/tmp/SO/13789882$ 

答案2

得分: 3

我同意@jnml关于使用ioutil来读取文件和写回文件的建议。但是我认为替换不应该通过多次对[]byte的操作来完成。代码和数据都是字符串/文本,应该以此方式处理(即使处理非ASCII/UTF8编码需要额外的工作);一次性替换(同时替换所有占位符)可以避免替换之前更改的结果的风险(即使我的正则表达式提案必须改进以处理非平凡的任务)。

package main

import(
    "fmt"
    "io/ioutil"
    "log"
    "regexp"
    "strings"
)

func main() {
    // (1) 读取文件
    data, err := ioutil.ReadFile("../tmpl/xpl.go")
    if err != nil {
        log.Fatal("ioutil.ReadFile: ", err)
    }
    s := string(data)
    fmt.Printf("----\n%s----\n", s)
    // => 适用于已知其他编码(非ASCII或UTF8)的文件的函数

    // (2) 创建一个将要替换的占位符到替换值的映射
    x := map[string]string {
        "BEGIN" : "{",
        "END" : "}"}
    ks := make([]string, 0, len(x))
    for k := range x {
        ks = append(ks, k)
    }
    // => 从映射中获取键的函数

    // (3) 创建一个正则表达式,用于查找将要替换的占位符
    p := strings.Join(ks, "|")
    fmt.Printf("/%s/\n", p)
    r := regexp.MustCompile(p)
    // => 需要更多考虑的有趣字符和顺序

    // (4) 创建一个回调函数,用于..ReplaceAllStringFunc,该函数了解映射x
    f := func(s string) string {
        fmt.Printf("*** '%s'\n", s)
        return x
展开收缩
} // => 以可重用的方式执行步骤(2)..(4)的函数? // (5) 进行替换(s将被覆盖为结果) s = r.ReplaceAllStringFunc(s, f) fmt.Printf("----\n%s----\n", s) // (6) 写回文件 err = ioutil.WriteFile("result.go", []byte(s), 0644) if err != nil { log.Fatal("ioutil.WriteFile: ", err) } // => 适用于已知其他编码(非ASCII或UTF8)的文件的函数 }

输出:

go run 13789882.go
----
func main() BEGIN
END
----
/BEGIN|END/
*** 'BEGIN'
*** 'END'
----
func main() {
}
----
英文:

I agree with @jnml wrt using ioutil to slurp the file and to write it back. But I think that the replacing shouldn't be done by multiple passes over []byte. Code and data are strings/text and should be treated as such (even if dealing with non ascii/utf8 encodings requires estra work); a one pass replacement (of all placeholders 'at once') avoids the risk of replacing results of previous changes (even if my regexp proposal must be improved to handle non-trivial tasks).

package main

import(
    "fmt"
    "io/ioutil"
    "log"
    "regexp"
    "strings"
)

func main() {
    // (1) slurp the file
    data, err := ioutil.ReadFile("../tmpl/xpl.go")
    if err != nil {
        log.Fatal("ioutil.ReadFile: ", err)
    }
    s := string(data)
    fmt.Printf("----\n%s----\n", s)
    // => function that works for files of (known) other encodings that ascii or utf8

    // (2) create a map that maps placeholder to be replaced to the replacements
    x := map[string]string {
        "BEGIN" : "{",
        "END" : "}"}
    ks := make([]string, 0, len(x))
    for k := range x {
        ks = append(ks, k)
    }
    // => function(s) that gets the keys from maps

    // (3) create a regexp that finds the placeholder to be replaced
    p := strings.Join(ks, "|")
    fmt.Printf("/%s/\n", p)
    r := regexp.MustCompile(p)
    // => funny letters & order need more consideration

    // (4) create a callback function for ..ReplaceAllStringFunc that knows
    // about the map x
    f := func(s string) string {
        fmt.Printf("*** '%s'\n", s)
        return x
展开收缩
} // => function (?) to do Step (2) .. (4) in a reusable way // (5) do the replacing (s will be overwritten with the result) s = r.ReplaceAllStringFunc(s, f) fmt.Printf("----\n%s----\n", s) // (6) write back err = ioutil.WriteFile("result.go", []byte(s), 0644) if err != nil { log.Fatal("ioutil.WriteFile: ", err) } // => function that works for files of (known) other encodings that ascii or utf8 }

output:

go run 13789882.go
----
func main() BEGIN
END
----
/BEGIN|END/
*** 'BEGIN'
*** 'END'
----
func main() {
}
----

答案3

得分: 1

如果您的文件大小很大,将所有内容读入内存可能是不可能的,也不建议这样做。可以尝试使用BytesReplacingReader,它以流式方式进行替换。而且性能还不错。如果您想替换两个字符串(例如BEGIN -> {END -> }),只需在原始读取器上包装两个BytesReplacingReader,一个用于BEGIN,一个用于END

r := NewBytesReplacingReader(
    NewBytesReplacingReader(inputReader, []byte("BEGIN"), []byte("{"),
    []byte("END"), []byte("}")
// 正常使用 r,所有非重叠的出现的
// "BEGIN" 和 "END" 将被替换为 "{" 和 "}"
英文:

If your file size is huge, reading everything in memory might not be possible nor advised. Give BytesReplacingReader a try as it is done replacement in streaming fashion. And it's reasonably performant. If you want to replace two strings (such as BEGIN -> { and END -> }), just need to wrap two BytesReplacingReader over original reader, one for BEGIN and one for END:

r := NewBytesReplacingReader(
    NewBytesReplacingReader(inputReader, []byte("BEGIN"), []byte("{"),
    []byte("END"), []byte("}")
// use r normally and all non-overlapping occurrences of
// "BEGIN" and "END" will be replaced with "{" and "}"

huangapple
  • 本文由 发表于 2012年12月10日 01:36:40
  • 转载请务必保留本文链接:https://go.coder-hub.com/13789882.html
匿名

发表评论

匿名网友

:?: :razz: :sad: :evil: :!: :smile: :oops: :grin: :eek: :shock: :???: :cool: :lol: :mad: :twisted: :roll: :wink: :idea: :arrow: :neutral: :cry: :mrgreen:

确定