英文:
extracting data from file using golang
问题
我正在尝试从文件中提取行,如果满足条件的话。
文件中的数据如下:
Sat 08 Aug 2015
Norwich City
A
League
W 3-1
Zaha 38; Delaney 48; Cabaye 90
27,036
如果日期的模式匹配成功,我想打印接下来的五行。
我的代码如下:
func main() {
r, _ := regexp.Compile("[aA-zZ]{3}\\s[0-9]{2}\\s[aA-zZ]{3}\\s[0-9]{4}")
file, err := os.Open("test.txt")
if err != nil {
log.Fatal(err)
}
defer file.Close()
scanner := bufio.NewScanner(file)
for scanner.Scan() {
if r.MatchString(scanner.Text()) {
fmt.Println(scanner.Text())
// 在这里,我该如何捕获接下来的五行
}
if err := scanner.Err(); err != nil {
log.Fatal(err)
}
}
}
英文:
I am trying to extract lines from a file if a condition is met.
The data in the file look like this :
Sat 08 Aug 2015
Norwich City
A
League
W 3-1
Zaha 38; Delaney 48; Cabaye 90
27,036
If the pattern of the date is matched, I want to print the following five lines.
My code is,
func main() {
r, _ := regexp.Compile("[aA-zZ]{3}\\s[0-9]{2}\\s[aA-zZ]{3}\\s[0-9]{4}")
file, err := os.Open("test.txt")
if err != nil {
log.Fatal(err)
}
defer file.Close()
scanner := bufio.NewScanner(file)
for scanner.Scan() {
if r.MatchString(scanner.Text()) {
fmt.Println(scanner.Text())
// here how do i capture the following 5 lines
}
if err := scanner.Err(); err != nil {
log.Fatal(err)
}
}
}
答案1
得分: 2
不确定是否有遗漏,但是像这样的代码是否足够:
package main
import (
"regexp"
"os"
"log"
"bufio"
"fmt"
)
func main() {
r, _ := regexp.Compile("[aA-zZ]{3}\\s[0-9]{2}\\s[aA-zZ]{3}\\s[0-9]{4}")
file, err := os.Open("/tmp/test.txt")
if err != nil {
log.Fatal(err)
}
defer file.Close()
scanner := bufio.NewScanner(file)
for scanner.Scan() {
if r.MatchString(scanner.Text()) {
fmt.Println(scanner.Text())
for i :=0; i < 5; i++{
scanner.Scan()
fmt.Println(scanner.Text())
}
}
if err := scanner.Err(); err != nil {
log.Fatal(err)
}
}
}
英文:
Not sure if I have missed something but would something like this suffice:
package main
import (
"regexp"
"os"
"log"
"bufio"
"fmt"
)
func main() {
r, _ := regexp.Compile("[aA-zZ]{3}\\s[0-9]{2}\\s[aA-zZ]{3}\\s[0-9]{4}")
file, err := os.Open("/tmp/test.txt")
if err != nil {
log.Fatal(err)
}
defer file.Close()
scanner := bufio.NewScanner(file)
for scanner.Scan() {
if r.MatchString(scanner.Text()) {
fmt.Println(scanner.Text())
for i :=0; i < 5; i++{
scanner.Scan()
fmt.Println(scanner.Text())
}
}
if err := scanner.Err(); err != nil {
log.Fatal(err)
}
}
}
答案2
得分: 1
也许是这样的吗?
package main
import (
"bufio"
"fmt"
"io"
"os"
"strings"
"time"
)
type Match struct {
Date time.Time
Opponents string
Venue string
Type string
Result string
Scorers string
Attendance string
}
var fmtMatchDate = "Mon 02 Jan 2006"
func (m Match) String() string {
var s string
s += fmt.Sprint(m.Date.Format(fmtMatchDate), "\n")
s += fmt.Sprint(
m.Opponents, "\n",
m.Venue, "\n",
m.Type, "\n",
m.Result, "\n",
)
if len(m.Scorers) > 0 {
s += fmt.Sprint(
m.Scorers, "\n",
)
}
if len(m.Attendance) > 0 {
s += fmt.Sprint(
m.Attendance, "\n",
)
}
return s
}
func ParseMatch(lines []string) (Match, error) {
// TODO: Implement a better parser.
var m Match
for i, line := range lines {
line = strings.TrimSpace(line)
switch i {
case 0:
date, err := time.Parse(fmtMatchDate, line)
if err != nil {
return Match{}, err
}
m.Date = date
case 1:
m.Opponents = line
case 2:
m.Venue = line
case 3:
m.Type = line
case 4:
m.Result = line
case 5:
m.Scorers = line
case 6:
m.Attendance = line
default:
}
}
return m, nil
}
func main() {
f, err := os.Open("match.txt")
if err != nil {
fmt.Fprintln(os.Stderr, err)
os.Exit(1)
}
var lines []string
snr := bufio.NewScanner(f)
for snr.Scan() {
line := snr.Text()
if _, err = time.Parse(fmtMatchDate, strings.TrimSpace(line)); err == nil {
if len(lines) > 0 {
m, err := ParseMatch(lines)
if err != nil {
fmt.Fprintln(os.Stderr, err)
} else {
fmt.Print(m)
}
}
lines = lines[:0]
}
lines = append(lines, line)
}
if len(lines) > 0 {
m, err := ParseMatch(lines)
if err != nil {
fmt.Fprintln(os.Stderr, err)
} else {
fmt.Print(m)
}
}
if err := snr.Err(); err != nil {
if err != io.EOF {
fmt.Fprintln(os.Stderr, err)
os.Exit(1)
}
}
}
输入:
$ cat match.txt
Sat 08 Aug 2015
Norwich City
A
League
W 3-1
Zaha 38; Delaney 48; Cabaye 90
27,036
Sun 16 Aug 2015
Arsenal
H
League
L 1-2
Sat 29 Aug 2015
Chelsea
A
League
W 2-1
Sako 64; Ward 80
41,581
输出:
$ go run match.go
Sat 08 Aug 2015
Norwich City
A
League
W 3-1
Zaha 38; Delaney 48; Cabaye 90
27,036
Sun 16 Aug 2015
Arsenal
H
League
L 1-2
Sat 29 Aug 2015
Chelsea
A
League
W 2-1
Sako 64; Ward 80
41,581
$
英文:
Perhaps, something like this?
package main
import (
"bufio"
"fmt"
"io"
"os"
"strings"
"time"
)
type Match struct {
Date time.Time
Opponents string
Venue string
Type string
Result string
Scorers string
Attendance string
}
var fmtMatchDate = "Mon 02 Jan 2006"
func (m Match) String() string {
var s string
s += fmt.Sprint(m.Date.Format(fmtMatchDate), "\n")
s += fmt.Sprint(
m.Opponents, "\n",
m.Venue, "\n",
m.Type, "\n",
m.Result, "\n",
)
if len(m.Scorers) > 0 {
s += fmt.Sprint(
m.Scorers, "\n",
)
}
if len(m.Attendance) > 0 {
s += fmt.Sprint(
m.Attendance, "\n",
)
}
return s
}
func ParseMatch(lines []string) (Match, error) {
// TODO: Implement a better parser.
var m Match
for i, line := range lines {
line = strings.TrimSpace(line)
switch i {
case 0:
date, err := time.Parse(fmtMatchDate, line)
if err != nil {
return Match{}, err
}
m.Date = date
case 1:
m.Opponents = line
case 2:
m.Venue = line
case 3:
m.Type = line
case 4:
m.Result = line
case 5:
m.Scorers = line
case 6:
m.Attendance = line
default:
}
}
return m, nil
}
func main() {
f, err := os.Open("match.txt")
if err != nil {
fmt.Fprintln(os.Stderr, err)
os.Exit(1)
}
var lines []string
snr := bufio.NewScanner(f)
for snr.Scan() {
line := snr.Text()
if _, err = time.Parse(fmtMatchDate, strings.TrimSpace(line)); err == nil {
if len(lines) > 0 {
m, err := ParseMatch(lines)
if err != nil {
fmt.Fprintln(os.Stderr, err)
} else {
fmt.Print(m)
}
}
lines = lines[:0]
}
lines = append(lines, line)
}
if len(lines) > 0 {
m, err := ParseMatch(lines)
if err != nil {
fmt.Fprintln(os.Stderr, err)
} else {
fmt.Print(m)
}
}
if err := snr.Err(); err != nil {
if err != io.EOF {
fmt.Fprintln(os.Stderr, err)
os.Exit(1)
}
}
}
Input:
$ cat match.txt
Sat 08 Aug 2015
Norwich City
A
League
W 3-1
Zaha 38; Delaney 48; Cabaye 90
27,036
Sun 16 Aug 2015
Arsenal
H
League
L 1-2
Sat 29 Aug 2015
Chelsea
A
League
W 2-1
Sako 64; Ward 80
41,581
Output:
$ go run match.go
Sat 08 Aug 2015
Norwich City
A
League
W 3-1
Zaha 38; Delaney 48; Cabaye 90
27,036
Sun 16 Aug 2015
Arsenal
H
League
L 1-2
Sat 29 Aug 2015
Chelsea
A
League
W 2-1
Sako 64; Ward 80
41,581
$
答案3
得分: 0
我不是正则表达式的忠实粉丝,因为当你或其他人在6个月后回头看它时,它往往会使事情变得复杂。我会将文件读入行的切片,并使用偏移量来获取要测试的行。
func main() {
var (
dayName string
month string
name string
A string
league string
score string
scorers string
attendance string
day int
year int
err error
)
data, errRead := ioutil.ReadFile(fileName)
if errRead != nil {
return
}
// 将文件作为文本块获取
theText := string(data)
// 使行结束符一致
theText = strings.Replace(theText, "\r\n", "\r", -1)
theText = strings.Replace(theText, "\n", "\r", -1)
// 将文本拆分为一组行
lines := strings.Split(theText, "\r")
numLines := len(lines)
i := 0
for i < numLines {
// 此时我们应该有你的测试行
theLine := lines[i]
i++
// 给每行一个一致的间距,你永远不知道它处于什么状态
theLine = strings.Replace(theLine, " ", " ", -1)
parts := strings.Split(theLine, " ")
if len(parts) == 4 {
// 至少该行有四个日期部分
dayName := parts[0]
day, err = strconv.Atoi(parts[1])
if err == nil {
// 我们有一个数字表示日期
month := parts[2]
year, err = strconv.Atoi(parts[3])
if err == nil {
// 我们有一个数字表示年份
// 接下来的五行是你的数据
name = lines[i]
A = lines[i+1]
league = lines[i+2]
score = lines[i+3]
scorers = lines[i+4]
attendance = lines[i+5]
i += 6
}
}
}
}
}
对于得分等数据,你需要自己解析,但这将相当简单。你还需要记住,当从他人那里获取数据时,他们的一致性可能不如你所希望的那样。
英文:
I am not a great fan of regex as it tends to complicate things when you, or someone else, goes back to it in 6 months. I would read the the file into a slice of lines, and use an offset as the way of getting the lines to test.
func main() {
var (
dayName string
month string
name string
A string
league string
score string
scorers string
attendance string
day int
year int
err error
)
data, errRead := ioutil.ReadFile(fileName)
if errRead != nil {
return
}
// get the files as a block of text
theText := string(data)
// make the line endings consistent
theText = strings.Replace(theText, "\r\n", "\r", -1)
theText = strings.Replace(theText, "\n", "\r", -1)
// split it into a set of lines
lines := strings.Split(theText, "\r")
numLines := len(lines)
i := 0
for i < numLines {
// at this point we should have your test line
theLine := lines[i]
i++
// give each line a consistent spacing, you never know what state it is in
theLine = strings.Replace(theLine, " ", " ", -1)
parts := strings.Split(theLine, " ")
if len(parts) == 4 {
// At least the line has the four date parts
dayName := parts[0]
day, err = strconv.Atoi(parts[1])
if err == nil {
// We have a number for the day
month := parts[2]
year, err = strconv.Atoi(parts[3])
if err == nil {
// We have a number for the year
// the next five lines are your data
name = lines[i]
A = lines[i+1]
league = lines[i+2]
score = lines[i+3]
scorers = lines[i+4]
attendance = lines[i+5]
i += 6
}
}
}
}
}
For the score etc you will have to parse it yourself, but this will be fairly trivial. You also need to remember that when getting data from someone else they may not always be as consistent as you would wish.
通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库,让每个人都能够通过互相帮助和分享经验来进步。
评论