英文:
Parsing string to time with unknown layout
问题
我有一个csv
文件,想要读取:
- 表头名称
- 字段类型
所以,我写了以下代码:
package main
import (
"encoding/csv"
"fmt"
"os"
"log"
"reflect"
"strconv"
)
func main() {
filePath := "./file.csv"
headerNames := make(map[int]string)
headerTypes := make(map[int]string)
// 加载csv文件
f, _ := os.Open(filePath)
// 创建一个新的读取器
r := csv.NewReader(f)
// 只读取第一行
header, err := r.Read()
checkError("发生其他错误", err)
// 添加映射:列/属性名称 --> 记录索引
for i, v := range header {
headerNames[i] = v
}
// 读取第二行
record, err := r.Read()
checkError("发生其他错误", err)
// 检查记录字段类型
for i, v := range record {
var value interface{}
if value, err = strconv.Atoi(v); err != nil {
if value, err = strconv.ParseFloat(v, 64); err != nil {
if value, err = strconv.ParseBool(v); err != nil {
if value, err = strconv.ParseBool(v); err != nil { // <== 如何处理未知布局的情况
// 值是字符串
headerTypes[i] = "string"
value = v
fmt.Println(reflect.TypeOf(value), reflect.ValueOf(value))
} else {
// 值是时间戳
headerTypes[i] = "time"
fmt.Println(reflect.TypeOf(value), reflect.ValueOf(value))
}
} else {
// 值是布尔型
headerTypes[i] = "bool"
fmt.Println(reflect.TypeOf(value), reflect.ValueOf(value))
}
} else {
// 值是浮点型
headerTypes[i] = "float"
fmt.Println(reflect.TypeOf(value), reflect.ValueOf(value))
}
} else {
// 值是整型
headerTypes[i] = "int"
fmt.Println(reflect.TypeOf(value), reflect.ValueOf(value))
}
}
for i, _ := range header {
fmt.Printf("表头: %v \t类型: %v\n", headerNames[i], headerTypes[i])
}
}
func checkError(message string, err error) {
// 错误日志记录
if err != nil {
log.Fatal(message, err)
}
}
使用以下csv文件:
name,age,developer
"Hasan","46.4","true"
我得到的输出结果是:
表头: name 类型: string
表头: age 类型: float
表头: developer 类型: bool
输出结果是正确的。
我无法做到的是检查字段是否为字符串,因为我不知道字段的布局。
我知道可以根据https://go.dev/src/time/format.go中所述的格式将字符串解析为时间,并且可以构建一个自定义解析器,例如:
test, err := fmtdate.Parse("MM/DD/YYYY", "10/15/1983")
if err != nil {
panic(err)
}
但是,这只适用于我知道布局的情况下。
所以,我再次的问题是,如果我不知道布局,我应该如何解析时间,或者我应该做些什么才能解析时间?
英文:
I'm having a csv
file, and want to read:
- Header names
- Fields types
So, I wrote the below:
package main
import (
"encoding/csv"
"fmt"
"os"
"log"
"reflect"
"strconv"
)
func main() {
filePath := "./file.csv"
headerNames := make(map[int]string)
headerTypes := make(map[int]string)
// Load a csv file.
f, _ := os.Open(filePath)
// Create a new reader.
r := csv.NewReader(f)
// Read first row only
header, err := r.Read()
checkError("Some other error occurred", err)
// Add mapping: Column/property name --> record index
for i, v := range header {
headerNames[i] = v
}
// Read second row
record, err := r.Read()
checkError("Some other error occurred", err)
// Check record fields types
for i, v := range record {
var value interface{}
if value, err = strconv.Atoi(v); err != nil {
if value, err = strconv.ParseFloat(v, 64); err != nil {
if value, err = strconv.ParseBool(v); err != nil {
if value, err = strconv.ParseBool(v); err != nil { // <== How to do this with unknown layout
// Value is a string
headerTypes[i] = "string"
value = v
fmt.Println(reflect.TypeOf(value), reflect.ValueOf(value))
} else {
// Value is a timestamp
headerTypes[i] = "time"
fmt.Println(reflect.TypeOf(value), reflect.ValueOf(value))
}
} else {
// Value is a bool
headerTypes[i] = "bool"
fmt.Println(reflect.TypeOf(value), reflect.ValueOf(value))
}
} else {
// Value is a float
headerTypes[i] = "float"
fmt.Println(reflect.TypeOf(value), reflect.ValueOf(value))
}
} else {
// Value is an int
headerTypes[i] = "int"
fmt.Println(reflect.TypeOf(value), reflect.ValueOf(value))
}
}
for i, _ := range header {
fmt.Printf("Header: %v \tis\t %v\n", headerNames[i], headerTypes[i])
}
}
func checkError(message string, err error) {
// Error Logging
if err != nil {
log.Fatal(message, err)
}
}
And with csv file as:
name,age,developer
"Hasan","46.4","true"
I got an output as:
Header: name is string
Header: age is float
Header: developer is bool
The output is correct.
The thing that I could not do is the one is checking if the field is string as I do not know what layout the field could be.
I aware I can pasre string to time as per the format stated at https://go.dev/src/time/format.go, and can build a custom parser, something like:
test, err := fmtdate.Parse("MM/DD/YYYY", "10/15/1983")
if err != nil {
panic(err)
}
But this will work only (as per my knowledge) if I know the layout?
So, again my question is, how can I parse time, or what shall I do to be able to parse it, if I do not know the layout?
答案1
得分: 0
感谢Burak的评论,我通过使用这个包找到了解决方案:github.com/araddon/dateparse
// 普通解析。与time.Parse()相同的时区规则
t, err := dateparse.ParseAny("3/1/2014")
// 严格解析,对于模糊的mm/dd和dd/mm日期会返回错误
t, err := dateparse.ParseStrict("3/1/2014")
> 返回错误
// 返回表示解析给定日期时间的布局的字符串
layout, err := dateparse.ParseFormat("May 8, 2009 5:57:51 PM")
> "Jan 2, 2006 3:04:05 PM"
英文:
Thanks to the comment by Burak, I found the solution by using this package: github.com/araddon/dateparse
// Normal parse. Equivalent Timezone rules as time.Parse()
t, err := dateparse.ParseAny("3/1/2014")
// Parse Strict, error on ambigous mm/dd vs dd/mm dates
t, err := dateparse.ParseStrict("3/1/2014")
> returns error
// Return a string that represents the layout to parse the given date-time.
layout, err := dateparse.ParseFormat("May 8, 2009 5:57:51 PM")
> "Jan 2, 2006 3:04:05 PM"
通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库,让每个人都能够通过互相帮助和分享经验来进步。
评论