将字符串解析为时间,但未知布局。

huangapple go评论125阅读模式
英文:

Parsing string to time with unknown layout

问题

我有一个csv文件,想要读取:

  1. 表头名称
  2. 字段类型

所以,我写了以下代码:

package main

import (
	"encoding/csv"
	"fmt"
	"os"
	"log"
	"reflect"
	"strconv"
)

func main() {
	filePath := "./file.csv"
	headerNames := make(map[int]string)
	headerTypes := make(map[int]string)
	// 加载csv文件
	f, _ := os.Open(filePath)
	// 创建一个新的读取器
	r := csv.NewReader(f)
	// 只读取第一行
	header, err := r.Read()
	checkError("发生其他错误", err)

	// 添加映射:列/属性名称 --> 记录索引
	for i, v := range header {
		headerNames[i] = v
	}

	// 读取第二行
	record, err := r.Read()
	checkError("发生其他错误", err)
	// 检查记录字段类型
	for i, v := range record {
		var value interface{}
		if value, err = strconv.Atoi(v); err != nil {
			if value, err = strconv.ParseFloat(v, 64); err != nil {
				if value, err = strconv.ParseBool(v); err != nil {
					if value, err = strconv.ParseBool(v); err != nil { // <== 如何处理未知布局的情况
						// 值是字符串
						headerTypes[i] = "string"
						value = v
						fmt.Println(reflect.TypeOf(value), reflect.ValueOf(value))
					} else {
						// 值是时间戳
						headerTypes[i] = "time"
						fmt.Println(reflect.TypeOf(value), reflect.ValueOf(value))
					}
				} else {
					// 值是布尔型
					headerTypes[i] = "bool"
					fmt.Println(reflect.TypeOf(value), reflect.ValueOf(value))
				}
			} else {
				// 值是浮点型
				headerTypes[i] = "float"
				fmt.Println(reflect.TypeOf(value), reflect.ValueOf(value))
			}
		} else {
			// 值是整型
			headerTypes[i] = "int"
			fmt.Println(reflect.TypeOf(value), reflect.ValueOf(value))
		}
	}

	for i, _ := range header {
		fmt.Printf("表头: %v \t类型: %v\n", headerNames[i], headerTypes[i])
	}
}

func checkError(message string, err error) {
	// 错误日志记录
	if err != nil {
		log.Fatal(message, err)
	}
}

使用以下csv文件:

name,age,developer
"Hasan","46.4","true"

我得到的输出结果是:

表头: name       类型: string
表头: age        类型: float
表头: developer  类型: bool

输出结果是正确的。

我无法做到的是检查字段是否为字符串,因为我不知道字段的布局。

我知道可以根据https://go.dev/src/time/format.go中所述的格式将字符串解析为时间,并且可以构建一个自定义解析器,例如:

    test, err := fmtdate.Parse("MM/DD/YYYY", "10/15/1983")
    if err != nil {
        panic(err)
    }

但是,这只适用于我知道布局的情况下。

所以,我再次的问题是,如果我不知道布局,我应该如何解析时间,或者我应该做些什么才能解析时间?

英文:

I'm having a csv file, and want to read:

  1. Header names
  2. Fields types

So, I wrote the below:

package main

import (
	"encoding/csv"
	"fmt"
	"os"
    "log"
	"reflect"
	"strconv"
)

func main() {
	filePath := "./file.csv"
	headerNames := make(map[int]string)
	headerTypes := make(map[int]string)
	// Load a csv file.
	f, _ := os.Open(filePath)
	// Create a new reader.
	r := csv.NewReader(f)
	// Read first row only
	header, err := r.Read()
	checkError("Some other error occurred", err)

	// Add mapping: Column/property name --> record index
	for i, v := range header {
		headerNames[i] = v
	}

	// Read second row
	record, err := r.Read()
	checkError("Some other error occurred", err)
	// Check record fields types
	for i, v := range record {
		var value interface{}
		if value, err = strconv.Atoi(v); err != nil {
			if value, err = strconv.ParseFloat(v, 64); err != nil {
				if value, err = strconv.ParseBool(v); err != nil {
					if value, err = strconv.ParseBool(v); err != nil { // <== How to do this with unknown layout
						// Value is a string
						headerTypes[i] = "string"
						value = v
						fmt.Println(reflect.TypeOf(value), reflect.ValueOf(value))
					} else {
						// Value is a timestamp
						headerTypes[i] = "time"
						fmt.Println(reflect.TypeOf(value), reflect.ValueOf(value))
					}
				} else {
					// Value is a bool
					headerTypes[i] = "bool"
					fmt.Println(reflect.TypeOf(value), reflect.ValueOf(value))
				}
			} else {
				// Value is a float
				headerTypes[i] = "float"
				fmt.Println(reflect.TypeOf(value), reflect.ValueOf(value))
			}
		} else {
			// Value is an int
			headerTypes[i] = "int"
			fmt.Println(reflect.TypeOf(value), reflect.ValueOf(value))
		}
	}

	for i, _ := range header {
		fmt.Printf("Header: %v \tis\t %v\n", headerNames[i], headerTypes[i])
	}
}

func checkError(message string, err error) {
	// Error Logging
	if err != nil {
		log.Fatal(message, err)
	}
}

And with csv file as:

name,age,developer
"Hasan","46.4","true"

I got an output as:

Header: name       is  string
Header: age        is  float
Header: developer  is  bool

The output is correct.

The thing that I could not do is the one is checking if the field is string as I do not know what layout the field could be.

I aware I can pasre string to time as per the format stated at https://go.dev/src/time/format.go, and can build a custom parser, something like:

    test, err := fmtdate.Parse("MM/DD/YYYY", "10/15/1983")
    if err != nil {
        panic(err)
    }

But this will work only (as per my knowledge) if I know the layout?

So, again my question is, how can I parse time, or what shall I do to be able to parse it, if I do not know the layout?

答案1

得分: 0

感谢Burak的评论,我通过使用这个包找到了解决方案:github.com/araddon/dateparse


// 普通解析。与time.Parse()相同的时区规则
t, err := dateparse.ParseAny("3/1/2014")

// 严格解析,对于模糊的mm/dd和dd/mm日期会返回错误
t, err := dateparse.ParseStrict("3/1/2014")
> 返回错误

// 返回表示解析给定日期时间的布局的字符串
layout, err := dateparse.ParseFormat("May 8, 2009 5:57:51 PM")
> "Jan 2, 2006 3:04:05 PM"
英文:

Thanks to the comment by Burak, I found the solution by using this package: github.com/araddon/dateparse


// Normal parse.  Equivalent Timezone rules as time.Parse()
t, err := dateparse.ParseAny("3/1/2014")

// Parse Strict, error on ambigous mm/dd vs dd/mm dates
t, err := dateparse.ParseStrict("3/1/2014")
> returns error 

// Return a string that represents the layout to parse the given date-time.
layout, err := dateparse.ParseFormat("May 8, 2009 5:57:51 PM")
> "Jan 2, 2006 3:04:05 PM"

huangapple
  • 本文由 发表于 2022年7月25日 21:18:32
  • 转载请务必保留本文链接:https://go.coder-hub.com/73109905.html
匿名

发表评论

匿名网友

:?: :razz: :sad: :evil: :!: :smile: :oops: :grin: :eek: :shock: :???: :cool: :lol: :mad: :twisted: :roll: :wink: :idea: :arrow: :neutral: :cry: :mrgreen:

确定