英文:
Creating a large csv file for testing file access
问题
我想创建一个大小为10GB的文件,格式如下:
前缀:用户名:时间戳, 数字
例如:
login:jbill:2013/3/25, 1
我想通过创建类似上面的随机行来创建一个10GB的文件。
在Go语言中,我该如何实现这个功能?
我可以有一个前缀的数组,如下:
login, logout, register
还可以有一个用户名的数组:
jbill, dkennedy
英文:
I want to create a 10 GB file that looks like:
prefix:username:timestamp, number
So an example is like:
login:jbill:2013/3/25, 1
I want to create a 10GB file, by creating random rows like the one above.
How could I do this in Go?
I can have an array of prefixes like:
login, logout, register
And also an array of usernames:
jbill, dkennedy
答案1
得分: 5
例如,
package main
import (
"bufio"
"fmt"
"math/rand"
"os"
"strconv"
"time"
)
func main() {
fileSize := int64(10e9) // 10GB
f, err := os.Create("/tmp/largefile")
if err != nil {
fmt.Println(err)
return
}
w := bufio.NewWriter(f)
prefixes := []string{"login", "logout", "register"}
names := []string{"jbill", "dkennedy"}
timeStart := time.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC)
timeDur := timeStart.AddDate(1, 0, 0).Sub(timeStart)
rand.Seed(time.Now().UnixNano())
size := int64(0)
for size < fileSize {
// prefix:username:timestamp, number
// login:jbill:2012/3/25, 1
prefix := prefixes[int(rand.Int31n(int32(len(prefixes))))]
name := names[int(rand.Int31n(int32(len(names))))]
time := timeStart.Add(time.Duration(rand.Int63n(int64(timeDur)))).Format("2006/1/2")
number := strconv.Itoa(int(rand.Int31n(100) + 1))
line := prefix + ":" + name + ":" + time + ", " + number + "\n"
n, err := w.WriteString(line)
if err != nil {
fmt.Println(n, err)
return
}
size += int64(len(line))
}
err = w.Flush()
if err != nil {
fmt.Println(err)
return
}
err = f.Close()
if err != nil {
fmt.Println(err)
return
}
fmt.Println("Size:", size)
}
输出:
register:jbill:2012/8/24, 15
login:jbill:2012/10/7, 98
register:dkennedy:2012/8/29, 70
register:jbill:2012/6/1, 89
register:jbill:2012/5/24, 63
login:dkennedy:2012/3/29, 48
logout:jbill:2012/7/8, 93
logout:dkennedy:2012/1/12, 74
login:jbill:2012/4/12, 14
login:jbill:2012/2/5, 83
英文:
For example,
package main
import (
"bufio"
"fmt"
"math/rand"
"os"
"strconv"
"time"
)
func main() {
fileSize := int64(10e9) // 10GB
f, err := os.Create("/tmp/largefile")
if err != nil {
fmt.Println(err)
return
}
w := bufio.NewWriter(f)
prefixes := []string{"login", "logout", "register"}
names := []string{"jbill", "dkennedy"}
timeStart := time.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC)
timeDur := timeStart.AddDate(1, 0, 0).Sub(timeStart)
rand.Seed(time.Now().UnixNano())
size := int64(0)
for size < fileSize {
// prefix:username:timestamp, number
// login:jbill:2012/3/25, 1
prefix := prefixes[int(rand.Int31n(int32(len(prefixes))))]
name := names[int(rand.Int31n(int32(len(names))))]
time := timeStart.Add(time.Duration(rand.Int63n(int64(timeDur)))).Format("2006/1/2")
number := strconv.Itoa(int(rand.Int31n(100) + 1))
line := prefix + ":" + name + ":" + time + ", " + number + "\n"
n, err := w.WriteString(line)
if err != nil {
fmt.Println(n, err)
return
}
size += int64(len(line))
}
err = w.Flush()
if err != nil {
fmt.Println(err)
return
}
err = f.Close()
if err != nil {
fmt.Println(err)
return
}
fmt.Println("Size:", size)
}
Output:
register:jbill:2012/8/24, 15
login:jbill:2012/10/7, 98
register:dkennedy:2012/8/29, 70
register:jbill:2012/6/1, 89
register:jbill:2012/5/24, 63
login:dkennedy:2012/3/29, 48
logout:jbill:2012/7/8, 93
logout:dkennedy:2012/1/12, 74
login:jbill:2012/4/12, 14
login:jbill:2012/2/5, 83
答案2
得分: 4
这是一个天真的方法(1GB):
package main
import (
"fmt"
"log"
"os"
)
func main() {
myfile, err := os.OpenFile("myfile", os.O_WRONLY|os.O_CREATE, 0644)
if err != nil {
log.Fatal(err)
}
defer myfile.Close()
var pos int
var line string
// sample: login:jbill:2013/3/25, 1
line = fmt.Sprintf("%s:%s:%s, %d\n", "login", "jbill", "2013/3/25", 1)
for pos < 1024*1024*1024 {
bytes, err := myfile.Write([]byte(line))
if err != nil {
log.Fatal(err)
}
pos = pos + bytes
}
}
这需要很长时间(1:16),因为输出没有缓冲。通过添加bufio,您可以大大减少时间
package main
import (
"bufio"
"fmt"
"log"
"os"
)
func main() {
myfile, err := os.OpenFile("myfile", os.O_WRONLY|os.O_CREATE, 0644)
if err != nil {
log.Fatal(err)
}
defer myfile.Close()
mybufferedfile := bufio.NewWriter(myfile)
var pos int
var line string
// sample: login:jbill:2013/3/25, 1
line = fmt.Sprintf("%s:%s:%s, %d\n", "login", "jbill", "2013/3/25", 1)
for pos < 1024*1024*1024 {
bytes, err := mybufferedfile.WriteString(line)
if err != nil {
log.Fatal(err)
}
pos = pos + bytes
}
err = mybufferedfile.Flush()
if err != nil {
log.Fatal(err)
}
}
在我的机器上仍然需要26秒,我希望看到一个更快的解决方案。
顺便说一下:您需要做随机字段,但这留给读者作为练习
英文:
This is a naive approach (1GB):
package main
import (
"fmt"
"log"
"os"
)
func main() {
myfile, err := os.OpenFile("myfile", os.O_WRONLY|os.O_CREATE, 0644)
if err != nil {
log.Fatal(err)
}
defer myfile.Close()
var pos int
var line string
// sample: login:jbill:2013/3/25, 1
line = fmt.Sprintf("%s:%s:%s, %d\n", "login", "jbill", "2013/3/25", 1)
for pos < 1024*1024*1024 {
bytes, err := myfile.Write([]byte(line))
if err != nil {
log.Fatal(err)
}
pos = pos + bytes
}
}
which takes forever (1:16), because the output is not buffered. By adding bufio you can decrease the time dramatically
package main
import (
"bufio"
"fmt"
"log"
"os"
)
func main() {
myfile, err := os.OpenFile("myfile", os.O_WRONLY|os.O_CREATE, 0644)
if err != nil {
log.Fatal(err)
}
defer myfile.Close()
mybufferedfile := bufio.NewWriter(myfile)
var pos int
var line string
// sample: login:jbill:2013/3/25, 1
line = fmt.Sprintf("%s:%s:%s, %d\n", "login", "jbill", "2013/3/25", 1)
for pos < 1024*1024*1024 {
bytes, err := mybufferedfile.WriteString(line)
if err != nil {
log.Fatal(err)
}
pos = pos + bytes
}
err = mybufferedfile.Flush()
if err != nil {
log.Fatal(err)
}
}
Still 26 sec on my machine, I'd like to see a faster solution.
BTW: you need to do the random fileds, but that is left as an exercise to the reader
通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库,让每个人都能够通过互相帮助和分享经验来进步。
评论