英文:
how should I optimize file rotation in golang?
问题
我已经设计并实现了在Go语言中对文件进行轮转的功能。根据设计,我会根据文件大小是否大于等于FileSizeThreshold(50000字节)或文件持续时间是否大于等于FileDurationThreshold(1分钟)(以先到者为准)来进行文件轮转。
以下是Go语言中的实现代码:
package main
import (
"os"
"path/filepath"
"time"
"log"
"strings"
"flag"
"os/exec"
)
type FileStruct struct{
Filename string
CreatedAt time.Time
}
type FileRotate struct {
Dir string
File chan FileStruct
}
const(
MAX_FILE_SIZE = 50000
MAX_FILE_DURATION = time.Minute * 1
filename_time_format = "20060102150405000"
MAX_TRY = 5
)
var blockingChan chan int
func main(){
path := flag.String("dir", "", "absolute path of dir ")
flag.Parse()
if strings.Contains(*path, "./") {
log.Fatalln("ERROR: please give absolute path")
}
if info, err := os.Stat(*path); err == nil{
if ! info.IsDir(){
log.Fatalln(*path," is not a directory")
}
log.Println("directory found..")
} else {
if os.IsNotExist(err){
log.Println("directory not found..")
log.Println("creating the directory..",*path)
if err := exec.Command("mkdir","-p",*path).Run(); err != nil{
log.Fatalln("failed to create the directory ERROR:",err)
}
log.Println("directory created successfully")
}
}
filerotate := &FileRotate{*path,make(chan FileStruct,1)}
go filerotate.FileOperationsRoutine()
log.Println("generating file name struct..")
filerotate.File <- GetFileStruct()
<- blockingChan
}
func (rotate *FileRotate) FileOperationsRoutine(){
try := 0
var f *os.File
for{
if file, ok := <- rotate.File; ok{
if f == nil {
log.Println("WARN: file ptr is nil")
}
filePath := filepath.Join(rotate.Dir, file.Filename)
fileInfo, err := os.Stat(filePath)
if err != nil && os.IsNotExist(err) {
log.Println("file:", filePath, " does not exist...creating file")
_, err = os.Create(filePath)
if err != nil {
log.Println("failed to create the file ERROR:",err)
try++
if try == MAX_TRY {
log.Println("tried creating the file ",MAX_TRY," times. No luck")
time.Sleep(time.Second * 3)
continue
}
rotate.File <- file
continue
}
log.Println("file:", filePath, " created successfully")
fileInfo,err = os.Stat(filePath)
}
sizeCheck := fileInfo.Size() >= MAX_FILE_SIZE
durationCheck := time.Now().After(file.CreatedAt.Add(MAX_FILE_DURATION))
if sizeCheck || durationCheck {
log.Println("filesize of ",filePath," is ",fileInfo.Size(),"..filesizeCheck=",sizeCheck)
log.Println("fileDurationCheck=",durationCheck)
log.Println("rotating the file..")
f.Close()
f = nil
go ZipAndSendRoutine(filePath)
rotate.File <- GetFileStruct()
}else{
if f == nil {
f, err = os.OpenFile(filePath, os.O_RDWR | os.O_APPEND, 0644)
if err != nil {
log.Println("failed to open the file ERROR:", err)
try++
if try == MAX_TRY {
log.Println("tried opening the file ", MAX_TRY, " times. No luck")
time.Sleep(time.Second * 3)
continue
}
rotate.File <- file
continue
}
log.Println("file opened in append mode")
}
rotate.File <- file
}
}
}
}
func GetFileStruct() FileStruct{
current_time := time.Now()
log.Println("returning the filestruct..")
return FileStruct{"example_" + current_time.Format(filename_time_format),current_time}
}
func ZipAndSendRoutine(file string){
log.Println("zipping and sending the file:",file,"to remote server")
}
执行日志:
root@workstation:/media/sf_golang# ./bin/file_rotation -dir "/tmp/file_rotaion"
2017/01/16 15:05:03 directory found..
2017/01/16 15:05:03 starting file operations routine...
2017/01/16 15:05:03 generating file name struct..
2017/01/16 15:05:03 returning the filestruct..
2017/01/16 15:05:03 WARN: file ptr is nil
2017/01/16 15:05:03 file: /tmp/file_rotaion/example_20170116150503000 does not exist...creating file
2017/01/16 15:05:03 file: /tmp/file_rotaion/example_20170116150503000 created successfully
2017/01/16 15:05:03 file opened in append mode
2017/01/16 15:06:03 filesize of /tmp/file_rotaion/example_20170116150503000 is 0 ..filesizeCheck= false ...fileDurationCheck= true
2017/01/16 15:06:03 rotating the file..
2017/01/16 15:06:03 returning the filestruct..
2017/01/16 15:06:03 WARN: file ptr is nil
2017/01/16 15:06:03 file: /tmp/file_rotaion/example_20170116150603000 does not exist...creating file
2017/01/16 15:06:03 file: /tmp/file_rotaion/example_20170116150603000 created successfully
2017/01/16 15:06:03 file opened in append mode
2017/01/16 15:06:03 zipping and sending the file: /tmp/file_rotaion/example_20170116150503000 to remote server
2017/01/16 15:07:03 filesize of /tmp/file_rotaion/example_20170116150603000 is 0 ..filesizeCheck= false ...fileDurationCheck= true
2017/01/16 15:07:03 rotating the file..
2017/01/16 15:07:03 returning the filestruct..
2017/01/16 15:07:03 WARN: file ptr is nil
2017/01/16 15:07:03 file: /tmp/file_rotaion/example_20170116150703000 does not exist...creating file
2017/01/16 15:07:03 file: /tmp/file_rotaion/example_20170116150703000 created successfully
2017/01/16 15:07:03 file opened in append mode
2017/01/16 15:07:03 zipping and sending the file: /tmp/file_rotaion/example_20170116150603000 to remote server
从日志中可以看出,该工具按预期工作。但在执行此工具时,CPU使用率几乎达到100%。
[![CPU utilization during process execution][1]][1]
[1]: https://i.stack.imgur.com/Uen4F.png
停止工具后的CPU使用率如下所示:
[![CPU utilization after process stopped][2]][2]
[2]: https://i.stack.imgur.com/X3glG.png
我已经确定了原因:
FileOperations
goroutine无限运行,并且在该例程中我正在将文件指针发送到rotate.File
通道上。
我陷入了困境,不确定如何进一步优化。有人能告诉我如何优化此工具的CPU利用率吗?
英文:
I had designed and implemented the file rotation of file in golang.
As per design I am rotating the file based on filesize >= FileSizeThreshold(50000bytes)
or file duration >= FileDurationThreshold(1 minute)
(whichever is first).
Following is the implementation in golang.
package main
import (
"os"
"path/filepath"
"time"
"log"
"strings"
"flag"
"os/exec"
)
type FileStruct struct{
Filename string
CreatedAt time.Time
}
type FileRotate struct {
Dir string
File chan FileStruct
}
const(
MAX_FILE_SIZE = 50000
MAX_FILE_DURATION = time.Minute * 1
filename_time_format = "20060102150405000"
MAX_TRY = 5
)
var blockingChan chan int
func main(){
path := flag.String("dir", "", "absolute path of dir ")
flag.Parse()
if strings.Contains(*path, "./") {
log.Fatalln("ERROR: please give absolute path")
}
if info, err := os.Stat(*path); err == nil{
if ! info.IsDir(){
log.Fatalln(*path," is not a directory")
}
log.Println("directory found..")
} else {
if os.IsNotExist(err){
log.Println("directory not found..")
log.Println("creating the directory..",*path)
if err := exec.Command("mkdir","-p",*path).Run(); err != nil{
log.Fatalln("failed to create the directory ERROR:",err)
}
log.Println("directory created successfully")
}
}
filerotate := &FileRotate{*path,make(chan FileStruct,1)}
go filerotate.FileOperationsRoutine()
log.Println("generating file name struct..")
filerotate.File <- GetFileStruct()
<- blockingChan
}
func (rotate *FileRotate) FileOperationsRoutine(){
try := 0
var f *os.File
for{
if file, ok := <- rotate.File; ok{
if f == nil {
log.Println("WARN: file ptr is nil")
}
filePath := filepath.Join(rotate.Dir, file.Filename)
fileInfo, err := os.Stat(filePath)
if err != nil && os.IsNotExist(err) {
log.Println("file:", filePath, " does not exist...creating file")
_, err = os.Create(filePath)
if err != nil {
log.Println("failed to create the file ERROR:",err)
try++
if try == MAX_TRY {
log.Println("tried creating the file ",MAX_TRY," times. No luck")
time.Sleep(time.Second * 3)
continue
}
rotate.File <- file
continue
}
log.Println("file:", filePath, " created successfully")
fileInfo,err = os.Stat(filePath)
}
sizeCheck := fileInfo.Size() >= MAX_FILE_SIZE
durationCheck := time.Now().After(file.CreatedAt.Add(MAX_FILE_DURATION))
if sizeCheck || durationCheck {
log.Println("filesize of ",filePath," is ",fileInfo.Size(),"..filesizeCheck=",sizeCheck)
log.Println("fileDurationCheck=",durationCheck)
log.Println("rotating the file..")
f.Close()
f = nil
go ZipAndSendRoutine(filePath)
rotate.File <- GetFileStruct()
}else{
if f == nil {
f, err = os.OpenFile(filePath, os.O_RDWR | os.O_APPEND, 0644)
if err != nil {
log.Println("failed to open the file ERROR:", err)
try++
if try == MAX_TRY {
log.Println("tried opening the file ", MAX_TRY, " times. No luck")
time.Sleep(time.Second * 3)
continue
}
rotate.File <- file
continue
}
log.Println("file opened in append mode")
}
rotate.File <- file
}
}
}
}
func GetFileStruct() FileStruct{
current_time := time.Now()
log.Println("returning the filestruct..")
return FileStruct{"example_" + current_time.Format(filename_time_format),current_time}
}
func ZipAndSendRoutine(file string){
log.Println("zipping and sending the file:",file,"to remote server")
}
Execution log :
root@workstation:/media/sf_golang# ./bin/file_rotation -dir "/tmp/file_rotaion"
2017/01/16 15:05:03 directory found..
2017/01/16 15:05:03 starting file operations routine...
2017/01/16 15:05:03 generating file name struct..
2017/01/16 15:05:03 returning the filestruct..
2017/01/16 15:05:03 WARN: file ptr is nil
2017/01/16 15:05:03 file: /tmp/file_rotaion/example_20170116150503000 does not exist...creating file
2017/01/16 15:05:03 file: /tmp/file_rotaion/example_20170116150503000 created successfully
2017/01/16 15:05:03 file opened in append mode
2017/01/16 15:06:03 filesize of /tmp/file_rotaion/example_20170116150503000 is 0 ..filesizeCheck= false ...fileDurationCheck= true
2017/01/16 15:06:03 rotating the file..
2017/01/16 15:06:03 returning the filestruct..
2017/01/16 15:06:03 WARN: file ptr is nil
2017/01/16 15:06:03 file: /tmp/file_rotaion/example_20170116150603000 does not exist...creating file
2017/01/16 15:06:03 file: /tmp/file_rotaion/example_20170116150603000 created successfully
2017/01/16 15:06:03 file opened in append mode
2017/01/16 15:06:03 zipping and sending the file: /tmp/file_rotaion/example_20170116150503000 to remote server
2017/01/16 15:07:03 filesize of /tmp/file_rotaion/example_20170116150603000 is 0 ..filesizeCheck= false ...fileDurationCheck= true
2017/01/16 15:07:03 rotating the file..
2017/01/16 15:07:03 returning the filestruct..
2017/01/16 15:07:03 WARN: file ptr is nil
2017/01/16 15:07:03 file: /tmp/file_rotaion/example_20170116150703000 does not exist...creating file
2017/01/16 15:07:03 file: /tmp/file_rotaion/example_20170116150703000 created successfully
2017/01/16 15:07:03 file opened in append mode
2017/01/16 15:07:03 zipping and sending the file: /tmp/file_rotaion/example_20170116150603000 to remote server
As seen from the logs, the utility is working as expected.
But during the execution of this utility, CPU usage was almost 100%
[![CPU utilization during process execution][1]][1]
[1]: https://i.stack.imgur.com/Uen4F.png
After stopping the utility..
[![CPU utilization after process stopped][2]][2]
[2]: https://i.stack.imgur.com/X3glG.png
I have identified the cause of this:
FileOperations
goroutine is running indefinitely and within this routine I am sending the file pointer on rotate.File
channel
I am stuck at this point and not sure how to optimize this further.
Could anyone tell me how should I optimize CPU utilization for this utility?
答案1
得分: 3
你的代码主要问题在于for循环中,你一直在将FileStruct
传递给通道,无论是old
还是new
。因此,通道接收数据时没有等待时间,在if循环内部,你正在对文件进行stat操作以获取其数据,而这个操作你可能已经完成了。
以下是你程序的strace输出:
% time seconds usecs/call calls errors syscall
------ ----------- ----------- --------- --------- ----------------
93.58 11.835475 3793 3120 227 futex
6.38 0.807279 4 192048 1 stat
0.03 0.003284 9 366 sched_yield
0.01 0.000759 7 114 rt_sigaction
0.00 0.000271 90 3 openat
0.00 0.000197 10 19 mmap
0.00 0.000143 20 7 write
0.00 0.000071 24 3 clone
0.00 0.000064 8 8 rt_sigprocmask
0.00 0.000034 17 2 select
0.00 0.000021 11 2 read
0.00 0.000016 16 1 sched_getaffinity
0.00 0.000014 14 1 munmap
0.00 0.000014 14 1 execve
0.00 0.000013 13 1 arch_prctl
0.00 0.000011 11 1 close
0.00 0.000000 0 2 sigaltstack
0.00 0.000000 0 1 gettid
------ ----------- ----------- --------- --------- ----------------
100.00 12.647666 195700 228 total
在大约40秒内,有195,000个系统调用。
你可以在for循环后添加一个等待时间:
for {
<-time.After(time.Second)
if file, ok := <-rotate.File; ok {
// 进行其他操作
}
}
并且你可以在FileStruct
中添加fileinfo
,在每次循环中首先检查结构体中的fileinfo
,然后再执行stat
操作。
添加了<-time.After(time.Second)
后的strace输出如下:
% time seconds usecs/call calls errors syscall
------ ----------- ----------- --------- --------- ----------------
65.65 0.001512 35 43 1 futex
23.71 0.000546 5 114 rt_sigaction
3.04 0.000070 9 8 mmap
2.43 0.000056 19 3 clone
2.26 0.000052 7 8 rt_sigprocmask
0.56 0.000013 7 2 stat
0.48 0.000011 11 1 munmap
0.48 0.000011 6 2 sigaltstack
0.43 0.000010 10 1 execve
0.39 0.000009 9 1 sched_getaffinity
0.35 0.000008 8 1 arch_prctl
0.22 0.000005 5 1 gettid
0.00 0.000000 0 2 read
0.00 0.000000 0 3 write
0.00 0.000000 0 1 close
0.00 0.000000 0 1 openat
------ ----------- ----------- --------- --------- ----------------
100.00 0.002303 192 1 total
结论:
在相同的时间段内,没有使用time.After()
的代码进行了195,000次系统调用,而使用了time.After(time.Second)
的代码只进行了192次系统调用。你可以通过将已获取的文件信息作为FileStruct
的一部分来进一步改进代码。
英文:
The primary problem with your code is in the for loop all the time you are passing a FileStruct
to the channel either old
or new
. So there is no wait time for the channel receive for data and inside the if loop you are doing stat on the file for getting its data which mostly you must have already done
Here is strace on your program
% time seconds usecs/call calls errors syscall
------ ----------- ----------- --------- --------- ----------------
93.58 11.835475 3793 3120 227 futex
6.38 0.807279 4 192048 1 stat
0.03 0.003284 9 366 sched_yield
0.01 0.000759 7 114 rt_sigaction
0.00 0.000271 90 3 openat
0.00 0.000197 10 19 mmap
0.00 0.000143 20 7 write
0.00 0.000071 24 3 clone
0.00 0.000064 8 8 rt_sigprocmask
0.00 0.000034 17 2 select
0.00 0.000021 11 2 read
0.00 0.000016 16 1 sched_getaffinity
0.00 0.000014 14 1 munmap
0.00 0.000014 14 1 execve
0.00 0.000013 13 1 arch_prctl
0.00 0.000011 11 1 close
0.00 0.000000 0 2 sigaltstack
0.00 0.000000 0 1 gettid
------ ----------- ----------- --------- --------- ----------------
100.00 12.647666 195700 228 total
Here with in around 40 seconds there are 195k system calls
What you may do is add a wait time just after for
for {
<- time.After(time.Second)
if file, ok := <- rotate.File; ok{
And you may add fileinfo
in the FileStruct
and on every looping you may check for that in the struct first then only do the stat
Here is the strace after adding <- time.After(time.Second)
% time seconds usecs/call calls errors syscall
------ ----------- ----------- --------- --------- ----------------
65.65 0.001512 35 43 1 futex
23.71 0.000546 5 114 rt_sigaction
3.04 0.000070 9 8 mmap
2.43 0.000056 19 3 clone
2.26 0.000052 7 8 rt_sigprocmask
0.56 0.000013 7 2 stat
0.48 0.000011 11 1 munmap
0.48 0.000011 6 2 sigaltstack
0.43 0.000010 10 1 execve
0.39 0.000009 9 1 sched_getaffinity
0.35 0.000008 8 1 arch_prctl
0.22 0.000005 5 1 gettid
0.00 0.000000 0 2 read
0.00 0.000000 0 3 write
0.00 0.000000 0 1 close
0.00 0.000000 0 1 openat
------ ----------- ----------- --------- --------- ----------------
100.00 0.002303 192 1 total
Conclusion
For same time duration code without time.After()
made 195K system calls where the one with time.After(time.Second)
made only 192 system calls. You can further improve it by adding already fetched file info as a part of FileStruct
通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库,让每个人都能够通过互相帮助和分享经验来进步。
评论