嵌套字典 Golang 的问题

Nested Dictionaries Golang Trouble


Go 代码:

  1. package main
  2. import (
  3. "bufio"
  4. "fmt"
  5. "log"
  6. "os"
  7. "os/user"
  8. "path/filepath"
  9. "regexp"
  10. "strconv"
  11. "strings"
  12. "sync"
  13. "time"
  14. "github.com/aws/aws-sdk-go/aws"
  15. "github.com/aws/aws-sdk-go/aws/session"
  16. "github.com/aws/aws-sdk-go/service/s3"
  17. "github.com/aws/aws-sdk-go/service/s3/s3manager"
  18. )
  19. var (
  20. LocalDirectory = "s3logs" // Into this directory
  21. Lock sync.Mutex
  22. totalImpressions int
  23. )
  24. var data = make(map[string]map[string]int)
  25. func main() {
  26. start := time.Now()
  27. // by adding this line i'm telling the program to run the threads on 4 different cores at the same time, Parallelism!!
  29. runtime.GOMAXPROCS(4)
  30. var wg sync.WaitGroup
  31. var year, month, day = time.Now().Date()
  32. str_year := strconv.Itoa(year)
  33. str_month := strconv.Itoa(int(month))
  34. str_day := strconv.Itoa(day)
  35. if int(month) < 10 {
  36. str_month = "0" + strconv.Itoa(int(month))
  37. }
  38. if day < 10 {
  39. str_day = "0" + strconv.Itoa(day)
  40. }
  41. regBuckets := map[string]string{
  42. "us-west-1": "pubgears-ca",
  43. "test": "test",
  44. }
  45. for region, bucket := range regBuckets {
  46. prefix := fmt.Sprintf("tagserver/logs/AWSLogs/978380792767/elasticloadbalancing/%s/%s/%s/%s/", region, str_year, str_month, str_day)
  47. wg.Add(1)
  48. go getLogs(region, bucket, LocalDirectory+bucket, &prefix, &wg)
  49. }
  50. wg.Wait()
  51. for tag, value := range data {
  52. for hour, imp := range value {
  53. fmt.Printf("tag: %s \n hour: %s impression %s\n", tag, hour, imp)
  54. }
  55. }
  56. elapsed := time.Since(start)
  57. fmt.Printf("\nTime took %s\n", elapsed)
  58. }
  59. func getLogs(region string, bucket string, directory string, prefix *string, wg *sync.WaitGroup) {
  60. sess := session.New()
  61. client := s3.New(sess, &aws.Config{Region: aws.String(region)})
  62. params := &s3.ListObjectsInput{Bucket: &bucket, Prefix: prefix}
  63. manager := s3manager.NewDownloaderWithClient(client, func(d *s3manager.Downloader) {
  64. d.PartSize = 5 * 1024 * 1024 // 6MB per part
  65. d.Concurrency = 4
  66. })
  67. d := downloader{bucket: bucket, dir: directory, Downloader: manager}
  68. client.ListObjectsPages(params, d.eachPage)
  69. wg.Done()
  70. }
  71. // downloader object and methods
  72. type downloader struct {
  73. *s3manager.Downloader
  74. bucket, dir string
  75. }
  76. func (d *downloader) eachPage(page *s3.ListObjectsOutput, more bool) bool {
  77. for _, obj := range page.Contents {
  78. d.downloadToFile(*obj.Key)
  79. }
  80. return true
  81. }
  82. func (d *downloader) downloadToFile(key string) {
  83. user, errs := user.Current()
  84. if errs != nil {
  85. panic(errs)
  86. }
  87. homedir := user.HomeDir
  88. desktop := homedir + "/Desktop/" + d.dir
  89. file := filepath.Join(desktop, key)
  90. if err := os.MkdirAll(filepath.Dir(file), 0775); err != nil {
  91. panic(err)
  92. }
  93. fd, err := os.Create(file)
  94. if err != nil {
  95. panic(err)
  96. }
  97. defer fd.Close()
  98. params := &s3.GetObjectInput{Bucket: &d.bucket, Key: &key}
  99. d.Download(fd, params)
  100. _, e := d.Download(fd, params)
  101. if e != nil {
  102. panic(e)
  103. }
  104. f, err := os.Open(file)
  105. if err != nil {
  106. log.Fatal(err)
  107. }
  108. defer f.Close()
  109. tag := regexp.MustCompile("/([a-zA-Z0-9_]+/{1}[a-zA-Z0-9_]+/{1}[a-zA-Z0-9_]+/{1}[a-zA-Z0-9_]+)")
  110. date := regexp.MustCompile("T([^:]+)")
  111. scanner := bufio.NewScanner(f)
  112. for scanner.Scan() {
  113. m := tag.FindString(scanner.Text())
  114. if m != "" {
  115. Lock.Lock()
  116. arr := strings.Split(m, "/")
  117. taghash := strings.Join(arr, "_")
  118. taghash = strings.TrimLeft(taghash, "_")
  119. m = date.FindString(scanner.Text())
  120. if m != "" {
  121. hour := m
  122. if _, ok := data[taghash]; !ok {
  123. data[taghash] = make(map[string]int)
  124. }
  125. data[taghash][hour]++
  126. }
  127. Lock.Unlock()
  128. }
  129. }
  130. fmt.Println(file)
  131. os.Remove(file)
  132. if err := scanner.Err(); err != nil {
  133. log.Fatal(err)
  134. }
  135. }


  1. fstvt1_ros_300x600_pp8_1
  2. T00 213434
  3. T01 23432
  4. T02 3324
  5. T03 324
  6. T04 324
  7. T05 0 ...
  8. (这不是真实数据)



  1. for scanner.Scan() {
  2. m := tag.FindString(scanner.Text())
  3. if m != "" {
  4. Lock.Lock()
  5. arr := strings.Split(m, "/")
  6. taghash := strings.Join(arr, "_")
  7. taghash = strings.TrimLeft(taghash, "_")
  8. m = date.FindString(scanner.Text())
  9. if m != "" {
  10. hour := m
  11. if _, ok := data[taghash]; !ok {
  12. data[taghash] = make(map[string]int)
  13. }
  14. data[taghash][hour]++
  15. }
  16. Lock.Unlock()
  17. }
  18. }



得分: 2


  1. data[taghash] = make(map[string]int)


  1. if _, ok := data[taghash]; !ok {
  2. // data[taghash]不存在--创建它!
  3. data[taghash] := make(map[string]int)
  4. }
  5. data[taghash][hour]++


  1. # Python
  2. data.setdefault(taghash, {}).setdefault(hour, 0) += 1


  1. if taghash not in data:
  2. data[taghash] = {}
  3. if hour not in data[taghash]:
  4. data[taghash][hour] = 1
  5. else:
  6. data[taghash][hour] += 1

The error comes from

  1. data[taghash] = make(map[string]int)

This sets data[taghash] to a newly allocated map every time through. That's definitely not what you're trying to do. Instead:

  1. if _, ok := data[taghash]; !ok {
  2. // data[taghash] does not exist -- create it!
  3. data[taghash] := make(map[string]int)
  4. }
  5. data[taghash][hour]++

This is equivalent to:

  1. # Python
  2. data.setdefault(taghash, {}).setdefault(hour, 0) += 1


  1. if taghash not in data:
  2. data[taghash] = {}
  3. if hour not in data[taghash]:
  4. data[taghash][hour] = 1
  5. else:
  6. data[taghash][hour] += 1

