嵌套字典 Golang 的问题

huangapple go评论116阅读模式
英文:

Nested Dictionaries Golang Trouble

问题

Go 代码:

  1. package main
  2. import (
  3. "bufio"
  4. "fmt"
  5. "log"
  6. "os"
  7. "os/user"
  8. "path/filepath"
  9. "regexp"
  10. "strconv"
  11. "strings"
  12. "sync"
  13. "time"
  14. "github.com/aws/aws-sdk-go/aws"
  15. "github.com/aws/aws-sdk-go/aws/session"
  16. "github.com/aws/aws-sdk-go/service/s3"
  17. "github.com/aws/aws-sdk-go/service/s3/s3manager"
  18. )
  19. var (
  20. LocalDirectory = "s3logs" // Into this directory
  21. Lock sync.Mutex
  22. totalImpressions int
  23. )
  24. var data = make(map[string]map[string]int)
  25. func main() {
  26. start := time.Now()
  27. // by adding this line i'm telling the program to run the threads on 4 different cores at the same time, Parallelism!!
  28. //REMEMBER TO ADD BLOCKS TO STOP RACE CONDITIONS
  29. runtime.GOMAXPROCS(4)
  30. var wg sync.WaitGroup
  31. var year, month, day = time.Now().Date()
  32. str_year := strconv.Itoa(year)
  33. str_month := strconv.Itoa(int(month))
  34. str_day := strconv.Itoa(day)
  35. if int(month) < 10 {
  36. str_month = "0" + strconv.Itoa(int(month))
  37. }
  38. if day < 10 {
  39. str_day = "0" + strconv.Itoa(day)
  40. }
  41. regBuckets := map[string]string{
  42. "us-west-1": "pubgears-ca",
  43. "test": "test",
  44. }
  45. for region, bucket := range regBuckets {
  46. prefix := fmt.Sprintf("tagserver/logs/AWSLogs/978380792767/elasticloadbalancing/%s/%s/%s/%s/", region, str_year, str_month, str_day)
  47. wg.Add(1)
  48. go getLogs(region, bucket, LocalDirectory+bucket, &prefix, &wg)
  49. }
  50. wg.Wait()
  51. for tag, value := range data {
  52. for hour, imp := range value {
  53. fmt.Printf("tag: %s \n hour: %s impression %s\n", tag, hour, imp)
  54. }
  55. }
  56. elapsed := time.Since(start)
  57. fmt.Printf("\nTime took %s\n", elapsed)
  58. }
  59. func getLogs(region string, bucket string, directory string, prefix *string, wg *sync.WaitGroup) {
  60. sess := session.New()
  61. client := s3.New(sess, &aws.Config{Region: aws.String(region)})
  62. params := &s3.ListObjectsInput{Bucket: &bucket, Prefix: prefix}
  63. manager := s3manager.NewDownloaderWithClient(client, func(d *s3manager.Downloader) {
  64. d.PartSize = 5 * 1024 * 1024 // 6MB per part
  65. d.Concurrency = 4
  66. })
  67. d := downloader{bucket: bucket, dir: directory, Downloader: manager}
  68. client.ListObjectsPages(params, d.eachPage)
  69. wg.Done()
  70. }
  71. // downloader object and methods
  72. type downloader struct {
  73. *s3manager.Downloader
  74. bucket, dir string
  75. }
  76. func (d *downloader) eachPage(page *s3.ListObjectsOutput, more bool) bool {
  77. for _, obj := range page.Contents {
  78. d.downloadToFile(*obj.Key)
  79. }
  80. return true
  81. }
  82. func (d *downloader) downloadToFile(key string) {
  83. user, errs := user.Current()
  84. if errs != nil {
  85. panic(errs)
  86. }
  87. homedir := user.HomeDir
  88. desktop := homedir + "/Desktop/" + d.dir
  89. file := filepath.Join(desktop, key)
  90. if err := os.MkdirAll(filepath.Dir(file), 0775); err != nil {
  91. panic(err)
  92. }
  93. fd, err := os.Create(file)
  94. if err != nil {
  95. panic(err)
  96. }
  97. defer fd.Close()
  98. params := &s3.GetObjectInput{Bucket: &d.bucket, Key: &key}
  99. d.Download(fd, params)
  100. _, e := d.Download(fd, params)
  101. if e != nil {
  102. panic(e)
  103. }
  104. f, err := os.Open(file)
  105. if err != nil {
  106. log.Fatal(err)
  107. }
  108. defer f.Close()
  109. tag := regexp.MustCompile("/([a-zA-Z0-9_]+/{1}[a-zA-Z0-9_]+/{1}[a-zA-Z0-9_]+/{1}[a-zA-Z0-9_]+)")
  110. date := regexp.MustCompile("T([^:]+)")
  111. scanner := bufio.NewScanner(f)
  112. for scanner.Scan() {
  113. m := tag.FindString(scanner.Text())
  114. if m != "" {
  115. Lock.Lock()
  116. arr := strings.Split(m, "/")
  117. taghash := strings.Join(arr, "_")
  118. taghash = strings.TrimLeft(taghash, "_")
  119. m = date.FindString(scanner.Text())
  120. if m != "" {
  121. hour := m
  122. if _, ok := data[taghash]; !ok {
  123. data[taghash] = make(map[string]int)
  124. }
  125. data[taghash][hour]++
  126. }
  127. Lock.Unlock()
  128. }
  129. }
  130. fmt.Println(file)
  131. os.Remove(file)
  132. if err := scanner.Err(); err != nil {
  133. log.Fatal(err)
  134. }
  135. }

你在代码的第167行附近遇到了问题。你希望输出每个taghash的多个键值对,但实际上只输出了最后一个键值对。你想要的输出应该类似于:

  1. fstvt1_ros_300x600_pp8_1
  2. T00 213434
  3. T01 23432
  4. T02 3324
  5. T03 324
  6. T04 324
  7. T05 0 ...
  8. (这不是真实数据)

你想要的效果是,如果taghashhour存在,则增加该特定对象。如果taghashhour不存在,则创建taghash并添加新的键并递增。

你可以尝试修改代码的第167行附近的部分如下:

  1. for scanner.Scan() {
  2. m := tag.FindString(scanner.Text())
  3. if m != "" {
  4. Lock.Lock()
  5. arr := strings.Split(m, "/")
  6. taghash := strings.Join(arr, "_")
  7. taghash = strings.TrimLeft(taghash, "_")
  8. m = date.FindString(scanner.Text())
  9. if m != "" {
  10. hour := m
  11. if _, ok := data[taghash]; !ok {
  12. data[taghash] = make(map[string]int)
  13. }
  14. data[taghash][hour]++
  15. }
  16. Lock.Unlock()
  17. }
  18. }

这样修改后,如果taghashhour存在,则会递增该特定对象。如果taghashhour不存在,则会创建taghash并添加新的键并递增。

英文:

Go Code:

  1. package main
  2. import (
  3. &quot;bufio&quot;
  4. _ &quot;bytes&quot;
  5. &quot;fmt&quot;
  6. _ &quot;io&quot;
  7. &quot;log&quot;
  8. &quot;os&quot;
  9. &quot;os/user&quot;
  10. &quot;path/filepath&quot;
  11. _ &quot;reflect&quot;
  12. &quot;regexp&quot;
  13. &quot;runtime&quot;
  14. &quot;strconv&quot;
  15. &quot;strings&quot;
  16. &quot;sync&quot;
  17. &quot;time&quot;
  18. &quot;github.com/aws/aws-sdk-go/aws&quot;
  19. &quot;github.com/aws/aws-sdk-go/aws/session&quot;
  20. &quot;github.com/aws/aws-sdk-go/service/s3&quot;
  21. &quot;github.com/aws/aws-sdk-go/service/s3/s3manager&quot;
  22. )
  23. var (
  24. LocalDirectory = &quot;s3logs&quot; // Into this directory
  25. Lock sync.Mutex
  26. totalImpressions int
  27. )
  28. var data = make(map[string]map[string]int)
  29. func main() {
  30. start := time.Now()
  31. // by adding this line i&#39;m telling the program to run the threads on 4 different cores at the same time, Parallelism!!
  32. //REMEMBER TO ADD BLOCKS TO STOP RACE CONDITIONS
  33. runtime.GOMAXPROCS(4)
  34. var wg sync.WaitGroup
  35. var year, month, day = time.Now().Date()
  36. str_year := strconv.Itoa(year)
  37. str_month := strconv.Itoa(int(month))
  38. str_day := strconv.Itoa(day)
  39. if int(month) &lt; 10 {
  40. str_month = &quot;0&quot; + strconv.Itoa(int(month))
  41. }
  42. if day &lt; 10 {
  43. str_day = &quot;0&quot; + strconv.Itoa(day)
  44. }
  45. regBuckets := map[string]string{
  46. &quot;us-west-1&quot;: &quot;pubgears-ca&quot;,
  47. &quot;test&quot;: &quot;test&quot;,
  48. }
  49. for region, bucket := range regBuckets {
  50. prefix := fmt.Sprintf(&quot;tagserver/logs/AWSLogs/978380792767/elasticloadbalancing/%s/%s/%s/%s/&quot;, region, str_year, str_month, str_day)
  51. wg.Add(1)
  52. go getLogs(region, bucket, LocalDirectory+bucket, &amp;prefix, &amp;wg)
  53. }
  54. wg.Wait()
  55. //salon/t1/728x90/index
  56. //totalImpressions := 0
  57. // var provider = make(map[string]int)
  58. // for key, value := range data {
  59. // key = strings.TrimSpace(key)
  60. // pro := strings.Split(key, &quot;_&quot;)[3]
  61. //
  62. // if strings.Contains(pro, &quot;pp&quot;) == true || (pro == &quot;pulsepoint&quot;) || (pro == &quot;cweb&quot;) {
  63. // provider[&quot;pulsepoint&quot;] += value
  64. // } else if (pro == &quot;openx&quot;) || (pro == &quot;openx3&quot;) {
  65. // provider[&quot;openx&quot;] += value
  66. // } else if key == &quot; &quot; {
  67. // continue
  68. // } else {
  69. // provider[pro] += value
  70. // }
  71. // totalImpressions += value
  72. // }
  73. for tag, value := range data {
  74. for hour, imp := range value {
  75. fmt.Printf(&quot;tag: %s \n hour: %s impression %s\n&quot;, tag, hour, imp)
  76. }
  77. }
  78. //sl = sl[:len(sl)-1]
  79. elapsed := time.Since(start)
  80. fmt.Printf(&quot;\nTime took %s\n&quot;, elapsed)
  81. }
  82. func getLogs(region string, bucket string, directory string, prefix *string, wg *sync.WaitGroup) {
  83. sess := session.New()
  84. client := s3.New(sess, &amp;aws.Config{Region: aws.String(region)})
  85. params := &amp;s3.ListObjectsInput{Bucket: &amp;bucket, Prefix: prefix}
  86. manager := s3manager.NewDownloaderWithClient(client, func(d *s3manager.Downloader) {
  87. d.PartSize = 5 * 1024 * 1024 // 6MB per part
  88. d.Concurrency = 4
  89. })
  90. d := downloader{bucket: bucket, dir: directory, Downloader: manager}
  91. client.ListObjectsPages(params, d.eachPage)
  92. wg.Done()
  93. }
  94. // downloader object and methods
  95. type downloader struct {
  96. *s3manager.Downloader
  97. bucket, dir string
  98. }
  99. func (d *downloader) eachPage(page *s3.ListObjectsOutput, more bool) bool {
  100. for _, obj := range page.Contents {
  101. // fmt.Println(obj)
  102. // return true
  103. d.downloadToFile(*obj.Key)
  104. }
  105. return true
  106. }
  107. func (d *downloader) downloadToFile(key string) {
  108. // Create the directories in the path
  109. // desktop path
  110. user, errs := user.Current()
  111. if errs != nil {
  112. panic(errs)
  113. }
  114. homedir := user.HomeDir
  115. desktop := homedir + &quot;/Desktop/&quot; + d.dir
  116. file := filepath.Join(desktop, key)
  117. if err := os.MkdirAll(filepath.Dir(file), 0775); err != nil {
  118. panic(err)
  119. }
  120. // Setup the local file
  121. fd, err := os.Create(file)
  122. if err != nil {
  123. panic(err)
  124. }
  125. defer fd.Close()
  126. // Download the file using the AWS SDK
  127. //fmt.Printf(&quot;Downloading s3://%s/%s to %s...\n&quot;, d.bucket, key, file)
  128. params := &amp;s3.GetObjectInput{Bucket: &amp;d.bucket, Key: &amp;key}
  129. d.Download(fd, params)
  130. _, e := d.Download(fd, params)
  131. if e != nil {
  132. panic(e)
  133. }
  134. f, err := os.Open(file)
  135. if err != nil {
  136. log.Fatal(err)
  137. }
  138. defer f.Close()
  139. tag := regexp.MustCompile(&quot;/([a-zA-Z0-9_]+/{1}[a-zA-Z0-9_]+/{1}[a-zA-Z0-9_]+/{1}[a-zA-Z0-9_]+)&quot;)
  140. date := regexp.MustCompile(&quot;T([^:]+)&quot;)
  141. scanner := bufio.NewScanner(f)
  142. // HAVING TROUBLE HERE
  143. for scanner.Scan() {
  144. //dateCollection := make(map[string]int)
  145. m := tag.FindString(scanner.Text())
  146. if m != &quot;&quot; {
  147. // stop races
  148. Lock.Lock()
  149. arr := strings.Split(m, &quot;/&quot;)
  150. taghash := strings.Join(arr, &quot;_&quot;)
  151. taghash = strings.TrimLeft(taghash, &quot;_&quot;)
  152. //data[taghash]++
  153. m = date.FindString(scanner.Text())
  154. if m != &quot;&quot; {
  155. hour := m
  156. data[taghash] = make(map[string]int)
  157. data[taghash][hour]++
  158. }
  159. Lock.Unlock()
  160. }
  161. }
  162. fmt.Println(file)
  163. os.Remove(file)
  164. if err := scanner.Err(); err != nil {
  165. log.Fatal(err)
  166. }
  167. }

The area i'm having trouble at starts on line 167 section below:

  1. for scanner.Scan() {
  2. //dateCollection := make(map[string]int)
  3. m := tag.FindString(scanner.Text())
  4. if m != &quot;&quot; {
  5. // stop races
  6. Lock.Lock()
  7. arr := strings.Split(m, &quot;/&quot;)
  8. taghash := strings.Join(arr, &quot;_&quot;)
  9. taghash = strings.TrimLeft(taghash, &quot;_&quot;)
  10. //data[taghash]++
  11. m = date.FindString(scanner.Text())
  12. if m != &quot;&quot; {
  13. hour := m
  14. data[taghash] = make(map[string]int)
  15. data[taghash][hour]++
  16. }
  17. Lock.Unlock()
  18. }
  19. }
  20. fmt.Println(file)
  21. os.Remove(file)
  22. if err := scanner.Err(); err != nil {
  23. log.Fatal(err)
  24. }

The output i'm getting when I print the values in the main func

  1. fstvt1_ros_300x600_pp8_1 T07 1

I'm expecting an output that looks something like this:

  1. fstvt1_ros_300x600_pp8_1
  2. T00 213434
  3. T01 23432
  4. T02 3324
  5. T03 324
  6. T04 324
  7. T05 0 ...
  8. (this isn&#39;t real data)

What I'm trying to do:
I have a map of data var data = make(map[string]map[string]int) whose key is equal to a taghash, e.g. fstvt1_ros_300x600_pp8_1. The value is a map of data. The key value of that map is expected to be a string and an integer. I want multiple maps. One for each key... T01, T02.. What is currently outputted is the last item in the last of items i'm iterating on instead of a collection of keys and values for each taghash. How do i make is so that instead of overwriting the data, it adds the new key Value T01, T02.. and if the taghash and hour are the some to increment that particular object.

Line of code i'm currently using:

T01, T02..

  1. data[taghash][hour]++

Where the if the taghash and hour exist then it should increment. If the taghash and hour doesn't exist then create the taghash and add the new key and increment.

答案1

得分: 2

错误出现在

  1. data[taghash] = make(map[string]int)

这一行代码中。它会在每次循环时将data[taghash]设置为一个新分配的映射。这绝对不是你想要做的。相反:

  1. if _, ok := data[taghash]; !ok {
  2. // data[taghash]不存在--创建它!
  3. data[taghash] := make(map[string]int)
  4. }
  5. data[taghash][hour]++

这等同于:

  1. # Python
  2. data.setdefault(taghash, {}).setdefault(hour, 0) += 1

或者

  1. if taghash not in data:
  2. data[taghash] = {}
  3. if hour not in data[taghash]:
  4. data[taghash][hour] = 1
  5. else:
  6. data[taghash][hour] += 1
英文:

The error comes from

  1. data[taghash] = make(map[string]int)

This sets data[taghash] to a newly allocated map every time through. That's definitely not what you're trying to do. Instead:

  1. if _, ok := data[taghash]; !ok {
  2. // data[taghash] does not exist -- create it!
  3. data[taghash] := make(map[string]int)
  4. }
  5. data[taghash][hour]++

This is equivalent to:

  1. # Python
  2. data.setdefault(taghash, {}).setdefault(hour, 0) += 1

Or

  1. if taghash not in data:
  2. data[taghash] = {}
  3. if hour not in data[taghash]:
  4. data[taghash][hour] = 1
  5. else:
  6. data[taghash][hour] += 1

huangapple
  • 本文由 发表于 2015年12月29日 04:46:05
  • 转载请务必保留本文链接:https://go.coder-hub.com/34500527.html
匿名

发表评论

匿名网友

:?: :razz: :sad: :evil: :!: :smile: :oops: :grin: :eek: :shock: :???: :cool: :lol: :mad: :twisted: :roll: :wink: :idea: :arrow: :neutral: :cry: :mrgreen:

确定