嵌套字典 Golang 的问题

huangapple go评论89阅读模式
英文:

Nested Dictionaries Golang Trouble

问题

Go 代码:

package main

import (
	"bufio"
	"fmt"
	"log"
	"os"
	"os/user"
	"path/filepath"
	"regexp"
	"strconv"
	"strings"
	"sync"
	"time"

	"github.com/aws/aws-sdk-go/aws"
	"github.com/aws/aws-sdk-go/aws/session"
	"github.com/aws/aws-sdk-go/service/s3"
	"github.com/aws/aws-sdk-go/service/s3/s3manager"
)

var (
	LocalDirectory   = "s3logs" // Into this directory
	Lock             sync.Mutex
	totalImpressions int
)

var data = make(map[string]map[string]int)

func main() {
	start := time.Now()
	// by adding this line i'm telling the program to run the threads on 4 different cores at the same time, Parallelism!!
	//REMEMBER TO ADD BLOCKS TO STOP RACE CONDITIONS
	runtime.GOMAXPROCS(4)
	var wg sync.WaitGroup

	var year, month, day = time.Now().Date()
	str_year := strconv.Itoa(year)
	str_month := strconv.Itoa(int(month))
	str_day := strconv.Itoa(day)

	if int(month) < 10 {
		str_month = "0" + strconv.Itoa(int(month))
	}
	if day < 10 {
		str_day = "0" + strconv.Itoa(day)
	}

	regBuckets := map[string]string{
		"us-west-1": "pubgears-ca",
		"test":      "test",
	}

	for region, bucket := range regBuckets {
		prefix := fmt.Sprintf("tagserver/logs/AWSLogs/978380792767/elasticloadbalancing/%s/%s/%s/%s/", region, str_year, str_month, str_day)
		wg.Add(1)
		go getLogs(region, bucket, LocalDirectory+bucket, &prefix, &wg)
	}
	wg.Wait()

	for tag, value := range data {
		for hour, imp := range value {
			fmt.Printf("tag: %s  \n hour: %s impression %s\n", tag, hour, imp)
		}
	}

	elapsed := time.Since(start)
	fmt.Printf("\nTime took %s\n", elapsed)

}

func getLogs(region string, bucket string, directory string, prefix *string, wg *sync.WaitGroup) {
	sess := session.New()
	client := s3.New(sess, &aws.Config{Region: aws.String(region)})

	params := &s3.ListObjectsInput{Bucket: &bucket, Prefix: prefix}
	manager := s3manager.NewDownloaderWithClient(client, func(d *s3manager.Downloader) {
		d.PartSize = 5 * 1024 * 1024 // 6MB per part
		d.Concurrency = 4
	})
	d := downloader{bucket: bucket, dir: directory, Downloader: manager}
	client.ListObjectsPages(params, d.eachPage)
	wg.Done()
}

// downloader object and methods
type downloader struct {
	*s3manager.Downloader
	bucket, dir string
}

func (d *downloader) eachPage(page *s3.ListObjectsOutput, more bool) bool {
	for _, obj := range page.Contents {
		d.downloadToFile(*obj.Key)
	}
	return true
}

func (d *downloader) downloadToFile(key string) {
	user, errs := user.Current()
	if errs != nil {
		panic(errs)
	}
	homedir := user.HomeDir
	desktop := homedir + "/Desktop/" + d.dir
	file := filepath.Join(desktop, key)
	if err := os.MkdirAll(filepath.Dir(file), 0775); err != nil {
		panic(err)
	}

	fd, err := os.Create(file)
	if err != nil {
		panic(err)
	}
	defer fd.Close()

	params := &s3.GetObjectInput{Bucket: &d.bucket, Key: &key}
	d.Download(fd, params)
	_, e := d.Download(fd, params)
	if e != nil {
		panic(e)
	}

	f, err := os.Open(file)
	if err != nil {
		log.Fatal(err)
	}
	defer f.Close()

	tag := regexp.MustCompile("/([a-zA-Z0-9_]+/{1}[a-zA-Z0-9_]+/{1}[a-zA-Z0-9_]+/{1}[a-zA-Z0-9_]+)")
	date := regexp.MustCompile("T([^:]+)")
	scanner := bufio.NewScanner(f)

	for scanner.Scan() {
		m := tag.FindString(scanner.Text())
		if m != "" {
			Lock.Lock()
			arr := strings.Split(m, "/")
			taghash := strings.Join(arr, "_")
			taghash = strings.TrimLeft(taghash, "_")
			m = date.FindString(scanner.Text())
			if m != "" {
				hour := m
				if _, ok := data[taghash]; !ok {
					data[taghash] = make(map[string]int)
				}
				data[taghash][hour]++
			}
			Lock.Unlock()
		}
	}
	fmt.Println(file)
	os.Remove(file)
	if err := scanner.Err(); err != nil {
		log.Fatal(err)
	}
}

你在代码的第167行附近遇到了问题。你希望输出每个taghash的多个键值对,但实际上只输出了最后一个键值对。你想要的输出应该类似于:

fstvt1_ros_300x600_pp8_1
T00 213434
T01 23432
T02 3324
T03 324
T04 324
T05 0 ...
(这不是真实数据)

你想要的效果是,如果taghashhour存在,则增加该特定对象。如果taghashhour不存在,则创建taghash并添加新的键并递增。

你可以尝试修改代码的第167行附近的部分如下:

for scanner.Scan() {
    m := tag.FindString(scanner.Text())
    if m != "" {
        Lock.Lock()
        arr := strings.Split(m, "/")
        taghash := strings.Join(arr, "_")
        taghash = strings.TrimLeft(taghash, "_")
        m = date.FindString(scanner.Text())
        if m != "" {
            hour := m
            if _, ok := data[taghash]; !ok {
                data[taghash] = make(map[string]int)
            }
            data[taghash][hour]++
        }
        Lock.Unlock()
    }
}

这样修改后,如果taghashhour存在,则会递增该特定对象。如果taghashhour不存在,则会创建taghash并添加新的键并递增。

英文:

Go Code:

package main
import (
&quot;bufio&quot;
_ &quot;bytes&quot;
&quot;fmt&quot;
_ &quot;io&quot;
&quot;log&quot;
&quot;os&quot;
&quot;os/user&quot;
&quot;path/filepath&quot;
_ &quot;reflect&quot;
&quot;regexp&quot;
&quot;runtime&quot;
&quot;strconv&quot;
&quot;strings&quot;
&quot;sync&quot;
&quot;time&quot;
&quot;github.com/aws/aws-sdk-go/aws&quot;
&quot;github.com/aws/aws-sdk-go/aws/session&quot;
&quot;github.com/aws/aws-sdk-go/service/s3&quot;
&quot;github.com/aws/aws-sdk-go/service/s3/s3manager&quot;
)
var (
LocalDirectory   = &quot;s3logs&quot; // Into this directory
Lock             sync.Mutex
totalImpressions int
)
var data = make(map[string]map[string]int)
func main() {
start := time.Now()
// by adding this line i&#39;m telling the program to run the threads on 4 different cores at the same time, Parallelism!!
//REMEMBER TO ADD BLOCKS TO STOP RACE CONDITIONS
runtime.GOMAXPROCS(4)
var wg sync.WaitGroup
var year, month, day = time.Now().Date()
str_year := strconv.Itoa(year)
str_month := strconv.Itoa(int(month))
str_day := strconv.Itoa(day)
if int(month) &lt; 10 {
str_month = &quot;0&quot; + strconv.Itoa(int(month))
}
if day &lt; 10 {
str_day = &quot;0&quot; + strconv.Itoa(day)
}
regBuckets := map[string]string{
&quot;us-west-1&quot;: &quot;pubgears-ca&quot;,
&quot;test&quot;:      &quot;test&quot;,
}
for region, bucket := range regBuckets {
prefix := fmt.Sprintf(&quot;tagserver/logs/AWSLogs/978380792767/elasticloadbalancing/%s/%s/%s/%s/&quot;, region, str_year, str_month, str_day)
wg.Add(1)
go getLogs(region, bucket, LocalDirectory+bucket, &amp;prefix, &amp;wg)
}
wg.Wait()
//salon/t1/728x90/index
//totalImpressions := 0
// var provider = make(map[string]int)
// for key, value := range data {
//   key = strings.TrimSpace(key)
//   pro := strings.Split(key, &quot;_&quot;)[3]
//
//   if strings.Contains(pro, &quot;pp&quot;) == true || (pro == &quot;pulsepoint&quot;) || (pro == &quot;cweb&quot;) {
//     provider[&quot;pulsepoint&quot;] += value
//   } else if (pro == &quot;openx&quot;) || (pro == &quot;openx3&quot;) {
//     provider[&quot;openx&quot;] += value
//   } else if key == &quot; &quot; {
//     continue
//   } else {
//     provider[pro] += value
//   }
//   totalImpressions += value
// }
for tag, value := range data {
for hour, imp := range value {
fmt.Printf(&quot;tag: %s  \n hour: %s impression %s\n&quot;, tag, hour, imp)
}
}
//sl = sl[:len(sl)-1]
elapsed := time.Since(start)
fmt.Printf(&quot;\nTime took %s\n&quot;, elapsed)
}
func getLogs(region string, bucket string, directory string, prefix *string, wg *sync.WaitGroup) {
sess := session.New()
client := s3.New(sess, &amp;aws.Config{Region: aws.String(region)})
params := &amp;s3.ListObjectsInput{Bucket: &amp;bucket, Prefix: prefix}
manager := s3manager.NewDownloaderWithClient(client, func(d *s3manager.Downloader) {
d.PartSize = 5 * 1024 * 1024 // 6MB per part
d.Concurrency = 4
})
d := downloader{bucket: bucket, dir: directory, Downloader: manager}
client.ListObjectsPages(params, d.eachPage)
wg.Done()
}
// downloader object and methods
type downloader struct {
*s3manager.Downloader
bucket, dir string
}
func (d *downloader) eachPage(page *s3.ListObjectsOutput, more bool) bool {
for _, obj := range page.Contents {
// fmt.Println(obj)
//     return true
d.downloadToFile(*obj.Key)
}
return true
}
func (d *downloader) downloadToFile(key string) {
// Create the directories in the path
// desktop path
user, errs := user.Current()
if errs != nil {
panic(errs)
}
homedir := user.HomeDir
desktop := homedir + &quot;/Desktop/&quot; + d.dir
file := filepath.Join(desktop, key)
if err := os.MkdirAll(filepath.Dir(file), 0775); err != nil {
panic(err)
}
// Setup the local file
fd, err := os.Create(file)
if err != nil {
panic(err)
}
defer fd.Close()
// Download the file using the AWS SDK
//fmt.Printf(&quot;Downloading s3://%s/%s to %s...\n&quot;, d.bucket, key, file)
params := &amp;s3.GetObjectInput{Bucket: &amp;d.bucket, Key: &amp;key}
d.Download(fd, params)
_, e := d.Download(fd, params)
if e != nil {
panic(e)
}
f, err := os.Open(file)
if err != nil {
log.Fatal(err)
}
defer f.Close()
tag := regexp.MustCompile(&quot;/([a-zA-Z0-9_]+/{1}[a-zA-Z0-9_]+/{1}[a-zA-Z0-9_]+/{1}[a-zA-Z0-9_]+)&quot;)
date := regexp.MustCompile(&quot;T([^:]+)&quot;)
scanner := bufio.NewScanner(f)
// HAVING TROUBLE HERE
for scanner.Scan() {
//dateCollection := make(map[string]int)
m := tag.FindString(scanner.Text())
if m != &quot;&quot; {
// stop races
Lock.Lock()
arr := strings.Split(m, &quot;/&quot;)
taghash := strings.Join(arr, &quot;_&quot;)
taghash = strings.TrimLeft(taghash, &quot;_&quot;)
//data[taghash]++
m = date.FindString(scanner.Text())
if m != &quot;&quot; {
hour := m
data[taghash] = make(map[string]int)
data[taghash][hour]++
}
Lock.Unlock()
}
}
fmt.Println(file)
os.Remove(file)
if err := scanner.Err(); err != nil {
log.Fatal(err)
}
}

The area i'm having trouble at starts on line 167 section below:

for scanner.Scan() {
//dateCollection := make(map[string]int)
m := tag.FindString(scanner.Text())
if m != &quot;&quot; {
// stop races
Lock.Lock()
arr := strings.Split(m, &quot;/&quot;)
taghash := strings.Join(arr, &quot;_&quot;)
taghash = strings.TrimLeft(taghash, &quot;_&quot;)
//data[taghash]++
m = date.FindString(scanner.Text())
if m != &quot;&quot; {
hour := m
data[taghash] = make(map[string]int)
data[taghash][hour]++
}
Lock.Unlock()
}
}
fmt.Println(file)
os.Remove(file)
if err := scanner.Err(); err != nil {
log.Fatal(err)
}

The output i'm getting when I print the values in the main func

fstvt1_ros_300x600_pp8_1 T07 1

I'm expecting an output that looks something like this:

fstvt1_ros_300x600_pp8_1
T00 213434
T01 23432
T02 3324
T03 324
T04 324
T05 0 ...
(this isn&#39;t real data)

What I'm trying to do:
I have a map of data var data = make(map[string]map[string]int) whose key is equal to a taghash, e.g. fstvt1_ros_300x600_pp8_1. The value is a map of data. The key value of that map is expected to be a string and an integer. I want multiple maps. One for each key... T01, T02.. What is currently outputted is the last item in the last of items i'm iterating on instead of a collection of keys and values for each taghash. How do i make is so that instead of overwriting the data, it adds the new key Value T01, T02.. and if the taghash and hour are the some to increment that particular object.

Line of code i'm currently using:

T01, T02..

data[taghash][hour]++

Where the if the taghash and hour exist then it should increment. If the taghash and hour doesn't exist then create the taghash and add the new key and increment.

答案1

得分: 2

错误出现在

data[taghash] = make(map[string]int)

这一行代码中。它会在每次循环时将data[taghash]设置为一个新分配的映射。这绝对不是你想要做的。相反:

if _, ok := data[taghash]; !ok {
// data[taghash]不存在--创建它!
data[taghash] := make(map[string]int)
}
data[taghash][hour]++

这等同于:

# Python
data.setdefault(taghash, {}).setdefault(hour, 0) += 1

或者

if taghash not in data:
data[taghash] = {}
if hour not in data[taghash]:
data[taghash][hour] = 1
else:
data[taghash][hour] += 1
英文:

The error comes from

data[taghash] = make(map[string]int)

This sets data[taghash] to a newly allocated map every time through. That's definitely not what you're trying to do. Instead:

if _, ok := data[taghash]; !ok {
// data[taghash] does not exist -- create it!
data[taghash] := make(map[string]int)
}
data[taghash][hour]++

This is equivalent to:

# Python
data.setdefault(taghash, {}).setdefault(hour, 0) += 1

Or

if taghash not in data:
data[taghash] = {}
if hour not in data[taghash]:
data[taghash][hour] = 1
else:
data[taghash][hour] += 1

huangapple
  • 本文由 发表于 2015年12月29日 04:46:05
  • 转载请务必保留本文链接:https://go.coder-hub.com/34500527.html
匿名

发表评论

匿名网友

:?: :razz: :sad: :evil: :!: :smile: :oops: :grin: :eek: :shock: :???: :cool: :lol: :mad: :twisted: :roll: :wink: :idea: :arrow: :neutral: :cry: :mrgreen:

确定