英文:
Nested Dictionaries Golang Trouble
问题
Go 代码:
package main
import (
"bufio"
"fmt"
"log"
"os"
"os/user"
"path/filepath"
"regexp"
"strconv"
"strings"
"sync"
"time"
"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/aws/session"
"github.com/aws/aws-sdk-go/service/s3"
"github.com/aws/aws-sdk-go/service/s3/s3manager"
)
var (
LocalDirectory = "s3logs" // Into this directory
Lock sync.Mutex
totalImpressions int
)
var data = make(map[string]map[string]int)
func main() {
start := time.Now()
// by adding this line i'm telling the program to run the threads on 4 different cores at the same time, Parallelism!!
//REMEMBER TO ADD BLOCKS TO STOP RACE CONDITIONS
runtime.GOMAXPROCS(4)
var wg sync.WaitGroup
var year, month, day = time.Now().Date()
str_year := strconv.Itoa(year)
str_month := strconv.Itoa(int(month))
str_day := strconv.Itoa(day)
if int(month) < 10 {
str_month = "0" + strconv.Itoa(int(month))
}
if day < 10 {
str_day = "0" + strconv.Itoa(day)
}
regBuckets := map[string]string{
"us-west-1": "pubgears-ca",
"test": "test",
}
for region, bucket := range regBuckets {
prefix := fmt.Sprintf("tagserver/logs/AWSLogs/978380792767/elasticloadbalancing/%s/%s/%s/%s/", region, str_year, str_month, str_day)
wg.Add(1)
go getLogs(region, bucket, LocalDirectory+bucket, &prefix, &wg)
}
wg.Wait()
for tag, value := range data {
for hour, imp := range value {
fmt.Printf("tag: %s \n hour: %s impression %s\n", tag, hour, imp)
}
}
elapsed := time.Since(start)
fmt.Printf("\nTime took %s\n", elapsed)
}
func getLogs(region string, bucket string, directory string, prefix *string, wg *sync.WaitGroup) {
sess := session.New()
client := s3.New(sess, &aws.Config{Region: aws.String(region)})
params := &s3.ListObjectsInput{Bucket: &bucket, Prefix: prefix}
manager := s3manager.NewDownloaderWithClient(client, func(d *s3manager.Downloader) {
d.PartSize = 5 * 1024 * 1024 // 6MB per part
d.Concurrency = 4
})
d := downloader{bucket: bucket, dir: directory, Downloader: manager}
client.ListObjectsPages(params, d.eachPage)
wg.Done()
}
// downloader object and methods
type downloader struct {
*s3manager.Downloader
bucket, dir string
}
func (d *downloader) eachPage(page *s3.ListObjectsOutput, more bool) bool {
for _, obj := range page.Contents {
d.downloadToFile(*obj.Key)
}
return true
}
func (d *downloader) downloadToFile(key string) {
user, errs := user.Current()
if errs != nil {
panic(errs)
}
homedir := user.HomeDir
desktop := homedir + "/Desktop/" + d.dir
file := filepath.Join(desktop, key)
if err := os.MkdirAll(filepath.Dir(file), 0775); err != nil {
panic(err)
}
fd, err := os.Create(file)
if err != nil {
panic(err)
}
defer fd.Close()
params := &s3.GetObjectInput{Bucket: &d.bucket, Key: &key}
d.Download(fd, params)
_, e := d.Download(fd, params)
if e != nil {
panic(e)
}
f, err := os.Open(file)
if err != nil {
log.Fatal(err)
}
defer f.Close()
tag := regexp.MustCompile("/([a-zA-Z0-9_]+/{1}[a-zA-Z0-9_]+/{1}[a-zA-Z0-9_]+/{1}[a-zA-Z0-9_]+)")
date := regexp.MustCompile("T([^:]+)")
scanner := bufio.NewScanner(f)
for scanner.Scan() {
m := tag.FindString(scanner.Text())
if m != "" {
Lock.Lock()
arr := strings.Split(m, "/")
taghash := strings.Join(arr, "_")
taghash = strings.TrimLeft(taghash, "_")
m = date.FindString(scanner.Text())
if m != "" {
hour := m
if _, ok := data[taghash]; !ok {
data[taghash] = make(map[string]int)
}
data[taghash][hour]++
}
Lock.Unlock()
}
}
fmt.Println(file)
os.Remove(file)
if err := scanner.Err(); err != nil {
log.Fatal(err)
}
}
你在代码的第167行附近遇到了问题。你希望输出每个taghash
的多个键值对,但实际上只输出了最后一个键值对。你想要的输出应该类似于:
fstvt1_ros_300x600_pp8_1
T00 213434
T01 23432
T02 3324
T03 324
T04 324
T05 0 ...
(这不是真实数据)
你想要的效果是,如果taghash
和hour
存在,则增加该特定对象。如果taghash
和hour
不存在,则创建taghash
并添加新的键并递增。
你可以尝试修改代码的第167行附近的部分如下:
for scanner.Scan() {
m := tag.FindString(scanner.Text())
if m != "" {
Lock.Lock()
arr := strings.Split(m, "/")
taghash := strings.Join(arr, "_")
taghash = strings.TrimLeft(taghash, "_")
m = date.FindString(scanner.Text())
if m != "" {
hour := m
if _, ok := data[taghash]; !ok {
data[taghash] = make(map[string]int)
}
data[taghash][hour]++
}
Lock.Unlock()
}
}
这样修改后,如果taghash
和hour
存在,则会递增该特定对象。如果taghash
和hour
不存在,则会创建taghash
并添加新的键并递增。
英文:
Go Code:
package main
import (
"bufio"
_ "bytes"
"fmt"
_ "io"
"log"
"os"
"os/user"
"path/filepath"
_ "reflect"
"regexp"
"runtime"
"strconv"
"strings"
"sync"
"time"
"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/aws/session"
"github.com/aws/aws-sdk-go/service/s3"
"github.com/aws/aws-sdk-go/service/s3/s3manager"
)
var (
LocalDirectory = "s3logs" // Into this directory
Lock sync.Mutex
totalImpressions int
)
var data = make(map[string]map[string]int)
func main() {
start := time.Now()
// by adding this line i'm telling the program to run the threads on 4 different cores at the same time, Parallelism!!
//REMEMBER TO ADD BLOCKS TO STOP RACE CONDITIONS
runtime.GOMAXPROCS(4)
var wg sync.WaitGroup
var year, month, day = time.Now().Date()
str_year := strconv.Itoa(year)
str_month := strconv.Itoa(int(month))
str_day := strconv.Itoa(day)
if int(month) < 10 {
str_month = "0" + strconv.Itoa(int(month))
}
if day < 10 {
str_day = "0" + strconv.Itoa(day)
}
regBuckets := map[string]string{
"us-west-1": "pubgears-ca",
"test": "test",
}
for region, bucket := range regBuckets {
prefix := fmt.Sprintf("tagserver/logs/AWSLogs/978380792767/elasticloadbalancing/%s/%s/%s/%s/", region, str_year, str_month, str_day)
wg.Add(1)
go getLogs(region, bucket, LocalDirectory+bucket, &prefix, &wg)
}
wg.Wait()
//salon/t1/728x90/index
//totalImpressions := 0
// var provider = make(map[string]int)
// for key, value := range data {
// key = strings.TrimSpace(key)
// pro := strings.Split(key, "_")[3]
//
// if strings.Contains(pro, "pp") == true || (pro == "pulsepoint") || (pro == "cweb") {
// provider["pulsepoint"] += value
// } else if (pro == "openx") || (pro == "openx3") {
// provider["openx"] += value
// } else if key == " " {
// continue
// } else {
// provider[pro] += value
// }
// totalImpressions += value
// }
for tag, value := range data {
for hour, imp := range value {
fmt.Printf("tag: %s \n hour: %s impression %s\n", tag, hour, imp)
}
}
//sl = sl[:len(sl)-1]
elapsed := time.Since(start)
fmt.Printf("\nTime took %s\n", elapsed)
}
func getLogs(region string, bucket string, directory string, prefix *string, wg *sync.WaitGroup) {
sess := session.New()
client := s3.New(sess, &aws.Config{Region: aws.String(region)})
params := &s3.ListObjectsInput{Bucket: &bucket, Prefix: prefix}
manager := s3manager.NewDownloaderWithClient(client, func(d *s3manager.Downloader) {
d.PartSize = 5 * 1024 * 1024 // 6MB per part
d.Concurrency = 4
})
d := downloader{bucket: bucket, dir: directory, Downloader: manager}
client.ListObjectsPages(params, d.eachPage)
wg.Done()
}
// downloader object and methods
type downloader struct {
*s3manager.Downloader
bucket, dir string
}
func (d *downloader) eachPage(page *s3.ListObjectsOutput, more bool) bool {
for _, obj := range page.Contents {
// fmt.Println(obj)
// return true
d.downloadToFile(*obj.Key)
}
return true
}
func (d *downloader) downloadToFile(key string) {
// Create the directories in the path
// desktop path
user, errs := user.Current()
if errs != nil {
panic(errs)
}
homedir := user.HomeDir
desktop := homedir + "/Desktop/" + d.dir
file := filepath.Join(desktop, key)
if err := os.MkdirAll(filepath.Dir(file), 0775); err != nil {
panic(err)
}
// Setup the local file
fd, err := os.Create(file)
if err != nil {
panic(err)
}
defer fd.Close()
// Download the file using the AWS SDK
//fmt.Printf("Downloading s3://%s/%s to %s...\n", d.bucket, key, file)
params := &s3.GetObjectInput{Bucket: &d.bucket, Key: &key}
d.Download(fd, params)
_, e := d.Download(fd, params)
if e != nil {
panic(e)
}
f, err := os.Open(file)
if err != nil {
log.Fatal(err)
}
defer f.Close()
tag := regexp.MustCompile("/([a-zA-Z0-9_]+/{1}[a-zA-Z0-9_]+/{1}[a-zA-Z0-9_]+/{1}[a-zA-Z0-9_]+)")
date := regexp.MustCompile("T([^:]+)")
scanner := bufio.NewScanner(f)
// HAVING TROUBLE HERE
for scanner.Scan() {
//dateCollection := make(map[string]int)
m := tag.FindString(scanner.Text())
if m != "" {
// stop races
Lock.Lock()
arr := strings.Split(m, "/")
taghash := strings.Join(arr, "_")
taghash = strings.TrimLeft(taghash, "_")
//data[taghash]++
m = date.FindString(scanner.Text())
if m != "" {
hour := m
data[taghash] = make(map[string]int)
data[taghash][hour]++
}
Lock.Unlock()
}
}
fmt.Println(file)
os.Remove(file)
if err := scanner.Err(); err != nil {
log.Fatal(err)
}
}
The area i'm having trouble at starts on line 167 section below:
for scanner.Scan() {
//dateCollection := make(map[string]int)
m := tag.FindString(scanner.Text())
if m != "" {
// stop races
Lock.Lock()
arr := strings.Split(m, "/")
taghash := strings.Join(arr, "_")
taghash = strings.TrimLeft(taghash, "_")
//data[taghash]++
m = date.FindString(scanner.Text())
if m != "" {
hour := m
data[taghash] = make(map[string]int)
data[taghash][hour]++
}
Lock.Unlock()
}
}
fmt.Println(file)
os.Remove(file)
if err := scanner.Err(); err != nil {
log.Fatal(err)
}
The output i'm getting when I print the values in the main func
fstvt1_ros_300x600_pp8_1 T07 1
I'm expecting an output that looks something like this:
fstvt1_ros_300x600_pp8_1
T00 213434
T01 23432
T02 3324
T03 324
T04 324
T05 0 ...
(this isn't real data)
What I'm trying to do:
I have a map of data var data = make(map[string]map[string]int)
whose key is equal to a taghash, e.g. fstvt1_ros_300x600_pp8_1
. The value is a map of data. The key value of that map is expected to be a string and an integer. I want multiple maps. One for each key... T01, T02.. What is currently outputted is the last item in the last of items i'm iterating on instead of a collection of keys and values for each taghash. How do i make is so that instead of overwriting the data, it adds the new key Value T01, T02.. and if the taghash and hour are the some to increment that particular object.
Line of code i'm currently using:
T01, T02..
data[taghash][hour]++
Where the if the taghash and hour exist then it should increment. If the taghash and hour doesn't exist then create the taghash and add the new key and increment.
答案1
得分: 2
错误出现在
data[taghash] = make(map[string]int)
这一行代码中。它会在每次循环时将data[taghash]
设置为一个新分配的映射。这绝对不是你想要做的。相反:
if _, ok := data[taghash]; !ok {
// data[taghash]不存在--创建它!
data[taghash] := make(map[string]int)
}
data[taghash][hour]++
这等同于:
# Python
data.setdefault(taghash, {}).setdefault(hour, 0) += 1
或者
if taghash not in data:
data[taghash] = {}
if hour not in data[taghash]:
data[taghash][hour] = 1
else:
data[taghash][hour] += 1
英文:
The error comes from
data[taghash] = make(map[string]int)
This sets data[taghash]
to a newly allocated map every time through. That's definitely not what you're trying to do. Instead:
if _, ok := data[taghash]; !ok {
// data[taghash] does not exist -- create it!
data[taghash] := make(map[string]int)
}
data[taghash][hour]++
This is equivalent to:
# Python
data.setdefault(taghash, {}).setdefault(hour, 0) += 1
Or
if taghash not in data:
data[taghash] = {}
if hour not in data[taghash]:
data[taghash][hour] = 1
else:
data[taghash][hour] += 1
通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库,让每个人都能够通过互相帮助和分享经验来进步。
评论