英文:
Is it a better way to do parallel programming that this?
问题
我为从Instagram获取“影响者”粉丝数量编写了这个脚本。
我从中获取的“运行时间”数字在550-750毫秒之间。
这还不错,但我想知道是否可以更好(因为我只学了3周的golang,还是个新手)
package main
import (
"encoding/json"
"fmt"
"io/ioutil"
"log"
"net/http"
"sync"
"time"
)
type user struct {
User userData `json:"user"`
}
type userData struct {
Followers count `json:"followed_by"`
}
type count struct {
Count int `json:"count"`
}
func getFollowerCount(in <-chan string) <-chan int {
out := make(chan int)
go func() {
for un := range in {
URL := "https://www.instagram.com/" + un + "/?__a=1"
resp, err := http.Get(URL)
if err != nil {
// 处理错误
fmt.Println(err)
}
defer resp.Body.Close()
body, err := ioutil.ReadAll(resp.Body)
var u user
err = json.Unmarshal(body, &u)
if err != nil {
fmt.Println(err)
}
// 返回 u.User.Followers.Count
out <- u.User.Followers.Count
}
close(out)
}()
return out
}
func merge(cs ...<-chan int) <-chan int {
var wg sync.WaitGroup
out := make(chan int)
output := func(c <-chan int) {
for n := range c {
out <- n
}
wg.Done()
}
wg.Add(len(cs))
for _, c := range cs {
go output(c)
}
go func() {
wg.Wait()
close(out)
}()
return out
}
func gen(users ...string) <-chan string {
out := make(chan string)
go func() {
for _, u := range users {
out <- u
}
close(out)
}()
return out
}
func main() {
start := time.Now()
fmt.Println("STARTING UP")
usrs := []string{"kanywest", "kimkardashian", "groovyq", "kendricklamar", "barackobama", "asaprocky", "champagnepapi", "eminem", "drdre", "g_eazy", "skrillex"}
in := gen(usrs...)
d1 := getFollowerCount(in)
d2 := getFollowerCount(in)
d3 := getFollowerCount(in)
d4 := getFollowerCount(in)
d5 := getFollowerCount(in)
d6 := getFollowerCount(in)
d7 := getFollowerCount(in)
d8 := getFollowerCount(in)
d9 := getFollowerCount(in)
d10 := getFollowerCount(in)
for d := range merge(d1, d2, d3, d4, d5, d6, d7, d8, d9, d10) {
fmt.Println(d)
}
elapsed := time.Since(start)
log.Println("runtime", elapsed)
}
以上是您要翻译的内容。
英文:
I made this script for getting the follower count of "influencers" from instagram
the "runtime" number I am getting from it is between 550-750ms.
It is not that bad, but I am wondering whether it could be better or not (as I am a golang noob - learning it 3 weeks only)
package main
import (
"encoding/json"
"fmt"
"io/ioutil"
"log"
"net/http"
"sync"
"time"
)
type user struct {
User userData `json:"user"`
}
type userData struct {
Followers count `json:"followed_by"`
}
type count struct {
Count int `json:"count"`
}
func getFollowerCount(in <-chan string) <-chan int {
out := make(chan int)
go func() {
for un := range in {
URL := "https://www.instagram.com/" + un + "/?__a=1"
resp, err := http.Get(URL)
if err != nil {
// handle error
fmt.Println(err)
}
defer resp.Body.Close()
body, err := ioutil.ReadAll(resp.Body)
var u user
err = json.Unmarshal(body, &u)
if err != nil {
fmt.Println(err)
}
// return u.User.Followers.Count
out <- u.User.Followers.Count
}
close(out)
}()
return out
}
func merge(cs ...<-chan int) <-chan int {
var wg sync.WaitGroup
out := make(chan int)
output := func(c <-chan int) {
for n := range c {
out <- n
}
wg.Done()
}
wg.Add(len(cs))
for _, c := range cs {
go output(c)
}
go func() {
wg.Wait()
close(out)
}()
return out
}
func gen(users ...string) <-chan string {
out := make(chan string)
go func() {
for _, u := range users {
out <- u
}
close(out)
}()
return out
}
func main() {
start := time.Now()
fmt.Println("STARTING UP")
usrs := []string{"kanywest", "kimkardashian", "groovyq", "kendricklamar", "barackobama", "asaprocky", "champagnepapi", "eminem", "drdre", "g_eazy", "skrillex"}
in := gen(usrs...)
d1 := getFollowerCount(in)
d2 := getFollowerCount(in)
d3 := getFollowerCount(in)
d4 := getFollowerCount(in)
d5 := getFollowerCount(in)
d6 := getFollowerCount(in)
d7 := getFollowerCount(in)
d8 := getFollowerCount(in)
d9 := getFollowerCount(in)
d10 := getFollowerCount(in)
for d := range merge(d1, d2, d3, d4, d5, d6, d7, d8, d9, d10) {
fmt.Println(d)
}
elapsed := time.Since(start)
log.Println("runtime", elapsed)
}
答案1
得分: 2
我同意jeevatkm的观点,有很多方法可以实现你的任务并改进它。以下是一些建议:
- 将实际执行任务的函数(即从远程服务获取结果的函数)与负责协调所有任务的函数分开。
- 最好将错误传递给调用者,而不是在函数内部处理错误。
- 由于任务是并行执行的,结果的返回顺序可能是不确定的。因此,除了关注者数量,结果还应包含其他相关信息。
以下是可能的一种实现方式:
package main
import (
"encoding/json"
"errors"
"fmt"
"net/http"
"sync"
"time"
)
type user struct {
User userData `json:"user"`
}
type userData struct {
Followers count `json:"followed_by"`
}
type count struct {
Count int `json:"count"`
}
// 封装用户名、关注者数量和错误。参见(3)。
type follower struct {
Username string
Count int
Error error
}
// GetFollowerCountFunc 是用于获取特定用户关注者数量的函数。
type GetFollowerCountFunc func(string) (int, error)
// 用于测试的模拟函数
func mockGetFollowerCountFor(userName string) (int, error) {
if len(userName) < 9 {
return -1, errors.New("在获取关注者数量时出现模拟错误")
}
return 10, nil
}
// 从远程服务获取结果。参见(1)。
func getFollowerCountFor(userName string) (int, error) {
URL := "https://www.instagram.com/" + userName + "/?__a=1"
resp, err := http.Get(URL)
if err != nil {
return -1, err
}
defer resp.Body.Close()
var u user
if err := json.NewDecoder(resp.Body).Decode(&u); err != nil {
return -1, err
}
return u.User.Followers.Count, nil
}
// 协调/分发任务的函数。参见(1)、(2)。
func getFollowersAsync(users []string, fn GetFollowerCountFunc) <-chan follower {
// 为存储结果分配通道
// 分配的通道数定义了最大的并行工作者数量
followers := make(chan follower, len(users))
// 以下也是有效的
// followers := make(chan follower, 5)
// 在 goroutine 中进行作业分发(异步执行)
go func() {
var wg sync.WaitGroup
wg.Add(len(users))
for _, u := range users {
// 运行一个并行工作者
go func(uid string) {
cnt, err := fn(uid)
if err != nil {
followers <- follower{uid, -1, err}
} else {
followers <- follower{uid, cnt, nil}
}
wg.Done()
}(u)
}
// 等待所有工作者完成
wg.Wait()
// 关闭通道,以便 `for ... range` 循环可以正常退出
close(followers)
}()
// 此函数将立即返回
return followers
}
func main() {
start := time.Now()
fmt.Println("启动中")
usrs := []string{"kanywest", "kimkardashian", "groovyq", "kendricklamar", "barackobama", "asaprocky", "champagnepapi", "eminem", "drdre", "g_eazy", "skrillex"}
results := getFollowersAsync(usrs, getFollowerCountFor)
// 用于测试:
// results := getFollowersAsync(usrs, mockGetFollowerCountFor)
for r := range results {
if r.Error != nil {
fmt.Printf("用户 '%s' 的错误 => %v", r.Username, r.Error)
} else {
fmt.Printf("%s: %d\n", r.Username, r.Count)
}
}
elapsed := time.Since(start)
fmt.Println("运行时间", elapsed)
}
希望对你有所帮助!
英文:
I agree with jeevatkm, there are numerous way to implement your task and improve it. Some notes:
- Separate the function that actually do the job (i.e. fetch result from remote service) and the function which is responsible for coordinating all the jobs.
- It is a good practice to propagate an
error
to the caller instead of consumes (handles) it in a function to be called. - Since the jobs are done in parallel, the result could be returned in undetermined order. Thus, besides follower count, result should contains other related information(s).
The following implementation may be one alternative:
package main
import (
"encoding/json"
"errors"
"fmt"
"net/http"
"sync"
"time"
)
type user struct {
User userData `json:"user"`
}
type userData struct {
Followers count `json:"followed_by"`
}
type count struct {
Count int `json:"count"`
}
//Wrap username, count, and error. See (3) above.
type follower struct {
Username string
Count int
Error error
}
//GetFollowerCountFunc is a function for
//fetching follower count of a specific user.
type GetFollowerCountFunc func(string) (int, error)
//Mockup function for test
func mockGetFollowerCountFor(userName string) (int, error) {
if len(userName) < 9 {
return -1, errors.New("mocking error in get follower count")
}
return 10, nil
}
//Fetch result from remote service. See (1) above.
func getFollowerCountFor(userName string) (int, error) {
URL := "https://www.instagram.com/" + userName + "/?__a=1"
resp, err := http.Get(URL)
if err != nil {
return -1, err
}
defer resp.Body.Close()
var u user
if err := json.NewDecoder(resp.Body).Decode(&u); err != nil {
return -1, err
}
return u.User.Followers.Count, nil
}
//Function that coordinates/distributes the jobs. See (1), (2) above.
func getFollowersAsync(users []string, fn GetFollowerCountFunc) <-chan follower {
//allocate channels for storing result
//number of allocated channels define the maximum *parallel* worker
followers := make(chan follower, len(users))
//The following is also valid
//followers := make(chan follower, 5)
//Do the job distribution in goroutine (Asynchronously)
go func() {
var wg sync.WaitGroup
wg.Add(len(users))
for _, u := range users {
//Run a *parallel* worker
go func(uid string) {
cnt, err := fn(uid)
if err != nil {
followers <- follower{uid, -1, err}
} else {
followers <- follower{uid, cnt, nil}
}
wg.Done()
}(u)
}
//wait all workers finish
wg.Wait()
//close the channels so the `for ... range` will exit gracefully
close(followers)
}()
//This function will returns immediately
return followers
}
func main() {
start := time.Now()
fmt.Println("STARTING UP")
usrs := []string{"kanywest", "kimkardashian", "groovyq", "kendricklamar", "barackobama", "asaprocky", "champagnepapi", "eminem", "drdre", "g_eazy", "skrillex"}
results := getFollowersAsync(usrs, getFollowerCountFor)
//For TESTING:
//results := getFollowersAsync(usrs, mockGetFollowerCountFor)
for r := range results {
if r.Error != nil {
fmt.Printf("Error for user '%s' => %v", r.Username, r.Error)
} else {
fmt.Printf("%s: %d\n", r.Username, r.Count)
}
}
elapsed := time.Since(start)
fmt.Println("runtime", elapsed)
}
答案2
得分: 1
欢迎来到Go语言,愉快学习。
你做得很好,你可以通过多种方式改进你的程序(例如json解码器、更少的chan等)。以下是其中一种方法。执行时间在352-446毫秒之间(请注意,由于涉及网络调用,执行时间可能会根据服务器响应时间而有所变化)。
你的更新代码:
package main
import (
"encoding/json"
"fmt"
"log"
"net/http"
"sync"
"time"
)
type user struct {
User userData `json:"user"`
}
type userData struct {
Followers count `json:"followed_by"`
}
type count struct {
Count int `json:"count"`
}
func getFollowerCount(username string, result chan<- int, wg *sync.WaitGroup) {
defer wg.Done()
reqURL := "https://www.instagram.com/" + username + "/?__a=1"
resp, err := http.Get(reqURL)
if err != nil {
log.Println(err)
return
}
defer resp.Body.Close()
var u user
if err := json.NewDecoder(resp.Body).Decode(&u); err != nil {
log.Println(err)
return
}
result <- u.User.Followers.Count
}
func execute(users []string, result chan<- int) {
wg := &sync.WaitGroup{}
for _, username := range users {
wg.Add(1)
go getFollowerCount(username, result, wg)
}
wg.Wait()
result <- -1
}
func main() {
start := time.Now()
fmt.Println("STARTING UP")
usrs := []string{"kanywest", "kimkardashian", "groovyq", "kendricklamar", "barackobama", "asaprocky", "champagnepapi", "eminem", "drdre", "g_eazy", "skrillex"}
result := make(chan int)
go execute(usrs, result)
for v := range result {
if v == -1 {
break
}
fmt.Println(v)
}
elapsed := time.Since(start)
fmt.Println("runtime:", elapsed)
}
英文:
Welcome to Go, happy learning.
You're doing good, you can improve your program many ways (such as json decoder, less no of chan, etc). Following is one of the approach. Execution time is between 352-446ms (take it with grain of salt, since network call is involved in your code. Might vary based on server response time).
Your updated code:
package main
import (
"encoding/json"
"fmt"
"log"
"net/http"
"sync"
"time"
)
type user struct {
User userData `json:"user"`
}
type userData struct {
Followers count `json:"followed_by"`
}
type count struct {
Count int `json:"count"`
}
func getFollowerCount(username string, result chan<- int, wg *sync.WaitGroup) {
defer wg.Done()
reqURL := "https://www.instagram.com/" + username + "/?__a=1"
resp, err := http.Get(reqURL)
if err != nil {
log.Println(err)
return
}
defer resp.Body.Close()
var u user
if err := json.NewDecoder(resp.Body).Decode(&u); err != nil {
log.Println(err)
return
}
result <- u.User.Followers.Count
}
func execute(users []string, result chan<- int) {
wg := &sync.WaitGroup{}
for _, username := range users {
wg.Add(1)
go getFollowerCount(username, result, wg)
}
wg.Wait()
result <- -1
}
func main() {
start := time.Now()
fmt.Println("STARTING UP")
usrs := []string{"kanywest", "kimkardashian", "groovyq", "kendricklamar", "barackobama", "asaprocky", "champagnepapi", "eminem", "drdre", "g_eazy", "skrillex"}
result := make(chan int)
go execute(usrs, result)
for v := range result {
if v == -1 {
break
}
fmt.Println(v)
}
elapsed := time.Since(start)
fmt.Println("runtime:", elapsed)
}
通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库,让每个人都能够通过互相帮助和分享经验来进步。
评论