这是一个更好的并行编程方法吗?

huangapple go评论86阅读模式
英文:

Is it a better way to do parallel programming that this?

问题

我为从Instagram获取“影响者”粉丝数量编写了这个脚本。

我从中获取的“运行时间”数字在550-750毫秒之间。
这还不错,但我想知道是否可以更好(因为我只学了3周的golang,还是个新手)

package main

import (
	"encoding/json"
	"fmt"
	"io/ioutil"
	"log"
	"net/http"
	"sync"
	"time"
)

type user struct {
	User userData `json:"user"`
}

type userData struct {
	Followers count `json:"followed_by"`
}

type count struct {
	Count int `json:"count"`
}

func getFollowerCount(in <-chan string) <-chan int {
	out := make(chan int)
	go func() {
		for un := range in {
			URL := "https://www.instagram.com/" + un + "/?__a=1"
			resp, err := http.Get(URL)
			if err != nil {
				// 处理错误
				fmt.Println(err)
			}
			defer resp.Body.Close()
			body, err := ioutil.ReadAll(resp.Body)
			var u user
			err = json.Unmarshal(body, &u)
			if err != nil {
				fmt.Println(err)
			}
			// 返回 u.User.Followers.Count
			out <- u.User.Followers.Count
		}
		close(out)
	}()
	return out
}

func merge(cs ...<-chan int) <-chan int {
	var wg sync.WaitGroup
	out := make(chan int)
	output := func(c <-chan int) {
		for n := range c {
			out <- n
		}
		wg.Done()
	}

	wg.Add(len(cs))
	for _, c := range cs {
		go output(c)
	}
	go func() {
		wg.Wait()
		close(out)
	}()
	return out
}

func gen(users ...string) <-chan string {
	out := make(chan string)
	go func() {
		for _, u := range users {
			out <- u
		}
		close(out)
	}()
	return out
}

func main() {
	start := time.Now()
	fmt.Println("STARTING UP")
	usrs := []string{"kanywest", "kimkardashian", "groovyq", "kendricklamar", "barackobama", "asaprocky", "champagnepapi", "eminem", "drdre", "g_eazy", "skrillex"}
	in := gen(usrs...)
	d1 := getFollowerCount(in)
	d2 := getFollowerCount(in)
	d3 := getFollowerCount(in)
	d4 := getFollowerCount(in)
	d5 := getFollowerCount(in)
	d6 := getFollowerCount(in)
	d7 := getFollowerCount(in)
	d8 := getFollowerCount(in)
	d9 := getFollowerCount(in)
	d10 := getFollowerCount(in)

	for d := range merge(d1, d2, d3, d4, d5, d6, d7, d8, d9, d10) {
		fmt.Println(d)
	}

	elapsed := time.Since(start)
	log.Println("runtime", elapsed)
}

以上是您要翻译的内容。

英文:

I made this script for getting the follower count of "influencers" from instagram

the "runtime" number I am getting from it is between 550-750ms.
It is not that bad, but I am wondering whether it could be better or not (as I am a golang noob - learning it 3 weeks only)

package main
import (
&quot;encoding/json&quot;
&quot;fmt&quot;
&quot;io/ioutil&quot;
&quot;log&quot;
&quot;net/http&quot;
&quot;sync&quot;
&quot;time&quot;
)
type user struct {
User userData `json:&quot;user&quot;`
}
type userData struct {
Followers count `json:&quot;followed_by&quot;`
}
type count struct {
Count int `json:&quot;count&quot;`
}
func getFollowerCount(in &lt;-chan string) &lt;-chan int {
out := make(chan int)
go func() {
for un := range in {
URL := &quot;https://www.instagram.com/&quot; + un + &quot;/?__a=1&quot;
resp, err := http.Get(URL)
if err != nil {
// handle error
fmt.Println(err)
}
defer resp.Body.Close()
body, err := ioutil.ReadAll(resp.Body)
var u user
err = json.Unmarshal(body, &amp;u)
if err != nil {
fmt.Println(err)
}
// return u.User.Followers.Count
out &lt;- u.User.Followers.Count
}
close(out)
}()
return out
}
func merge(cs ...&lt;-chan int) &lt;-chan int {
var wg sync.WaitGroup
out := make(chan int)
output := func(c &lt;-chan int) {
for n := range c {
out &lt;- n
}
wg.Done()
}
wg.Add(len(cs))
for _, c := range cs {
go output(c)
}
go func() {
wg.Wait()
close(out)
}()
return out
}
func gen(users ...string) &lt;-chan string {
out := make(chan string)
go func() {
for _, u := range users {
out &lt;- u
}
close(out)
}()
return out
}
func main() {
start := time.Now()
fmt.Println(&quot;STARTING UP&quot;)
usrs := []string{&quot;kanywest&quot;, &quot;kimkardashian&quot;, &quot;groovyq&quot;, &quot;kendricklamar&quot;, &quot;barackobama&quot;, &quot;asaprocky&quot;, &quot;champagnepapi&quot;, &quot;eminem&quot;, &quot;drdre&quot;, &quot;g_eazy&quot;, &quot;skrillex&quot;}
in := gen(usrs...)
d1 := getFollowerCount(in)
d2 := getFollowerCount(in)
d3 := getFollowerCount(in)
d4 := getFollowerCount(in)
d5 := getFollowerCount(in)
d6 := getFollowerCount(in)
d7 := getFollowerCount(in)
d8 := getFollowerCount(in)
d9 := getFollowerCount(in)
d10 := getFollowerCount(in)
for d := range merge(d1, d2, d3, d4, d5, d6, d7, d8, d9, d10) {
fmt.Println(d)
}
elapsed := time.Since(start)
log.Println(&quot;runtime&quot;, elapsed)
}

答案1

得分: 2

我同意jeevatkm的观点,有很多方法可以实现你的任务并改进它。以下是一些建议:

  1. 将实际执行任务的函数(即从远程服务获取结果的函数)与负责协调所有任务的函数分开。
  2. 最好将错误传递给调用者,而不是在函数内部处理错误。
  3. 由于任务是并行执行的,结果的返回顺序可能是不确定的。因此,除了关注者数量,结果还应包含其他相关信息。

以下是可能的一种实现方式:

package main

import (
    "encoding/json"
    "errors"
    "fmt"
    "net/http"
    "sync"
    "time"
)

type user struct {
    User userData `json:"user"`
}

type userData struct {
    Followers count `json:"followed_by"`
}

type count struct {
    Count int `json:"count"`
}

// 封装用户名、关注者数量和错误。参见(3)。
type follower struct {
    Username string
    Count    int
    Error    error
}

// GetFollowerCountFunc 是用于获取特定用户关注者数量的函数。
type GetFollowerCountFunc func(string) (int, error)

// 用于测试的模拟函数
func mockGetFollowerCountFor(userName string) (int, error) {
    if len(userName) < 9 {
        return -1, errors.New("在获取关注者数量时出现模拟错误")
    }
    return 10, nil
}

// 从远程服务获取结果。参见(1)。
func getFollowerCountFor(userName string) (int, error) {
    URL := "https://www.instagram.com/" + userName + "/?__a=1"
    resp, err := http.Get(URL)
    if err != nil {
        return -1, err
    }
    defer resp.Body.Close()

    var u user
    if err := json.NewDecoder(resp.Body).Decode(&u); err != nil {
        return -1, err
    }
    return u.User.Followers.Count, nil
}

// 协调/分发任务的函数。参见(1)、(2)。
func getFollowersAsync(users []string, fn GetFollowerCountFunc) <-chan follower {
    // 为存储结果分配通道
    // 分配的通道数定义了最大的并行工作者数量
    followers := make(chan follower, len(users))
    // 以下也是有效的
    // followers := make(chan follower, 5)

    // 在 goroutine 中进行作业分发(异步执行)
    go func() {
        var wg sync.WaitGroup
        wg.Add(len(users))
        for _, u := range users {
            // 运行一个并行工作者
            go func(uid string) {
                cnt, err := fn(uid)
                if err != nil {
                    followers <- follower{uid, -1, err}
                } else {
                    followers <- follower{uid, cnt, nil}
                }
                wg.Done()
            }(u)
        }
        // 等待所有工作者完成
        wg.Wait()

        // 关闭通道,以便 `for ... range` 循环可以正常退出
        close(followers)
    }()

    // 此函数将立即返回
    return followers
}

func main() {
    start := time.Now()
    fmt.Println("启动中")
    usrs := []string{"kanywest", "kimkardashian", "groovyq", "kendricklamar", "barackobama", "asaprocky", "champagnepapi", "eminem", "drdre", "g_eazy", "skrillex"}

    results := getFollowersAsync(usrs, getFollowerCountFor)
    // 用于测试:
    // results := getFollowersAsync(usrs, mockGetFollowerCountFor)
    for r := range results {
        if r.Error != nil {
            fmt.Printf("用户 '%s' 的错误 => %v", r.Username, r.Error)
        } else {
            fmt.Printf("%s: %d\n", r.Username, r.Count)
        }
    }

    elapsed := time.Since(start)
    fmt.Println("运行时间", elapsed)
}

希望对你有所帮助!

英文:

I agree with jeevatkm, there are numerous way to implement your task and improve it. Some notes:

  1. Separate the function that actually do the job (i.e. fetch result from remote service) and the function which is responsible for coordinating all the jobs.
  2. It is a good practice to propagate an errorto the caller instead of consumes (handles) it in a function to be called.
  3. Since the jobs are done in parallel, the result could be returned in undetermined order. Thus, besides follower count, result should contains other related information(s).

The following implementation may be one alternative:

package main
import (
&quot;encoding/json&quot;
&quot;errors&quot;
&quot;fmt&quot;
&quot;net/http&quot;
&quot;sync&quot;
&quot;time&quot;
)
type user struct {
User userData `json:&quot;user&quot;`
}
type userData struct {
Followers count `json:&quot;followed_by&quot;`
}
type count struct {
Count int `json:&quot;count&quot;`
}
//Wrap username, count, and error. See (3) above.
type follower struct {
Username string
Count    int
Error    error
}
//GetFollowerCountFunc is a function for
//fetching follower count of a specific user.
type GetFollowerCountFunc func(string) (int, error)
//Mockup function for test
func mockGetFollowerCountFor(userName string) (int, error) {
if len(userName) &lt; 9 {
return -1, errors.New(&quot;mocking error in get follower count&quot;)
}
return 10, nil
}
//Fetch result from remote service. See (1) above.
func getFollowerCountFor(userName string) (int, error) {
URL := &quot;https://www.instagram.com/&quot; + userName + &quot;/?__a=1&quot;
resp, err := http.Get(URL)
if err != nil {
return -1, err
}
defer resp.Body.Close()
var u user
if err := json.NewDecoder(resp.Body).Decode(&amp;u); err != nil {
return -1, err
}
return u.User.Followers.Count, nil
}
//Function that coordinates/distributes the jobs. See (1), (2) above.
func getFollowersAsync(users []string, fn GetFollowerCountFunc) &lt;-chan follower {
//allocate channels for storing result
//number of allocated channels define the maximum *parallel* worker
followers := make(chan follower, len(users))
//The following is also valid
//followers := make(chan follower, 5)
//Do the job distribution in goroutine (Asynchronously)
go func() {
var wg sync.WaitGroup
wg.Add(len(users))
for _, u := range users {
//Run a *parallel* worker
go func(uid string) {
cnt, err := fn(uid)
if err != nil {
followers &lt;- follower{uid, -1, err}
} else {
followers &lt;- follower{uid, cnt, nil}
}
wg.Done()
}(u)
}
//wait all workers finish
wg.Wait()
//close the channels so the `for ... range` will exit gracefully
close(followers)
}()
//This function will returns immediately
return followers
}
func main() {
start := time.Now()
fmt.Println(&quot;STARTING UP&quot;)
usrs := []string{&quot;kanywest&quot;, &quot;kimkardashian&quot;, &quot;groovyq&quot;, &quot;kendricklamar&quot;, &quot;barackobama&quot;, &quot;asaprocky&quot;, &quot;champagnepapi&quot;, &quot;eminem&quot;, &quot;drdre&quot;, &quot;g_eazy&quot;, &quot;skrillex&quot;}
results := getFollowersAsync(usrs, getFollowerCountFor)
//For TESTING:
//results := getFollowersAsync(usrs, mockGetFollowerCountFor)
for r := range results {
if r.Error != nil {
fmt.Printf(&quot;Error for user &#39;%s&#39; =&gt; %v&quot;, r.Username, r.Error)
} else {
fmt.Printf(&quot;%s: %d\n&quot;, r.Username, r.Count)
}
}
elapsed := time.Since(start)
fmt.Println(&quot;runtime&quot;, elapsed)
}

答案2

得分: 1

欢迎来到Go语言,愉快学习。

你做得很好,你可以通过多种方式改进你的程序(例如json解码器、更少的chan等)。以下是其中一种方法。执行时间在352-446毫秒之间(请注意,由于涉及网络调用,执行时间可能会根据服务器响应时间而有所变化)。

你的更新代码:

package main

import (
	"encoding/json"
	"fmt"
	"log"
	"net/http"
	"sync"
	"time"
)

type user struct {
	User userData `json:"user"`
}

type userData struct {
	Followers count `json:"followed_by"`
}

type count struct {
	Count int `json:"count"`
}

func getFollowerCount(username string, result chan<- int, wg *sync.WaitGroup) {
	defer wg.Done()
	reqURL := "https://www.instagram.com/" + username + "/?__a=1"
	resp, err := http.Get(reqURL)
	if err != nil {
		log.Println(err)
		return
	}
	defer resp.Body.Close()

	var u user
	if err := json.NewDecoder(resp.Body).Decode(&u); err != nil {
		log.Println(err)
		return
	}
	result <- u.User.Followers.Count
}

func execute(users []string, result chan<- int) {
	wg := &sync.WaitGroup{}
	for _, username := range users {
		wg.Add(1)
		go getFollowerCount(username, result, wg)
	}
	wg.Wait()
	result <- -1
}

func main() {
	start := time.Now()
	fmt.Println("STARTING UP")
	usrs := []string{"kanywest", "kimkardashian", "groovyq", "kendricklamar", "barackobama", "asaprocky", "champagnepapi", "eminem", "drdre", "g_eazy", "skrillex"}

	result := make(chan int)
	go execute(usrs, result)

	for v := range result {
		if v == -1 {
			break
		}
		fmt.Println(v)
	}

	elapsed := time.Since(start)
	fmt.Println("runtime:", elapsed)
}
英文:

Welcome to Go, happy learning.

You're doing good, you can improve your program many ways (such as json decoder, less no of chan, etc). Following is one of the approach. Execution time is between 352-446ms (take it with grain of salt, since network call is involved in your code. Might vary based on server response time).

Your updated code:

package main
import (
&quot;encoding/json&quot;
&quot;fmt&quot;
&quot;log&quot;
&quot;net/http&quot;
&quot;sync&quot;
&quot;time&quot;
)
type user struct {
User userData `json:&quot;user&quot;`
}
type userData struct {
Followers count `json:&quot;followed_by&quot;`
}
type count struct {
Count int `json:&quot;count&quot;`
}
func getFollowerCount(username string, result chan&lt;- int, wg *sync.WaitGroup) {
defer wg.Done()
reqURL := &quot;https://www.instagram.com/&quot; + username + &quot;/?__a=1&quot;
resp, err := http.Get(reqURL)
if err != nil {
log.Println(err)
return
}
defer resp.Body.Close()
var u user
if err := json.NewDecoder(resp.Body).Decode(&amp;u); err != nil {
log.Println(err)
return
}
result &lt;- u.User.Followers.Count
}
func execute(users []string, result chan&lt;- int) {
wg := &amp;sync.WaitGroup{}
for _, username := range users {
wg.Add(1)
go getFollowerCount(username, result, wg)
}
wg.Wait()
result &lt;- -1
}
func main() {
start := time.Now()
fmt.Println(&quot;STARTING UP&quot;)
usrs := []string{&quot;kanywest&quot;, &quot;kimkardashian&quot;, &quot;groovyq&quot;, &quot;kendricklamar&quot;, &quot;barackobama&quot;, &quot;asaprocky&quot;, &quot;champagnepapi&quot;, &quot;eminem&quot;, &quot;drdre&quot;, &quot;g_eazy&quot;, &quot;skrillex&quot;}
result := make(chan int)
go execute(usrs, result)
for v := range result {
if v == -1 {
break
}
fmt.Println(v)
}
elapsed := time.Since(start)
fmt.Println(&quot;runtime:&quot;, elapsed)
}

huangapple
  • 本文由 发表于 2017年7月20日 11:51:25
  • 转载请务必保留本文链接:https://go.coder-hub.com/45204814.html
匿名

发表评论

匿名网友

:?: :razz: :sad: :evil: :!: :smile: :oops: :grin: :eek: :shock: :???: :cool: :lol: :mad: :twisted: :roll: :wink: :idea: :arrow: :neutral: :cry: :mrgreen:

确定