英文:
how to have go find packages online?
问题
我正在尝试使用LiteIDE x22运行一个Go程序,但是我收到以下消息:
C:/Go/bin/go.exe build [C:/Users/admins/Desktop/desktp/worm_scraper-master]
worm_scraper.go:11:2: 找不到包 "github.com/codegangsta/cli",在以下任何位置都找不到:
C:\Go\src\pkg\github.com\codegangsta\cli (来自 $GOROOT)
C:\users\admins\gostuff\src\github.com\codegangsta\cli (来自 $GOPATH)
worm_scraper.go:12:2: 找不到包 "github.com/puerkitobio/goquery",在以下任何位置都找不到:
C:\Go\src\pkg\github.com\puerkitobio\goquery (来自 $GOROOT)
C:\users\admins\gostuff\src\github.com\puerkitobio\goquery (来自 $GOPATH)
错误:进程以代码 1 退出。
我认为这意味着它在我的硬盘上寻找而不是在线上,对吗?(顺便说一句,我对编程一窍不通,只是尝试运行别人写的代码)如何让它访问网络?以下是完整的代码:
package main
import (
"errors"
"fmt"
"os"
"os/exec"
"regexp"
"strings"
"github.com/codegangsta/cli"
"github.com/puerkitobio/goquery"
)
const (
MainSite = "https://parahumans.wordpress.com/"
TableOfContents = "https://parahumans.wordpress.com/table-of-contents/"
)
type Arc struct {
Identifier string
Title string
Chapters []Chapter
}
type Chapter struct {
Title string
Url string
Tags []string
Paragraphs []Paragraph
Retries int
DatePosted string
}
type Paragraph string
// 格式化段落
func (p *Paragraph) Format() {
s := string(*p)
// 处理强调
s = strings.Replace(s, "<em>", "*", -1)
s = strings.Replace(s, "</em>", "*", -1)
s = strings.Replace(s, "<i>", "*", -1)
s = strings.Replace(s, "</i>", "*", -1)
// 处理加粗
s = strings.Replace(s, "<strong>", "**", -1)
s = strings.Replace(s, "</strong>", "**", -1)
s = strings.Replace(s, "<b>", "**", -1)
s = strings.Replace(s, "</b>", "**", -1)
// 移除换行符
s = strings.Replace(s, "\n", "", -1)
// 移除多余的双空格
s = strings.Replace(s, ". ", ". ", -1)
*p = Paragraph(s)
}
// 返回给定章节所属的弧
func (ch *Chapter) WhichArc(arcList []*Arc) (*Arc, error) {
for _, arc := range arcList {
if strings.Replace(ch.Title[:2], ".", "", -1) == arc.Identifier {
return arc, nil
}
}
return &Arc{}, errors.New("章节'" + ch.Title + "'与任何弧不匹配")
}
// 解析章节并返回
func (ch *Chapter) Parse(done chan bool) {
if ch.Retries > 3 {
panic("章节URL'" + ch.Url + "'超时次数过多")
}
// 获取章节
if strings.HasPrefix(ch.Url, "http") == false {
// 确保以http开头,以便goquery可以使用它
ch.Url = "https://" + ch.Url
}
doc, err := goquery.NewDocument(ch.Url)
if err != nil {
// 再次尝试
ch.Retries++
go ch.Parse(done)
return
}
// 设置新的章节标题
ch.Title = doc.Find("h1.entry-title").Text()
// 设置标签
doc.Find(".entry-meta a[rel=tag]").Each(func(_ int, s *goquery.Selection) {
ch.Tags = append(ch.Tags, s.Text())
if len(ch.Tags) == 0 {
ch.Tags = append(ch.Tags, "NONE")
}
})
// 获取发布日期
ch.DatePosted = doc.Find("time.entry-date").Text()
// 现在我们将获取所有段落
doc.Find(".entry-content > p").Each(func(_ int, s *goquery.Selection) {
// 检查前后链接
if len(s.Find("a").Nodes) > 0 {
return
}
// 获取段落HTML
st, _ := s.Html()
para := Paragraph("")
// 获取实际段落
if val, exists := s.Attr("padding-left"); exists && val == "30px" {
// 检查段落是否是特殊(缩进)块
para = Paragraph(" " + st)
} else if val, exists := s.Attr("text-align"); exists && val == "center" {
// 否则检查是否为分隔段落
para = Paragraph("----------")
} else {
// 在这种情况下,它只是一个普通段落
para = Paragraph(st)
}
// 并将段落添加到章节中
para.Format()
ch.Paragraphs = append(ch.Paragraphs, para)
})
// 最后,让我们发出成功信号
done <- true
}
// 从目录中提取弧的切片
func ParseArcs(s string) []*Arc {
arcs := []*Arc{}
r, _ := regexp.Compile(`[0-9]+`)
for _, line := range strings.Split(s, "\n") {
line = strings.TrimSpace(line)
if strings.HasPrefix(line, "Arc") {
arcs = append(arcs, &Arc{
Identifier: r.FindString(line),
Title: line,
})
} else if strings.HasPrefix(line, "Epilogue") {
arcs = append(arcs, &Arc{
Identifier: "E",
Title: line,
})
}
}
return arcs
}
func main() {
// 定义应用程序
app := cli.NewApp()
app.Name = "Worm Scraper"
app.Usage = "一个工具,让您获取由Wildbow创作的连载网络小说Worm的更新的EPUB副本"
app.Version = "1.0"
app.Author = "Benjamin Harris"
// 定义应用程序标志
app.Flags = []cli.Flag{
cli.BoolFlag{"pdf", "将书保存为PDF而不是EPUB(如果可能的话)"},
cli.BoolFlag{"with-link", "包含章节在线链接"},
cli.BoolFlag{"with-tags", "包含每个章节发布的标签"},
cli.BoolFlag{"with-date", "包含每个章节发布的日期"},
}
// 应用程序的核心
app.Action = func(context *cli.Context) {
// 开始程序
fmt.Println("开始爬取Worm")
// 从目录中获取弧的列表
fmt.Println("从目录中获取链接...")
contents, err := goquery.NewDocument(TableOfContents)
if err != nil {
panic("获取目录失败!" + err.Error())
}
// 解析弧
arcs := ParseArcs(contents.Find(".entry-content").Text())
// 现在获取弧章节的链接
contents.Find(".entry-content a:not([class*=share-icon])").Each(func(_ int, s *goquery.Selection) {
ch := Chapter{}
ch.Title = strings.Replace(strings.TrimSpace(s.Text()), "\n", "", -1)
ch.Url, _ = s.Attr("href")
if ch.Title == "" {
return
}
arc, _ := ch.WhichArc(arcs)
arc.Chapters = append(arc.Chapters, ch)
})
// 手动添加Epilogue中缺失的章节
c := Chapter{
Title: "E.2",
Url: "https://parahumans.wordpress.com/2013/11/05/teneral-e-2/",
}
a, _ := c.WhichArc(arcs)
a.Chapters = append(a.Chapters, c)
copy(a.Chapters[1+1:], a.Chapters[1:])
a.Chapters[1] = c
// 现在开始获取章节
chapters := 0
done := make(chan bool)
for _, arc := range arcs {
for i, _ := range arc.Chapters {
chapters++
go arc.Chapters[i].Parse(done)
}
}
fmt.Println("开始解析", chapters, "章节")
fmt.Print("完成: ")
totalChapters := chapters
for {
select {
case <-done:
chapters--
fmt.Print(totalChapters-chapters, ",")
}
if chapters == 0 {
// 我们已经完成了所有章节
close(done)
fmt.Println()
break
}
}
// 现在让我们将所有这些内容写入文件
fmt.Println("将结果保存到文件...")
f, err := os.OpenFile("Worm.md", os.O_RDWR|os.O_CREATE|os.O_EXCL, 0666)
if err != nil {
panic(err)
}
defer f.Close()
// 定义分页符
PageBreak := "\n\n"
// 写入封面
f.WriteString("# Worm\n\n")
f.WriteString("By Wildbow\n\n")
f.WriteString("网站: " + MainSite)
// 现在循环遍历弧
for _, arc := range arcs {
f.WriteString(PageBreak + "# " + arc.Title)
for _, chapter := range arc.Chapters {
f.WriteString("\n\n")
f.WriteString("## " + chapter.Title + "\n\n")
if context.Bool("with-tags") {
f.WriteString("**标签:** " + strings.Join(chapter.Tags, ", ") + " ")
}
if context.Bool("with-date") {
f.WriteString("**日期:** " + chapter.DatePosted + " ")
}
if context.Bool("with-link") {
f.WriteString("**链接:** " + chapter.Url + " ")
}
f.WriteString("\n\n")
// 现在保存章节的段落
for _, p := range chapter.Paragraphs {
f.WriteString(string(p) + "\n\n")
}
}
}
// 现在让我们尝试将Markdown文件转换为电子书格式(epub、pdf)
fmt.Print("尝试转换Markdown文件... ")
cmdText := []string{"-S", "Worm.md", "--epub-chapter-level", "2", "-o", "Worm.epub"}
if context.Bool("pdf") {
cmdText = []string{"Worm.md", "-o", "Worm.pdf"}
PageBreak = `<div style="page-break-after: always;"></div>`
}
cmd := exec.Command("pandoc", cmdText...)
err = cmd.Run()
if err != nil {
fmt.Println("转换失败!请确保您已安装Pandoc(http://johnmacfarlane.net/pandoc/installing.html),如果要将生成的Markdown文件转换为电子书兼容格式。同时,我们已经为您保留了Markdown文件。")
} else {
_ = os.Remove("Worm.md")
fmt.Println("完成!")
}
}
// 运行应用程序
app.Run(os.Args)
}
另外,是否可能将其修改为输出为.txt或.mobi格式?如果不行,我将使用Calibre进行转换。提前感谢。哦,如果有关系的话,我使用的是Windows 7 64位操作系统。
<details>
<summary>英文:</summary>
I'm trying to run a go program using LiteIDE x22 but I get the message
C:/Go/bin/go.exe build [C:/Users/admins/Desktop/desktp/worm_scraper-master]
worm_scraper.go:11:2: cannot find package "github.com/codegangsta/cli" in any of:
C:\Go\src\pkg\github.com\codegangsta\cli (from $GOROOT)
C:\users\admins\gostuff\src\github.com\codegangsta\cli (from $GOPATH)
worm_scraper.go:12:2: cannot find package "github.com/puerkitobio/goquery" in any of:
C:\Go\src\pkg\github.com\puerkitobio\goquery (from $GOROOT)
C:\users\admins\gostuff\src\github.com\puerkitobio\goquery (from $GOPATH)
Error: process exited with code 1.
I think this means it's looking for it on my harddrive instead of online right? (btw I'm pretty clueless about programming just trying to something some else wrote)
how to I get it to access the web?
here's the full code
package main
import (
"errors"
"fmt"
"os"
"os/exec"
"regexp"
"strings"
"github.com/codegangsta/cli"
"github.com/puerkitobio/goquery"
)
const (
MainSite = "https://parahumans.wordpress.com/"
TableOfContents = "https://parahumans.wordpress.com/table-of-contents/"
)
type Arc struct {
Identifier string
Title string
Chapters []Chapter
}
type Chapter struct {
Title string
Url string
Tags []string
Paragraphs []Paragraph
Retries int
DatePosted string
}
type Paragraph string
// Format the paragraph
func (p *Paragraph) Format() {
s := string(*p)
// Handle emphasis
s = strings.Replace(s, "<em>", "*", -1)
s = strings.Replace(s, "</em>", "*", -1)
s = strings.Replace(s, "<i>", "*", -1)
s = strings.Replace(s, "</i>", "*", -1)
// Handle bold
s = strings.Replace(s, "<strong>", "**", -1)
s = strings.Replace(s, "</strong>", "**", -1)
s = strings.Replace(s, "<b>", "**", -1)
s = strings.Replace(s, "</b>", "**", -1)
// Remove new lines
s = strings.Replace(s, "\n", "", -1)
// And random double spaces
s = strings.Replace(s, ". ", ". ", -1)
*p = Paragraph(s)
}
// Return the Arc that the given chapter belongs to
func (ch *Chapter) WhichArc(arcList []*Arc) (*Arc, error) {
for _, arc := range arcList {
if strings.Replace(ch.Title[:2], ".", "", -1) == arc.Identifier {
return arc, nil
}
}
return &Arc{}, errors.New("chapter '" + ch.Title + "' did not match any Arcs")
}
// Parse a chapter and return it
func (ch *Chapter) Parse(done chan bool) {
if ch.Retries > 3 {
panic("Chapter url '" + ch.Url + "' has timed out too many times")
}
// Get the chapter
if strings.HasPrefix(ch.Url, "http") == false {
// Make sure it begins with http so goquery can use it
ch.Url = "https://" + ch.Url
}
doc, err := goquery.NewDocument(ch.Url)
if err != nil {
// Try again
ch.Retries++
go ch.Parse(done)
return
}
// Set the new chapter title
ch.Title = doc.Find("h1.entry-title").Text()
// Set the tags
doc.Find(".entry-meta a[rel=tag]").Each(func(_ int, s *goquery.Selection) {
ch.Tags = append(ch.Tags, s.Text())
if len(ch.Tags) == 0 {
ch.Tags = append(ch.Tags, "NONE")
}
})
// Get the date it was posted
ch.DatePosted = doc.Find("time.entry-date").Text()
// Now we'll get all the paragraphs
doc.Find(".entry-content > p").Each(func(_ int, s *goquery.Selection) {
// Check for the previous/next links
if len(s.Find("a").Nodes) > 0 {
return
}
// Get the paragraph HTML
st, _ := s.Html()
para := Paragraph("")
// Get the actual paragraph
if val, exists := s.Attr("padding-left"); exists && val == "30px" {
// Check to see if the paragraph is special (indented) block
para = Paragraph(" " + st)
} else if val, exists := s.Attr("text-align"); exists && val == "center" {
// Otherwise check to see if it's a separator paragraph
para = Paragraph("----------")
} else {
// It's just a normal paragraph in this case
para = Paragraph(st)
}
// And add the paragraph to the chapter
para.Format()
ch.Paragraphs = append(ch.Paragraphs, para)
})
// Finally, let's signal a success
done <- true
}
// Return a slice of Arcs extracted from the table of contents
func ParseArcs(s string) []*Arc {
arcs := []*Arc{}
r, _ := regexp.Compile(`[0-9]+`)
for _, line := range strings.Split(s, "\n") {
line = strings.TrimSpace(line)
if strings.HasPrefix(line, "Arc") {
arcs = append(arcs, &Arc{
Identifier: r.FindString(line),
Title: line,
})
} else if strings.HasPrefix(line, "Epilogue") {
arcs = append(arcs, &Arc{
Identifier: "E",
Title: line,
})
}
}
return arcs
}
func main() {
// Define the app
app := cli.NewApp()
app.Name = "Worm Scraper"
app.Usage = "A tool to let you get an updated EPUB copy of the serial web novel Worm, by Wildbow"
app.Version = "1.0"
app.Author = "Benjamin Harris"
// Define the application flags
app.Flags = []cli.Flag{
cli.BoolFlag{"pdf", "Save the book as a PDF instead of an EPUB, if possible"},
cli.BoolFlag{"with-link", "Include a link to the chapter online"},
cli.BoolFlag{"with-tags", "Include the tags each chapter was posted under"},
cli.BoolFlag{"with-date", "Include the date each chapter was posted"},
}
// The heart of the application
app.Action = func(context *cli.Context) {
// Starting the program
fmt.Println("Starting to scrape Worm")
// Get the list of arcs from the table of contents
fmt.Println("Gathering links from table of contents...")
contents, err := goquery.NewDocument(TableOfContents)
if err != nil {
panic("Failed to get the table of contents! " + err.Error())
}
// Parse the arcs
arcs := ParseArcs(contents.Find(".entry-content").Text())
// Now get the links for the arc chapters
contents.Find(".entry-content a:not([class*=share-icon])").Each(func(_ int, s *goquery.Selection) {
ch := Chapter{}
ch.Title = strings.Replace(strings.TrimSpace(s.Text()), "\n", "", -1)
ch.Url, _ = s.Attr("href")
if ch.Title == "" {
return
}
arc, _ := ch.WhichArc(arcs)
arc.Chapters = append(arc.Chapters, ch)
})
// Manually add missing chapter in Epilogue
c := Chapter{
Title: "E.2",
Url: "https://parahumans.wordpress.com/2013/11/05/teneral-e-2/",
}
a, _ := c.WhichArc(arcs)
a.Chapters = append(a.Chapters, c)
copy(a.Chapters[1+1:], a.Chapters[1:])
a.Chapters[1] = c
// Now start getting the chapters
chapters := 0
done := make(chan bool)
for _, arc := range arcs {
for i, _ := range arc.Chapters {
chapters++
go arc.Chapters[i].Parse(done)
}
}
fmt.Println("Starting to parse", chapters, "chapters")
fmt.Print("Finished: ")
totalChapters := chapters
for {
select {
case <-done:
chapters--
fmt.Print(totalChapters-chapters, ",")
}
if chapters == 0 {
// We're done with all the chapters
close(done)
fmt.Println()
break
}
}
// And let's write all this stuff to a file now
fmt.Println("Saving results to file...")
f, err := os.OpenFile("Worm.md", os.O_RDWR|os.O_CREATE|os.O_EXCL, 0666)
if err != nil {
panic(err)
}
defer f.Close()
// Define pagebreak
PageBreak := "\n\n"
// Write the cover
f.WriteString("# Worm\n\n")
f.WriteString("By Wildbow\n\n")
f.WriteString("Website: " + MainSite)
// Now loop through the Arcs
for _, arc := range arcs {
f.WriteString(PageBreak + "# " + arc.Title)
for _, chapter := range arc.Chapters {
f.WriteString("\n\n")
f.WriteString("## " + chapter.Title + "\n\n")
if context.Bool("with-tags") {
f.WriteString("**Tags:** " + strings.Join(chapter.Tags, ", ") + " ")
}
if context.Bool("with-date") {
f.WriteString("**Date:** " + chapter.DatePosted + " ")
}
if context.Bool("with-link") {
f.WriteString("**Link:** " + chapter.Url + " ")
}
f.WriteString("\n\n")
// Now save the chapter's paragraphs
for _, p := range chapter.Paragraphs {
f.WriteString(string(p) + "\n\n")
}
}
}
// Now let's try to convert the markdown file into an ebook format (epub, pdf)
fmt.Print("Attempting to convert Markdown file... ")
cmdText := []string{"-S", "Worm.md", "--epub-chapter-level", "2", "-o", "Worm.epub"}
if context.Bool("pdf") {
cmdText = []string{"Worm.md", "-o", "Worm.pdf"}
PageBreak = `<div style="page-break-after: always;"></div>`
}
cmd := exec.Command("pandoc", cmdText...)
err = cmd.Run()
if err != nil {
fmt.Println("Conversion failed! Make sure you've installed Pandoc (http://johnmacfarlane.net/pandoc/installing.html) if you want to convert the generated Markdown file to an ebook compatible format. In the meantime, we've left you the Markdown file.")
} else {
_ = os.Remove("Worm.md")
fmt.Println("Completed!")
}
}
// Run the application
app.Run(os.Args)
}
oh also would it be possible to modify it to output as .txt or .mobi? if not I'll just convert using Calibre. Thanks in advance.
Oh if it matters I'm using windows 7 64-bit
</details>
# 答案1
**得分**: 3
go编译器不直接从互联网导入库,但它知道如何为您获取它们。当您导入类似`github.com/codegangsta/cli`的内容时,它不会在该URL上查找,而是在您的GOPATH/src文件夹中查找。
`go get`命令可以为您在其URL中获取库并将其下载到GOPATH中。
如果您已经设置了GOPATH(如果没有,请阅读[如何编写Go代码][1]),那么在运行代码之前,请运行`go get library`命令,以便go工具为您下载它。在您的示例中,您应该运行以下命令:
go get github.com/codegangsta/cli
go get github.com/puerkitobio/goquery
这将分别将库下载到`GOPATH/src/github.com/codegangsta/cli`和`GOPATH/src/github.com/puerkitobio/goquery`。
[1]: http://golang.org/doc/code.html
<details>
<summary>英文:</summary>
The go compiler doesn't import the libraries directly from the internet but it does know how to fetch them for you. When you import something like `github.com/codegangsta/cli` it doesn't look for it on that URL but instead it looks for it on your GOPATH/src folder.
The `go get` command can fetch the library for you in it's URL and download it to your GOPATH.
If you have already setup your GOPATH (if not, read [How to Write Go Code][1]) then before running your code run the command `go get library` for the go tool to download it for you. In your example you should run the following commands:
go get github.com/codegangsta/cli
go get github.com/puerkitobio/goquery
That would download the libraries to `GOPATH/src/github.com/codegangsta/cli` and `GOPATH/src/github.com/puerkitobio/goquery` respectively.
[1]: http://golang.org/doc/code.html
</details>
通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库,让每个人都能够通过互相帮助和分享经验来进步。
评论