Commit 5575fdfd by 庄欣

f

parent 1a022a68
module example.com/m/v2
go 1.15
require (
github.com/PuerkitoBio/goquery v1.6.1 // indirect
github.com/chromedp/chromedp v0.6.4
)
github.com/PuerkitoBio/goquery v1.6.1 h1:FgjbQZKl5HTmcn4sKBgvx8vv63nhyhIpv7lJpFGCWpk=
github.com/PuerkitoBio/goquery v1.6.1/go.mod h1:GsLWisAFVj4WgDibEWF4pvYnkVQBpKBKeU+7zCJoLcc=
github.com/andybalholm/cascadia v1.1.0 h1:BuuO6sSfQNFRu1LppgbD25Hr2vLYW25JvxHs5zzsLTo=
github.com/andybalholm/cascadia v1.1.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y=
github.com/chromedp/cdproto v0.0.0-20210113043257-dabd2f2e7693 h1:11eq/RkpaotwdF6b1TRMcdgQUPNmyFEJOB7zLvh0O/Y=
github.com/chromedp/cdproto v0.0.0-20210113043257-dabd2f2e7693/go.mod h1:55pim6Ht4LJKdVLlyFJV/g++HsEA1hQxPbB5JyNdZC0=
github.com/chromedp/chromedp v0.6.4 h1:Gx7ZkRyrSVmbbDDja/ieNgNGJIvElroPOyeqYQGVDSY=
github.com/chromedp/chromedp v0.6.4/go.mod h1:vodUdJf5dF/b8n0UBJv6NeM/QK28RjP3j+eM7fq4+84=
github.com/chromedp/sysutil v1.0.0 h1:+ZxhTpfpZlmchB58ih/LBHX52ky7w2VhQVKQMucy3Ic=
github.com/chromedp/sysutil v1.0.0/go.mod h1:kgWmDdq8fTzXYcKIBqIYvRRTnYb9aNS9moAV0xufSww=
github.com/gobwas/httphead v0.1.0 h1:exrUm0f4YX0L7EBwZHuCF4GDp8aJfVeBrlLQrs6NqWU=
github.com/gobwas/httphead v0.1.0/go.mod h1:O/RXo79gxV8G+RqlR/otEwx4Q36zl9rqC5u12GKvMCM=
github.com/gobwas/pool v0.2.1 h1:xfeeEhW7pwmX8nuLVlqbzVc7udMDrwetjEv+TZIz1og=
github.com/gobwas/pool v0.2.1/go.mod h1:q8bcK0KcYlCgd9e7WYLm9LpyS+YeLd8JVDW6WezmKEw=
github.com/gobwas/ws v1.0.4 h1:5eXU1CZhpQdq5kXbKb+sECH5Ia5KiO6CYzIzdlVx6Bs=
github.com/gobwas/ws v1.0.4/go.mod h1:szmBTxLgaFppYjEmNtny/v3w89xOydFnnZMcgRRu/EM=
github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY=
github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
github.com/mailru/easyjson v0.7.6 h1:8yTIVnZgCoiM1TgqoeTl+LfU5Jg6/xL3QhGQnimLYnA=
github.com/mailru/easyjson v0.7.6/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20200202094626-16171245cfb2 h1:CCH4IOTTfewWjGOlSp+zGcjutRKlBEZQ6wTn8ozI/nI=
golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20210113181707-4bcb84eeeb78 h1:nVuTkr9L6Bq62qpUqKo/RnZCFfzDBL0bYo6w9OJUqZY=
golang.org/x/sys v0.0.0-20210113181707-4bcb84eeeb78/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
package main
import (
"context"
"fmt"
"github.com/PuerkitoBio/goquery"
"github.com/chromedp/cdproto/cdp"
"github.com/chromedp/cdproto/runtime"
"github.com/chromedp/chromedp"
"io/ioutil"
"log"
"math"
"math/rand"
"net/http"
"os"
"strconv"
"strings"
_ "sync"
"time"
)
type fuser struct {
Nickname string
Avatar string
gender string
}
//var url = flag.String("l","", "微博链接")
func open(url string, index int) {
defer func() {
if err := recover(); err != nil {
fmt.Println(err) // 这里的err其实就是panic传入的内容
}
}()
options := []chromedp.ExecAllocatorOption{
chromedp.Flag("headless",true),
chromedp.Flag("blink-settings","imageEnable=false"),
chromedp.UserAgent(`Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko)`),
}
c,_ := chromedp.NewExecAllocator(context.Background(),options...)
chromeCtx, cancel := chromedp.NewContext(c,chromedp.WithLogf(log.Printf))
_ = chromedp.Run(chromeCtx, make([]chromedp.Action, 0, 1)...)
timeOutCtx, cancel := context.WithTimeout(chromeCtx, 6000* time.Second)
defer cancel()
var htmlContent string
var totalString string;
fmt.Println("准备打开页面", url)
chromedp.Run(timeOutCtx,
chromedp.Navigate(url),
chromedp.WaitVisible(`div[node-type="comment_list"]`),
chromedp.OuterHTML(`span[node-type=comment_btn_text] > span > em:last-child`, &totalString, chromedp.ByQuery),
chromedp.ActionFunc(func(ctx context.Context) error {
totalString = strings.Replace(totalString, "<em>", "", 1)
totalString = strings.Replace(totalString, "</em>", "", 1);
totalNum,_ := strconv.ParseInt(totalString,10, 64)
var totalPage int;
if (totalNum > 0) {
totalPage = f2i(math.Floor(float64(totalNum/15)))
if totalPage > 30 {
totalPage = 30
}
} else {
totalPage = 30
}
chromedp.ScrollIntoView("div.WB_footer", chromedp.ByQuery).Do(ctx);
time.Sleep(2 * time.Second)
chromedp.ScrollIntoView("div.WB_footer", chromedp.ByQuery).Do(ctx);
for i:= 0 ; i < totalPage ; i++ {
fmt.Println("开始翻页...", i)
chromedp.Query(`a[action-type="click_more_comment"]`, chromedp.AtLeast(0), chromedp.After(func(ctx context.Context, id runtime.ExecutionContextID, node ...*cdp.Node) error {
chromedp.Click(node).Do(ctx)
return nil;
}))
time.Sleep(1 * time.Second)
}
return nil
}),
chromedp.OuterHTML(`document.querySelector("body")`, &htmlContent, chromedp.ByJSPath),
)
getUserInfo(htmlContent, index)
}
func f2i(f float64) int {
i, _ := strconv.Atoi(fmt.Sprintf("%1.0f", f))
return i
}
func saveUser(users []*fuser, index int) {
path := "/home/code001/data/users.txt"
avatars := "/home/code001/data/avatar/"
file,_ := os.OpenFile(path, os.O_WRONLY|os.O_CREATE|os.O_APPEND, 0666)
for _,user := range users {
file.Write([]byte(user.Nickname + " " + user.gender + "\r\n"))
saveImage(user.Avatar, avatars + user.Nickname + ".jpeg")
}
file.Close();
// wg.Done()
}
func saveImage(src string, path string) {
file,_ := os.OpenFile(path, os.O_WRONLY|os.O_CREATE, 0666)
res,_ := http.Get(src)
defer res.Body.Close()
data,_ := ioutil.ReadAll(res.Body)
file.Write(data)
file.Close()
}
func getUserInfo(html string, index int) {
doc,_ := goquery.NewDocumentFromReader(strings.NewReader(html))
var users []*fuser;
doc.Find("WB_face")
doc.Find("div.list_ul > div").Each(func(i int, s *goquery.Selection) {
// For each item found, get the band and title
img := s.Find("div.WB_face > a > img");
nickname,_ := img.Attr("alt");
avatar,_ := img.Attr("src");
if nickname != "" && avatar != "" {
users = append(users, &fuser{
Nickname: nickname,
Avatar: avatar,
gender: "未知",
})
}
})
saveUser(users , index);
}
//var wg *sync.WaitGroup
func init() {
//wg = new(sync.WaitGroup)
}
func RandomInt(start int,end int) int{
rand.Seed(time.Now().UnixNano())
random:=rand.Intn(end-start)
random = start + random
return random
}
func main() {
//flag.Parse();
defer func() {
if err := recover(); err != nil {
fmt.Println(err) // 这里的err其实就是panic传入的内容
}
}()
urls := []string {
"https://weibo.com/1618051664/JF9nSmV3n?filter=hot&page_source=hot&root_comment_id=0",
"https://weibo.com/5634100124/JDIIu47rJ?filter=hot&root_comment_id=0&type=comment",
"https://weibo.com/1609953534/JF9s1b6wW?filter=hot&root_comment_id=0&type=comment",
"https://weibo.com/2656274875/JFdQC8TIz?filter=hot&root_comment_id=0",
"https://weibo.com/1618051664/JF8TP0Kv4?filter=hot&root_comment_id=0",
}
for i := 0; i < len(urls); i++ {
// wg.Add(1)
open(urls[i], RandomInt(1000000, 9000000));
}
// wg.Wait()
fmt.Println("finished")
}
File added
github.com/PuerkitoBio/goquery v1.6.1/go.mod h1:GsLWisAFVj4WgDibEWF4pvYnkVQBpKBKeU+7zCJoLcc=
github.com/andybalholm/cascadia v1.1.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y=
github.com/chromedp/cdproto v0.0.0-20210113043257-dabd2f2e7693 h1:11eq/RkpaotwdF6b1TRMcdgQUPNmyFEJOB7zLvh0O/Y=
github.com/chromedp/cdproto v0.0.0-20210113043257-dabd2f2e7693/go.mod h1:55pim6Ht4LJKdVLlyFJV/g++HsEA1hQxPbB5JyNdZC0=
github.com/chromedp/chromedp v0.6.4 h1:Gx7ZkRyrSVmbbDDja/ieNgNGJIvElroPOyeqYQGVDSY=
github.com/chromedp/chromedp v0.6.4/go.mod h1:vodUdJf5dF/b8n0UBJv6NeM/QK28RjP3j+eM7fq4+84=
github.com/chromedp/sysutil v1.0.0 h1:+ZxhTpfpZlmchB58ih/LBHX52ky7w2VhQVKQMucy3Ic=
github.com/chromedp/sysutil v1.0.0/go.mod h1:kgWmDdq8fTzXYcKIBqIYvRRTnYb9aNS9moAV0xufSww=
github.com/gobwas/httphead v0.1.0 h1:exrUm0f4YX0L7EBwZHuCF4GDp8aJfVeBrlLQrs6NqWU=
github.com/gobwas/httphead v0.1.0/go.mod h1:O/RXo79gxV8G+RqlR/otEwx4Q36zl9rqC5u12GKvMCM=
github.com/gobwas/pool v0.2.1 h1:xfeeEhW7pwmX8nuLVlqbzVc7udMDrwetjEv+TZIz1og=
github.com/gobwas/pool v0.2.1/go.mod h1:q8bcK0KcYlCgd9e7WYLm9LpyS+YeLd8JVDW6WezmKEw=
github.com/gobwas/ws v1.0.4 h1:5eXU1CZhpQdq5kXbKb+sECH5Ia5KiO6CYzIzdlVx6Bs=
github.com/gobwas/ws v1.0.4/go.mod h1:szmBTxLgaFppYjEmNtny/v3w89xOydFnnZMcgRRu/EM=
github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY=
github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
github.com/mailru/easyjson v0.7.6 h1:8yTIVnZgCoiM1TgqoeTl+LfU5Jg6/xL3QhGQnimLYnA=
github.com/mailru/easyjson v0.7.6/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20210113181707-4bcb84eeeb78/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment