Go版本的Playwright支持Chromium、Firefox和WebKit的Web自动化测试,兼容Windows、Linux和MacOS,默认支持headless无头模式,安装方便、绿色高效、兼容性强、运行速度快。
支持的主要功能有:
跨多个页面、域名和iframe的场景;
在执行操作(如单击、填充)之前自动等待元素就绪;
拦截网络活动以拦截和模拟网络请求;
模拟移动设备、地理位置和权限;
通过阴影穿透选择器支持web组件;
鼠标和键盘的原生事件;
上传和下载文件;
安装
使用go get安装playwright-go。
go get github.com/playwright-community/playwright-go
另外还需要安装驱动和浏览器依赖:
go run github.com/playwright-community/playwright-go/cmd/playwright install --with-deps
# Or
go install github.com/playwright-community/playwright-go/cmd/playwright
playwright install --with-deps
自动化测试
package main
import (
"fmt"
"log"
"github.com/playwright-community/playwright-go"
)
func main() {
pw, err := playwright.Run()
if err != nil {
log.Fatalf("could not start playwright: %v", err)
}
browser, err := pw.Chromium.Launch()
if err != nil {
log.Fatalf("could not launch browser: %v", err)
}
page, err := browser.NewPage()
if err != nil {
log.Fatalf("could not create page: %v", err)
}
if _, err = page.Goto("http://www.dzwww.com/xinwen/"); err != nil {
log.Fatalf("could not goto: %v", err)
}
entries, err := page.QuerySelectorAll("ul.list")
if err != nil {
log.Fatalf("could not get entries: %v", err)
}
for i, entry := range entries {
titleElement, err := entry.QuerySelector("h3 > a")
if err != nil {
log.Fatalf("could not get title element: %v", err)
}
title, err := titleElement.TextContent()
if err != nil {
log.Fatalf("could not get text content: %v", err)
}
fmt.Printf("%d: %sn", i+1, title)
}
if err = browser.Close(); err != nil {
log.Fatalf("could not close browser: %v", err)
}
if err = pw.Stop(); err != nil {
log.Fatalf("could not stop Playwright: %v", err)
}
}
playwright.Run()创建playwright对象。
pw.Chromium.Launch()创建Chromium浏览器对象。
browser.NewPage()创业一个新页面。
page.Goto打开一个url页面。
page.QuerySelectorAll使用css选择器查找所有元素。
entry.QuerySelector查找第一个元素。
titleElement.TextContent()获取inner_text文本内容。
browser.Close()关闭浏览器。
pw.Stop()关闭playwright对象。
录屏视频
package main
import (
"fmt"
"log"
"github.com/playwright-community/playwright-go"
)
func main() {
pw, err := playwright.Run()
if err != nil {
log.Fatalf("could not launch playwright: %v", err)
}
browser, err := pw.Chromium.Launch()
if err != nil {
log.Fatalf("could not launch Chromium: %v", err)
}
page, err := browser.NewPage(playwright.BrowserNewContextOptions{
RecordVideo: &playwright.BrowserNewContextOptionsRecordVideo{
Dir: playwright.String("videos/"),
},
})
if err != nil {
log.Fatalf("could not create page: %v", err)
}
gotoPage := func(url string) {
fmt.Printf("Visiting %sn", url)
if _, err = page.Goto(url); err != nil {
log.Fatalf("could not goto: %v", err)
}
fmt.Printf("Visited %sn", url)
}
gotoPage("http://whatsmyuseragent.org")
gotoPage("https://github.com")
gotoPage("https://microsoft.com")
if err := page.Close(); err != nil {
log.Fatalf("failed to close page: %v", err)
}
path, err := page.Video().Path()
if err != nil {
log.Fatalf("failed to get video path: %v", err)
}
fmt.Printf("Saved to %sn", path)
if err = browser.Close(); err != nil {
log.Fatalf("could not close browser: %v", err)
}
if err = pw.Stop(); err != nil {
log.Fatalf("could not stop Playwright: %v", err)
}
}
这里用到了playwright的浏览器上下文配置:playwright.BrowserNewContextOptions
。
执行js
package main
import (
"fmt"
"log"
"github.com/playwright-community/playwright-go"
)
func main() {
pw, err := playwright.Run()
if err != nil {
log.Fatal(err)
}
browser, err := pw.Chromium.Launch()
if err != nil {
log.Fatalf("could not launch browser: %vn", err)
}
page, err := browser.NewPage()
if err != nil {
log.Fatalf("could not create page: %vn", err)
}
if _, err = page.Goto("https://en.wikipedia.org/wiki/JavaScript"); err != nil {
log.Fatalf("could not goto: %vn", err)
}
// mw.config.values is the JS object where Wikipedia stores wiki metadata
handle, err := page.EvaluateHandle("mw.config.values", struct{}{})
if err != nil {
log.Fatalf("could not acquire JSHandle: %vn", err)
}
// mw.config.values.wgPageName is the name of the current page
pageName, err := handle.(playwright.JSHandle).GetProperty("wgPageName")
if err != nil {
log.Fatalf("could not get Wikipedia page name: %vn", err)
}
fmt.Printf("Lots of type casting, brought to you by %sn", pageName)
if err := browser.Close(); err != nil {
log.Fatalf("could not close browser: %vn", err)
}
if err := pw.Stop(); err != nil {
log.Fatalf("could not stop Playwright: %vn", err)
}
}
page.EvaluateHandle可以传递js代码字符串来执行js。
并行抓取
package main
import (
"archive/zip"
"bytes"
"context"
"encoding/csv"
"fmt"
"io"
"io/ioutil"
"log"
"math"
"net/http"
"os"
"path/filepath"
"strings"
"time"
"github.com/playwright-community/playwright-go"
)
func assertErrorToNilf(message string, err error) {
if err != nil {
log.Fatalf(message, err)
}
}
func worker(id int, jobs chan Job, results chan<- Job, browser playwright.Browser) {
for job := range jobs {
fmt.Printf("starting (try: %d): %sn", job.Try, job.URL)
if job.Try >= 3 {
job.Success = false
job.err = fmt.Errorf("Stopped with domain %s (%w)", job.URL, job.err)
results <- job
continue
}
jobCtx, cancel := context.WithTimeout(context.Background(), time.Second*12)
internalJobError := make(chan error, 1)
go func() {
internalJobError <- processJob(browser, job, jobCtx)
cancel()
}()
select {
case <-jobCtx.Done():
job.err = fmt.Errorf("timeout (try: %d)", job.Try+1)
job.Success = false
job.Try++
jobs <- job
case err := <-internalJobError:
if err != nil {
job.err = err
job.Success = false
job.Try++
jobs <- job
cancel()
} else {
job.Success = true
job.err = nil
results <- job
}
}
}
}
func processJob(browser playwright.Browser, job Job, ctx context.Context) error {
context, err := browser.NewContext(playwright.BrowserNewContextOptions{
UserAgent: playwright.String("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.135 Safari/537.36"),
})
if err != nil {
return fmt.Errorf("could not create context: %w", err)
}
defer context.Close()
go func() {
<-ctx.Done()
context.Close()
}()
page, err := context.NewPage()
if err != nil {
return fmt.Errorf("could not create page: %w", err)
}
_, err = page.Goto("http://"+job.URL, playwright.PageGotoOptions{
WaitUntil: playwright.WaitUntilStateNetworkidle,
})
if err != nil {
return fmt.Errorf("could not goto: %s: %v", job.URL, err)
}
cwd, err := os.Getwd()
if err != nil {
return fmt.Errorf("could not get cwd %w", err)
}
_, err = page.Screenshot(playwright.PageScreenshotOptions{
Path: playwright.String(filepath.Join(cwd, "out", strings.Replace(job.URL, ".", "-", -1)+".png")),
})
if err != nil {
return fmt.Errorf("could not screenshot: %w", err)
}
return nil
}
type Job struct {
URL string
Try int
err error
Success bool
}
func main() {
log.Println("Downloading Alexa top domains")
topDomains, err := getAlexaTopDomains()
assertErrorToNilf("could not get alexa top domains: %w", err)
log.Println("Downloaded Alexa top domains successfully")
cwd, err := os.Getwd()
if err != nil {
assertErrorToNilf("could not get cwd %w", err)
}
if err := os.Mkdir(filepath.Join(cwd, "out"), 0777); err != nil && !os.IsExist(err) {
assertErrorToNilf("could not create output directory %w", err)
}
pw, err := playwright.Run()
assertErrorToNilf("could not launch playwright: %w", err)
browser, err := pw.Chromium.Launch(playwright.BrowserTypeLaunchOptions{
Headless: playwright.Bool(false),
})
assertErrorToNilf("could not launch Chromium: %w", err)
numberOfJobs := int(math.Min(30, float64(len(topDomains))))
jobs := make(chan Job, numberOfJobs)
results := make(chan Job, numberOfJobs)
for w := 1; w <= 3; w++ {
go worker(w, jobs, results, browser)
}
for _, url := range topDomains[:numberOfJobs] {
jobs <- Job{
URL: url,
}
}
for a := 0; a < numberOfJobs; a++ {
job := <-results
if job.Success {
fmt.Println("success:", job.URL)
} else {
fmt.Println("error:", job.URL, job.err)
}
}
close(jobs)
close(results)
assertErrorToNilf("could not close browser: %w", browser.Close())
assertErrorToNilf("could not stop Playwright: %w", pw.Stop())
}
func getAlexaTopDomains() ([]string, error) {
resp, err := http.Get("http://s3.amazonaws.com/alexa-static/top-1m.csv.zip")
if err != nil {
return nil, fmt.Errorf("could not get: %w", err)
}
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
return nil, fmt.Errorf("could not read body: %w", err)
}
defer resp.Body.Close()
zipReader, err := zip.NewReader(bytes.NewReader(body), int64(len(body)))
if err != nil {
return nil, fmt.Errorf("could not create zip reader: %w", err)
}
alexaFile, err := zipReader.File[0].Open()
if err != nil {
return nil, fmt.Errorf("could not read alexa file: %w", err)
}
defer alexaFile.Close()
reader := csv.NewReader(alexaFile)
out := make([]string, 0)
for {
record, err := reader.Read()
if err == io.EOF {
return out, nil
}
if err != nil {
return nil, fmt.Errorf("could not read csv: %w", err)
}
out = append(out, record[1])
}
}
ConnectOverCDP
import (
"testing"
"github.com/playwright-community/playwright-go"
)
func TestPlaywight(t *testing.T) {
pw, err := playwright.Run()
if err != nil {
log.Fatal(err)
}
// "C:Program Files (x86)GoogleChromeApplicationchrome.exe" --remote-debugging-port=9222 --user-data-dir="D:playwrightuser_data" --incognito
browser, err := pw.Chromium.ConnectOverCDP("http://localhost:9222")
// browser, err := pw.Chromium.Launch()
if err != nil {
log.Fatalf("could not launch browser: %vn", err)
}
page, err := browser.NewPage()
if err != nil {
log.Fatalf("could not create page: %vn", err)
}
if _, err = page.Goto("https://www.xx.com/123123", playwright.PageGotoOptions{
WaitUntil: playwright.WaitUntilStateNetworkidle,
}); err != nil {
log.Fatalf("could not goto: %vnn", err)
}
handle, err := page.EvaluateHandle("window.normal", struct{}{})
if err != nil {
log.Fatalf("could not acquire JSHandle: %vn", err)
}
videoList, err := handle.(playwright.JSHandle).GetProperty("video_list")
if err != nil {
log.Fatalf("could not get page name: %vn", err)
}
fmt.Printf("Lots of type casting, brought to you by %sn", videoList)
properties, _ := videoList.GetProperties()
for _, value := range properties {
// fmt.Println(key)
url, _ := value.GetProperty("main_url")
fmt.Printf("https:%vn", url)
}
if err := browser.Close(); err != nil {
log.Fatalf("could not close browser: %vn", err)
}
if err := pw.Stop(); err != nil {
log.Fatalf("could not stop Playwright: %vn", err)
}
}
参考
https://playwright-community.github.io/playwright-go/
原文地址:https://blog.csdn.net/lilongsy/article/details/129396575
本文来自互联网用户投稿,该文观点仅代表作者本人,不代表本站立场。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。
如若转载,请注明出处:http://www.7code.cn/show_51233.html
如若内容造成侵权/违法违规/事实不符,请联系代码007邮箱:suwngjj01@126.com进行投诉反馈,一经查实,立即删除!