本文介绍: 辛辛苦苦搬砖真辛苦啊。
爬虫用户代理和ip自动生成
辛辛苦苦搬砖真辛苦啊
package com.glodon.gbes.utils;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Random;
import java.util.Set;
/**
* 爬虫辅助工具类
*
* @author luochao
* @since 20240126
*/
public class CrawlerHelper {
// 浏览器内核标识
private static final List<String> consumers = Arrays.asList("Mozilla/5.0 (", "Mozilla/5.01 (", "Mozilla/4.0 (");
// 操作系统类型
private static final List<String> systemTypes = Arrays.asList(
"Windows NT 10.0; WOW64", "Macintosh; Intel Mac OS X 10_12_6", "Macintosh; Intel Mac OS X 10.12; rv:65.0",
"Windows NT 10.0; Win64; x64", "Windows NT 10.0; WOW64; Trident/7.0; rv:11.0", "Windows NT 6.1; WOW64",
"Windows NT 6.3; Win64; x64", "Windows NT 10.0; WOW64", "Macintosh mips64", "Macintosh mips64",
"Macintosh; Intel Mac OS X 11_2_3", "Macintosh; Intel Mac OS X 11_2_1", "Macintosh; Intel Mac OS X 11_2_1",
"Macintosh; Intel Mac OS X 11_2_1", "Macintosh; Intel Mac OS X 10_15_4", "Macintosh; Intel Mac OS X 10_14_6",
"Macintosh; Intel Mac OS X 10_14_6", "Macintosh; Intel Mac OS X 10_14_6", "Macintosh; Intel Mac OS X 10_14_6",
"Macintosh; Intel Mac OS X 10_14_4", "Macintosh; Intel Mac OS X 10_14_4", "Macintosh; Intel Mac OS X 10_14_3",
"Macintosh; Intel Mac OS X 10_14_2", "Macintosh; Intel Mac OS X 10_14_0", "Macintosh; Intel Mac OS X 10_13_6",
"Macintosh; Intel Mac OS X 10_13_6", "Macintosh; Intel Mac OS X 10_13_6", "Macintosh; Intel Mac OS X 10_13_6",
"Macintosh; Intel Mac OS X 10_12_6", "Macintosh; Intel Mac OS X 10_11_6", "Macintosh; Intel Mac OS X 10_11_6"
);
private static final String kernelPrefix = ") AppleWebKit";
// 浏览器内核
private static final List<String> browserKernels = Arrays.asList(
"/537.36", "/607.3.10", "/13605.3.8", "/605.1.15", "/546.36", "/543.36", "/542.36", "/540.36", "/538.36",
"/536.36", "/535.36", "/533.36", "/530.36", "/528.36", "/525.36", "/511.36", "/509.36", "/508.36",
"/506.36", "/504.36", "/503.36", "/500.36", "/499.36", "/498.36", "/496.36", "/494.36", "/491.36",
"/490.36", "/489.36", "/483.36", "/482.36", "/481.36", "/476.36", "/473.36", "/472.36", "/471.36",
"/469.36", "/466.36"
);
private static final String kernelSuffix= " (KHTML, like Gecko) ";
// 浏览器标识
private static final List<String> browserVersion = Arrays.asList(
"Chrome/90.0.4430.72", "Chrome/72.0.3626.121", "Chrome/87.0.27523.82", "Chrome/88.0.30832.82", "Chrome/89.0.4389.128",
"Chrome/88.0.48357.82", "Chrome/86.0.16571.82", "Chrome/80.0.3987.87", "Chrome/73.0.3683.75", "Chrome/108.0.5359.95",
"Chrome/89.0.4350.7", "Chrome/89.0.4389.90", "Chrome/67.0.1762.3", "Chrome/91.0.4455.2", "Chrome/87.0.4280.88",
"Chrome/87.0.34697.82", "Chrome/87.0.40937.82", "Chrome/89.0.20219.82", "Chrome/60.0.3112.90", "Chrome/86.0.33219.82",
"Chrome/87.0.7030.82", "Chrome/64.0.3282.140", "Chrome/88.0.40585.82", "Chrome/89.0.5219.82", "Chrome/80.0.3987.122",
"Chrome/87.0.48110.82", "Chrome/89.0.4389.82", "Chrome/86.0.49343.82", "Chrome/48.0.2564.116", "Chrome/86.0.27485.82",
"Chrome/86.0.11902.82", "Chrome/89.0.4385.82", "Chrome/89.0.43907.82", "Chrome/87.0.32496.82", "Chrome/89.0.48906.82",
"Chrome/91.0.4466.0", "Chrome/88.0.4324.192", "Chrome/89.0.45365.82", "Chrome/75.0.3770.80", "Chrome/87.0.17682.82",
"Chrome/80.0.3987.132", "Chrome/86.0.5210.82", "Chrome/89.0.42050.82", "Chrome/76.0.3809.87", "Chrome/67.0.200.124",
"Chrome/89.0.33519.82", "Chrome/78.0.3904.108", "Chrome/76.0.3809.100", "Chrome/88.0.46354.82", "Chrome/87.0.44790.82",
"Chrome/88.0.4324.182", "Chrome/87.0.4280.67", "Chrome/82.0.4077.0", "Chrome/88.0.48271.82", "Chrome/81.0.4044.129",
"Chrome/87.0.48788.82", "Chrome/86.0.36322.82", "Chrome/90.0.4430.11", "Chrome/87.0.7809.82", "Chrome/86.0.4240.198",
"Chrome/88.0.35623.82", "Chrome/87.0.42434.82", "Chrome/89.0.4389.114", "Chrome/80.0.3987.100", "Chrome/89.0.34528.82",
"Chrome/69.0.3497.100", "Chrome/53.0.2785.104", "Chrome/84.0.4147.105", "Chrome/87.0.37035.82", "Chrome/77.0.3865.90",
"Chrome/83.0.4103.56 ", "Chrome/78.0.3872.0", "Chrome/81.0.4023.0", "Chrome/79.0.3945.136", "Chrome/86.0.10846.82",
"Chrome/89.0.9922.82", "Chrome/80.0.3987.149", "Chrome/88.0.4324.50", "Chrome/80.0.3987.106", "Chrome/86.0.26591.82",
"Chrome/89.0.14272.82", "Chrome/88.0.9787.82", "Chrome/87.0.28829.82", "Chrome/84.0.4147.89", "Chrome/41.0.2227.0"
);
//
private static final List<String> browserTypes = Arrays.asList(
" Edg/89.0.774.75", " Maxthon/5.1.60", " Explorer/10.15.0.21066", " Edg/90.0.818.39", " Edg/89.0.774.77",
" Edg/88.0.705.29", " Edg/88.0.705.74", " Edg/87.0.664.66", " OPR/73.0.3856.260", " Edg/83.0.478.33",
" Edg/78.0.244.0", " Edge/13.18362"
);
// ip池
private static final int[][] range = {
{ 607649792, 608174079 },// 36.56.0.0-36.63.255.255
{ 1038614528, 1039007743 },// 61.232.0.0-61.237.255.255
{ 1783627776, 1784676351 },// 106.80.0.0-106.95.255.255
{ 2035023872, 2035154943 },// 121.76.0.0-121.77.255.255
{ 2078801920, 2079064063 },// 123.232.0.0-123.235.255.255
{ -1950089216, -1948778497 },// 139.196.0.0-139.215.255.255
{ -1425539072, -1425014785 },// 171.8.0.0-171.15.255.255
{ -1236271104, -1235419137 },// 182.80.0.0-182.92.255.255
{ -770113536, -768606209 },// 210.25.0.0-210.47.255.255
{ -569376768, -564133889 }, // 222.16.0.0-222.95.255.255
};
/**
* 获取随机用户代理
* @return
*/
public static String getRandomAgent() {
StringBuffer userAgent = new StringBuffer();
Random random = new Random();
userAgent.append(consumers.get(random.nextInt(consumers.size())));
userAgent.append(systemTypes.get(random.nextInt(systemTypes.size())));
userAgent.append(kernelPrefix);
String kernelVersion = browserKernels.get(random.nextInt(browserKernels.size()));
userAgent.append(kernelVersion);
userAgent.append(kernelSuffix);
userAgent.append(browserVersion.get(random.nextInt(browserVersion.size())));
userAgent.append(" Safari"+ kernelVersion);
int isAdditional = random.nextInt(10);
if (isAdditional> 7) {
userAgent.append(browserTypes.get(random.nextInt(browserTypes.size())));
}
return userAgent.toString();
}
/**
* 随机生成国内IP地址
* @return
*/
public static String getRandomIp() {
// ip范围
Random random = new Random();
int index = random.nextInt(10);
int intIp = range[index][0] + new Random().nextInt(range[index][1] - range[index][0]);
// 格式转换
int[] b = new int[4];
b[0] = (intIp >> 24) & 0xff;
b[1] = (intIp >> 16) & 0xff;
b[2] = (intIp >> 8) & 0xff;
b[3] = intIp & 0xff;
return b[0] + "." + b[1] + "." + b[2] + "." + b[3];
}
public static void main(String[] args) {
Set<String> resSet = new HashSet<>();
for (int i = 0; i < 10000; i++) {
String randomIp = getRandomIp();
resSet.add(randomIp);
System.out.println(randomIp);
}
Set<String> agent = new HashSet<>();
for (int i = 0; i < 10000; i++) {
String userAgent = getRandomAgent();
agent.add(userAgent);
System.out.println(userAgent);
}
System.out.println("================================");
}
}
原文地址:https://blog.csdn.net/qq_36833168/article/details/135870433
本文来自互联网用户投稿,该文观点仅代表作者本人,不代表本站立场。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。
如若转载,请注明出处:http://www.7code.cn/show_63223.html
如若内容造成侵权/违法违规/事实不符,请联系代码007邮箱:suwngjj01@126.com进行投诉反馈,一经查实,立即删除!
声明:本站所有文章,如无特殊说明或标注,均为本站原创发布。任何个人或组织,在未征得本站同意时,禁止复制、盗用、采集、发布本站内容到任何网站、书籍等各类媒体平台。如若本站内容侵犯了原著者的合法权益,可联系我们进行处理。