Common.java
13.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
package cn.fw.freya.service.crawl.impl;
import cn.fw.freya.config.SeleniumProperties;
import cn.fw.freya.dao.AccountDao;
import cn.fw.freya.dao.CookieDao;
import cn.fw.freya.dao.LivePoolDao;
import cn.fw.freya.dao.VideoPoolDao;
import cn.fw.freya.model.data.Account;
import cn.fw.freya.model.data.FwCookie;
import cn.fw.freya.model.data.ResponseReceived;
import cn.fw.freya.model.data.pool.LivePool;
import cn.fw.freya.model.data.pool.VideoPool;
import cn.fw.freya.model.dto.rpc.ReportAccountDto;
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.apache.http.HttpResponse;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.entity.StringEntity;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClientBuilder;
import org.openqa.selenium.Cookie;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.chrome.ChromeDriver;
import org.openqa.selenium.chrome.ChromeDriverService;
import org.openqa.selenium.chrome.ChromeOptions;
import org.openqa.selenium.logging.LogEntries;
import org.openqa.selenium.logging.LogType;
import org.openqa.selenium.logging.LoggingPreferences;
import org.openqa.selenium.logging.Logs;
import org.openqa.selenium.remote.CapabilityType;
import org.openqa.selenium.remote.RemoteWebDriver;
import org.springframework.stereotype.Component;
import org.springframework.transaction.annotation.Transactional;
import org.springframework.util.CollectionUtils;
import java.io.IOException;
import java.util.*;
import java.util.logging.Level;
import java.util.stream.Collectors;
/**
* @author unknown
* @version 1.0
* @date 2021/12/12 1:21
* @Description
*/
@Slf4j
@Component
@RequiredArgsConstructor
public class Common {
public static final String NETWORK_RESPONSE_RECEIVED = "Network.responseReceived";
public static final int SELENIUM_PORT = 9005;
private final SeleniumProperties seleniumProperties;
private final CookieDao cookieDao;
private final AccountDao accountDao;
private final VideoPoolDao videoPoolDao;
private final LivePoolDao livePoolDao;
private final String playbackBaseUrl = "https://live.kuaishou.com/playback/";
/**
* 创建浏览器驱动
*
* @return
*/
public ChromeDriver createDriver() {
/**
* options.addArguments('--disable-infobars');// 禁止策略化
* options.addArguments('--no-sandbox');// 解决DevToolsActivePort文件不存在的报错
* options.addArguments('window-size=1920x3000');// 指定浏览器分辨率
* options.addArguments('--disable-gpu');// 谷歌文档提到需要加上这个属性来规避bug
* options.addArguments('--incognito');// 隐身模式(无痕模式)
* options.addArguments('--disable-javascript');// 禁用javascript
* options.addArguments('--start-maximized');// 最大化运行(全屏窗口),不设置,取元素会报错
* options.addArguments('--hide-scrollbars');// 隐藏滚动条, 应对一些特殊页面
* options.addArguments('blink-settings=imagesEnabled=false');// 不加载图片, 提升速度
* options.addArguments('--headless');// 浏览器不提供可视化页面. linux下如果系统不支持可视化不加这条会启动失败
* options.binary_location = r"C:\Program Files (x86)\Google\Chrome\Application\chrome.exe";// 手动指定使用的浏览器位置
* options.addArguments('lang=en_US');// 设置语言
* options.addArguments('User-Agent=Mozilla/5.0 (Linux; U; Android 8.1.0; zh-cn; BLA-AL00 Build/HUAWEIBLA-AL00) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/57.0.2987.132 MQQBrowser/8.9 Mobile Safari/537.36')
* options.addArguments('--headless');// 浏览器不提供可视化页面
*
* Map<String, Object> prefs = new HashMap<>();
* prefs.put("credentials_enable_service", false);
* prefs.put("profile.password_manager_enabled", false);
* options.setExperimentalOption("prefs", prefs);// 屏蔽'保存密码'提示框
*/
ChromeOptions options = new ChromeOptions();
if (!seleniumProperties.isDebug()) {
options.addArguments("--disable-popup-blocking", "--lang=zh-CN");// 禁用阻止弹出窗口
options.setHeadless(true);// 设置无头模式
options.addArguments("--no-sandbox");// 启动无沙盒模式运行
options.addArguments("--disable-infobars");// 禁用信息栏
options.addArguments("--disable-dev-shm-usage");
options.addArguments("--disable-gpu");// 禁用GPU加速
options.addArguments("--window-size=2560,1024");// 指定浏览器分辨率
options.addArguments("--disable-extensions");// 禁用扩展
options.addArguments("--no-default-browser-check");// 禁用默认浏览器检查
//options.addArguments("--disable-javascript");// 禁用JavaScript
}
Map<String, Object> prefs = new HashMap<>();
prefs.put("credentials_enable_service", false);
prefs.put("profile.password_manager_enabled", false);
options.setExperimentalOption("prefs", prefs);// 禁用保存密码提示框
// set performance logger
// this sends Network.enable to chromedriver
LoggingPreferences logPrefs = new LoggingPreferences();
logPrefs.enable(LogType.PERFORMANCE, Level.ALL);// 设置日志级别
options.setCapability(CapabilityType.LOGGING_PREFS, logPrefs);
options.setExperimentalOption("w3c", false);
//options.addArguments("blink-settings=imagesEnabled=false");// 禁止加载图片
HashMap<Object, Object> map = new HashMap<>();
//map.put("implicit", 10 * 1000);// 元素查找最多等待10秒
map.put("pageLoad", 30 * 1000);// 页面加载最多等待30秒
map.put("script", 30 * 1000);// 页面脚本执行最多等待30秒
options.setCapability("timeouts", map);
return new ChromeDriver(new ChromeDriverService.Builder()
.usingPort(SELENIUM_PORT)
.build(), options);
}
/**
* 保存浏览器cookies
*
* @param accountNo 账户号
*/
@Transactional(rollbackFor = Exception.class)
public void saveCookie(WebDriver driver, String accountNo, Integer type) {
List<FwCookie> cookieList = this.getTempCookies(driver, accountNo, type);
if (CollectionUtils.isEmpty(cookieList)) {
return;
}
this.deleteCookies(accountNo, type);
cookieDao.saveAll(cookieList);
//driver.quit();
}
/**
* 获取浏览器cookies
*
* @param driver 驱动
* @param accountNo 账号
* @param type 账号类型
* @return
*/
public List<FwCookie> getTempCookies(WebDriver driver, String accountNo, Integer type) {
Set<Cookie> cookies = driver.manage().getCookies();
return cookies.stream()
.map(cookie -> FwCookie.toDb(cookie, accountNo, type))
.collect(Collectors.toList());
}
/**
* 删除指定类型指定账号的cookies
*
* @param accountNo 账户号
* @param type 账户类型
*/
public void deleteCookies(String accountNo, Integer type) {
cookieDao.deleteByAccountNoAndType(accountNo, type);
}
/**
* 根据账户号, 找到cookie
*
* @param accountNo 账户号
* @return
*/
public List<FwCookie> loadCookie(String accountNo, Integer type) {
return Optional.ofNullable(cookieDao.findByAccountNoAndType(accountNo, type))
.orElse(new ArrayList<>());
}
/**
* 处理收到的响应
*
* @param driver
* @return
*/
public List<ResponseReceived> processHttpTransferData(WebDriver driver) {
List<ResponseReceived> responseReceived = new ArrayList<>();
Logs logs = driver.manage().logs();// 获取用于获取不同类型日志的日志接口
Set<String> availableLogTypes = logs.getAvailableLogTypes();// 查询可用的日志类型
if (availableLogTypes.contains(LogType.PERFORMANCE)) {
LogEntries logEntries = logs.get(LogType.PERFORMANCE);// 获取'performance'类型的日志条目
logEntries.forEach(item -> {
JSONObject jsonObj = Optional.ofNullable(JSON.parseObject(item.getMessage())
.getJSONObject("message"))
.orElse(new JSONObject());
String method = jsonObj.getString("method");
if (NETWORK_RESPONSE_RECEIVED.equals(method)) {// 日志中的条目中获取到的方法为'Network.responseReceived'
ResponseReceived response = JSON.parseObject(jsonObj.getString("params"), ResponseReceived.class);
responseReceived.add(response);
}
});
}
return responseReceived;
}
/**
* 获取日志中的数据
*
* @param driver 浏览器驱动
* @param responseReceived 收到的响应
* @param dataUrl 数据接口地址
* @return
*/
public HttpResponse getHttpResponse(WebDriver driver, ResponseReceived responseReceived, String dataUrl) {
HttpResponse response = null;
String baseUrl = JSONObject.parseObject(responseReceived.getResponse()).getString("url");
boolean notStaticFiles = !baseUrl.endsWith(".png")
&& !baseUrl.endsWith(".jpg")
&& !baseUrl.endsWith(".css")
&& !baseUrl.endsWith(".ico")
&& !baseUrl.endsWith(".js")
&& !baseUrl.endsWith(".gif");
if (notStaticFiles && baseUrl.contains(dataUrl)) {
// 使用上面开发的接口获取返回数据
try {
// CHROME_DRIVER_PORT chromeDriver提供的端口
String url = String.format("http://localhost:%s/session/%s/goog/cdp/execute",
SELENIUM_PORT, ((RemoteWebDriver) driver).getSessionId());
HttpPost httpPost = new HttpPost(url);
JSONObject object = new JSONObject();
object.put("cmd", "Network.getResponseBody");
final JSONObject params = new JSONObject();
params.put("requestId", responseReceived.getRequestId());
object.put("params", params);
httpPost.setEntity(new StringEntity(object.toString()));
RequestConfig requestConfig = RequestConfig
.custom()
.setSocketTimeout(50000)
.setConnectTimeout(50000)
.build();
CloseableHttpClient httpClient = HttpClientBuilder.create()
.setDefaultRequestConfig(requestConfig).build();
response = httpClient.execute(httpPost);
} catch (IOException e) {
e.printStackTrace();
}
}
return response;
}
/**
* 查询指定天是否有账户粉丝数据
*
* @param accountNo 账户号
* @param type 账户类型
* @param reportDate 指定日期
* @return
*/
public ReportAccountDto getHasFoundAccountMsg(String accountNo, Integer type, Date reportDate) {
final List<Account> hasReportDate = accountDao.getHasReportDate(accountNo, type, reportDate);
if (!CollectionUtils.isEmpty(hasReportDate)) {
final Account account = hasReportDate.get(0);
return ReportAccountDto.builder()
.fansCnt(account.getFansCnt())
.accountName(account.getAccountName())
.build();
}
return null;
}
/**
* 查询指定天是否有视频数据
*
* @param accountNo 账户号
* @param type 账户类型
* @param reportDate 指定日期
* @return
*/
public List<VideoPool> getHasFoundVideo(String accountNo, Integer type, Date reportDate) {
final List<VideoPool> hasReportData = videoPoolDao.getHasReportDate(accountNo, type, reportDate);
if (hasReportData.size() > 0) {
return hasReportData;
}
return null;
}
/**
* 查询指定天是否有直播数据
*
* @param accountNo 账户号
* @param type 账户类型
* @param reportDate 指定日期
* @return
*/
public List<LivePool> getHasFoundLive(String accountNo, Integer type, Date reportDate) {
final List<LivePool> hasReportData = livePoolDao.getHasReportData(accountNo, type, reportDate);
if (hasReportData.size() > 0) {
return hasReportData;
}
return null;
}
/**
* 根据用户类型随机获取一个用户
*
* @param type 账户类型
* @return
*/
public Account getRandomUserByType(Integer type) {
return accountDao.findRandomByAndType(type).get(0);
}
/**
* 获取直播无回放信息的直播
*
* @param type 账户类型
* @param durationThreshold 时长阈值
* @return
*/
public List<LivePool> getWithoutPlaybackLive(Integer type, Double durationThreshold) {
return livePoolDao.getWithoutPlaybackLive(type, durationThreshold);
}
}