Commit 03520b5e20460c3d6ec1523a9842b5d354aa8968
1 parent
0a02549c
2022年11月1日17:53:55 快手视频数据分页, 缓存接口NS_sig信息
Showing
7 changed files
with
145 additions
and
38 deletions
src/main/java/cn/fw/freya/FreyaApplication.java
@@ -40,7 +40,7 @@ public class FreyaApplication { | @@ -40,7 +40,7 @@ public class FreyaApplication { | ||
40 | 6, | 40 | 6, |
41 | 30, | 41 | 30, |
42 | TimeUnit.SECONDS, | 42 | TimeUnit.SECONDS, |
43 | - new LinkedBlockingQueue<>(1000), | 43 | + new LinkedBlockingQueue<>(100000), |
44 | new ThreadFactoryBuilder().setNamePrefix("wmy-thread-pool-").build(), | 44 | new ThreadFactoryBuilder().setNamePrefix("wmy-thread-pool-").build(), |
45 | new ThreadPoolExecutor.DiscardPolicy()); | 45 | new ThreadPoolExecutor.DiscardPolicy()); |
46 | } | 46 | } |
src/main/java/cn/fw/freya/controller/KSController.java
@@ -81,7 +81,7 @@ public class KSController { | @@ -81,7 +81,7 @@ public class KSController { | ||
81 | */ | 81 | */ |
82 | @GetMapping("/getNS_sig3") | 82 | @GetMapping("/getNS_sig3") |
83 | public String getNS_sig3(String accountNo, Integer type, boolean retryGet) { | 83 | public String getNS_sig3(String accountNo, Integer type, boolean retryGet) { |
84 | - return kuaishouCrawl.getNS_sig3(accountNo, type, retryGet); | 84 | + return kuaishouCrawl.getNS_sig3(accountNo, type, 1, retryGet); |
85 | } | 85 | } |
86 | 86 | ||
87 | /** | 87 | /** |
src/main/java/cn/fw/freya/service/crawl/impl/BilibiliCrawl.java
@@ -254,7 +254,7 @@ public class BilibiliCrawl implements CrawlStrategy { | @@ -254,7 +254,7 @@ public class BilibiliCrawl implements CrawlStrategy { | ||
254 | ); | 254 | ); |
255 | String res = RequestUtil.get(config);// 发送GET请求 | 255 | String res = RequestUtil.get(config);// 发送GET请求 |
256 | this.waitFor(0.5); | 256 | this.waitFor(0.5); |
257 | - final JSONObject response = JSONObject.parseObject(res); | 257 | + final JSONObject response = JSON.parseObject(res); |
258 | if (this.verifyCookies(response)) { | 258 | if (this.verifyCookies(response)) { |
259 | return null; | 259 | return null; |
260 | } | 260 | } |
@@ -288,7 +288,7 @@ public class BilibiliCrawl implements CrawlStrategy { | @@ -288,7 +288,7 @@ public class BilibiliCrawl implements CrawlStrategy { | ||
288 | ); | 288 | ); |
289 | String res = RequestUtil.get(config);// 发送GET请求 | 289 | String res = RequestUtil.get(config);// 发送GET请求 |
290 | this.waitFor(0.1); | 290 | this.waitFor(0.1); |
291 | - final JSONObject response = JSONObject.parseObject(res); | 291 | + final JSONObject response = JSON.parseObject(res); |
292 | if (this.verifyCookies(response)) { | 292 | if (this.verifyCookies(response)) { |
293 | return null; | 293 | return null; |
294 | } | 294 | } |
@@ -333,7 +333,7 @@ public class BilibiliCrawl implements CrawlStrategy { | @@ -333,7 +333,7 @@ public class BilibiliCrawl implements CrawlStrategy { | ||
333 | ); | 333 | ); |
334 | String res = RequestUtil.get(config);// 发送GET请求 | 334 | String res = RequestUtil.get(config);// 发送GET请求 |
335 | log.info(String.format("%s [%s]平台账户号为: %s的直播数据的原始数据为: %s", LocalDateTime.now(), this.getType().getName(), accountNo, res)); | 335 | log.info(String.format("%s [%s]平台账户号为: %s的直播数据的原始数据为: %s", LocalDateTime.now(), this.getType().getName(), accountNo, res)); |
336 | - final JSONObject response = JSONObject.parseObject(res); | 336 | + final JSONObject response = JSON.parseObject(res); |
337 | if (this.verifyCookies(response)) { | 337 | if (this.verifyCookies(response)) { |
338 | return null; | 338 | return null; |
339 | } | 339 | } |
@@ -438,7 +438,7 @@ public class BilibiliCrawl implements CrawlStrategy { | @@ -438,7 +438,7 @@ public class BilibiliCrawl implements CrawlStrategy { | ||
438 | .build() | 438 | .build() |
439 | ); | 439 | ); |
440 | String res = RequestUtil.get(config);// 发送GET请求 | 440 | String res = RequestUtil.get(config);// 发送GET请求 |
441 | - final JSONObject response = JSONObject.parseObject(res); | 441 | + final JSONObject response = JSON.parseObject(res); |
442 | if (this.verifyCookies(response)) { | 442 | if (this.verifyCookies(response)) { |
443 | return null; | 443 | return null; |
444 | } | 444 | } |
@@ -534,7 +534,7 @@ public class BilibiliCrawl implements CrawlStrategy { | @@ -534,7 +534,7 @@ public class BilibiliCrawl implements CrawlStrategy { | ||
534 | .build() | 534 | .build() |
535 | ); | 535 | ); |
536 | String res = RequestUtil.get(config); | 536 | String res = RequestUtil.get(config); |
537 | - final JSONObject response = JSONObject.parseObject(res); | 537 | + final JSONObject response = JSON.parseObject(res); |
538 | if (this.verifyCookies(response)) { | 538 | if (this.verifyCookies(response)) { |
539 | return null; | 539 | return null; |
540 | } | 540 | } |
src/main/java/cn/fw/freya/service/crawl/impl/Common.java
@@ -215,7 +215,7 @@ public class Common { | @@ -215,7 +215,7 @@ public class Common { | ||
215 | */ | 215 | */ |
216 | public HttpResponse getHttpResponse(WebDriver driver, ResponseReceived responseReceived, String dataUrl) { | 216 | public HttpResponse getHttpResponse(WebDriver driver, ResponseReceived responseReceived, String dataUrl) { |
217 | HttpResponse response = null; | 217 | HttpResponse response = null; |
218 | - String baseUrl = JSONObject.parseObject(responseReceived.getResponse()).getString("url"); | 218 | + String baseUrl = JSON.parseObject(responseReceived.getResponse()).getString("url"); |
219 | boolean notStaticFiles = !baseUrl.endsWith(".png") | 219 | boolean notStaticFiles = !baseUrl.endsWith(".png") |
220 | && !baseUrl.endsWith(".jpg") | 220 | && !baseUrl.endsWith(".jpg") |
221 | && !baseUrl.endsWith(".css") | 221 | && !baseUrl.endsWith(".css") |
src/main/java/cn/fw/freya/service/crawl/impl/DongCheDiCrawl.java
@@ -152,7 +152,7 @@ public class DongCheDiCrawl implements CrawlStrategy { | @@ -152,7 +152,7 @@ public class DongCheDiCrawl implements CrawlStrategy { | ||
152 | .build() | 152 | .build() |
153 | ); | 153 | ); |
154 | String res = RequestUtil.get(config); | 154 | String res = RequestUtil.get(config); |
155 | - final JSONObject response = JSONObject.parseObject(res); | 155 | + final JSONObject response = JSON.parseObject(res); |
156 | if (this.verifyCookies(response)) { | 156 | if (this.verifyCookies(response)) { |
157 | return null; | 157 | return null; |
158 | } | 158 | } |
@@ -283,7 +283,7 @@ public class DongCheDiCrawl implements CrawlStrategy { | @@ -283,7 +283,7 @@ public class DongCheDiCrawl implements CrawlStrategy { | ||
283 | log.info(String.format("%s [%s]平台账户号为: %s的直播数据的原始数据为: %s", LocalDateTime.now(), this.getType().getName(), accountNo, res)); | 283 | log.info(String.format("%s [%s]平台账户号为: %s的直播数据的原始数据为: %s", LocalDateTime.now(), this.getType().getName(), accountNo, res)); |
284 | JSONObject response = new JSONObject(); | 284 | JSONObject response = new JSONObject(); |
285 | try { | 285 | try { |
286 | - response = JSONObject.parseObject(res); | 286 | + response = JSON.parseObject(res); |
287 | } catch (Exception e) { | 287 | } catch (Exception e) { |
288 | log.info(LocalDate.now() + " 暂未找到账户号为:" + accountNo + "的懂车帝直播数据"); | 288 | log.info(LocalDate.now() + " 暂未找到账户号为:" + accountNo + "的懂车帝直播数据"); |
289 | final LivePool nullLive = LivePool.builder() | 289 | final LivePool nullLive = LivePool.builder() |
@@ -413,7 +413,7 @@ public class DongCheDiCrawl implements CrawlStrategy { | @@ -413,7 +413,7 @@ public class DongCheDiCrawl implements CrawlStrategy { | ||
413 | .build() | 413 | .build() |
414 | ); | 414 | ); |
415 | String res1 = RequestUtil.get(config1); | 415 | String res1 = RequestUtil.get(config1); |
416 | - final JSONObject response1 = JSONObject.parseObject(res1); | 416 | + final JSONObject response1 = JSON.parseObject(res1); |
417 | if (this.verifyCookies(response1)) { | 417 | if (this.verifyCookies(response1)) { |
418 | return null; | 418 | return null; |
419 | } | 419 | } |
@@ -480,7 +480,7 @@ public class DongCheDiCrawl implements CrawlStrategy { | @@ -480,7 +480,7 @@ public class DongCheDiCrawl implements CrawlStrategy { | ||
480 | .build() | 480 | .build() |
481 | ); | 481 | ); |
482 | String res = RequestUtil.get(config); | 482 | String res = RequestUtil.get(config); |
483 | - final JSONObject response = JSONObject.parseObject(res); | 483 | + final JSONObject response = JSON.parseObject(res); |
484 | if (this.verifyCookies(response)) { | 484 | if (this.verifyCookies(response)) { |
485 | return null; | 485 | return null; |
486 | } | 486 | } |
@@ -520,7 +520,7 @@ public class DongCheDiCrawl implements CrawlStrategy { | @@ -520,7 +520,7 @@ public class DongCheDiCrawl implements CrawlStrategy { | ||
520 | .build() | 520 | .build() |
521 | ); | 521 | ); |
522 | String res = RequestUtil.get(config); | 522 | String res = RequestUtil.get(config); |
523 | - final JSONObject response = JSONObject.parseObject(res); | 523 | + final JSONObject response = JSON.parseObject(res); |
524 | if (this.verifyCookies(response)) { | 524 | if (this.verifyCookies(response)) { |
525 | return null; | 525 | return null; |
526 | } | 526 | } |
src/main/java/cn/fw/freya/service/crawl/impl/KuaiShouCrawl.java
@@ -102,7 +102,14 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { | @@ -102,7 +102,14 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { | ||
102 | final List<Account> accountList = accountDao.getAllKSAccount(); | 102 | final List<Account> accountList = accountDao.getAllKSAccount(); |
103 | accountList.forEach(item -> | 103 | accountList.forEach(item -> |
104 | Arrays.stream(DataTypeEnum.values()).forEach(item1 -> | 104 | Arrays.stream(DataTypeEnum.values()).forEach(item1 -> |
105 | - threadPoolExecutor.execute(() -> this.task(item.getAccountNo(), item1.getValue())) | 105 | + threadPoolExecutor.execute(() -> { |
106 | + Integer dataType = item1.getValue(); | ||
107 | + if (Objects.equals(dataType, 2)) { | ||
108 | + this.task(item.getAccountNo(), dataType, 1); | ||
109 | + } else { | ||
110 | + this.task(item.getAccountNo(), dataType, null); | ||
111 | + } | ||
112 | + }) | ||
106 | ) | 113 | ) |
107 | ); | 114 | ); |
108 | return true; | 115 | return true; |
@@ -114,9 +121,11 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { | @@ -114,9 +121,11 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { | ||
114 | * @param accountNo 账户号 | 121 | * @param accountNo 账户号 |
115 | * @param dataType 数据类型 | 122 | * @param dataType 数据类型 |
116 | */ | 123 | */ |
117 | - public void task(String accountNo, Integer dataType) { | 124 | + public void task(String accountNo, Integer dataType, Integer page) { |
118 | String key = accountNo + "#" + dataType; | 125 | String key = accountNo + "#" + dataType; |
119 | - final String ns_sig3 = this.getNS_sig3(accountNo, dataType, true); | 126 | + if (Objects.nonNull(page)) |
127 | + key += "#" + page; | ||
128 | + String ns_sig3 = this.getNS_sig3(accountNo, dataType, page, true); | ||
120 | if (Objects.nonNull(ns_sig3)) | 129 | if (Objects.nonNull(ns_sig3)) |
121 | sig3Map.put(key, ns_sig3); | 130 | sig3Map.put(key, ns_sig3); |
122 | else | 131 | else |
@@ -248,6 +257,53 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { | @@ -248,6 +257,53 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { | ||
248 | } | 257 | } |
249 | 258 | ||
250 | /** | 259 | /** |
260 | + * 分页获取快手视频数据 | ||
261 | + * | ||
262 | + * @param accountNo 账号 | ||
263 | + * @param page 第几页 | ||
264 | + * @return 快手返回data | ||
265 | + * @throws IOException 异常 | ||
266 | + */ | ||
267 | + private JSONObject getVideoDataByPage(String accountNo, Integer page) throws IOException { | ||
268 | + HttpCookies cookies = HttpCookies.custom(); | ||
269 | + CookieStore cookieStore = new BasicCookieStore(); | ||
270 | + cookies.setCookieStore(cookieStore); | ||
271 | + final String ns_sig3 = this.getNS_sig3(accountNo, DataTypeEnum.VIDEO.getValue(), page, false); | ||
272 | + if (Objects.isNull(ns_sig3)) | ||
273 | + return null; | ||
274 | + Map<String, Object> params = new LinkedHashMap<>(); | ||
275 | + params.put("count", 10); | ||
276 | + params.put("page", page); | ||
277 | + //params.put("total", 9007199000000000L + new Random().nextInt(999999999)); | ||
278 | + params.put("kuaishou.web.cp.api_ph", this.getWebApiPh(accountNo)); | ||
279 | + HttpConfig config = HttpConfig.custom() | ||
280 | + .url("https://cp.kuaishou.com/rest/cp/creator/pc/analysis/photo/list?__NS_sig3=" + ns_sig3) | ||
281 | + .context(cookies.getContext()) | ||
282 | + .json(JsonUtils.objectToJson(params)) | ||
283 | + .headers(HttpHeader | ||
284 | + .defaultHeader() | ||
285 | + .contentType("application/json") | ||
286 | + .host("cp.kuaishou.com") | ||
287 | + .cookie(this.getUserCookies(accountNo)) | ||
288 | + .build() | ||
289 | + ); | ||
290 | + String res = RequestUtil.post(config);// 发送POST请求 | ||
291 | + log.info(String.format("%s [%s]平台账户号为: %s的视频数据的原始数据为: %s", LocalDateTime.now(), this.getType().getName(), accountNo, res)); | ||
292 | + final JSONObject response = JSON.parseObject(res); | ||
293 | + if (this.verifyCookies(response)) { | ||
294 | + return null; | ||
295 | + } | ||
296 | + if (!StringUtils.hasText(res)) { | ||
297 | + throw new BusinessException("调用快手[视频]接口失败"); | ||
298 | + } | ||
299 | + if (Objects.equals(response.getInteger("result"), 500002)) { | ||
300 | + threadPoolExecutor.execute(() -> this.task(accountNo, DataTypeEnum.VIDEO.getValue(), page)); | ||
301 | + throw new BusinessException("获取数据失败, 尝试重新获取sig3签名信息"); | ||
302 | + } | ||
303 | + return Optional.ofNullable(response.getJSONObject("data")).orElse(new JSONObject()); | ||
304 | + } | ||
305 | + | ||
306 | + /** | ||
251 | * 获取所有视频作品信息 | 307 | * 获取所有视频作品信息 |
252 | * | 308 | * |
253 | * @param accountNo 账户号 | 309 | * @param accountNo 账户号 |
@@ -260,7 +316,21 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { | @@ -260,7 +316,21 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { | ||
260 | if (Objects.nonNull(hasFoundVideo)) { | 316 | if (Objects.nonNull(hasFoundVideo)) { |
261 | return hasFoundVideo; | 317 | return hasFoundVideo; |
262 | } | 318 | } |
263 | - HttpCookies cookies = HttpCookies.custom(); | 319 | + Date previousDay = DateUtil.getPreviousDay(new Date()); |
320 | + int page = 1; | ||
321 | + Integer total; | ||
322 | + JSONArray videoJsonArray = new JSONArray(); | ||
323 | + do { | ||
324 | + JSONObject obj = this.getVideoDataByPage(accountNo, page); | ||
325 | + total = Objects.requireNonNull(obj).getInteger("totalCount"); | ||
326 | + videoJsonArray.addAll(Optional.ofNullable(obj.getJSONArray("photoList")).orElse(new JSONArray())); | ||
327 | + page++; | ||
328 | + JSONArray photoList = obj.getJSONArray("photoList"); | ||
329 | + String publishTime = JSON.parseObject(JSON.toJSONString(photoList.get(photoList.size() - 1))).getString("publishTime"); | ||
330 | + if (Objects.requireNonNull(DateUtil.parse(publishTime)).compareTo(DateUtil.getMonthFirstDay(DateUtil.getThisDayMinTime(previousDay))) < 0) | ||
331 | + break; | ||
332 | + } while (total > 10 * (page - 1)); | ||
333 | + /*HttpCookies cookies = HttpCookies.custom(); | ||
264 | CookieStore cookieStore = new BasicCookieStore(); | 334 | CookieStore cookieStore = new BasicCookieStore(); |
265 | cookies.setCookieStore(cookieStore); | 335 | cookies.setCookieStore(cookieStore); |
266 | Date previousDay = DateUtil.getPreviousDay(new Date()); | 336 | Date previousDay = DateUtil.getPreviousDay(new Date()); |
@@ -285,7 +355,7 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { | @@ -285,7 +355,7 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { | ||
285 | ); | 355 | ); |
286 | String res = RequestUtil.post(config);// 发送POST请求 | 356 | String res = RequestUtil.post(config);// 发送POST请求 |
287 | log.info(String.format("%s [%s]平台账户号为: %s的视频数据的原始数据为: %s", LocalDateTime.now(), this.getType().getName(), accountNo, res)); | 357 | log.info(String.format("%s [%s]平台账户号为: %s的视频数据的原始数据为: %s", LocalDateTime.now(), this.getType().getName(), accountNo, res)); |
288 | - final JSONObject response = JSONObject.parseObject(res); | 358 | + final JSONObject response = JSON.parseObject(res); |
289 | if (this.verifyCookies(response)) { | 359 | if (this.verifyCookies(response)) { |
290 | return null; | 360 | return null; |
291 | } | 361 | } |
@@ -297,6 +367,7 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { | @@ -297,6 +367,7 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { | ||
297 | throw new BusinessException("获取数据失败, 尝试重新获取sig3签名信息"); | 367 | throw new BusinessException("获取数据失败, 尝试重新获取sig3签名信息"); |
298 | } | 368 | } |
299 | JSONArray videoJsonArray = Optional.ofNullable(Optional.ofNullable(response.getJSONObject("data")).orElse(new JSONObject()).getJSONArray("photoList")).orElse(new JSONArray()); | 369 | JSONArray videoJsonArray = Optional.ofNullable(Optional.ofNullable(response.getJSONObject("data")).orElse(new JSONObject()).getJSONArray("photoList")).orElse(new JSONArray()); |
370 | + */ | ||
300 | videoPoolDao.deleteByAccountNoAndDate(accountNo, previousDay, AccountTypeEnum.KS.getValue(), ResourceTypeEnum.VIDEO.getValue()); | 371 | videoPoolDao.deleteByAccountNoAndDate(accountNo, previousDay, AccountTypeEnum.KS.getValue(), ResourceTypeEnum.VIDEO.getValue()); |
301 | // 视频数据存库 | 372 | // 视频数据存库 |
302 | List<VideoPool> videoPoolList = new ArrayList<>(videoJsonArray.size()); | 373 | List<VideoPool> videoPoolList = new ArrayList<>(videoJsonArray.size()); |
@@ -354,7 +425,7 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { | @@ -354,7 +425,7 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { | ||
354 | //} | 425 | //} |
355 | }); | 426 | }); |
356 | try { | 427 | try { |
357 | - if (videoPoolList.size() > 0) { | 428 | + if (!videoPoolList.isEmpty()) { |
358 | videoPoolDao.saveAll(videoPoolList);// 将收集到的视频信息保存 | 429 | videoPoolDao.saveAll(videoPoolList);// 将收集到的视频信息保存 |
359 | } else { | 430 | } else { |
360 | final VideoPool nullVideo = VideoPool.builder() | 431 | final VideoPool nullVideo = VideoPool.builder() |
@@ -394,7 +465,7 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { | @@ -394,7 +465,7 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { | ||
394 | Date endTime = DateUtil.getThisDayMaxTime(previousDay); | 465 | Date endTime = DateUtil.getThisDayMaxTime(previousDay); |
395 | Date startTime = DateUtil.getThisDayMinTime(previousDay); | 466 | Date startTime = DateUtil.getThisDayMinTime(previousDay); |
396 | //Date startTime = DateUtil.getThisDayMinTime(new Date(previousDay.getTime() - 7 * 24 * 3600 * 1000L));// 补数据使用 | 467 | //Date startTime = DateUtil.getThisDayMinTime(new Date(previousDay.getTime() - 7 * 24 * 3600 * 1000L));// 补数据使用 |
397 | - final String ns_sig3 = this.getNS_sig3(accountNo, DataTypeEnum.LIVE.getValue(), false); | 468 | + final String ns_sig3 = this.getNS_sig3(accountNo, DataTypeEnum.LIVE.getValue(), null, false); |
398 | if (Objects.isNull(ns_sig3)) | 469 | if (Objects.isNull(ns_sig3)) |
399 | return null; | 470 | return null; |
400 | Map<String, Object> params = new LinkedHashMap<>(); | 471 | Map<String, Object> params = new LinkedHashMap<>(); |
@@ -418,7 +489,7 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { | @@ -418,7 +489,7 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { | ||
418 | ); | 489 | ); |
419 | String res = RequestUtil.post(config);// 发送POST请求 | 490 | String res = RequestUtil.post(config);// 发送POST请求 |
420 | log.info(String.format("%s [%s]平台账户号为: %s的直播数据的原始数据为: %s", LocalDateTime.now(), this.getType().getName(), accountNo, res)); | 491 | log.info(String.format("%s [%s]平台账户号为: %s的直播数据的原始数据为: %s", LocalDateTime.now(), this.getType().getName(), accountNo, res)); |
421 | - final JSONObject response = JSONObject.parseObject(res); | 492 | + final JSONObject response = JSON.parseObject(res); |
422 | if (this.verifyCookies(response)) { | 493 | if (this.verifyCookies(response)) { |
423 | return null; | 494 | return null; |
424 | } | 495 | } |
@@ -426,7 +497,7 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { | @@ -426,7 +497,7 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { | ||
426 | throw new BusinessException("调用快手[视频]接口失败"); | 497 | throw new BusinessException("调用快手[视频]接口失败"); |
427 | } | 498 | } |
428 | if (Objects.equals(response.getInteger("result"), 500002)) { | 499 | if (Objects.equals(response.getInteger("result"), 500002)) { |
429 | - threadPoolExecutor.execute(() -> this.task(accountNo, DataTypeEnum.LIVE.getValue())); | 500 | + threadPoolExecutor.execute(() -> this.task(accountNo, DataTypeEnum.LIVE.getValue(), null)); |
430 | throw new BusinessException("获取数据失败, 尝试重新获取sig3签名信息"); | 501 | throw new BusinessException("获取数据失败, 尝试重新获取sig3签名信息"); |
431 | } | 502 | } |
432 | JSONObject dataJSONObject = response.getJSONObject("data"); | 503 | JSONObject dataJSONObject = response.getJSONObject("data"); |
@@ -612,7 +683,7 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { | @@ -612,7 +683,7 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { | ||
612 | if (!StringUtils.hasText(res)) { | 683 | if (!StringUtils.hasText(res)) { |
613 | return objects; | 684 | return objects; |
614 | } | 685 | } |
615 | - JSONObject resObj = JSONObject.parseObject(res); | 686 | + JSONObject resObj = JSON.parseObject(res); |
616 | try { | 687 | try { |
617 | return resObj.getJSONObject("data").getJSONObject("playbackFeeds").getJSONArray("list"); | 688 | return resObj.getJSONObject("data").getJSONObject("playbackFeeds").getJSONArray("list"); |
618 | } catch (Exception e) { | 689 | } catch (Exception e) { |
@@ -638,7 +709,7 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { | @@ -638,7 +709,7 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { | ||
638 | cookies.setCookieStore(cookieStore); | 709 | cookies.setCookieStore(cookieStore); |
639 | Map<String, Object> params = new HashMap<>(); | 710 | Map<String, Object> params = new HashMap<>(); |
640 | params.put("kuaishou.web.cp.api_ph", this.getWebApiPh(accountNo)); | 711 | params.put("kuaishou.web.cp.api_ph", this.getWebApiPh(accountNo)); |
641 | - final String ns_sig3 = this.getNS_sig3(accountNo, DataTypeEnum.FANS.getValue(), false); | 712 | + final String ns_sig3 = this.getNS_sig3(accountNo, DataTypeEnum.FANS.getValue(), null, false); |
642 | if (Objects.isNull(ns_sig3)) | 713 | if (Objects.isNull(ns_sig3)) |
643 | return null; | 714 | return null; |
644 | HttpConfig config = HttpConfig.custom() | 715 | HttpConfig config = HttpConfig.custom() |
@@ -654,7 +725,7 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { | @@ -654,7 +725,7 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { | ||
654 | .build() | 725 | .build() |
655 | ); | 726 | ); |
656 | String res = RequestUtil.post(config);// 发送POST请求 | 727 | String res = RequestUtil.post(config);// 发送POST请求 |
657 | - final JSONObject response = JSONObject.parseObject(res); | 728 | + final JSONObject response = JSON.parseObject(res); |
658 | if (this.verifyCookies(response)) { | 729 | if (this.verifyCookies(response)) { |
659 | return null; | 730 | return null; |
660 | } | 731 | } |
@@ -662,7 +733,7 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { | @@ -662,7 +733,7 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { | ||
662 | throw new BusinessException("调用快手[直播]接口失败"); | 733 | throw new BusinessException("调用快手[直播]接口失败"); |
663 | } | 734 | } |
664 | if (Objects.equals(response.getInteger("result"), 500002)) { | 735 | if (Objects.equals(response.getInteger("result"), 500002)) { |
665 | - threadPoolExecutor.execute(() -> this.task(accountNo, DataTypeEnum.FANS.getValue())); | 736 | + threadPoolExecutor.execute(() -> this.task(accountNo, DataTypeEnum.FANS.getValue(), null)); |
666 | throw new BusinessException("获取数据失败, 尝试重新获取sig3签名信息"); | 737 | throw new BusinessException("获取数据失败, 尝试重新获取sig3签名信息"); |
667 | } | 738 | } |
668 | JSONObject data = response.getJSONObject("data"); | 739 | JSONObject data = response.getJSONObject("data"); |
@@ -727,24 +798,29 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { | @@ -727,24 +798,29 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { | ||
727 | * | 798 | * |
728 | * @param accountNo 快手账户号 | 799 | * @param accountNo 快手账户号 |
729 | * @param type 密钥类型(1:粉丝, 2:短视频, 3:直播) | 800 | * @param type 密钥类型(1:粉丝, 2:短视频, 3:直播) |
801 | + * @param page 数据页数(1:粉丝, 2:短视频, 3:直播) | ||
730 | * @param retryGet 是否重新获取 | 802 | * @param retryGet 是否重新获取 |
731 | */ | 803 | */ |
732 | - public String getNS_sig3(String accountNo, Integer type, boolean retryGet) { | ||
733 | - final String key = accountNo + "#" + type; | 804 | + public String getNS_sig3(String accountNo, Integer type, Integer page, boolean retryGet) { |
805 | + String key = accountNo + "#" + type; | ||
734 | String NS_sig3; | 806 | String NS_sig3; |
735 | if (!retryGet) { | 807 | if (!retryGet) { |
808 | + if (Objects.nonNull(page)) | ||
809 | + key += "#" + page; | ||
736 | NS_sig3 = sig3Map.get(key); | 810 | NS_sig3 = sig3Map.get(key); |
737 | if (StringUtils.hasText(NS_sig3)) | 811 | if (StringUtils.hasText(NS_sig3)) |
738 | return NS_sig3; | 812 | return NS_sig3; |
739 | } | 813 | } |
814 | + boolean videoDimension = Objects.equals(type, DataTypeEnum.VIDEO.getValue()); | ||
815 | + boolean fansDimension = Objects.equals(type, DataTypeEnum.FANS.getValue()); | ||
740 | final String uuid = UUID.randomUUID().toString().replace("-", ""); | 816 | final String uuid = UUID.randomUUID().toString().replace("-", ""); |
741 | final WebDriver driver = this.getKSDriver(accountNo, uuid); | 817 | final WebDriver driver = this.getKSDriver(accountNo, uuid); |
742 | String targetUrl = null; | 818 | String targetUrl = null; |
743 | String dataUrl = null;// 数据接口地址 | 819 | String dataUrl = null;// 数据接口地址 |
744 | - if (Objects.equals(type, DataTypeEnum.FANS.getValue())) { | 820 | + if (fansDimension) { |
745 | targetUrl = "https://cp.kuaishou.com/profile"; | 821 | targetUrl = "https://cp.kuaishou.com/profile"; |
746 | dataUrl = "https://cp.kuaishou.com/rest/cp/creator/pc/home/infoV2"; | 822 | dataUrl = "https://cp.kuaishou.com/rest/cp/creator/pc/home/infoV2"; |
747 | - } else if (Objects.equals(type, DataTypeEnum.VIDEO.getValue())) { | 823 | + } else if (videoDimension) { |
748 | targetUrl = "https://cp.kuaishou.com/statistics/works"; | 824 | targetUrl = "https://cp.kuaishou.com/statistics/works"; |
749 | dataUrl = "https://cp.kuaishou.com/rest/cp/creator/pc/analysis/photo/list"; | 825 | dataUrl = "https://cp.kuaishou.com/rest/cp/creator/pc/analysis/photo/list"; |
750 | } else if (Objects.equals(type, DataTypeEnum.LIVE.getValue())) { | 826 | } else if (Objects.equals(type, DataTypeEnum.LIVE.getValue())) { |
@@ -758,7 +834,7 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { | @@ -758,7 +834,7 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { | ||
758 | throw new BusinessException("跳转页面发生异常"); | 834 | throw new BusinessException("跳转页面发生异常"); |
759 | } | 835 | } |
760 | LockSupport.parkNanos(TimeUnit.SECONDS.toNanos(5)); | 836 | LockSupport.parkNanos(TimeUnit.SECONDS.toNanos(5)); |
761 | - if (Objects.equals(type, DataTypeEnum.FANS.getValue())) { | 837 | + if (fansDimension) { |
762 | driver.get("https://cp.kuaishou.com/article/manage/video"); | 838 | driver.get("https://cp.kuaishou.com/article/manage/video"); |
763 | LockSupport.parkNanos(TimeUnit.SECONDS.toNanos(5)); | 839 | LockSupport.parkNanos(TimeUnit.SECONDS.toNanos(5)); |
764 | if (Objects.equals(targetUrl, driver.getCurrentUrl())) { | 840 | if (Objects.equals(targetUrl, driver.getCurrentUrl())) { |
@@ -770,7 +846,29 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { | @@ -770,7 +846,29 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { | ||
770 | this.exitBrowser(accountNo, uuid); | 846 | this.exitBrowser(accountNo, uuid); |
771 | return null; | 847 | return null; |
772 | } | 848 | } |
849 | + Integer maxPageNum = 0; | ||
850 | + if (videoDimension) { | ||
851 | + List<WebElement> pageLabels = new WebDriverWait(driver, 15, 300).until(driver1 -> | ||
852 | + driver1.findElements(By.xpath("//ul[@class='el-pager']/li")));// 获取分页页码标签元素列表 | ||
853 | + maxPageNum = Integer.valueOf(pageLabels.get(pageLabels.size() - 1).getText());// 最大页码 | ||
854 | + if (!CollectionUtils.isEmpty(pageLabels)) { | ||
855 | + for (WebElement item : pageLabels) { | ||
856 | + try { | ||
857 | + /*if (Objects.equals(item.getText(), "...")) { | ||
858 | + pageLabels = new WebDriverWait(driver, 15, 300).until(driver1 -> | ||
859 | + driver1.findElements(By.xpath("//ul[@class='el-pager']/li"))); | ||
860 | + item.click(); | ||
861 | + }*/ | ||
862 | + item.click(); | ||
863 | + LockSupport.parkNanos(TimeUnit.SECONDS.toNanos(2)); | ||
864 | + } catch (Exception e) { | ||
865 | + // this.exitBrowser(accountNo, uuid); | ||
866 | + } | ||
867 | + } | ||
868 | + } | ||
869 | + } | ||
773 | try { | 870 | try { |
871 | + ArrayList<String> sigList = new ArrayList<>(); | ||
774 | final List<ResponseReceived> responseReceivedEvents = common.processHttpTransferData(driver); | 872 | final List<ResponseReceived> responseReceivedEvents = common.processHttpTransferData(driver); |
775 | for (ResponseReceived item : responseReceivedEvents) { | 873 | for (ResponseReceived item : responseReceivedEvents) { |
776 | String str = this.getDataUrl(item, dataUrl); | 874 | String str = this.getDataUrl(item, dataUrl); |
@@ -778,16 +876,25 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { | @@ -778,16 +876,25 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { | ||
778 | this.exitBrowser(accountNo, uuid); | 876 | this.exitBrowser(accountNo, uuid); |
779 | String[] split = str.split("="); | 877 | String[] split = str.split("="); |
780 | NS_sig3 = split[1]; | 878 | NS_sig3 = split[1]; |
781 | - sig3Map.put(key, NS_sig3); | ||
782 | - return NS_sig3; | 879 | + if (videoDimension) { |
880 | + sigList.add(NS_sig3); | ||
881 | + } else { | ||
882 | + sig3Map.put(key, NS_sig3); | ||
883 | + return NS_sig3; | ||
884 | + } | ||
783 | } | 885 | } |
784 | } | 886 | } |
887 | + for (int i = 0; i < sigList.size(); i++) { | ||
888 | + sig3Map.put(accountNo + "#" + type + "#" + (i + 1), sigList.get(i)); | ||
889 | + if (sigList.size() < maxPageNum && i > 4) | ||
890 | + break; | ||
891 | + } | ||
785 | } catch (Exception e) { | 892 | } catch (Exception e) { |
786 | this.exitBrowser(accountNo, uuid); | 893 | this.exitBrowser(accountNo, uuid); |
787 | throw new BusinessException(e.getMessage()); | 894 | throw new BusinessException(e.getMessage()); |
788 | } | 895 | } |
789 | this.exitBrowser(accountNo, uuid); | 896 | this.exitBrowser(accountNo, uuid); |
790 | - return null; | 897 | + return sig3Map.get(key); |
791 | } | 898 | } |
792 | 899 | ||
793 | /** | 900 | /** |
@@ -798,7 +905,7 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { | @@ -798,7 +905,7 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { | ||
798 | * @return | 905 | * @return |
799 | */ | 906 | */ |
800 | public String getDataUrl(ResponseReceived responseReceived, String dataUrl) { | 907 | public String getDataUrl(ResponseReceived responseReceived, String dataUrl) { |
801 | - String baseUrl = JSONObject.parseObject(responseReceived.getResponse()).getString("url"); | 908 | + String baseUrl = JSON.parseObject(responseReceived.getResponse()).getString("url"); |
802 | boolean notStaticFiles = !baseUrl.endsWith(".png") | 909 | boolean notStaticFiles = !baseUrl.endsWith(".png") |
803 | && !baseUrl.endsWith(".jpg") | 910 | && !baseUrl.endsWith(".jpg") |
804 | && !baseUrl.endsWith(".css") | 911 | && !baseUrl.endsWith(".css") |
src/main/java/cn/fw/freya/service/rpc/ReportRpcService.java
@@ -55,7 +55,7 @@ public class ReportRpcService { | @@ -55,7 +55,7 @@ public class ReportRpcService { | ||
55 | if (!StringUtils.hasText(res)) { | 55 | if (!StringUtils.hasText(res)) { |
56 | return false; | 56 | return false; |
57 | } | 57 | } |
58 | - JSONObject resObj = JSONObject.parseObject(res); | 58 | + JSONObject resObj = JSON.parseObject(res); |
59 | Boolean result = resObj.getBoolean("success"); | 59 | Boolean result = resObj.getBoolean("success"); |
60 | if (Boolean.FALSE.equals(result)) { | 60 | if (Boolean.FALSE.equals(result)) { |
61 | Integer status = Optional.ofNullable(resObj.getInteger("status")).orElse(-1); | 61 | Integer status = Optional.ofNullable(resObj.getInteger("status")).orElse(-1); |
@@ -112,7 +112,7 @@ public class ReportRpcService { | @@ -112,7 +112,7 @@ public class ReportRpcService { | ||
112 | if (!StringUtils.hasText(res)) { | 112 | if (!StringUtils.hasText(res)) { |
113 | return false; | 113 | return false; |
114 | } | 114 | } |
115 | - JSONObject resObj = JSONObject.parseObject(res); | 115 | + JSONObject resObj = JSON.parseObject(res); |
116 | Boolean result = resObj.getBoolean("success"); | 116 | Boolean result = resObj.getBoolean("success"); |
117 | if (Boolean.FALSE.equals(result)) { | 117 | if (Boolean.FALSE.equals(result)) { |
118 | Integer status = Optional.ofNullable(resObj.getInteger("status")).orElse(-1); | 118 | Integer status = Optional.ofNullable(resObj.getInteger("status")).orElse(-1); |
@@ -174,7 +174,7 @@ public class ReportRpcService { | @@ -174,7 +174,7 @@ public class ReportRpcService { | ||
174 | if (!StringUtils.hasText(res)) { | 174 | if (!StringUtils.hasText(res)) { |
175 | return false; | 175 | return false; |
176 | } | 176 | } |
177 | - JSONObject resObj = JSONObject.parseObject(res); | 177 | + JSONObject resObj = JSON.parseObject(res); |
178 | Boolean result = resObj.getBoolean("success"); | 178 | Boolean result = resObj.getBoolean("success"); |
179 | if (Boolean.FALSE.equals(result)) { | 179 | if (Boolean.FALSE.equals(result)) { |
180 | Integer status = Optional.ofNullable(resObj.getInteger("status")).orElse(-1); | 180 | Integer status = Optional.ofNullable(resObj.getInteger("status")).orElse(-1); |