Commit 03520b5e20460c3d6ec1523a9842b5d354aa8968

Authored by 王明元
1 parent 0a02549c

2022年11月1日17:53:55 快手视频数据分页, 缓存接口NS_sig信息

src/main/java/cn/fw/freya/FreyaApplication.java
@@ -40,7 +40,7 @@ public class FreyaApplication { @@ -40,7 +40,7 @@ public class FreyaApplication {
40 6, 40 6,
41 30, 41 30,
42 TimeUnit.SECONDS, 42 TimeUnit.SECONDS,
43 - new LinkedBlockingQueue<>(1000), 43 + new LinkedBlockingQueue<>(100000),
44 new ThreadFactoryBuilder().setNamePrefix("wmy-thread-pool-").build(), 44 new ThreadFactoryBuilder().setNamePrefix("wmy-thread-pool-").build(),
45 new ThreadPoolExecutor.DiscardPolicy()); 45 new ThreadPoolExecutor.DiscardPolicy());
46 } 46 }
src/main/java/cn/fw/freya/controller/KSController.java
@@ -81,7 +81,7 @@ public class KSController { @@ -81,7 +81,7 @@ public class KSController {
81 */ 81 */
82 @GetMapping("/getNS_sig3") 82 @GetMapping("/getNS_sig3")
83 public String getNS_sig3(String accountNo, Integer type, boolean retryGet) { 83 public String getNS_sig3(String accountNo, Integer type, boolean retryGet) {
84 - return kuaishouCrawl.getNS_sig3(accountNo, type, retryGet); 84 + return kuaishouCrawl.getNS_sig3(accountNo, type, 1, retryGet);
85 } 85 }
86 86
87 /** 87 /**
src/main/java/cn/fw/freya/service/crawl/impl/BilibiliCrawl.java
@@ -254,7 +254,7 @@ public class BilibiliCrawl implements CrawlStrategy { @@ -254,7 +254,7 @@ public class BilibiliCrawl implements CrawlStrategy {
254 ); 254 );
255 String res = RequestUtil.get(config);// 发送GET请求 255 String res = RequestUtil.get(config);// 发送GET请求
256 this.waitFor(0.5); 256 this.waitFor(0.5);
257 - final JSONObject response = JSONObject.parseObject(res); 257 + final JSONObject response = JSON.parseObject(res);
258 if (this.verifyCookies(response)) { 258 if (this.verifyCookies(response)) {
259 return null; 259 return null;
260 } 260 }
@@ -288,7 +288,7 @@ public class BilibiliCrawl implements CrawlStrategy { @@ -288,7 +288,7 @@ public class BilibiliCrawl implements CrawlStrategy {
288 ); 288 );
289 String res = RequestUtil.get(config);// 发送GET请求 289 String res = RequestUtil.get(config);// 发送GET请求
290 this.waitFor(0.1); 290 this.waitFor(0.1);
291 - final JSONObject response = JSONObject.parseObject(res); 291 + final JSONObject response = JSON.parseObject(res);
292 if (this.verifyCookies(response)) { 292 if (this.verifyCookies(response)) {
293 return null; 293 return null;
294 } 294 }
@@ -333,7 +333,7 @@ public class BilibiliCrawl implements CrawlStrategy { @@ -333,7 +333,7 @@ public class BilibiliCrawl implements CrawlStrategy {
333 ); 333 );
334 String res = RequestUtil.get(config);// 发送GET请求 334 String res = RequestUtil.get(config);// 发送GET请求
335 log.info(String.format("%s [%s]平台账户号为: %s的直播数据的原始数据为: %s", LocalDateTime.now(), this.getType().getName(), accountNo, res)); 335 log.info(String.format("%s [%s]平台账户号为: %s的直播数据的原始数据为: %s", LocalDateTime.now(), this.getType().getName(), accountNo, res));
336 - final JSONObject response = JSONObject.parseObject(res); 336 + final JSONObject response = JSON.parseObject(res);
337 if (this.verifyCookies(response)) { 337 if (this.verifyCookies(response)) {
338 return null; 338 return null;
339 } 339 }
@@ -438,7 +438,7 @@ public class BilibiliCrawl implements CrawlStrategy { @@ -438,7 +438,7 @@ public class BilibiliCrawl implements CrawlStrategy {
438 .build() 438 .build()
439 ); 439 );
440 String res = RequestUtil.get(config);// 发送GET请求 440 String res = RequestUtil.get(config);// 发送GET请求
441 - final JSONObject response = JSONObject.parseObject(res); 441 + final JSONObject response = JSON.parseObject(res);
442 if (this.verifyCookies(response)) { 442 if (this.verifyCookies(response)) {
443 return null; 443 return null;
444 } 444 }
@@ -534,7 +534,7 @@ public class BilibiliCrawl implements CrawlStrategy { @@ -534,7 +534,7 @@ public class BilibiliCrawl implements CrawlStrategy {
534 .build() 534 .build()
535 ); 535 );
536 String res = RequestUtil.get(config); 536 String res = RequestUtil.get(config);
537 - final JSONObject response = JSONObject.parseObject(res); 537 + final JSONObject response = JSON.parseObject(res);
538 if (this.verifyCookies(response)) { 538 if (this.verifyCookies(response)) {
539 return null; 539 return null;
540 } 540 }
src/main/java/cn/fw/freya/service/crawl/impl/Common.java
@@ -215,7 +215,7 @@ public class Common { @@ -215,7 +215,7 @@ public class Common {
215 */ 215 */
216 public HttpResponse getHttpResponse(WebDriver driver, ResponseReceived responseReceived, String dataUrl) { 216 public HttpResponse getHttpResponse(WebDriver driver, ResponseReceived responseReceived, String dataUrl) {
217 HttpResponse response = null; 217 HttpResponse response = null;
218 - String baseUrl = JSONObject.parseObject(responseReceived.getResponse()).getString("url"); 218 + String baseUrl = JSON.parseObject(responseReceived.getResponse()).getString("url");
219 boolean notStaticFiles = !baseUrl.endsWith(".png") 219 boolean notStaticFiles = !baseUrl.endsWith(".png")
220 && !baseUrl.endsWith(".jpg") 220 && !baseUrl.endsWith(".jpg")
221 && !baseUrl.endsWith(".css") 221 && !baseUrl.endsWith(".css")
src/main/java/cn/fw/freya/service/crawl/impl/DongCheDiCrawl.java
@@ -152,7 +152,7 @@ public class DongCheDiCrawl implements CrawlStrategy { @@ -152,7 +152,7 @@ public class DongCheDiCrawl implements CrawlStrategy {
152 .build() 152 .build()
153 ); 153 );
154 String res = RequestUtil.get(config); 154 String res = RequestUtil.get(config);
155 - final JSONObject response = JSONObject.parseObject(res); 155 + final JSONObject response = JSON.parseObject(res);
156 if (this.verifyCookies(response)) { 156 if (this.verifyCookies(response)) {
157 return null; 157 return null;
158 } 158 }
@@ -283,7 +283,7 @@ public class DongCheDiCrawl implements CrawlStrategy { @@ -283,7 +283,7 @@ public class DongCheDiCrawl implements CrawlStrategy {
283 log.info(String.format("%s [%s]平台账户号为: %s的直播数据的原始数据为: %s", LocalDateTime.now(), this.getType().getName(), accountNo, res)); 283 log.info(String.format("%s [%s]平台账户号为: %s的直播数据的原始数据为: %s", LocalDateTime.now(), this.getType().getName(), accountNo, res));
284 JSONObject response = new JSONObject(); 284 JSONObject response = new JSONObject();
285 try { 285 try {
286 - response = JSONObject.parseObject(res); 286 + response = JSON.parseObject(res);
287 } catch (Exception e) { 287 } catch (Exception e) {
288 log.info(LocalDate.now() + " 暂未找到账户号为:" + accountNo + "的懂车帝直播数据"); 288 log.info(LocalDate.now() + " 暂未找到账户号为:" + accountNo + "的懂车帝直播数据");
289 final LivePool nullLive = LivePool.builder() 289 final LivePool nullLive = LivePool.builder()
@@ -413,7 +413,7 @@ public class DongCheDiCrawl implements CrawlStrategy { @@ -413,7 +413,7 @@ public class DongCheDiCrawl implements CrawlStrategy {
413 .build() 413 .build()
414 ); 414 );
415 String res1 = RequestUtil.get(config1); 415 String res1 = RequestUtil.get(config1);
416 - final JSONObject response1 = JSONObject.parseObject(res1); 416 + final JSONObject response1 = JSON.parseObject(res1);
417 if (this.verifyCookies(response1)) { 417 if (this.verifyCookies(response1)) {
418 return null; 418 return null;
419 } 419 }
@@ -480,7 +480,7 @@ public class DongCheDiCrawl implements CrawlStrategy { @@ -480,7 +480,7 @@ public class DongCheDiCrawl implements CrawlStrategy {
480 .build() 480 .build()
481 ); 481 );
482 String res = RequestUtil.get(config); 482 String res = RequestUtil.get(config);
483 - final JSONObject response = JSONObject.parseObject(res); 483 + final JSONObject response = JSON.parseObject(res);
484 if (this.verifyCookies(response)) { 484 if (this.verifyCookies(response)) {
485 return null; 485 return null;
486 } 486 }
@@ -520,7 +520,7 @@ public class DongCheDiCrawl implements CrawlStrategy { @@ -520,7 +520,7 @@ public class DongCheDiCrawl implements CrawlStrategy {
520 .build() 520 .build()
521 ); 521 );
522 String res = RequestUtil.get(config); 522 String res = RequestUtil.get(config);
523 - final JSONObject response = JSONObject.parseObject(res); 523 + final JSONObject response = JSON.parseObject(res);
524 if (this.verifyCookies(response)) { 524 if (this.verifyCookies(response)) {
525 return null; 525 return null;
526 } 526 }
src/main/java/cn/fw/freya/service/crawl/impl/KuaiShouCrawl.java
@@ -102,7 +102,14 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { @@ -102,7 +102,14 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle {
102 final List<Account> accountList = accountDao.getAllKSAccount(); 102 final List<Account> accountList = accountDao.getAllKSAccount();
103 accountList.forEach(item -> 103 accountList.forEach(item ->
104 Arrays.stream(DataTypeEnum.values()).forEach(item1 -> 104 Arrays.stream(DataTypeEnum.values()).forEach(item1 ->
105 - threadPoolExecutor.execute(() -> this.task(item.getAccountNo(), item1.getValue())) 105 + threadPoolExecutor.execute(() -> {
  106 + Integer dataType = item1.getValue();
  107 + if (Objects.equals(dataType, 2)) {
  108 + this.task(item.getAccountNo(), dataType, 1);
  109 + } else {
  110 + this.task(item.getAccountNo(), dataType, null);
  111 + }
  112 + })
106 ) 113 )
107 ); 114 );
108 return true; 115 return true;
@@ -114,9 +121,11 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { @@ -114,9 +121,11 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle {
114 * @param accountNo 账户号 121 * @param accountNo 账户号
115 * @param dataType 数据类型 122 * @param dataType 数据类型
116 */ 123 */
117 - public void task(String accountNo, Integer dataType) { 124 + public void task(String accountNo, Integer dataType, Integer page) {
118 String key = accountNo + "#" + dataType; 125 String key = accountNo + "#" + dataType;
119 - final String ns_sig3 = this.getNS_sig3(accountNo, dataType, true); 126 + if (Objects.nonNull(page))
  127 + key += "#" + page;
  128 + String ns_sig3 = this.getNS_sig3(accountNo, dataType, page, true);
120 if (Objects.nonNull(ns_sig3)) 129 if (Objects.nonNull(ns_sig3))
121 sig3Map.put(key, ns_sig3); 130 sig3Map.put(key, ns_sig3);
122 else 131 else
@@ -248,6 +257,53 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { @@ -248,6 +257,53 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle {
248 } 257 }
249 258
250 /** 259 /**
  260 + * 分页获取快手视频数据
  261 + *
  262 + * @param accountNo 账号
  263 + * @param page 第几页
  264 + * @return 快手返回data
  265 + * @throws IOException 异常
  266 + */
  267 + private JSONObject getVideoDataByPage(String accountNo, Integer page) throws IOException {
  268 + HttpCookies cookies = HttpCookies.custom();
  269 + CookieStore cookieStore = new BasicCookieStore();
  270 + cookies.setCookieStore(cookieStore);
  271 + final String ns_sig3 = this.getNS_sig3(accountNo, DataTypeEnum.VIDEO.getValue(), page, false);
  272 + if (Objects.isNull(ns_sig3))
  273 + return null;
  274 + Map<String, Object> params = new LinkedHashMap<>();
  275 + params.put("count", 10);
  276 + params.put("page", page);
  277 + //params.put("total", 9007199000000000L + new Random().nextInt(999999999));
  278 + params.put("kuaishou.web.cp.api_ph", this.getWebApiPh(accountNo));
  279 + HttpConfig config = HttpConfig.custom()
  280 + .url("https://cp.kuaishou.com/rest/cp/creator/pc/analysis/photo/list?__NS_sig3=" + ns_sig3)
  281 + .context(cookies.getContext())
  282 + .json(JsonUtils.objectToJson(params))
  283 + .headers(HttpHeader
  284 + .defaultHeader()
  285 + .contentType("application/json")
  286 + .host("cp.kuaishou.com")
  287 + .cookie(this.getUserCookies(accountNo))
  288 + .build()
  289 + );
  290 + String res = RequestUtil.post(config);// 发送POST请求
  291 + log.info(String.format("%s [%s]平台账户号为: %s的视频数据的原始数据为: %s", LocalDateTime.now(), this.getType().getName(), accountNo, res));
  292 + final JSONObject response = JSON.parseObject(res);
  293 + if (this.verifyCookies(response)) {
  294 + return null;
  295 + }
  296 + if (!StringUtils.hasText(res)) {
  297 + throw new BusinessException("调用快手[视频]接口失败");
  298 + }
  299 + if (Objects.equals(response.getInteger("result"), 500002)) {
  300 + threadPoolExecutor.execute(() -> this.task(accountNo, DataTypeEnum.VIDEO.getValue(), page));
  301 + throw new BusinessException("获取数据失败, 尝试重新获取sig3签名信息");
  302 + }
  303 + return Optional.ofNullable(response.getJSONObject("data")).orElse(new JSONObject());
  304 + }
  305 +
  306 + /**
251 * 获取所有视频作品信息 307 * 获取所有视频作品信息
252 * 308 *
253 * @param accountNo 账户号 309 * @param accountNo 账户号
@@ -260,7 +316,21 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { @@ -260,7 +316,21 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle {
260 if (Objects.nonNull(hasFoundVideo)) { 316 if (Objects.nonNull(hasFoundVideo)) {
261 return hasFoundVideo; 317 return hasFoundVideo;
262 } 318 }
263 - HttpCookies cookies = HttpCookies.custom(); 319 + Date previousDay = DateUtil.getPreviousDay(new Date());
  320 + int page = 1;
  321 + Integer total;
  322 + JSONArray videoJsonArray = new JSONArray();
  323 + do {
  324 + JSONObject obj = this.getVideoDataByPage(accountNo, page);
  325 + total = Objects.requireNonNull(obj).getInteger("totalCount");
  326 + videoJsonArray.addAll(Optional.ofNullable(obj.getJSONArray("photoList")).orElse(new JSONArray()));
  327 + page++;
  328 + JSONArray photoList = obj.getJSONArray("photoList");
  329 + String publishTime = JSON.parseObject(JSON.toJSONString(photoList.get(photoList.size() - 1))).getString("publishTime");
  330 + if (Objects.requireNonNull(DateUtil.parse(publishTime)).compareTo(DateUtil.getMonthFirstDay(DateUtil.getThisDayMinTime(previousDay))) < 0)
  331 + break;
  332 + } while (total > 10 * (page - 1));
  333 + /*HttpCookies cookies = HttpCookies.custom();
264 CookieStore cookieStore = new BasicCookieStore(); 334 CookieStore cookieStore = new BasicCookieStore();
265 cookies.setCookieStore(cookieStore); 335 cookies.setCookieStore(cookieStore);
266 Date previousDay = DateUtil.getPreviousDay(new Date()); 336 Date previousDay = DateUtil.getPreviousDay(new Date());
@@ -285,7 +355,7 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { @@ -285,7 +355,7 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle {
285 ); 355 );
286 String res = RequestUtil.post(config);// 发送POST请求 356 String res = RequestUtil.post(config);// 发送POST请求
287 log.info(String.format("%s [%s]平台账户号为: %s的视频数据的原始数据为: %s", LocalDateTime.now(), this.getType().getName(), accountNo, res)); 357 log.info(String.format("%s [%s]平台账户号为: %s的视频数据的原始数据为: %s", LocalDateTime.now(), this.getType().getName(), accountNo, res));
288 - final JSONObject response = JSONObject.parseObject(res); 358 + final JSONObject response = JSON.parseObject(res);
289 if (this.verifyCookies(response)) { 359 if (this.verifyCookies(response)) {
290 return null; 360 return null;
291 } 361 }
@@ -297,6 +367,7 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { @@ -297,6 +367,7 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle {
297 throw new BusinessException("获取数据失败, 尝试重新获取sig3签名信息"); 367 throw new BusinessException("获取数据失败, 尝试重新获取sig3签名信息");
298 } 368 }
299 JSONArray videoJsonArray = Optional.ofNullable(Optional.ofNullable(response.getJSONObject("data")).orElse(new JSONObject()).getJSONArray("photoList")).orElse(new JSONArray()); 369 JSONArray videoJsonArray = Optional.ofNullable(Optional.ofNullable(response.getJSONObject("data")).orElse(new JSONObject()).getJSONArray("photoList")).orElse(new JSONArray());
  370 + */
300 videoPoolDao.deleteByAccountNoAndDate(accountNo, previousDay, AccountTypeEnum.KS.getValue(), ResourceTypeEnum.VIDEO.getValue()); 371 videoPoolDao.deleteByAccountNoAndDate(accountNo, previousDay, AccountTypeEnum.KS.getValue(), ResourceTypeEnum.VIDEO.getValue());
301 // 视频数据存库 372 // 视频数据存库
302 List<VideoPool> videoPoolList = new ArrayList<>(videoJsonArray.size()); 373 List<VideoPool> videoPoolList = new ArrayList<>(videoJsonArray.size());
@@ -354,7 +425,7 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { @@ -354,7 +425,7 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle {
354 //} 425 //}
355 }); 426 });
356 try { 427 try {
357 - if (videoPoolList.size() > 0) { 428 + if (!videoPoolList.isEmpty()) {
358 videoPoolDao.saveAll(videoPoolList);// 将收集到的视频信息保存 429 videoPoolDao.saveAll(videoPoolList);// 将收集到的视频信息保存
359 } else { 430 } else {
360 final VideoPool nullVideo = VideoPool.builder() 431 final VideoPool nullVideo = VideoPool.builder()
@@ -394,7 +465,7 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { @@ -394,7 +465,7 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle {
394 Date endTime = DateUtil.getThisDayMaxTime(previousDay); 465 Date endTime = DateUtil.getThisDayMaxTime(previousDay);
395 Date startTime = DateUtil.getThisDayMinTime(previousDay); 466 Date startTime = DateUtil.getThisDayMinTime(previousDay);
396 //Date startTime = DateUtil.getThisDayMinTime(new Date(previousDay.getTime() - 7 * 24 * 3600 * 1000L));// 补数据使用 467 //Date startTime = DateUtil.getThisDayMinTime(new Date(previousDay.getTime() - 7 * 24 * 3600 * 1000L));// 补数据使用
397 - final String ns_sig3 = this.getNS_sig3(accountNo, DataTypeEnum.LIVE.getValue(), false); 468 + final String ns_sig3 = this.getNS_sig3(accountNo, DataTypeEnum.LIVE.getValue(), null, false);
398 if (Objects.isNull(ns_sig3)) 469 if (Objects.isNull(ns_sig3))
399 return null; 470 return null;
400 Map<String, Object> params = new LinkedHashMap<>(); 471 Map<String, Object> params = new LinkedHashMap<>();
@@ -418,7 +489,7 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { @@ -418,7 +489,7 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle {
418 ); 489 );
419 String res = RequestUtil.post(config);// 发送POST请求 490 String res = RequestUtil.post(config);// 发送POST请求
420 log.info(String.format("%s [%s]平台账户号为: %s的直播数据的原始数据为: %s", LocalDateTime.now(), this.getType().getName(), accountNo, res)); 491 log.info(String.format("%s [%s]平台账户号为: %s的直播数据的原始数据为: %s", LocalDateTime.now(), this.getType().getName(), accountNo, res));
421 - final JSONObject response = JSONObject.parseObject(res); 492 + final JSONObject response = JSON.parseObject(res);
422 if (this.verifyCookies(response)) { 493 if (this.verifyCookies(response)) {
423 return null; 494 return null;
424 } 495 }
@@ -426,7 +497,7 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { @@ -426,7 +497,7 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle {
426 throw new BusinessException("调用快手[视频]接口失败"); 497 throw new BusinessException("调用快手[视频]接口失败");
427 } 498 }
428 if (Objects.equals(response.getInteger("result"), 500002)) { 499 if (Objects.equals(response.getInteger("result"), 500002)) {
429 - threadPoolExecutor.execute(() -> this.task(accountNo, DataTypeEnum.LIVE.getValue())); 500 + threadPoolExecutor.execute(() -> this.task(accountNo, DataTypeEnum.LIVE.getValue(), null));
430 throw new BusinessException("获取数据失败, 尝试重新获取sig3签名信息"); 501 throw new BusinessException("获取数据失败, 尝试重新获取sig3签名信息");
431 } 502 }
432 JSONObject dataJSONObject = response.getJSONObject("data"); 503 JSONObject dataJSONObject = response.getJSONObject("data");
@@ -612,7 +683,7 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { @@ -612,7 +683,7 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle {
612 if (!StringUtils.hasText(res)) { 683 if (!StringUtils.hasText(res)) {
613 return objects; 684 return objects;
614 } 685 }
615 - JSONObject resObj = JSONObject.parseObject(res); 686 + JSONObject resObj = JSON.parseObject(res);
616 try { 687 try {
617 return resObj.getJSONObject("data").getJSONObject("playbackFeeds").getJSONArray("list"); 688 return resObj.getJSONObject("data").getJSONObject("playbackFeeds").getJSONArray("list");
618 } catch (Exception e) { 689 } catch (Exception e) {
@@ -638,7 +709,7 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { @@ -638,7 +709,7 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle {
638 cookies.setCookieStore(cookieStore); 709 cookies.setCookieStore(cookieStore);
639 Map<String, Object> params = new HashMap<>(); 710 Map<String, Object> params = new HashMap<>();
640 params.put("kuaishou.web.cp.api_ph", this.getWebApiPh(accountNo)); 711 params.put("kuaishou.web.cp.api_ph", this.getWebApiPh(accountNo));
641 - final String ns_sig3 = this.getNS_sig3(accountNo, DataTypeEnum.FANS.getValue(), false); 712 + final String ns_sig3 = this.getNS_sig3(accountNo, DataTypeEnum.FANS.getValue(), null, false);
642 if (Objects.isNull(ns_sig3)) 713 if (Objects.isNull(ns_sig3))
643 return null; 714 return null;
644 HttpConfig config = HttpConfig.custom() 715 HttpConfig config = HttpConfig.custom()
@@ -654,7 +725,7 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { @@ -654,7 +725,7 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle {
654 .build() 725 .build()
655 ); 726 );
656 String res = RequestUtil.post(config);// 发送POST请求 727 String res = RequestUtil.post(config);// 发送POST请求
657 - final JSONObject response = JSONObject.parseObject(res); 728 + final JSONObject response = JSON.parseObject(res);
658 if (this.verifyCookies(response)) { 729 if (this.verifyCookies(response)) {
659 return null; 730 return null;
660 } 731 }
@@ -662,7 +733,7 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { @@ -662,7 +733,7 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle {
662 throw new BusinessException("调用快手[直播]接口失败"); 733 throw new BusinessException("调用快手[直播]接口失败");
663 } 734 }
664 if (Objects.equals(response.getInteger("result"), 500002)) { 735 if (Objects.equals(response.getInteger("result"), 500002)) {
665 - threadPoolExecutor.execute(() -> this.task(accountNo, DataTypeEnum.FANS.getValue())); 736 + threadPoolExecutor.execute(() -> this.task(accountNo, DataTypeEnum.FANS.getValue(), null));
666 throw new BusinessException("获取数据失败, 尝试重新获取sig3签名信息"); 737 throw new BusinessException("获取数据失败, 尝试重新获取sig3签名信息");
667 } 738 }
668 JSONObject data = response.getJSONObject("data"); 739 JSONObject data = response.getJSONObject("data");
@@ -727,24 +798,29 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { @@ -727,24 +798,29 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle {
727 * 798 *
728 * @param accountNo 快手账户号 799 * @param accountNo 快手账户号
729 * @param type 密钥类型(1:粉丝, 2:短视频, 3:直播) 800 * @param type 密钥类型(1:粉丝, 2:短视频, 3:直播)
  801 + * @param page 数据页数(1:粉丝, 2:短视频, 3:直播)
730 * @param retryGet 是否重新获取 802 * @param retryGet 是否重新获取
731 */ 803 */
732 - public String getNS_sig3(String accountNo, Integer type, boolean retryGet) {  
733 - final String key = accountNo + "#" + type; 804 + public String getNS_sig3(String accountNo, Integer type, Integer page, boolean retryGet) {
  805 + String key = accountNo + "#" + type;
734 String NS_sig3; 806 String NS_sig3;
735 if (!retryGet) { 807 if (!retryGet) {
  808 + if (Objects.nonNull(page))
  809 + key += "#" + page;
736 NS_sig3 = sig3Map.get(key); 810 NS_sig3 = sig3Map.get(key);
737 if (StringUtils.hasText(NS_sig3)) 811 if (StringUtils.hasText(NS_sig3))
738 return NS_sig3; 812 return NS_sig3;
739 } 813 }
  814 + boolean videoDimension = Objects.equals(type, DataTypeEnum.VIDEO.getValue());
  815 + boolean fansDimension = Objects.equals(type, DataTypeEnum.FANS.getValue());
740 final String uuid = UUID.randomUUID().toString().replace("-", ""); 816 final String uuid = UUID.randomUUID().toString().replace("-", "");
741 final WebDriver driver = this.getKSDriver(accountNo, uuid); 817 final WebDriver driver = this.getKSDriver(accountNo, uuid);
742 String targetUrl = null; 818 String targetUrl = null;
743 String dataUrl = null;// 数据接口地址 819 String dataUrl = null;// 数据接口地址
744 - if (Objects.equals(type, DataTypeEnum.FANS.getValue())) { 820 + if (fansDimension) {
745 targetUrl = "https://cp.kuaishou.com/profile"; 821 targetUrl = "https://cp.kuaishou.com/profile";
746 dataUrl = "https://cp.kuaishou.com/rest/cp/creator/pc/home/infoV2"; 822 dataUrl = "https://cp.kuaishou.com/rest/cp/creator/pc/home/infoV2";
747 - } else if (Objects.equals(type, DataTypeEnum.VIDEO.getValue())) { 823 + } else if (videoDimension) {
748 targetUrl = "https://cp.kuaishou.com/statistics/works"; 824 targetUrl = "https://cp.kuaishou.com/statistics/works";
749 dataUrl = "https://cp.kuaishou.com/rest/cp/creator/pc/analysis/photo/list"; 825 dataUrl = "https://cp.kuaishou.com/rest/cp/creator/pc/analysis/photo/list";
750 } else if (Objects.equals(type, DataTypeEnum.LIVE.getValue())) { 826 } else if (Objects.equals(type, DataTypeEnum.LIVE.getValue())) {
@@ -758,7 +834,7 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { @@ -758,7 +834,7 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle {
758 throw new BusinessException("跳转页面发生异常"); 834 throw new BusinessException("跳转页面发生异常");
759 } 835 }
760 LockSupport.parkNanos(TimeUnit.SECONDS.toNanos(5)); 836 LockSupport.parkNanos(TimeUnit.SECONDS.toNanos(5));
761 - if (Objects.equals(type, DataTypeEnum.FANS.getValue())) { 837 + if (fansDimension) {
762 driver.get("https://cp.kuaishou.com/article/manage/video"); 838 driver.get("https://cp.kuaishou.com/article/manage/video");
763 LockSupport.parkNanos(TimeUnit.SECONDS.toNanos(5)); 839 LockSupport.parkNanos(TimeUnit.SECONDS.toNanos(5));
764 if (Objects.equals(targetUrl, driver.getCurrentUrl())) { 840 if (Objects.equals(targetUrl, driver.getCurrentUrl())) {
@@ -770,7 +846,29 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { @@ -770,7 +846,29 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle {
770 this.exitBrowser(accountNo, uuid); 846 this.exitBrowser(accountNo, uuid);
771 return null; 847 return null;
772 } 848 }
  849 + Integer maxPageNum = 0;
  850 + if (videoDimension) {
  851 + List<WebElement> pageLabels = new WebDriverWait(driver, 15, 300).until(driver1 ->
  852 + driver1.findElements(By.xpath("//ul[@class='el-pager']/li")));// 获取分页页码标签元素列表
  853 + maxPageNum = Integer.valueOf(pageLabels.get(pageLabels.size() - 1).getText());// 最大页码
  854 + if (!CollectionUtils.isEmpty(pageLabels)) {
  855 + for (WebElement item : pageLabels) {
  856 + try {
  857 + /*if (Objects.equals(item.getText(), "...")) {
  858 + pageLabels = new WebDriverWait(driver, 15, 300).until(driver1 ->
  859 + driver1.findElements(By.xpath("//ul[@class='el-pager']/li")));
  860 + item.click();
  861 + }*/
  862 + item.click();
  863 + LockSupport.parkNanos(TimeUnit.SECONDS.toNanos(2));
  864 + } catch (Exception e) {
  865 + // this.exitBrowser(accountNo, uuid);
  866 + }
  867 + }
  868 + }
  869 + }
773 try { 870 try {
  871 + ArrayList<String> sigList = new ArrayList<>();
774 final List<ResponseReceived> responseReceivedEvents = common.processHttpTransferData(driver); 872 final List<ResponseReceived> responseReceivedEvents = common.processHttpTransferData(driver);
775 for (ResponseReceived item : responseReceivedEvents) { 873 for (ResponseReceived item : responseReceivedEvents) {
776 String str = this.getDataUrl(item, dataUrl); 874 String str = this.getDataUrl(item, dataUrl);
@@ -778,16 +876,25 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { @@ -778,16 +876,25 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle {
778 this.exitBrowser(accountNo, uuid); 876 this.exitBrowser(accountNo, uuid);
779 String[] split = str.split("="); 877 String[] split = str.split("=");
780 NS_sig3 = split[1]; 878 NS_sig3 = split[1];
781 - sig3Map.put(key, NS_sig3);  
782 - return NS_sig3; 879 + if (videoDimension) {
  880 + sigList.add(NS_sig3);
  881 + } else {
  882 + sig3Map.put(key, NS_sig3);
  883 + return NS_sig3;
  884 + }
783 } 885 }
784 } 886 }
  887 + for (int i = 0; i < sigList.size(); i++) {
  888 + sig3Map.put(accountNo + "#" + type + "#" + (i + 1), sigList.get(i));
  889 + if (sigList.size() < maxPageNum && i > 4)
  890 + break;
  891 + }
785 } catch (Exception e) { 892 } catch (Exception e) {
786 this.exitBrowser(accountNo, uuid); 893 this.exitBrowser(accountNo, uuid);
787 throw new BusinessException(e.getMessage()); 894 throw new BusinessException(e.getMessage());
788 } 895 }
789 this.exitBrowser(accountNo, uuid); 896 this.exitBrowser(accountNo, uuid);
790 - return null; 897 + return sig3Map.get(key);
791 } 898 }
792 899
793 /** 900 /**
@@ -798,7 +905,7 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { @@ -798,7 +905,7 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle {
798 * @return 905 * @return
799 */ 906 */
800 public String getDataUrl(ResponseReceived responseReceived, String dataUrl) { 907 public String getDataUrl(ResponseReceived responseReceived, String dataUrl) {
801 - String baseUrl = JSONObject.parseObject(responseReceived.getResponse()).getString("url"); 908 + String baseUrl = JSON.parseObject(responseReceived.getResponse()).getString("url");
802 boolean notStaticFiles = !baseUrl.endsWith(".png") 909 boolean notStaticFiles = !baseUrl.endsWith(".png")
803 && !baseUrl.endsWith(".jpg") 910 && !baseUrl.endsWith(".jpg")
804 && !baseUrl.endsWith(".css") 911 && !baseUrl.endsWith(".css")
src/main/java/cn/fw/freya/service/rpc/ReportRpcService.java
@@ -55,7 +55,7 @@ public class ReportRpcService { @@ -55,7 +55,7 @@ public class ReportRpcService {
55 if (!StringUtils.hasText(res)) { 55 if (!StringUtils.hasText(res)) {
56 return false; 56 return false;
57 } 57 }
58 - JSONObject resObj = JSONObject.parseObject(res); 58 + JSONObject resObj = JSON.parseObject(res);
59 Boolean result = resObj.getBoolean("success"); 59 Boolean result = resObj.getBoolean("success");
60 if (Boolean.FALSE.equals(result)) { 60 if (Boolean.FALSE.equals(result)) {
61 Integer status = Optional.ofNullable(resObj.getInteger("status")).orElse(-1); 61 Integer status = Optional.ofNullable(resObj.getInteger("status")).orElse(-1);
@@ -112,7 +112,7 @@ public class ReportRpcService { @@ -112,7 +112,7 @@ public class ReportRpcService {
112 if (!StringUtils.hasText(res)) { 112 if (!StringUtils.hasText(res)) {
113 return false; 113 return false;
114 } 114 }
115 - JSONObject resObj = JSONObject.parseObject(res); 115 + JSONObject resObj = JSON.parseObject(res);
116 Boolean result = resObj.getBoolean("success"); 116 Boolean result = resObj.getBoolean("success");
117 if (Boolean.FALSE.equals(result)) { 117 if (Boolean.FALSE.equals(result)) {
118 Integer status = Optional.ofNullable(resObj.getInteger("status")).orElse(-1); 118 Integer status = Optional.ofNullable(resObj.getInteger("status")).orElse(-1);
@@ -174,7 +174,7 @@ public class ReportRpcService { @@ -174,7 +174,7 @@ public class ReportRpcService {
174 if (!StringUtils.hasText(res)) { 174 if (!StringUtils.hasText(res)) {
175 return false; 175 return false;
176 } 176 }
177 - JSONObject resObj = JSONObject.parseObject(res); 177 + JSONObject resObj = JSON.parseObject(res);
178 Boolean result = resObj.getBoolean("success"); 178 Boolean result = resObj.getBoolean("success");
179 if (Boolean.FALSE.equals(result)) { 179 if (Boolean.FALSE.equals(result)) {
180 Integer status = Optional.ofNullable(resObj.getInteger("status")).orElse(-1); 180 Integer status = Optional.ofNullable(resObj.getInteger("status")).orElse(-1);