Commit 03520b5e20460c3d6ec1523a9842b5d354aa8968
1 parent
0a02549c
2022年11月1日17:53:55 快手视频数据分页, 缓存接口NS_sig信息
Showing
7 changed files
with
145 additions
and
38 deletions
src/main/java/cn/fw/freya/FreyaApplication.java
... | ... | @@ -40,7 +40,7 @@ public class FreyaApplication { |
40 | 40 | 6, |
41 | 41 | 30, |
42 | 42 | TimeUnit.SECONDS, |
43 | - new LinkedBlockingQueue<>(1000), | |
43 | + new LinkedBlockingQueue<>(100000), | |
44 | 44 | new ThreadFactoryBuilder().setNamePrefix("wmy-thread-pool-").build(), |
45 | 45 | new ThreadPoolExecutor.DiscardPolicy()); |
46 | 46 | } | ... | ... |
src/main/java/cn/fw/freya/controller/KSController.java
... | ... | @@ -81,7 +81,7 @@ public class KSController { |
81 | 81 | */ |
82 | 82 | @GetMapping("/getNS_sig3") |
83 | 83 | public String getNS_sig3(String accountNo, Integer type, boolean retryGet) { |
84 | - return kuaishouCrawl.getNS_sig3(accountNo, type, retryGet); | |
84 | + return kuaishouCrawl.getNS_sig3(accountNo, type, 1, retryGet); | |
85 | 85 | } |
86 | 86 | |
87 | 87 | /** | ... | ... |
src/main/java/cn/fw/freya/service/crawl/impl/BilibiliCrawl.java
... | ... | @@ -254,7 +254,7 @@ public class BilibiliCrawl implements CrawlStrategy { |
254 | 254 | ); |
255 | 255 | String res = RequestUtil.get(config);// 发送GET请求 |
256 | 256 | this.waitFor(0.5); |
257 | - final JSONObject response = JSONObject.parseObject(res); | |
257 | + final JSONObject response = JSON.parseObject(res); | |
258 | 258 | if (this.verifyCookies(response)) { |
259 | 259 | return null; |
260 | 260 | } |
... | ... | @@ -288,7 +288,7 @@ public class BilibiliCrawl implements CrawlStrategy { |
288 | 288 | ); |
289 | 289 | String res = RequestUtil.get(config);// 发送GET请求 |
290 | 290 | this.waitFor(0.1); |
291 | - final JSONObject response = JSONObject.parseObject(res); | |
291 | + final JSONObject response = JSON.parseObject(res); | |
292 | 292 | if (this.verifyCookies(response)) { |
293 | 293 | return null; |
294 | 294 | } |
... | ... | @@ -333,7 +333,7 @@ public class BilibiliCrawl implements CrawlStrategy { |
333 | 333 | ); |
334 | 334 | String res = RequestUtil.get(config);// 发送GET请求 |
335 | 335 | log.info(String.format("%s [%s]平台账户号为: %s的直播数据的原始数据为: %s", LocalDateTime.now(), this.getType().getName(), accountNo, res)); |
336 | - final JSONObject response = JSONObject.parseObject(res); | |
336 | + final JSONObject response = JSON.parseObject(res); | |
337 | 337 | if (this.verifyCookies(response)) { |
338 | 338 | return null; |
339 | 339 | } |
... | ... | @@ -438,7 +438,7 @@ public class BilibiliCrawl implements CrawlStrategy { |
438 | 438 | .build() |
439 | 439 | ); |
440 | 440 | String res = RequestUtil.get(config);// 发送GET请求 |
441 | - final JSONObject response = JSONObject.parseObject(res); | |
441 | + final JSONObject response = JSON.parseObject(res); | |
442 | 442 | if (this.verifyCookies(response)) { |
443 | 443 | return null; |
444 | 444 | } |
... | ... | @@ -534,7 +534,7 @@ public class BilibiliCrawl implements CrawlStrategy { |
534 | 534 | .build() |
535 | 535 | ); |
536 | 536 | String res = RequestUtil.get(config); |
537 | - final JSONObject response = JSONObject.parseObject(res); | |
537 | + final JSONObject response = JSON.parseObject(res); | |
538 | 538 | if (this.verifyCookies(response)) { |
539 | 539 | return null; |
540 | 540 | } | ... | ... |
src/main/java/cn/fw/freya/service/crawl/impl/Common.java
... | ... | @@ -215,7 +215,7 @@ public class Common { |
215 | 215 | */ |
216 | 216 | public HttpResponse getHttpResponse(WebDriver driver, ResponseReceived responseReceived, String dataUrl) { |
217 | 217 | HttpResponse response = null; |
218 | - String baseUrl = JSONObject.parseObject(responseReceived.getResponse()).getString("url"); | |
218 | + String baseUrl = JSON.parseObject(responseReceived.getResponse()).getString("url"); | |
219 | 219 | boolean notStaticFiles = !baseUrl.endsWith(".png") |
220 | 220 | && !baseUrl.endsWith(".jpg") |
221 | 221 | && !baseUrl.endsWith(".css") | ... | ... |
src/main/java/cn/fw/freya/service/crawl/impl/DongCheDiCrawl.java
... | ... | @@ -152,7 +152,7 @@ public class DongCheDiCrawl implements CrawlStrategy { |
152 | 152 | .build() |
153 | 153 | ); |
154 | 154 | String res = RequestUtil.get(config); |
155 | - final JSONObject response = JSONObject.parseObject(res); | |
155 | + final JSONObject response = JSON.parseObject(res); | |
156 | 156 | if (this.verifyCookies(response)) { |
157 | 157 | return null; |
158 | 158 | } |
... | ... | @@ -283,7 +283,7 @@ public class DongCheDiCrawl implements CrawlStrategy { |
283 | 283 | log.info(String.format("%s [%s]平台账户号为: %s的直播数据的原始数据为: %s", LocalDateTime.now(), this.getType().getName(), accountNo, res)); |
284 | 284 | JSONObject response = new JSONObject(); |
285 | 285 | try { |
286 | - response = JSONObject.parseObject(res); | |
286 | + response = JSON.parseObject(res); | |
287 | 287 | } catch (Exception e) { |
288 | 288 | log.info(LocalDate.now() + " 暂未找到账户号为:" + accountNo + "的懂车帝直播数据"); |
289 | 289 | final LivePool nullLive = LivePool.builder() |
... | ... | @@ -413,7 +413,7 @@ public class DongCheDiCrawl implements CrawlStrategy { |
413 | 413 | .build() |
414 | 414 | ); |
415 | 415 | String res1 = RequestUtil.get(config1); |
416 | - final JSONObject response1 = JSONObject.parseObject(res1); | |
416 | + final JSONObject response1 = JSON.parseObject(res1); | |
417 | 417 | if (this.verifyCookies(response1)) { |
418 | 418 | return null; |
419 | 419 | } |
... | ... | @@ -480,7 +480,7 @@ public class DongCheDiCrawl implements CrawlStrategy { |
480 | 480 | .build() |
481 | 481 | ); |
482 | 482 | String res = RequestUtil.get(config); |
483 | - final JSONObject response = JSONObject.parseObject(res); | |
483 | + final JSONObject response = JSON.parseObject(res); | |
484 | 484 | if (this.verifyCookies(response)) { |
485 | 485 | return null; |
486 | 486 | } |
... | ... | @@ -520,7 +520,7 @@ public class DongCheDiCrawl implements CrawlStrategy { |
520 | 520 | .build() |
521 | 521 | ); |
522 | 522 | String res = RequestUtil.get(config); |
523 | - final JSONObject response = JSONObject.parseObject(res); | |
523 | + final JSONObject response = JSON.parseObject(res); | |
524 | 524 | if (this.verifyCookies(response)) { |
525 | 525 | return null; |
526 | 526 | } | ... | ... |
src/main/java/cn/fw/freya/service/crawl/impl/KuaiShouCrawl.java
... | ... | @@ -102,7 +102,14 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { |
102 | 102 | final List<Account> accountList = accountDao.getAllKSAccount(); |
103 | 103 | accountList.forEach(item -> |
104 | 104 | Arrays.stream(DataTypeEnum.values()).forEach(item1 -> |
105 | - threadPoolExecutor.execute(() -> this.task(item.getAccountNo(), item1.getValue())) | |
105 | + threadPoolExecutor.execute(() -> { | |
106 | + Integer dataType = item1.getValue(); | |
107 | + if (Objects.equals(dataType, 2)) { | |
108 | + this.task(item.getAccountNo(), dataType, 1); | |
109 | + } else { | |
110 | + this.task(item.getAccountNo(), dataType, null); | |
111 | + } | |
112 | + }) | |
106 | 113 | ) |
107 | 114 | ); |
108 | 115 | return true; |
... | ... | @@ -114,9 +121,11 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { |
114 | 121 | * @param accountNo 账户号 |
115 | 122 | * @param dataType 数据类型 |
116 | 123 | */ |
117 | - public void task(String accountNo, Integer dataType) { | |
124 | + public void task(String accountNo, Integer dataType, Integer page) { | |
118 | 125 | String key = accountNo + "#" + dataType; |
119 | - final String ns_sig3 = this.getNS_sig3(accountNo, dataType, true); | |
126 | + if (Objects.nonNull(page)) | |
127 | + key += "#" + page; | |
128 | + String ns_sig3 = this.getNS_sig3(accountNo, dataType, page, true); | |
120 | 129 | if (Objects.nonNull(ns_sig3)) |
121 | 130 | sig3Map.put(key, ns_sig3); |
122 | 131 | else |
... | ... | @@ -248,6 +257,53 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { |
248 | 257 | } |
249 | 258 | |
250 | 259 | /** |
260 | + * 分页获取快手视频数据 | |
261 | + * | |
262 | + * @param accountNo 账号 | |
263 | + * @param page 第几页 | |
264 | + * @return 快手返回data | |
265 | + * @throws IOException 异常 | |
266 | + */ | |
267 | + private JSONObject getVideoDataByPage(String accountNo, Integer page) throws IOException { | |
268 | + HttpCookies cookies = HttpCookies.custom(); | |
269 | + CookieStore cookieStore = new BasicCookieStore(); | |
270 | + cookies.setCookieStore(cookieStore); | |
271 | + final String ns_sig3 = this.getNS_sig3(accountNo, DataTypeEnum.VIDEO.getValue(), page, false); | |
272 | + if (Objects.isNull(ns_sig3)) | |
273 | + return null; | |
274 | + Map<String, Object> params = new LinkedHashMap<>(); | |
275 | + params.put("count", 10); | |
276 | + params.put("page", page); | |
277 | + //params.put("total", 9007199000000000L + new Random().nextInt(999999999)); | |
278 | + params.put("kuaishou.web.cp.api_ph", this.getWebApiPh(accountNo)); | |
279 | + HttpConfig config = HttpConfig.custom() | |
280 | + .url("https://cp.kuaishou.com/rest/cp/creator/pc/analysis/photo/list?__NS_sig3=" + ns_sig3) | |
281 | + .context(cookies.getContext()) | |
282 | + .json(JsonUtils.objectToJson(params)) | |
283 | + .headers(HttpHeader | |
284 | + .defaultHeader() | |
285 | + .contentType("application/json") | |
286 | + .host("cp.kuaishou.com") | |
287 | + .cookie(this.getUserCookies(accountNo)) | |
288 | + .build() | |
289 | + ); | |
290 | + String res = RequestUtil.post(config);// 发送POST请求 | |
291 | + log.info(String.format("%s [%s]平台账户号为: %s的视频数据的原始数据为: %s", LocalDateTime.now(), this.getType().getName(), accountNo, res)); | |
292 | + final JSONObject response = JSON.parseObject(res); | |
293 | + if (this.verifyCookies(response)) { | |
294 | + return null; | |
295 | + } | |
296 | + if (!StringUtils.hasText(res)) { | |
297 | + throw new BusinessException("调用快手[视频]接口失败"); | |
298 | + } | |
299 | + if (Objects.equals(response.getInteger("result"), 500002)) { | |
300 | + threadPoolExecutor.execute(() -> this.task(accountNo, DataTypeEnum.VIDEO.getValue(), page)); | |
301 | + throw new BusinessException("获取数据失败, 尝试重新获取sig3签名信息"); | |
302 | + } | |
303 | + return Optional.ofNullable(response.getJSONObject("data")).orElse(new JSONObject()); | |
304 | + } | |
305 | + | |
306 | + /** | |
251 | 307 | * 获取所有视频作品信息 |
252 | 308 | * |
253 | 309 | * @param accountNo 账户号 |
... | ... | @@ -260,7 +316,21 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { |
260 | 316 | if (Objects.nonNull(hasFoundVideo)) { |
261 | 317 | return hasFoundVideo; |
262 | 318 | } |
263 | - HttpCookies cookies = HttpCookies.custom(); | |
319 | + Date previousDay = DateUtil.getPreviousDay(new Date()); | |
320 | + int page = 1; | |
321 | + Integer total; | |
322 | + JSONArray videoJsonArray = new JSONArray(); | |
323 | + do { | |
324 | + JSONObject obj = this.getVideoDataByPage(accountNo, page); | |
325 | + total = Objects.requireNonNull(obj).getInteger("totalCount"); | |
326 | + videoJsonArray.addAll(Optional.ofNullable(obj.getJSONArray("photoList")).orElse(new JSONArray())); | |
327 | + page++; | |
328 | + JSONArray photoList = obj.getJSONArray("photoList"); | |
329 | + String publishTime = JSON.parseObject(JSON.toJSONString(photoList.get(photoList.size() - 1))).getString("publishTime"); | |
330 | + if (Objects.requireNonNull(DateUtil.parse(publishTime)).compareTo(DateUtil.getMonthFirstDay(DateUtil.getThisDayMinTime(previousDay))) < 0) | |
331 | + break; | |
332 | + } while (total > 10 * (page - 1)); | |
333 | + /*HttpCookies cookies = HttpCookies.custom(); | |
264 | 334 | CookieStore cookieStore = new BasicCookieStore(); |
265 | 335 | cookies.setCookieStore(cookieStore); |
266 | 336 | Date previousDay = DateUtil.getPreviousDay(new Date()); |
... | ... | @@ -285,7 +355,7 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { |
285 | 355 | ); |
286 | 356 | String res = RequestUtil.post(config);// 发送POST请求 |
287 | 357 | log.info(String.format("%s [%s]平台账户号为: %s的视频数据的原始数据为: %s", LocalDateTime.now(), this.getType().getName(), accountNo, res)); |
288 | - final JSONObject response = JSONObject.parseObject(res); | |
358 | + final JSONObject response = JSON.parseObject(res); | |
289 | 359 | if (this.verifyCookies(response)) { |
290 | 360 | return null; |
291 | 361 | } |
... | ... | @@ -297,6 +367,7 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { |
297 | 367 | throw new BusinessException("获取数据失败, 尝试重新获取sig3签名信息"); |
298 | 368 | } |
299 | 369 | JSONArray videoJsonArray = Optional.ofNullable(Optional.ofNullable(response.getJSONObject("data")).orElse(new JSONObject()).getJSONArray("photoList")).orElse(new JSONArray()); |
370 | + */ | |
300 | 371 | videoPoolDao.deleteByAccountNoAndDate(accountNo, previousDay, AccountTypeEnum.KS.getValue(), ResourceTypeEnum.VIDEO.getValue()); |
301 | 372 | // 视频数据存库 |
302 | 373 | List<VideoPool> videoPoolList = new ArrayList<>(videoJsonArray.size()); |
... | ... | @@ -354,7 +425,7 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { |
354 | 425 | //} |
355 | 426 | }); |
356 | 427 | try { |
357 | - if (videoPoolList.size() > 0) { | |
428 | + if (!videoPoolList.isEmpty()) { | |
358 | 429 | videoPoolDao.saveAll(videoPoolList);// 将收集到的视频信息保存 |
359 | 430 | } else { |
360 | 431 | final VideoPool nullVideo = VideoPool.builder() |
... | ... | @@ -394,7 +465,7 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { |
394 | 465 | Date endTime = DateUtil.getThisDayMaxTime(previousDay); |
395 | 466 | Date startTime = DateUtil.getThisDayMinTime(previousDay); |
396 | 467 | //Date startTime = DateUtil.getThisDayMinTime(new Date(previousDay.getTime() - 7 * 24 * 3600 * 1000L));// 补数据使用 |
397 | - final String ns_sig3 = this.getNS_sig3(accountNo, DataTypeEnum.LIVE.getValue(), false); | |
468 | + final String ns_sig3 = this.getNS_sig3(accountNo, DataTypeEnum.LIVE.getValue(), null, false); | |
398 | 469 | if (Objects.isNull(ns_sig3)) |
399 | 470 | return null; |
400 | 471 | Map<String, Object> params = new LinkedHashMap<>(); |
... | ... | @@ -418,7 +489,7 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { |
418 | 489 | ); |
419 | 490 | String res = RequestUtil.post(config);// 发送POST请求 |
420 | 491 | log.info(String.format("%s [%s]平台账户号为: %s的直播数据的原始数据为: %s", LocalDateTime.now(), this.getType().getName(), accountNo, res)); |
421 | - final JSONObject response = JSONObject.parseObject(res); | |
492 | + final JSONObject response = JSON.parseObject(res); | |
422 | 493 | if (this.verifyCookies(response)) { |
423 | 494 | return null; |
424 | 495 | } |
... | ... | @@ -426,7 +497,7 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { |
426 | 497 | throw new BusinessException("调用快手[视频]接口失败"); |
427 | 498 | } |
428 | 499 | if (Objects.equals(response.getInteger("result"), 500002)) { |
429 | - threadPoolExecutor.execute(() -> this.task(accountNo, DataTypeEnum.LIVE.getValue())); | |
500 | + threadPoolExecutor.execute(() -> this.task(accountNo, DataTypeEnum.LIVE.getValue(), null)); | |
430 | 501 | throw new BusinessException("获取数据失败, 尝试重新获取sig3签名信息"); |
431 | 502 | } |
432 | 503 | JSONObject dataJSONObject = response.getJSONObject("data"); |
... | ... | @@ -612,7 +683,7 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { |
612 | 683 | if (!StringUtils.hasText(res)) { |
613 | 684 | return objects; |
614 | 685 | } |
615 | - JSONObject resObj = JSONObject.parseObject(res); | |
686 | + JSONObject resObj = JSON.parseObject(res); | |
616 | 687 | try { |
617 | 688 | return resObj.getJSONObject("data").getJSONObject("playbackFeeds").getJSONArray("list"); |
618 | 689 | } catch (Exception e) { |
... | ... | @@ -638,7 +709,7 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { |
638 | 709 | cookies.setCookieStore(cookieStore); |
639 | 710 | Map<String, Object> params = new HashMap<>(); |
640 | 711 | params.put("kuaishou.web.cp.api_ph", this.getWebApiPh(accountNo)); |
641 | - final String ns_sig3 = this.getNS_sig3(accountNo, DataTypeEnum.FANS.getValue(), false); | |
712 | + final String ns_sig3 = this.getNS_sig3(accountNo, DataTypeEnum.FANS.getValue(), null, false); | |
642 | 713 | if (Objects.isNull(ns_sig3)) |
643 | 714 | return null; |
644 | 715 | HttpConfig config = HttpConfig.custom() |
... | ... | @@ -654,7 +725,7 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { |
654 | 725 | .build() |
655 | 726 | ); |
656 | 727 | String res = RequestUtil.post(config);// 发送POST请求 |
657 | - final JSONObject response = JSONObject.parseObject(res); | |
728 | + final JSONObject response = JSON.parseObject(res); | |
658 | 729 | if (this.verifyCookies(response)) { |
659 | 730 | return null; |
660 | 731 | } |
... | ... | @@ -662,7 +733,7 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { |
662 | 733 | throw new BusinessException("调用快手[直播]接口失败"); |
663 | 734 | } |
664 | 735 | if (Objects.equals(response.getInteger("result"), 500002)) { |
665 | - threadPoolExecutor.execute(() -> this.task(accountNo, DataTypeEnum.FANS.getValue())); | |
736 | + threadPoolExecutor.execute(() -> this.task(accountNo, DataTypeEnum.FANS.getValue(), null)); | |
666 | 737 | throw new BusinessException("获取数据失败, 尝试重新获取sig3签名信息"); |
667 | 738 | } |
668 | 739 | JSONObject data = response.getJSONObject("data"); |
... | ... | @@ -727,24 +798,29 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { |
727 | 798 | * |
728 | 799 | * @param accountNo 快手账户号 |
729 | 800 | * @param type 密钥类型(1:粉丝, 2:短视频, 3:直播) |
801 | + * @param page 数据页数(1:粉丝, 2:短视频, 3:直播) | |
730 | 802 | * @param retryGet 是否重新获取 |
731 | 803 | */ |
732 | - public String getNS_sig3(String accountNo, Integer type, boolean retryGet) { | |
733 | - final String key = accountNo + "#" + type; | |
804 | + public String getNS_sig3(String accountNo, Integer type, Integer page, boolean retryGet) { | |
805 | + String key = accountNo + "#" + type; | |
734 | 806 | String NS_sig3; |
735 | 807 | if (!retryGet) { |
808 | + if (Objects.nonNull(page)) | |
809 | + key += "#" + page; | |
736 | 810 | NS_sig3 = sig3Map.get(key); |
737 | 811 | if (StringUtils.hasText(NS_sig3)) |
738 | 812 | return NS_sig3; |
739 | 813 | } |
814 | + boolean videoDimension = Objects.equals(type, DataTypeEnum.VIDEO.getValue()); | |
815 | + boolean fansDimension = Objects.equals(type, DataTypeEnum.FANS.getValue()); | |
740 | 816 | final String uuid = UUID.randomUUID().toString().replace("-", ""); |
741 | 817 | final WebDriver driver = this.getKSDriver(accountNo, uuid); |
742 | 818 | String targetUrl = null; |
743 | 819 | String dataUrl = null;// 数据接口地址 |
744 | - if (Objects.equals(type, DataTypeEnum.FANS.getValue())) { | |
820 | + if (fansDimension) { | |
745 | 821 | targetUrl = "https://cp.kuaishou.com/profile"; |
746 | 822 | dataUrl = "https://cp.kuaishou.com/rest/cp/creator/pc/home/infoV2"; |
747 | - } else if (Objects.equals(type, DataTypeEnum.VIDEO.getValue())) { | |
823 | + } else if (videoDimension) { | |
748 | 824 | targetUrl = "https://cp.kuaishou.com/statistics/works"; |
749 | 825 | dataUrl = "https://cp.kuaishou.com/rest/cp/creator/pc/analysis/photo/list"; |
750 | 826 | } else if (Objects.equals(type, DataTypeEnum.LIVE.getValue())) { |
... | ... | @@ -758,7 +834,7 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { |
758 | 834 | throw new BusinessException("跳转页面发生异常"); |
759 | 835 | } |
760 | 836 | LockSupport.parkNanos(TimeUnit.SECONDS.toNanos(5)); |
761 | - if (Objects.equals(type, DataTypeEnum.FANS.getValue())) { | |
837 | + if (fansDimension) { | |
762 | 838 | driver.get("https://cp.kuaishou.com/article/manage/video"); |
763 | 839 | LockSupport.parkNanos(TimeUnit.SECONDS.toNanos(5)); |
764 | 840 | if (Objects.equals(targetUrl, driver.getCurrentUrl())) { |
... | ... | @@ -770,7 +846,29 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { |
770 | 846 | this.exitBrowser(accountNo, uuid); |
771 | 847 | return null; |
772 | 848 | } |
849 | + Integer maxPageNum = 0; | |
850 | + if (videoDimension) { | |
851 | + List<WebElement> pageLabels = new WebDriverWait(driver, 15, 300).until(driver1 -> | |
852 | + driver1.findElements(By.xpath("//ul[@class='el-pager']/li")));// 获取分页页码标签元素列表 | |
853 | + maxPageNum = Integer.valueOf(pageLabels.get(pageLabels.size() - 1).getText());// 最大页码 | |
854 | + if (!CollectionUtils.isEmpty(pageLabels)) { | |
855 | + for (WebElement item : pageLabels) { | |
856 | + try { | |
857 | + /*if (Objects.equals(item.getText(), "...")) { | |
858 | + pageLabels = new WebDriverWait(driver, 15, 300).until(driver1 -> | |
859 | + driver1.findElements(By.xpath("//ul[@class='el-pager']/li"))); | |
860 | + item.click(); | |
861 | + }*/ | |
862 | + item.click(); | |
863 | + LockSupport.parkNanos(TimeUnit.SECONDS.toNanos(2)); | |
864 | + } catch (Exception e) { | |
865 | + // this.exitBrowser(accountNo, uuid); | |
866 | + } | |
867 | + } | |
868 | + } | |
869 | + } | |
773 | 870 | try { |
871 | + ArrayList<String> sigList = new ArrayList<>(); | |
774 | 872 | final List<ResponseReceived> responseReceivedEvents = common.processHttpTransferData(driver); |
775 | 873 | for (ResponseReceived item : responseReceivedEvents) { |
776 | 874 | String str = this.getDataUrl(item, dataUrl); |
... | ... | @@ -778,16 +876,25 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { |
778 | 876 | this.exitBrowser(accountNo, uuid); |
779 | 877 | String[] split = str.split("="); |
780 | 878 | NS_sig3 = split[1]; |
781 | - sig3Map.put(key, NS_sig3); | |
782 | - return NS_sig3; | |
879 | + if (videoDimension) { | |
880 | + sigList.add(NS_sig3); | |
881 | + } else { | |
882 | + sig3Map.put(key, NS_sig3); | |
883 | + return NS_sig3; | |
884 | + } | |
783 | 885 | } |
784 | 886 | } |
887 | + for (int i = 0; i < sigList.size(); i++) { | |
888 | + sig3Map.put(accountNo + "#" + type + "#" + (i + 1), sigList.get(i)); | |
889 | + if (sigList.size() < maxPageNum && i > 4) | |
890 | + break; | |
891 | + } | |
785 | 892 | } catch (Exception e) { |
786 | 893 | this.exitBrowser(accountNo, uuid); |
787 | 894 | throw new BusinessException(e.getMessage()); |
788 | 895 | } |
789 | 896 | this.exitBrowser(accountNo, uuid); |
790 | - return null; | |
897 | + return sig3Map.get(key); | |
791 | 898 | } |
792 | 899 | |
793 | 900 | /** |
... | ... | @@ -798,7 +905,7 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { |
798 | 905 | * @return |
799 | 906 | */ |
800 | 907 | public String getDataUrl(ResponseReceived responseReceived, String dataUrl) { |
801 | - String baseUrl = JSONObject.parseObject(responseReceived.getResponse()).getString("url"); | |
908 | + String baseUrl = JSON.parseObject(responseReceived.getResponse()).getString("url"); | |
802 | 909 | boolean notStaticFiles = !baseUrl.endsWith(".png") |
803 | 910 | && !baseUrl.endsWith(".jpg") |
804 | 911 | && !baseUrl.endsWith(".css") | ... | ... |
src/main/java/cn/fw/freya/service/rpc/ReportRpcService.java
... | ... | @@ -55,7 +55,7 @@ public class ReportRpcService { |
55 | 55 | if (!StringUtils.hasText(res)) { |
56 | 56 | return false; |
57 | 57 | } |
58 | - JSONObject resObj = JSONObject.parseObject(res); | |
58 | + JSONObject resObj = JSON.parseObject(res); | |
59 | 59 | Boolean result = resObj.getBoolean("success"); |
60 | 60 | if (Boolean.FALSE.equals(result)) { |
61 | 61 | Integer status = Optional.ofNullable(resObj.getInteger("status")).orElse(-1); |
... | ... | @@ -112,7 +112,7 @@ public class ReportRpcService { |
112 | 112 | if (!StringUtils.hasText(res)) { |
113 | 113 | return false; |
114 | 114 | } |
115 | - JSONObject resObj = JSONObject.parseObject(res); | |
115 | + JSONObject resObj = JSON.parseObject(res); | |
116 | 116 | Boolean result = resObj.getBoolean("success"); |
117 | 117 | if (Boolean.FALSE.equals(result)) { |
118 | 118 | Integer status = Optional.ofNullable(resObj.getInteger("status")).orElse(-1); |
... | ... | @@ -174,7 +174,7 @@ public class ReportRpcService { |
174 | 174 | if (!StringUtils.hasText(res)) { |
175 | 175 | return false; |
176 | 176 | } |
177 | - JSONObject resObj = JSONObject.parseObject(res); | |
177 | + JSONObject resObj = JSON.parseObject(res); | |
178 | 178 | Boolean result = resObj.getBoolean("success"); |
179 | 179 | if (Boolean.FALSE.equals(result)) { |
180 | 180 | Integer status = Optional.ofNullable(resObj.getInteger("status")).orElse(-1); | ... | ... |