Commit 8649c6990fb302703cbbec869fec86fd88ec8199

Authored by 王明元
1 parent 4dad2096

2022年11月3日17:21:23 调整获取签名策略

src/main/java/cn/fw/freya/service/crawl/impl/KuaiShouCrawl.java
... ... @@ -85,6 +85,7 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle {
85 85 private final String playbackBaseUrl = "https://live.kuaishou.com/playback/";
86 86 private final ConcurrentHashMap<String, String> sig3Map = new ConcurrentHashMap<>();
87 87 private final AccountRpcService accountRpcService;
  88 + private static final String RE_TRY_GET_SIG_FLAG = "reTry";
88 89  
89 90 @Resource(name = "wmyThreadPool")
90 91 private ThreadPoolExecutor threadPoolExecutor;
... ... @@ -125,7 +126,12 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle {
125 126 String key = accountNo + "#" + dataType;
126 127 if (Objects.nonNull(page))
127 128 key += "#" + page;
128   - String ns_sig3 = this.getNS_sig3(accountNo, dataType, page, true);
  129 + String ns_sig3;
  130 + int tryTimes = 0;
  131 + do {
  132 + ns_sig3 = this.getNS_sig3(accountNo, dataType, page, true);
  133 + tryTimes++;
  134 + } while (Objects.equals(ns_sig3, RE_TRY_GET_SIG_FLAG) && tryTimes > 5);
129 135 if (Objects.nonNull(ns_sig3))
130 136 sig3Map.put(key, ns_sig3);
131 137 else
... ... @@ -271,6 +277,8 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle {
271 277 final String ns_sig3 = this.getNS_sig3(accountNo, DataTypeEnum.VIDEO.getValue(), page, false);
272 278 if (Objects.isNull(ns_sig3))
273 279 return null;
  280 + if (Objects.equals(ns_sig3, RE_TRY_GET_SIG_FLAG))
  281 + throw new BusinessException("获取签名失败, 等待下一次重新获取签名");
274 282 Map<String, Object> params = new LinkedHashMap<>();
275 283 params.put("count", 10);
276 284 params.put("page", page);
... ... @@ -470,6 +478,8 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle {
470 478 final String ns_sig3 = this.getNS_sig3(accountNo, DataTypeEnum.LIVE.getValue(), null, false);
471 479 if (Objects.isNull(ns_sig3))
472 480 return null;
  481 + if (Objects.equals(ns_sig3, RE_TRY_GET_SIG_FLAG))
  482 + throw new BusinessException("获取签名失败, 等待下一次重新获取签名");
473 483 Map<String, Object> params = new LinkedHashMap<>();
474 484 params.put("memberId", this.getUserId(accountNo));
475 485 params.put("endTime", endTime.getTime());
... ... @@ -714,6 +724,8 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle {
714 724 final String ns_sig3 = this.getNS_sig3(accountNo, DataTypeEnum.FANS.getValue(), null, false);
715 725 if (Objects.isNull(ns_sig3))
716 726 return null;
  727 + if (Objects.equals(ns_sig3, RE_TRY_GET_SIG_FLAG))
  728 + throw new BusinessException("获取签名失败, 等待下一次重新获取签名");
717 729 HttpConfig config = HttpConfig.custom()
718 730 .url("https://cp.kuaishou.com/rest/cp/creator/pc/home/infoV2?__NS_sig3=" + ns_sig3)
719 731 .context(cookies.getContext())
... ... @@ -839,32 +851,38 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle {
839 851 if (fansDimension) {
840 852 driver.get("https://cp.kuaishou.com/article/manage/video");
841 853 LockSupport.parkNanos(TimeUnit.SECONDS.toNanos(5));
842   - if (Objects.equals(targetUrl, driver.getCurrentUrl())) {
  854 + }
  855 + try {
  856 + new WebDriverWait(driver, 15, 300).until(driver1 ->
  857 + driver1.findElement(By.xpath("//span[@class='publish-button__text']")));// '发布视频'按钮
  858 + } catch (Exception e) {
  859 + if (!new WebDriverWait(driver, 3, 300).until(driver1 ->
  860 + driver1.findElements(By.xpath("//a[@class='login']"))).isEmpty()) {
843 861 this.exitBrowser(accountNo, uuid);
844 862 return null;
845 863 }
846   - }
847   - if (Objects.equals("https://cp.kuaishou.com/profile", driver.getCurrentUrl())) {// 页面未跳转到视频/直播数据页面
848 864 this.exitBrowser(accountNo, uuid);
849   - return null;
  865 + return RE_TRY_GET_SIG_FLAG;
850 866 }
851 867 int maxPageNum = 0;
852 868 if (videoDimension) {
853 869 List<WebElement> pageLabels = this.findPageLabels(driver);// 获取分页页码标签元素列表
854   - Map<Integer, WebElement> labelMap = this.processPageElement(pageLabels);// 将标签元素处理成K(页码), V(标签element)
  870 + Map<Integer, WebElement> labelMap;// K(页码), V(标签element)
855 871 if (!CollectionUtils.isEmpty(pageLabels)) {
  872 + LockSupport.parkNanos(TimeUnit.SECONDS.toNanos(4));
  873 + labelMap = this.processPageElement(pageLabels);// 将标签元素处理成K(页码), V(标签element)
856 874 maxPageNum = Integer.parseInt(pageLabels.get(pageLabels.size() - 1).getText());// 最大页码
857 875 for (int i = 0; i < maxPageNum; i++) {
858 876 try {
859 877 WebElement pageLabel = this.findPageLabelFromMap(labelMap, i + 1);
860 878 if (Objects.isNull(pageLabel)) {
861   - LockSupport.parkNanos(TimeUnit.SECONDS.toNanos(1));
  879 + LockSupport.parkNanos(TimeUnit.SECONDS.toNanos(2));
862 880 pageLabels = this.findPageLabels(driver);
863 881 labelMap = this.processPageElement(pageLabels);
864 882 pageLabel = this.findPageLabelFromMap(labelMap, i + 1);
865 883 }
866 884 pageLabel.click();
867   - LockSupport.parkNanos(TimeUnit.SECONDS.toNanos(2));
  885 + LockSupport.parkNanos(TimeUnit.SECONDS.toNanos(4));
868 886 } catch (Exception e) {
869 887 log.info("异常发生, 信息为: {}", e.getMessage(), e);
870 888 }
... ... @@ -896,6 +914,7 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle {
896 914 throw new BusinessException(e.getMessage());
897 915 }
898 916 this.exitBrowser(accountNo, uuid);
  917 + log.info("key: {}, value: {}", key, sig3Map.get(key));
899 918 return sig3Map.get(key);
900 919 }
901 920  
... ...