Commit 8649c6990fb302703cbbec869fec86fd88ec8199

Authored by 王明元
1 parent 4dad2096

2022年11月3日17:21:23 调整获取签名策略

src/main/java/cn/fw/freya/service/crawl/impl/KuaiShouCrawl.java
@@ -85,6 +85,7 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { @@ -85,6 +85,7 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle {
85 private final String playbackBaseUrl = "https://live.kuaishou.com/playback/"; 85 private final String playbackBaseUrl = "https://live.kuaishou.com/playback/";
86 private final ConcurrentHashMap<String, String> sig3Map = new ConcurrentHashMap<>(); 86 private final ConcurrentHashMap<String, String> sig3Map = new ConcurrentHashMap<>();
87 private final AccountRpcService accountRpcService; 87 private final AccountRpcService accountRpcService;
  88 + private static final String RE_TRY_GET_SIG_FLAG = "reTry";
88 89
89 @Resource(name = "wmyThreadPool") 90 @Resource(name = "wmyThreadPool")
90 private ThreadPoolExecutor threadPoolExecutor; 91 private ThreadPoolExecutor threadPoolExecutor;
@@ -125,7 +126,12 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { @@ -125,7 +126,12 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle {
125 String key = accountNo + "#" + dataType; 126 String key = accountNo + "#" + dataType;
126 if (Objects.nonNull(page)) 127 if (Objects.nonNull(page))
127 key += "#" + page; 128 key += "#" + page;
128 - String ns_sig3 = this.getNS_sig3(accountNo, dataType, page, true); 129 + String ns_sig3;
  130 + int tryTimes = 0;
  131 + do {
  132 + ns_sig3 = this.getNS_sig3(accountNo, dataType, page, true);
  133 + tryTimes++;
  134 + } while (Objects.equals(ns_sig3, RE_TRY_GET_SIG_FLAG) && tryTimes > 5);
129 if (Objects.nonNull(ns_sig3)) 135 if (Objects.nonNull(ns_sig3))
130 sig3Map.put(key, ns_sig3); 136 sig3Map.put(key, ns_sig3);
131 else 137 else
@@ -271,6 +277,8 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { @@ -271,6 +277,8 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle {
271 final String ns_sig3 = this.getNS_sig3(accountNo, DataTypeEnum.VIDEO.getValue(), page, false); 277 final String ns_sig3 = this.getNS_sig3(accountNo, DataTypeEnum.VIDEO.getValue(), page, false);
272 if (Objects.isNull(ns_sig3)) 278 if (Objects.isNull(ns_sig3))
273 return null; 279 return null;
  280 + if (Objects.equals(ns_sig3, RE_TRY_GET_SIG_FLAG))
  281 + throw new BusinessException("获取签名失败, 等待下一次重新获取签名");
274 Map<String, Object> params = new LinkedHashMap<>(); 282 Map<String, Object> params = new LinkedHashMap<>();
275 params.put("count", 10); 283 params.put("count", 10);
276 params.put("page", page); 284 params.put("page", page);
@@ -470,6 +478,8 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { @@ -470,6 +478,8 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle {
470 final String ns_sig3 = this.getNS_sig3(accountNo, DataTypeEnum.LIVE.getValue(), null, false); 478 final String ns_sig3 = this.getNS_sig3(accountNo, DataTypeEnum.LIVE.getValue(), null, false);
471 if (Objects.isNull(ns_sig3)) 479 if (Objects.isNull(ns_sig3))
472 return null; 480 return null;
  481 + if (Objects.equals(ns_sig3, RE_TRY_GET_SIG_FLAG))
  482 + throw new BusinessException("获取签名失败, 等待下一次重新获取签名");
473 Map<String, Object> params = new LinkedHashMap<>(); 483 Map<String, Object> params = new LinkedHashMap<>();
474 params.put("memberId", this.getUserId(accountNo)); 484 params.put("memberId", this.getUserId(accountNo));
475 params.put("endTime", endTime.getTime()); 485 params.put("endTime", endTime.getTime());
@@ -714,6 +724,8 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { @@ -714,6 +724,8 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle {
714 final String ns_sig3 = this.getNS_sig3(accountNo, DataTypeEnum.FANS.getValue(), null, false); 724 final String ns_sig3 = this.getNS_sig3(accountNo, DataTypeEnum.FANS.getValue(), null, false);
715 if (Objects.isNull(ns_sig3)) 725 if (Objects.isNull(ns_sig3))
716 return null; 726 return null;
  727 + if (Objects.equals(ns_sig3, RE_TRY_GET_SIG_FLAG))
  728 + throw new BusinessException("获取签名失败, 等待下一次重新获取签名");
717 HttpConfig config = HttpConfig.custom() 729 HttpConfig config = HttpConfig.custom()
718 .url("https://cp.kuaishou.com/rest/cp/creator/pc/home/infoV2?__NS_sig3=" + ns_sig3) 730 .url("https://cp.kuaishou.com/rest/cp/creator/pc/home/infoV2?__NS_sig3=" + ns_sig3)
719 .context(cookies.getContext()) 731 .context(cookies.getContext())
@@ -839,32 +851,38 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { @@ -839,32 +851,38 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle {
839 if (fansDimension) { 851 if (fansDimension) {
840 driver.get("https://cp.kuaishou.com/article/manage/video"); 852 driver.get("https://cp.kuaishou.com/article/manage/video");
841 LockSupport.parkNanos(TimeUnit.SECONDS.toNanos(5)); 853 LockSupport.parkNanos(TimeUnit.SECONDS.toNanos(5));
842 - if (Objects.equals(targetUrl, driver.getCurrentUrl())) { 854 + }
  855 + try {
  856 + new WebDriverWait(driver, 15, 300).until(driver1 ->
  857 + driver1.findElement(By.xpath("//span[@class='publish-button__text']")));// '发布视频'按钮
  858 + } catch (Exception e) {
  859 + if (!new WebDriverWait(driver, 3, 300).until(driver1 ->
  860 + driver1.findElements(By.xpath("//a[@class='login']"))).isEmpty()) {
843 this.exitBrowser(accountNo, uuid); 861 this.exitBrowser(accountNo, uuid);
844 return null; 862 return null;
845 } 863 }
846 - }  
847 - if (Objects.equals("https://cp.kuaishou.com/profile", driver.getCurrentUrl())) {// 页面未跳转到视频/直播数据页面  
848 this.exitBrowser(accountNo, uuid); 864 this.exitBrowser(accountNo, uuid);
849 - return null; 865 + return RE_TRY_GET_SIG_FLAG;
850 } 866 }
851 int maxPageNum = 0; 867 int maxPageNum = 0;
852 if (videoDimension) { 868 if (videoDimension) {
853 List<WebElement> pageLabels = this.findPageLabels(driver);// 获取分页页码标签元素列表 869 List<WebElement> pageLabels = this.findPageLabels(driver);// 获取分页页码标签元素列表
854 - Map<Integer, WebElement> labelMap = this.processPageElement(pageLabels);// 将标签元素处理成K(页码), V(标签element) 870 + Map<Integer, WebElement> labelMap;// K(页码), V(标签element)
855 if (!CollectionUtils.isEmpty(pageLabels)) { 871 if (!CollectionUtils.isEmpty(pageLabels)) {
  872 + LockSupport.parkNanos(TimeUnit.SECONDS.toNanos(4));
  873 + labelMap = this.processPageElement(pageLabels);// 将标签元素处理成K(页码), V(标签element)
856 maxPageNum = Integer.parseInt(pageLabels.get(pageLabels.size() - 1).getText());// 最大页码 874 maxPageNum = Integer.parseInt(pageLabels.get(pageLabels.size() - 1).getText());// 最大页码
857 for (int i = 0; i < maxPageNum; i++) { 875 for (int i = 0; i < maxPageNum; i++) {
858 try { 876 try {
859 WebElement pageLabel = this.findPageLabelFromMap(labelMap, i + 1); 877 WebElement pageLabel = this.findPageLabelFromMap(labelMap, i + 1);
860 if (Objects.isNull(pageLabel)) { 878 if (Objects.isNull(pageLabel)) {
861 - LockSupport.parkNanos(TimeUnit.SECONDS.toNanos(1)); 879 + LockSupport.parkNanos(TimeUnit.SECONDS.toNanos(2));
862 pageLabels = this.findPageLabels(driver); 880 pageLabels = this.findPageLabels(driver);
863 labelMap = this.processPageElement(pageLabels); 881 labelMap = this.processPageElement(pageLabels);
864 pageLabel = this.findPageLabelFromMap(labelMap, i + 1); 882 pageLabel = this.findPageLabelFromMap(labelMap, i + 1);
865 } 883 }
866 pageLabel.click(); 884 pageLabel.click();
867 - LockSupport.parkNanos(TimeUnit.SECONDS.toNanos(2)); 885 + LockSupport.parkNanos(TimeUnit.SECONDS.toNanos(4));
868 } catch (Exception e) { 886 } catch (Exception e) {
869 log.info("异常发生, 信息为: {}", e.getMessage(), e); 887 log.info("异常发生, 信息为: {}", e.getMessage(), e);
870 } 888 }
@@ -896,6 +914,7 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { @@ -896,6 +914,7 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle {
896 throw new BusinessException(e.getMessage()); 914 throw new BusinessException(e.getMessage());
897 } 915 }
898 this.exitBrowser(accountNo, uuid); 916 this.exitBrowser(accountNo, uuid);
  917 + log.info("key: {}, value: {}", key, sig3Map.get(key));
899 return sig3Map.get(key); 918 return sig3Map.get(key);
900 } 919 }
901 920