Commit 8649c6990fb302703cbbec869fec86fd88ec8199
1 parent
4dad2096
2022年11月3日17:21:23 调整获取签名策略
Showing
1 changed file
with
27 additions
and
8 deletions
src/main/java/cn/fw/freya/service/crawl/impl/KuaiShouCrawl.java
... | ... | @@ -85,6 +85,7 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { |
85 | 85 | private final String playbackBaseUrl = "https://live.kuaishou.com/playback/"; |
86 | 86 | private final ConcurrentHashMap<String, String> sig3Map = new ConcurrentHashMap<>(); |
87 | 87 | private final AccountRpcService accountRpcService; |
88 | + private static final String RE_TRY_GET_SIG_FLAG = "reTry"; | |
88 | 89 | |
89 | 90 | @Resource(name = "wmyThreadPool") |
90 | 91 | private ThreadPoolExecutor threadPoolExecutor; |
... | ... | @@ -125,7 +126,12 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { |
125 | 126 | String key = accountNo + "#" + dataType; |
126 | 127 | if (Objects.nonNull(page)) |
127 | 128 | key += "#" + page; |
128 | - String ns_sig3 = this.getNS_sig3(accountNo, dataType, page, true); | |
129 | + String ns_sig3; | |
130 | + int tryTimes = 0; | |
131 | + do { | |
132 | + ns_sig3 = this.getNS_sig3(accountNo, dataType, page, true); | |
133 | + tryTimes++; | |
134 | + } while (Objects.equals(ns_sig3, RE_TRY_GET_SIG_FLAG) && tryTimes > 5); | |
129 | 135 | if (Objects.nonNull(ns_sig3)) |
130 | 136 | sig3Map.put(key, ns_sig3); |
131 | 137 | else |
... | ... | @@ -271,6 +277,8 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { |
271 | 277 | final String ns_sig3 = this.getNS_sig3(accountNo, DataTypeEnum.VIDEO.getValue(), page, false); |
272 | 278 | if (Objects.isNull(ns_sig3)) |
273 | 279 | return null; |
280 | + if (Objects.equals(ns_sig3, RE_TRY_GET_SIG_FLAG)) | |
281 | + throw new BusinessException("获取签名失败, 等待下一次重新获取签名"); | |
274 | 282 | Map<String, Object> params = new LinkedHashMap<>(); |
275 | 283 | params.put("count", 10); |
276 | 284 | params.put("page", page); |
... | ... | @@ -470,6 +478,8 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { |
470 | 478 | final String ns_sig3 = this.getNS_sig3(accountNo, DataTypeEnum.LIVE.getValue(), null, false); |
471 | 479 | if (Objects.isNull(ns_sig3)) |
472 | 480 | return null; |
481 | + if (Objects.equals(ns_sig3, RE_TRY_GET_SIG_FLAG)) | |
482 | + throw new BusinessException("获取签名失败, 等待下一次重新获取签名"); | |
473 | 483 | Map<String, Object> params = new LinkedHashMap<>(); |
474 | 484 | params.put("memberId", this.getUserId(accountNo)); |
475 | 485 | params.put("endTime", endTime.getTime()); |
... | ... | @@ -714,6 +724,8 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { |
714 | 724 | final String ns_sig3 = this.getNS_sig3(accountNo, DataTypeEnum.FANS.getValue(), null, false); |
715 | 725 | if (Objects.isNull(ns_sig3)) |
716 | 726 | return null; |
727 | + if (Objects.equals(ns_sig3, RE_TRY_GET_SIG_FLAG)) | |
728 | + throw new BusinessException("获取签名失败, 等待下一次重新获取签名"); | |
717 | 729 | HttpConfig config = HttpConfig.custom() |
718 | 730 | .url("https://cp.kuaishou.com/rest/cp/creator/pc/home/infoV2?__NS_sig3=" + ns_sig3) |
719 | 731 | .context(cookies.getContext()) |
... | ... | @@ -839,32 +851,38 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { |
839 | 851 | if (fansDimension) { |
840 | 852 | driver.get("https://cp.kuaishou.com/article/manage/video"); |
841 | 853 | LockSupport.parkNanos(TimeUnit.SECONDS.toNanos(5)); |
842 | - if (Objects.equals(targetUrl, driver.getCurrentUrl())) { | |
854 | + } | |
855 | + try { | |
856 | + new WebDriverWait(driver, 15, 300).until(driver1 -> | |
857 | + driver1.findElement(By.xpath("//span[@class='publish-button__text']")));// '发布视频'按钮 | |
858 | + } catch (Exception e) { | |
859 | + if (!new WebDriverWait(driver, 3, 300).until(driver1 -> | |
860 | + driver1.findElements(By.xpath("//a[@class='login']"))).isEmpty()) { | |
843 | 861 | this.exitBrowser(accountNo, uuid); |
844 | 862 | return null; |
845 | 863 | } |
846 | - } | |
847 | - if (Objects.equals("https://cp.kuaishou.com/profile", driver.getCurrentUrl())) {// 页面未跳转到视频/直播数据页面 | |
848 | 864 | this.exitBrowser(accountNo, uuid); |
849 | - return null; | |
865 | + return RE_TRY_GET_SIG_FLAG; | |
850 | 866 | } |
851 | 867 | int maxPageNum = 0; |
852 | 868 | if (videoDimension) { |
853 | 869 | List<WebElement> pageLabels = this.findPageLabels(driver);// 获取分页页码标签元素列表 |
854 | - Map<Integer, WebElement> labelMap = this.processPageElement(pageLabels);// 将标签元素处理成K(页码), V(标签element) | |
870 | + Map<Integer, WebElement> labelMap;// K(页码), V(标签element) | |
855 | 871 | if (!CollectionUtils.isEmpty(pageLabels)) { |
872 | + LockSupport.parkNanos(TimeUnit.SECONDS.toNanos(4)); | |
873 | + labelMap = this.processPageElement(pageLabels);// 将标签元素处理成K(页码), V(标签element) | |
856 | 874 | maxPageNum = Integer.parseInt(pageLabels.get(pageLabels.size() - 1).getText());// 最大页码 |
857 | 875 | for (int i = 0; i < maxPageNum; i++) { |
858 | 876 | try { |
859 | 877 | WebElement pageLabel = this.findPageLabelFromMap(labelMap, i + 1); |
860 | 878 | if (Objects.isNull(pageLabel)) { |
861 | - LockSupport.parkNanos(TimeUnit.SECONDS.toNanos(1)); | |
879 | + LockSupport.parkNanos(TimeUnit.SECONDS.toNanos(2)); | |
862 | 880 | pageLabels = this.findPageLabels(driver); |
863 | 881 | labelMap = this.processPageElement(pageLabels); |
864 | 882 | pageLabel = this.findPageLabelFromMap(labelMap, i + 1); |
865 | 883 | } |
866 | 884 | pageLabel.click(); |
867 | - LockSupport.parkNanos(TimeUnit.SECONDS.toNanos(2)); | |
885 | + LockSupport.parkNanos(TimeUnit.SECONDS.toNanos(4)); | |
868 | 886 | } catch (Exception e) { |
869 | 887 | log.info("异常发生, 信息为: {}", e.getMessage(), e); |
870 | 888 | } |
... | ... | @@ -896,6 +914,7 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { |
896 | 914 | throw new BusinessException(e.getMessage()); |
897 | 915 | } |
898 | 916 | this.exitBrowser(accountNo, uuid); |
917 | + log.info("key: {}, value: {}", key, sig3Map.get(key)); | |
899 | 918 | return sig3Map.get(key); |
900 | 919 | } |
901 | 920 | ... | ... |