Commit 8649c6990fb302703cbbec869fec86fd88ec8199
1 parent
4dad2096
2022年11月3日17:21:23 调整获取签名策略
Showing
1 changed file
with
27 additions
and
8 deletions
src/main/java/cn/fw/freya/service/crawl/impl/KuaiShouCrawl.java
@@ -85,6 +85,7 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { | @@ -85,6 +85,7 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { | ||
85 | private final String playbackBaseUrl = "https://live.kuaishou.com/playback/"; | 85 | private final String playbackBaseUrl = "https://live.kuaishou.com/playback/"; |
86 | private final ConcurrentHashMap<String, String> sig3Map = new ConcurrentHashMap<>(); | 86 | private final ConcurrentHashMap<String, String> sig3Map = new ConcurrentHashMap<>(); |
87 | private final AccountRpcService accountRpcService; | 87 | private final AccountRpcService accountRpcService; |
88 | + private static final String RE_TRY_GET_SIG_FLAG = "reTry"; | ||
88 | 89 | ||
89 | @Resource(name = "wmyThreadPool") | 90 | @Resource(name = "wmyThreadPool") |
90 | private ThreadPoolExecutor threadPoolExecutor; | 91 | private ThreadPoolExecutor threadPoolExecutor; |
@@ -125,7 +126,12 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { | @@ -125,7 +126,12 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { | ||
125 | String key = accountNo + "#" + dataType; | 126 | String key = accountNo + "#" + dataType; |
126 | if (Objects.nonNull(page)) | 127 | if (Objects.nonNull(page)) |
127 | key += "#" + page; | 128 | key += "#" + page; |
128 | - String ns_sig3 = this.getNS_sig3(accountNo, dataType, page, true); | 129 | + String ns_sig3; |
130 | + int tryTimes = 0; | ||
131 | + do { | ||
132 | + ns_sig3 = this.getNS_sig3(accountNo, dataType, page, true); | ||
133 | + tryTimes++; | ||
134 | + } while (Objects.equals(ns_sig3, RE_TRY_GET_SIG_FLAG) && tryTimes > 5); | ||
129 | if (Objects.nonNull(ns_sig3)) | 135 | if (Objects.nonNull(ns_sig3)) |
130 | sig3Map.put(key, ns_sig3); | 136 | sig3Map.put(key, ns_sig3); |
131 | else | 137 | else |
@@ -271,6 +277,8 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { | @@ -271,6 +277,8 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { | ||
271 | final String ns_sig3 = this.getNS_sig3(accountNo, DataTypeEnum.VIDEO.getValue(), page, false); | 277 | final String ns_sig3 = this.getNS_sig3(accountNo, DataTypeEnum.VIDEO.getValue(), page, false); |
272 | if (Objects.isNull(ns_sig3)) | 278 | if (Objects.isNull(ns_sig3)) |
273 | return null; | 279 | return null; |
280 | + if (Objects.equals(ns_sig3, RE_TRY_GET_SIG_FLAG)) | ||
281 | + throw new BusinessException("获取签名失败, 等待下一次重新获取签名"); | ||
274 | Map<String, Object> params = new LinkedHashMap<>(); | 282 | Map<String, Object> params = new LinkedHashMap<>(); |
275 | params.put("count", 10); | 283 | params.put("count", 10); |
276 | params.put("page", page); | 284 | params.put("page", page); |
@@ -470,6 +478,8 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { | @@ -470,6 +478,8 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { | ||
470 | final String ns_sig3 = this.getNS_sig3(accountNo, DataTypeEnum.LIVE.getValue(), null, false); | 478 | final String ns_sig3 = this.getNS_sig3(accountNo, DataTypeEnum.LIVE.getValue(), null, false); |
471 | if (Objects.isNull(ns_sig3)) | 479 | if (Objects.isNull(ns_sig3)) |
472 | return null; | 480 | return null; |
481 | + if (Objects.equals(ns_sig3, RE_TRY_GET_SIG_FLAG)) | ||
482 | + throw new BusinessException("获取签名失败, 等待下一次重新获取签名"); | ||
473 | Map<String, Object> params = new LinkedHashMap<>(); | 483 | Map<String, Object> params = new LinkedHashMap<>(); |
474 | params.put("memberId", this.getUserId(accountNo)); | 484 | params.put("memberId", this.getUserId(accountNo)); |
475 | params.put("endTime", endTime.getTime()); | 485 | params.put("endTime", endTime.getTime()); |
@@ -714,6 +724,8 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { | @@ -714,6 +724,8 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { | ||
714 | final String ns_sig3 = this.getNS_sig3(accountNo, DataTypeEnum.FANS.getValue(), null, false); | 724 | final String ns_sig3 = this.getNS_sig3(accountNo, DataTypeEnum.FANS.getValue(), null, false); |
715 | if (Objects.isNull(ns_sig3)) | 725 | if (Objects.isNull(ns_sig3)) |
716 | return null; | 726 | return null; |
727 | + if (Objects.equals(ns_sig3, RE_TRY_GET_SIG_FLAG)) | ||
728 | + throw new BusinessException("获取签名失败, 等待下一次重新获取签名"); | ||
717 | HttpConfig config = HttpConfig.custom() | 729 | HttpConfig config = HttpConfig.custom() |
718 | .url("https://cp.kuaishou.com/rest/cp/creator/pc/home/infoV2?__NS_sig3=" + ns_sig3) | 730 | .url("https://cp.kuaishou.com/rest/cp/creator/pc/home/infoV2?__NS_sig3=" + ns_sig3) |
719 | .context(cookies.getContext()) | 731 | .context(cookies.getContext()) |
@@ -839,32 +851,38 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { | @@ -839,32 +851,38 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { | ||
839 | if (fansDimension) { | 851 | if (fansDimension) { |
840 | driver.get("https://cp.kuaishou.com/article/manage/video"); | 852 | driver.get("https://cp.kuaishou.com/article/manage/video"); |
841 | LockSupport.parkNanos(TimeUnit.SECONDS.toNanos(5)); | 853 | LockSupport.parkNanos(TimeUnit.SECONDS.toNanos(5)); |
842 | - if (Objects.equals(targetUrl, driver.getCurrentUrl())) { | 854 | + } |
855 | + try { | ||
856 | + new WebDriverWait(driver, 15, 300).until(driver1 -> | ||
857 | + driver1.findElement(By.xpath("//span[@class='publish-button__text']")));// '发布视频'按钮 | ||
858 | + } catch (Exception e) { | ||
859 | + if (!new WebDriverWait(driver, 3, 300).until(driver1 -> | ||
860 | + driver1.findElements(By.xpath("//a[@class='login']"))).isEmpty()) { | ||
843 | this.exitBrowser(accountNo, uuid); | 861 | this.exitBrowser(accountNo, uuid); |
844 | return null; | 862 | return null; |
845 | } | 863 | } |
846 | - } | ||
847 | - if (Objects.equals("https://cp.kuaishou.com/profile", driver.getCurrentUrl())) {// 页面未跳转到视频/直播数据页面 | ||
848 | this.exitBrowser(accountNo, uuid); | 864 | this.exitBrowser(accountNo, uuid); |
849 | - return null; | 865 | + return RE_TRY_GET_SIG_FLAG; |
850 | } | 866 | } |
851 | int maxPageNum = 0; | 867 | int maxPageNum = 0; |
852 | if (videoDimension) { | 868 | if (videoDimension) { |
853 | List<WebElement> pageLabels = this.findPageLabels(driver);// 获取分页页码标签元素列表 | 869 | List<WebElement> pageLabels = this.findPageLabels(driver);// 获取分页页码标签元素列表 |
854 | - Map<Integer, WebElement> labelMap = this.processPageElement(pageLabels);// 将标签元素处理成K(页码), V(标签element) | 870 | + Map<Integer, WebElement> labelMap;// K(页码), V(标签element) |
855 | if (!CollectionUtils.isEmpty(pageLabels)) { | 871 | if (!CollectionUtils.isEmpty(pageLabels)) { |
872 | + LockSupport.parkNanos(TimeUnit.SECONDS.toNanos(4)); | ||
873 | + labelMap = this.processPageElement(pageLabels);// 将标签元素处理成K(页码), V(标签element) | ||
856 | maxPageNum = Integer.parseInt(pageLabels.get(pageLabels.size() - 1).getText());// 最大页码 | 874 | maxPageNum = Integer.parseInt(pageLabels.get(pageLabels.size() - 1).getText());// 最大页码 |
857 | for (int i = 0; i < maxPageNum; i++) { | 875 | for (int i = 0; i < maxPageNum; i++) { |
858 | try { | 876 | try { |
859 | WebElement pageLabel = this.findPageLabelFromMap(labelMap, i + 1); | 877 | WebElement pageLabel = this.findPageLabelFromMap(labelMap, i + 1); |
860 | if (Objects.isNull(pageLabel)) { | 878 | if (Objects.isNull(pageLabel)) { |
861 | - LockSupport.parkNanos(TimeUnit.SECONDS.toNanos(1)); | 879 | + LockSupport.parkNanos(TimeUnit.SECONDS.toNanos(2)); |
862 | pageLabels = this.findPageLabels(driver); | 880 | pageLabels = this.findPageLabels(driver); |
863 | labelMap = this.processPageElement(pageLabels); | 881 | labelMap = this.processPageElement(pageLabels); |
864 | pageLabel = this.findPageLabelFromMap(labelMap, i + 1); | 882 | pageLabel = this.findPageLabelFromMap(labelMap, i + 1); |
865 | } | 883 | } |
866 | pageLabel.click(); | 884 | pageLabel.click(); |
867 | - LockSupport.parkNanos(TimeUnit.SECONDS.toNanos(2)); | 885 | + LockSupport.parkNanos(TimeUnit.SECONDS.toNanos(4)); |
868 | } catch (Exception e) { | 886 | } catch (Exception e) { |
869 | log.info("异常发生, 信息为: {}", e.getMessage(), e); | 887 | log.info("异常发生, 信息为: {}", e.getMessage(), e); |
870 | } | 888 | } |
@@ -896,6 +914,7 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { | @@ -896,6 +914,7 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { | ||
896 | throw new BusinessException(e.getMessage()); | 914 | throw new BusinessException(e.getMessage()); |
897 | } | 915 | } |
898 | this.exitBrowser(accountNo, uuid); | 916 | this.exitBrowser(accountNo, uuid); |
917 | + log.info("key: {}, value: {}", key, sig3Map.get(key)); | ||
899 | return sig3Map.get(key); | 918 | return sig3Map.get(key); |
900 | } | 919 | } |
901 | 920 |