From 8649c6990fb302703cbbec869fec86fd88ec8199 Mon Sep 17 00:00:00 2001 From: 王明元 <97082371@qq.com> Date: Thu, 3 Nov 2022 17:21:41 +0800 Subject: [PATCH] 2022年11月3日17:21:23 调整获取签名策略 --- src/main/java/cn/fw/freya/service/crawl/impl/KuaiShouCrawl.java | 35 +++++++++++++++++++++++++++-------- 1 file changed, 27 insertions(+), 8 deletions(-) diff --git a/src/main/java/cn/fw/freya/service/crawl/impl/KuaiShouCrawl.java b/src/main/java/cn/fw/freya/service/crawl/impl/KuaiShouCrawl.java index 855c23d..276c748 100644 --- a/src/main/java/cn/fw/freya/service/crawl/impl/KuaiShouCrawl.java +++ b/src/main/java/cn/fw/freya/service/crawl/impl/KuaiShouCrawl.java @@ -85,6 +85,7 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { private final String playbackBaseUrl = "https://live.kuaishou.com/playback/"; private final ConcurrentHashMap sig3Map = new ConcurrentHashMap<>(); private final AccountRpcService accountRpcService; + private static final String RE_TRY_GET_SIG_FLAG = "reTry"; @Resource(name = "wmyThreadPool") private ThreadPoolExecutor threadPoolExecutor; @@ -125,7 +126,12 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { String key = accountNo + "#" + dataType; if (Objects.nonNull(page)) key += "#" + page; - String ns_sig3 = this.getNS_sig3(accountNo, dataType, page, true); + String ns_sig3; + int tryTimes = 0; + do { + ns_sig3 = this.getNS_sig3(accountNo, dataType, page, true); + tryTimes++; + } while (Objects.equals(ns_sig3, RE_TRY_GET_SIG_FLAG) && tryTimes > 5); if (Objects.nonNull(ns_sig3)) sig3Map.put(key, ns_sig3); else @@ -271,6 +277,8 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { final String ns_sig3 = this.getNS_sig3(accountNo, DataTypeEnum.VIDEO.getValue(), page, false); if (Objects.isNull(ns_sig3)) return null; + if (Objects.equals(ns_sig3, RE_TRY_GET_SIG_FLAG)) + throw new BusinessException("获取签名失败, 等待下一次重新获取签名"); Map params = new LinkedHashMap<>(); params.put("count", 10); params.put("page", page); @@ -470,6 +478,8 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { final String ns_sig3 = this.getNS_sig3(accountNo, DataTypeEnum.LIVE.getValue(), null, false); if (Objects.isNull(ns_sig3)) return null; + if (Objects.equals(ns_sig3, RE_TRY_GET_SIG_FLAG)) + throw new BusinessException("获取签名失败, 等待下一次重新获取签名"); Map params = new LinkedHashMap<>(); params.put("memberId", this.getUserId(accountNo)); params.put("endTime", endTime.getTime()); @@ -714,6 +724,8 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { final String ns_sig3 = this.getNS_sig3(accountNo, DataTypeEnum.FANS.getValue(), null, false); if (Objects.isNull(ns_sig3)) return null; + if (Objects.equals(ns_sig3, RE_TRY_GET_SIG_FLAG)) + throw new BusinessException("获取签名失败, 等待下一次重新获取签名"); HttpConfig config = HttpConfig.custom() .url("https://cp.kuaishou.com/rest/cp/creator/pc/home/infoV2?__NS_sig3=" + ns_sig3) .context(cookies.getContext()) @@ -839,32 +851,38 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { if (fansDimension) { driver.get("https://cp.kuaishou.com/article/manage/video"); LockSupport.parkNanos(TimeUnit.SECONDS.toNanos(5)); - if (Objects.equals(targetUrl, driver.getCurrentUrl())) { + } + try { + new WebDriverWait(driver, 15, 300).until(driver1 -> + driver1.findElement(By.xpath("//span[@class='publish-button__text']")));// '发布视频'按钮 + } catch (Exception e) { + if (!new WebDriverWait(driver, 3, 300).until(driver1 -> + driver1.findElements(By.xpath("//a[@class='login']"))).isEmpty()) { this.exitBrowser(accountNo, uuid); return null; } - } - if (Objects.equals("https://cp.kuaishou.com/profile", driver.getCurrentUrl())) {// 页面未跳转到视频/直播数据页面 this.exitBrowser(accountNo, uuid); - return null; + return RE_TRY_GET_SIG_FLAG; } int maxPageNum = 0; if (videoDimension) { List pageLabels = this.findPageLabels(driver);// 获取分页页码标签元素列表 - Map labelMap = this.processPageElement(pageLabels);// 将标签元素处理成K(页码), V(标签element) + Map labelMap;// K(页码), V(标签element) if (!CollectionUtils.isEmpty(pageLabels)) { + LockSupport.parkNanos(TimeUnit.SECONDS.toNanos(4)); + labelMap = this.processPageElement(pageLabels);// 将标签元素处理成K(页码), V(标签element) maxPageNum = Integer.parseInt(pageLabels.get(pageLabels.size() - 1).getText());// 最大页码 for (int i = 0; i < maxPageNum; i++) { try { WebElement pageLabel = this.findPageLabelFromMap(labelMap, i + 1); if (Objects.isNull(pageLabel)) { - LockSupport.parkNanos(TimeUnit.SECONDS.toNanos(1)); + LockSupport.parkNanos(TimeUnit.SECONDS.toNanos(2)); pageLabels = this.findPageLabels(driver); labelMap = this.processPageElement(pageLabels); pageLabel = this.findPageLabelFromMap(labelMap, i + 1); } pageLabel.click(); - LockSupport.parkNanos(TimeUnit.SECONDS.toNanos(2)); + LockSupport.parkNanos(TimeUnit.SECONDS.toNanos(4)); } catch (Exception e) { log.info("异常发生, 信息为: {}", e.getMessage(), e); } @@ -896,6 +914,7 @@ public class KuaiShouCrawl implements CrawlStrategy, SmartLifecycle { throw new BusinessException(e.getMessage()); } this.exitBrowser(accountNo, uuid); + log.info("key: {}, value: {}", key, sig3Map.get(key)); return sig3Map.get(key); } -- libgit2 0.22.2