Common.java 14 KB
package cn.fw.freya.service.crawl.impl;

import cn.fw.freya.config.SeleniumProperties;
import cn.fw.freya.dao.AccountDao;
import cn.fw.freya.dao.CookieDao;
import cn.fw.freya.dao.LivePoolDao;
import cn.fw.freya.dao.VideoPoolDao;
import cn.fw.freya.model.data.Account;
import cn.fw.freya.model.data.FwCookie;
import cn.fw.freya.model.data.ResponseReceived;
import cn.fw.freya.model.data.pool.LivePool;
import cn.fw.freya.model.data.pool.VideoPool;
import cn.fw.freya.model.dto.rpc.ReportAccountDto;
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.apache.http.HttpResponse;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.entity.StringEntity;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClientBuilder;
import org.openqa.selenium.Cookie;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.chrome.ChromeDriver;
import org.openqa.selenium.chrome.ChromeDriverService;
import org.openqa.selenium.chrome.ChromeOptions;
import org.openqa.selenium.logging.LogEntries;
import org.openqa.selenium.logging.LogType;
import org.openqa.selenium.logging.LoggingPreferences;
import org.openqa.selenium.logging.Logs;
import org.openqa.selenium.remote.CapabilityType;
import org.openqa.selenium.remote.RemoteWebDriver;
import org.springframework.stereotype.Component;
import org.springframework.transaction.annotation.Transactional;
import org.springframework.util.CollectionUtils;

import java.io.IOException;
import java.util.*;
import java.util.logging.Level;
import java.util.stream.Collectors;

/**
 * @author unknown
 * @version 1.0
 * @date 2021/12/12 1:21
 * @Description
 */
@Slf4j
@Component
@RequiredArgsConstructor
public class Common {

    public static final String NETWORK_RESPONSE_RECEIVED = "Network.responseReceived";
    public static final int SELENIUM_PORT = 9005;
    private final SeleniumProperties seleniumProperties;
    private final CookieDao cookieDao;
    private final AccountDao accountDao;
    private final VideoPoolDao videoPoolDao;
    private final LivePoolDao livePoolDao;
    private final String playbackBaseUrl = "https://live.kuaishou.com/playback/";

    /**
     * 创建浏览器驱动
     *
     * @return
     */
    public ChromeDriver createDriver() {
        /**
         * options.addArguments('--disable-infobars');// 禁止策略化
         * options.addArguments('--no-sandbox');// 解决DevToolsActivePort文件不存在的报错
         * options.addArguments('window-size=1920x3000');// 指定浏览器分辨率
         * options.addArguments('--disable-gpu');// 谷歌文档提到需要加上这个属性来规避bug
         * options.addArguments('--incognito');// 隐身模式(无痕模式)
         * options.addArguments('--disable-javascript');// 禁用javascript
         * options.addArguments('--start-maximized');// 最大化运行(全屏窗口),不设置,取元素会报错
         * options.addArguments('--hide-scrollbars');// 隐藏滚动条, 应对一些特殊页面
         * options.addArguments('blink-settings=imagesEnabled=false');// 不加载图片, 提升速度
         * options.addArguments('--headless');// 浏览器不提供可视化页面. linux下如果系统不支持可视化不加这条会启动失败
         * options.binary_location = r"C:\Program Files (x86)\Google\Chrome\Application\chrome.exe";// 手动指定使用的浏览器位置
         * options.addArguments('lang=en_US');// 设置语言
         * options.addArguments('User-Agent=Mozilla/5.0 (Linux; U; Android 8.1.0; zh-cn; BLA-AL00 Build/HUAWEIBLA-AL00) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/57.0.2987.132 MQQBrowser/8.9 Mobile Safari/537.36')
         * options.addArguments('--headless');// 浏览器不提供可视化页面
         *
         * Map<String, Object> prefs = new HashMap<>();
         * prefs.put("credentials_enable_service", false);
         * prefs.put("profile.password_manager_enabled", false);
         * options.setExperimentalOption("prefs", prefs);// 屏蔽'保存密码'提示框
         */
        ChromeOptions options = new ChromeOptions();
        if (!seleniumProperties.isDebug()) {
            options.addArguments("--disable-popup-blocking", "--lang=zh-CN");// 禁用阻止弹出窗口
            options.setHeadless(true);// 设置无头模式
            options.addArguments("--no-sandbox");// 启动无沙盒模式运行
            options.addArguments("--disable-infobars");// 禁用信息栏
            options.addArguments("--disable-dev-shm-usage");
            options.addArguments("--disable-gpu");// 禁用GPU加速
            options.addArguments("--window-size=2560,1024");// 指定浏览器分辨率
            options.addArguments("--disable-extensions");// 禁用扩展
            options.addArguments("--no-default-browser-check");// 禁用默认浏览器检查
            //options.addArguments("--disable-javascript");// 禁用JavaScript
        }
        Map<String, Object> prefs = new HashMap<>();
        prefs.put("credentials_enable_service", false);
        prefs.put("profile.password_manager_enabled", false);
        options.setExperimentalOption("prefs", prefs);// 禁用保存密码提示框
        // set performance logger
        // this sends Network.enable to chromedriver
        LoggingPreferences logPrefs = new LoggingPreferences();
        logPrefs.enable(LogType.PERFORMANCE, Level.ALL);// 设置日志级别
        options.setCapability(CapabilityType.LOGGING_PREFS, logPrefs);
        options.setExperimentalOption("w3c", false);
        //options.addArguments("blink-settings=imagesEnabled=false");// 禁止加载图片
        HashMap<Object, Object> map = new HashMap<>();
        //map.put("implicit", 10 * 1000);// 元素查找最多等待10秒
        map.put("pageLoad", 30 * 1000);// 页面加载最多等待30秒
        map.put("script", 30 * 1000);// 页面脚本执行最多等待30秒
        options.setCapability("timeouts", map);
        return new ChromeDriver(new ChromeDriverService.Builder()
                .usingPort(SELENIUM_PORT)
                .build(), options);
    }

    /**
     * 保存浏览器cookies
     *
     * @param accountNo 账户号
     */
    @Transactional(rollbackFor = Exception.class)
    public void saveCookie(WebDriver driver, String accountNo, Integer type) {
        List<FwCookie> cookieList = this.getTempCookies(driver, accountNo, type);
        if (CollectionUtils.isEmpty(cookieList)) {
            return;
        }
        this.deleteCookies(accountNo, type);
        cookieDao.saveAll(cookieList);
        //driver.quit();
    }

    /**
     * 获取浏览器cookies
     *
     * @param driver    驱动
     * @param accountNo 账号
     * @param type      账号类型
     * @return
     */
    public List<FwCookie> getTempCookies(WebDriver driver, String accountNo, Integer type) {
        Set<Cookie> cookies = driver.manage().getCookies();
        return cookies.stream()
                .map(cookie -> FwCookie.toDb(cookie, accountNo, type))
                .collect(Collectors.toList());
    }

    /**
     * 删除指定类型指定账号的cookies
     *
     * @param accountNo 账户号
     * @param type      账户类型
     */
    public void deleteCookies(String accountNo, Integer type) {
        cookieDao.deleteByAccountNoAndType(accountNo, type);
    }

    /**
     * 根据账户号, 找到cookie
     *
     * @param accountNo 账户号
     * @return
     */
    public List<FwCookie> loadCookie(String accountNo, Integer type) {
        return Optional.ofNullable(cookieDao.findByAccountNoAndType(accountNo, type))
                .orElse(new ArrayList<>());
    }

    /**
     * 处理收到的响应
     *
     * @param driver
     * @return
     */
    public List<ResponseReceived> processHttpTransferData(WebDriver driver) {
        List<ResponseReceived> responseReceived = new ArrayList<>();
        Logs logs = driver.manage().logs();// 获取用于获取不同类型日志的日志接口
        Set<String> availableLogTypes = logs.getAvailableLogTypes();// 查询可用的日志类型
        if (availableLogTypes.contains(LogType.PERFORMANCE)) {
            LogEntries logEntries = logs.get(LogType.PERFORMANCE);// 获取'performance'类型的日志条目
            logEntries.forEach(item -> {
                JSONObject jsonObj = Optional.ofNullable(JSON.parseObject(item.getMessage())
                        .getJSONObject("message"))
                        .orElse(new JSONObject());
                String method = jsonObj.getString("method");
                if (NETWORK_RESPONSE_RECEIVED.equals(method)) {// 日志中的条目中获取到的方法为'Network.responseReceived'
                    ResponseReceived response = JSON.parseObject(jsonObj.getString("params"), ResponseReceived.class);
                    responseReceived.add(response);
                }
            });
        }
        return responseReceived;
    }

    /**
     * 获取日志中的数据
     *
     * @param driver           浏览器驱动
     * @param responseReceived 收到的响应
     * @param dataUrl          数据接口地址
     * @return
     */
    public HttpResponse getHttpResponse(WebDriver driver, ResponseReceived responseReceived, String dataUrl) {
        HttpResponse response = null;
        String baseUrl = JSONObject.parseObject(responseReceived.getResponse()).getString("url");
        boolean notStaticFiles = !baseUrl.endsWith(".png")
                && !baseUrl.endsWith(".jpg")
                && !baseUrl.endsWith(".css")
                && !baseUrl.endsWith(".ico")
                && !baseUrl.endsWith(".js")
                && !baseUrl.endsWith(".gif");
        if (notStaticFiles && baseUrl.contains(dataUrl)) {
            // 使用上面开发的接口获取返回数据
            try {
                // CHROME_DRIVER_PORT chromeDriver提供的端口
                String url = String.format("http://localhost:%s/session/%s/goog/cdp/execute",
                        SELENIUM_PORT, ((RemoteWebDriver) driver).getSessionId());
                HttpPost httpPost = new HttpPost(url);
                JSONObject object = new JSONObject();
                object.put("cmd", "Network.getResponseBody");
                final JSONObject params = new JSONObject();
                params.put("requestId", responseReceived.getRequestId());
                object.put("params", params);
                httpPost.setEntity(new StringEntity(object.toString()));
                RequestConfig requestConfig = RequestConfig
                        .custom()
                        .setSocketTimeout(50000)
                        .setConnectTimeout(50000)
                        .build();
                CloseableHttpClient httpClient = HttpClientBuilder.create()
                        .setDefaultRequestConfig(requestConfig).build();
                response = httpClient.execute(httpPost);
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
        return response;
    }

    /**
     * 查询指定天是否有账户粉丝数据
     *
     * @param accountNo  账户号
     * @param type       账户类型
     * @param reportDate 指定日期
     * @return
     */
    public ReportAccountDto getHasFoundAccountMsg(String accountNo, Integer type, Date reportDate) {
        final List<Account> hasReportDate = accountDao.getHasReportDate(accountNo, type, reportDate);
        if (!CollectionUtils.isEmpty(hasReportDate)) {
            final Account account = hasReportDate.get(0);
            return ReportAccountDto.builder()
                    .fansCnt(account.getFansCnt())
                    .accountName(account.getAccountName())
                    .build();
        }
        return null;
    }

    /**
     * 查询指定天是否有视频数据
     *
     * @param accountNo  账户号
     * @param type       账户类型
     * @param reportDate 指定日期
     * @return
     */
    public List<VideoPool> getHasFoundVideo(String accountNo, Integer type, Date reportDate) {
        final List<VideoPool> hasReportData = videoPoolDao.getHasReportDate(accountNo, type, reportDate);
        if (hasReportData.size() > 0) {
            return hasReportData;
        }
        return null;
    }

    /**
     * 查询指定天是否有直播数据
     *
     * @param accountNo  账户号
     * @param type       账户类型
     * @param reportDate 指定日期
     * @return
     */
    public List<LivePool> getHasFoundLive(String accountNo, Integer type, Date reportDate) {
        final List<LivePool> hasReportData = livePoolDao.getHasReportData(accountNo, type, reportDate);
        if (hasReportData.size() > 0) {
            return hasReportData;
        }
        return null;
    }

    /**
     * 根据用户类型随机获取一个用户
     *
     * @param type 账户类型
     * @return
     */
    public Account getRandomUserByType(Integer type) {
        return accountDao.findRandomByAndType(type).get(0);
    }

    /**
     * 获取直播无回放信息的直播
     *
     * @param type              账户类型
     * @param durationThreshold 时长阈值
     * @return
     */
    public List<LivePool> getWithoutPlaybackLive(Integer type, Double durationThreshold) {
        return livePoolDao.getWithoutPlaybackLive(type, durationThreshold);
    }

    /**
     * 当账号登录成功, 检查今天同步了的账号列表中是否包含该账号
     *
     * @param accountNo 账号
     * @param type      账号类型
     * @return 结果
     */
    public void whenLoginCheckAccountExist(String accountNo, Integer type) {
        Account account = accountDao.findByAccountNoAndType(accountNo, type);
        if (Objects.nonNull(account))
            return;
        accountDao.save(Account.builder()
                .cookiesStatus(true)
                .accountNo(accountNo)
                .type(type)
                .fansCnt(null)
                .accountName(null)
                .reportDate(null)
                .done(false)
                .build()
        );// 不存在, 则保存该账号
    }

}