From 7ff08265218415c258c3d40e3ff3c9143b3c0b82 Mon Sep 17 00:00:00 2001 From: liudongqi Date: Thu, 5 Sep 2024 15:13:42 +0800 Subject: [PATCH] =?UTF-8?q?=E8=B0=83=E8=AF=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app.js | 2 +- enums/AreaNameEnum.js | 2 - package-lock.json | 1 + package.json | 1 + public/hubei/index.html | 3 +- routes/index.js | 226 ++++++++++++++++++++++++++------------- test/downloadFileTest.js | 24 +++++ utils/JsUtil.js | 63 +++++++++++ 8 files changed, 243 insertions(+), 79 deletions(-) create mode 100644 test/downloadFileTest.js create mode 100644 utils/JsUtil.js diff --git a/app.js b/app.js index 920159c..8f7583f 100644 --- a/app.js +++ b/app.js @@ -1,6 +1,6 @@ const express = require('express'); // const bodyParser = require('body-parser'); -let IpUtil = require('./utils/IpUtil'); +const IpUtil = require('./utils/IpUtil'); const rs = require("./routes"); // 读取环境变量 diff --git a/enums/AreaNameEnum.js b/enums/AreaNameEnum.js index 6ae2746..e9d28ec 100644 --- a/enums/AreaNameEnum.js +++ b/enums/AreaNameEnum.js @@ -5,12 +5,10 @@ const AreaNameEnum = Object.freeze({ YUN_NAN: Object.freeze({ AREA_CODE: '5300', AREA_NAME: 'yunnan', - JS_FILE: path.resolve(__dirname, '../public/yunnan/MO5zzCMcub4d.b4c45da.js'), }), HU_BEI: Object.freeze({ AREA_CODE: '4200', AREA_NAME: 'hubei', - JS_FILE: path.resolve(__dirname, '../public/hubei/5PXGXoOF7eGJ.ed63b8f.js'), }), getByAreaCode(areaCode) { diff --git a/package-lock.json b/package-lock.json index 4fbc8b5..12a0e5c 100644 --- a/package-lock.json +++ b/package-lock.json @@ -11,6 +11,7 @@ "dependencies": { "body-parser": "^1.20.2", "express": "^4.19.2", + "node-fetch": "^2.7.0", "sdenv": "^0.2.2", "sdenv-extend": "^1.3.1", "sdenv-jsdom": "^1.1.0" diff --git a/package.json b/package.json index 27361c0..1405c7f 100644 --- a/package.json +++ b/package.json @@ -12,6 +12,7 @@ "dependencies": { "body-parser": "^1.20.2", "express": "^4.19.2", + "node-fetch": "^2.7.0", "sdenv": "^0.2.2", "sdenv-extend": "^1.3.1", "sdenv-jsdom": "^1.1.0" diff --git a/public/hubei/index.html b/public/hubei/index.html index eccfe6c..b76ba83 100644 --- a/public/hubei/index.html +++ b/public/hubei/index.html @@ -4,4 +4,5 @@ window.location.href="./ie.html" }, 1000) -
\ No newline at end of file +
+ \ No newline at end of file diff --git a/routes/index.js b/routes/index.js index c24d585..80ad0fe 100644 --- a/routes/index.js +++ b/routes/index.js @@ -3,11 +3,12 @@ const router = express.Router(); const {jsdomFromText, browser} = require("sdenv"); const {Script} = require("node:vm"); -const fs = require("node:fs"); const crypto = require("node:crypto") const AreaNameEnum = require('../enums/AreaNameEnum'); const Store = require("../utils/Store"); +const JsUtil = require('../utils/JsUtil'); let store = new Store(); +let sleep = ms => new Promise(resolve => setTimeout(resolve, ms)); router.post('/rsCookie', async (req, res) => { @@ -21,7 +22,6 @@ router.post('/rsCookie', async (req, res) => { let cookie = req.body['cookieBase64']; let userAgent = req.body['userAgentBase64']; console.log(`${uuid};接收到 ${areaName} 请求:${url}`) - // fs.writeFileSync(`./back/${uuid}.url`, `${url}\n ${userAgent}\n ${cookie} \n`); if (url == null || url === '') { return res.status(500).send('error url') } @@ -29,28 +29,11 @@ router.post('/rsCookie', async (req, res) => { return res.status(500).send('error html') } - let jsText; - let loadHtmlJs; - if (jsStr == null || jsStr === "") { - let jsPath = AreaNameEnum.getByAreaName(areaName).JS_FILE - if (jsPath == null) { - console.error('未找到js文件') - return res.send('未找到js文件') - } - jsText = fs.readFileSync(jsPath).toString('utf8'); - loadHtmlJs = true; - } else { - jsText = Buffer.from(jsStr, 'base64').toString('utf-8') - loadHtmlJs = false - } - - - let cookies = await handle(url, + let cookies = await handle(url, uuid, areaName, Buffer.from(htmlStr, 'base64').toString('utf-8'), - jsText, cookie != null && cookie !== "" ? Buffer.from(cookie, 'base64').toString('utf-8') : null, userAgent != null && userAgent !== "" ? Buffer.from(userAgent, 'base64').toString('utf-8') : null, - uuid, loadHtmlJs) + ) console.log(`${uuid};返回cookie ---->`, cookies.split('; ')) @@ -64,24 +47,6 @@ router.post('/rsCookie', async (req, res) => { }) -function loadJs(window, jsText) { - // 加载js - let js = ''; - // 加载 页面上的js - const allScript = window.document.querySelectorAll('script[r="m"]'); - for (let i = 0; i < allScript.length; i++) { - const script = allScript[i]; - let attr = script.textContent; - if (attr) { - js += attr - } else { - js += jsText - } - js += ";\n" - } - return js; -} - function CookieStr2List(cookies) { let list = [] for (let cookie of cookies.trim().split("; ")) { @@ -90,7 +55,7 @@ function CookieStr2List(cookies) { return list } -async function handle(url, htmlStr, jsText, cookie, userAgent, uuid, loadHtmlJs) { +async function handle(url, uuid, areaName, htmlStr, cookie, userAgent) { // 获取 origin let baseUrl = new URL(url).origin; // 初始化 jsDom 和 cookieJar @@ -105,62 +70,173 @@ async function handle(url, htmlStr, jsText, cookie, userAgent, uuid, loadHtmlJs) if (cookie != null) { let cookieList = CookieStr2List(cookie); console.log(`${uuid};cookie 加载长度--->`, cookieList, baseUrl) - // fs.writeFileSync(`./back/${uuid}.cookie`, cookie) for (let i = 0; i < cookieList.length; i++) { cookieJar.setCookieSync(cookieList[i], baseUrl); } } - // 设置cookie的回掉 - const superSetCookie = cookieJar.setCookie; - cookieJar.setCookie = function (cookie, currentUrl, options, callback) { - console.log(`${uuid};正在设置 Cookie:`, cookie, currentUrl); - // store.set(uuid, cookie) - return superSetCookie.call(this, cookie, currentUrl, options, callback); - }; // 加载dom let dom = await jsDom(htmlStr); - // console.log('html 加载长度--->', dom.serialize()) console.log(`${uuid};html 加载长度--->`, dom.serialize().length) - // fs.writeFileSync(`./back/${uuid}.html`, dom.serialize()) window = dom.window + // 标志判断cookie是否生成 + window[uuid] = false + // 方案1 通过监听cookie 判断cookie是否生成 + cookieJar.delete + const superSetCookie = cookieJar.setCookie; + // 设置 setCookie 代理 + cookieJar.setCookie = function (cookie, currentUrl, options, callback) { + console.log(`${uuid};正在设置 Cookie:`, cookie, currentUrl); + let call = superSetCookie.call(this, cookie, currentUrl, options, callback); + // 设置标志可取标志 + if (cookie.includes('YqQ7a3SgknV8P')) { + window[uuid] = true + } + return call; + }; + // js执行成功后会跳转页面 会触发onbeforeunload钩子 window.onbeforeunload = async (url) => { - const cookies = cookieJar.getCookieStringSync(baseUrl); - console.debug(`${url} 生成cookie:`, cookies); - store.set(uuid, cookies) - // window.close(); + console.debug(`${url} 页面回调完成`); + window[uuid] = true } // 初始化浏览器 browser(window, 'chrome'); // 加载js - let js; - if (loadHtmlJs) { - js = loadJs(window, jsText); - } else { - js = jsText; - } + let js = await JsUtil.loadJs(window.document, areaName, cookie); console.log(`${uuid};js 加载长度--->`, js.length) - // fs.writeFileSync(`./back/${uuid}.js`, js) + // 执行 js let script = new Script(js); let internalVMContext = dom.getInternalVMContext(); - script.runInContext(internalVMContext); + script.runInContext(internalVMContext, {timeout: 1000}); + + + for (let i = 0; i < 10; i++) { + if (window[uuid]) { + break; + } + await sleep(100) + } + + let resCookie = cookieJar.getCookieStringSync(baseUrl); + window.close() + return resCookie; +} + +/** + * 方案1 + * 利用rs特性 js加载完成后会刷新页面 + * 利用 window.onbeforeunload 判断页面是否执行完毕 + * @param window + * @param cookieJar + * @param uuid + * @returns {Promise<*|null>} + */ +function scheme1_before(window, cookieJar, uuid) { + window.onbeforeunload = async (url) => { + let baseUrl = new URL(url).origin; + const cookies = cookieJar.getCookieStringSync(baseUrl); + console.debug(`${url} 页面回调生成cookie:`, cookies); + store.set(uuid, cookies) + // window.close(); + } +} + +/** + * 方案1 + * @param window + * @param cookieJar + * @param uuid + * @returns {Promise<*|null>} + */ +async function scheme1_after(window, cookieJar, uuid) { // 等待 onbeforeunload 钩子触发后的回掉 - // let val = await store.waitGetAndDelete(uuid, 100, 10) - - // bug 关闭后 部分参数是需要永久保存在内存中的 比如 cookie 实例 下次调用会报错 - // internalVMContext.close() - // window.close() - // dom = null - - // if (val != null) { - // return val; - // } - // console.log(`${uuid} 执行超时`) - return cookieJar.getCookieStringSync(baseUrl); - // throw new Error(`${uuid}执行超时`) + let val = await store.waitGetAndDelete(uuid, 100, 10) + if (val != null) { + return val; + } + return null; +} + + +/** + * 监听cookie出现指定cookie + * @param window + * @param cookieJar + * @param uuid + * @returns {Promise} + */ +function scheme2_before(window, cookieJar, uuid) { + // window[uuid] = false; + const superSetCookie = cookieJar.setCookie; + // 设置 setCookie 代理 + cookieJar.setCookie = function (cookie, currentUrl, options, callback) { + console.log(`${uuid};正在设置 Cookie:`, cookie, currentUrl); + return superSetCookie.call(this, cookie, currentUrl, options, callback); + // let cookieStringSync = super.getCookieStringSync(); + // console.log(cookieStringSync) + // if (cookie.includes(key)) { + // // 设置标志可取标志 + // window[uuid] = true + // } + // return call; + }; +} + +/** + * 方案2 + * @param window + * @param cookieJar + * @param key + * @param uuid + * @returns {Promise} + */ +async function scheme2_after(window, cookieJar, key, uuid) { + let sleep = ms => new Promise(resolve => setTimeout(resolve, ms)); + for (let i = 0; i < 10; i++) { + let cookieStringSync = cookieJar.getCookieStringSync(); + if (cookieStringSync.includes(key)) { + return cookieStringSync; + } + await sleep(100) + } + return null; +} + +/** + * 方案3 根据cookie现有的数量判断 + * @param window + * @param cookieJar + * @param baseUrl + * @param uuid + */ +function scheme3_before(window, cookieJar, baseUrl, uuid) { + const initCookie = cookieJar.getCookieStringSync(baseUrl); + window[uuid + 'CookieSize'] = initCookie != null ? initCookie.trim().split("; ").length : 0; +} + +/** + * 方案3 + * @param window + * @param cookieJar + * @param baseUrl + * @param uuid + * @returns {Promise} + */ +async function scheme3_after(window, cookieJar, baseUrl, uuid) { + let initCookieLength = window[uuid + 'CookieSize'] + let sleep = ms => new Promise(resolve => setTimeout(resolve, ms)); + for (let i = 0; i < 10; i++) { + let cookieStringSync = cookieJar.getCookieStringSync(baseUrl); + let cookies = cookieStringSync != null ? cookieStringSync.trim().split("; ").length : 0; + if (cookies > initCookieLength) { + return cookieStringSync; + } + await sleep(100) + } + return null; } diff --git a/test/downloadFileTest.js b/test/downloadFileTest.js new file mode 100644 index 0000000..ac8be4c --- /dev/null +++ b/test/downloadFileTest.js @@ -0,0 +1,24 @@ +const fs = require('fs'); + +// const fetch = require('node-fetch'); // Install this via npm + + +async function downloadFile(url, path) { + try { + const response = await fetch(url); + if (!response.ok){ + throw new Error(`HTTP error! Status: ${response.status}`); + } + const arrayBuffer = await response.arrayBuffer(); + const buffer = Buffer.from(arrayBuffer); + fs.writeFileSync(path, buffer); + console.log('File downloaded and saved successfully.'); + } catch (error) { + console.error('Error downloading the file:', error); + } +} + +const downloadUrl = 'https://app.yunnan.chinatax.gov.cn/GVv7ud1ebech/MO5zzCMcub4d.b4c45da.js'; // Replace with your file URL +const filePath = './downloadedFile.js'; // Desired file path + +downloadFile(downloadUrl, filePath); diff --git a/utils/JsUtil.js b/utils/JsUtil.js new file mode 100644 index 0000000..4e8c122 --- /dev/null +++ b/utils/JsUtil.js @@ -0,0 +1,63 @@ +let fs = require('node:fs'); +const path = require("node:path"); + + +class JsUtil { + // 加载 js 文本 + static async loadJs(document, areaName, cookie) { + let js = ''; + // 加载 页面上的js + const allScript = document.querySelectorAll('script[r="m"]'); + for (let i = 0; i < allScript.length; i++) { + const script = allScript[i]; + let attr = script.textContent; + if (attr) { + js += attr + } else { + //获取script @src 属性 + let jsSrc = script.src; + let url = new URL(jsSrc); + let fileName = areaName + url.pathname.replaceAll('/', '.'); + let filePath = path.resolve(__dirname, `../public/static/js/`); + let existDir = fs.existsSync(filePath); + if (!existDir) { + fs.mkdirSync(filePath, {recursive: true}); + } + let file =path.resolve(filePath, fileName); + let existFile = fs.existsSync(file); + let jsText; + if (existFile) { + let buffer = fs.readFileSync(file); + jsText = buffer.toString('utf8') + } else { + // 文件下载 + jsText = await this.downloadJs(jsSrc, filePath, fileName, cookie) + } + js += jsText + } + js += ";\n" + } + return js; + } + + + static async downloadJs(downloadUrl, filePath, fileName) { + let url = new URL(downloadUrl); + const response = await fetch(url); + if (!response.ok) { + throw new Error(`HTTP error! Status: ${response.status}`); + } + const arrayBuffer = await response.arrayBuffer(); + const buffer = Buffer.from(arrayBuffer); + let file = path.resolve(filePath, fileName); + fs.writeFile(file, buffer, {encoding: 'utf8'}, (err) => { + if (err){ + console.error(`文件${file}写入失败 ---> ${err}`) + } + }) + console.log('File downloaded and saved successfully.'); + return buffer.toString('utf8') + } +} + +module.exports = JsUtil \ No newline at end of file