diff --git a/ElectronJS/EasySpider_en.crx b/ElectronJS/EasySpider_en.crx index 7d1a530d..362585f8 100644 Binary files a/ElectronJS/EasySpider_en.crx and b/ElectronJS/EasySpider_en.crx differ diff --git a/ElectronJS/EasySpider_zh.crx b/ElectronJS/EasySpider_zh.crx index 17b1921f..b6573418 100644 Binary files a/ElectronJS/EasySpider_zh.crx and b/ElectronJS/EasySpider_zh.crx differ diff --git a/ElectronJS/main.js b/ElectronJS/main.js index f62e5d0b..0b0c343f 100644 --- a/ElectronJS/main.js +++ b/ElectronJS/main.js @@ -333,7 +333,17 @@ async function beginInvoke(msg, ws) { try{ flowchart_window.openDevTools(); } catch { - + console.log("open devtools error"); + } + } else if (msg.type == 7) { + // 获得当前页面Cookies + try{ + let cookies = await driver.manage().getCookies(); + console.log("Cookies: ", cookies); + let cookiesText = cookies.map(cookie => `${cookie.name}=${cookie.value}`).join('\n'); + socket_flowchart.send(JSON.stringify({"type": "GetCookies", "message": cookiesText})); + } catch { + console.log("Cannot get Cookies"); } } } diff --git a/ElectronJS/src/taskGrid/FlowChart_CN.html b/ElectronJS/src/taskGrid/FlowChart_CN.html index cbfe6650..9f0594cc 100644 --- a/ElectronJS/src/taskGrid/FlowChart_CN.html +++ b/ElectronJS/src/taskGrid/FlowChart_CN.html @@ -116,6 +116,23 @@ + +

+ +

+
+
+ +

+ 点击获取当前页面Cookie +

+ +
+
+
diff --git a/ElectronJS/src/taskGrid/FlowChart_CN.js b/ElectronJS/src/taskGrid/FlowChart_CN.js index 16868512..d4ba72e7 100644 --- a/ElectronJS/src/taskGrid/FlowChart_CN.js +++ b/ElectronJS/src/taskGrid/FlowChart_CN.js @@ -87,6 +87,15 @@ let app = new Vue({ }, }, methods: { + getCookies: function() { //获取cookies + let command = new WebSocket("ws://localhost:"+getUrlParam("wsport")) + command.onopen = function() { + let message = { + type: 7, //消息类型,0代表连接操作 + }; + this.send(JSON.stringify(message)); + }; + }, changeXPaths: function (XPaths){ let result = ""; for (let i = 0; i < XPaths.length; i++) { @@ -588,7 +597,7 @@ document.onkeydown = function(e) { location.reload(); } else if (currKey == 123) { console.log("打开devtools") - let command = new WebSocket("ws://localhost:8084") + let command = new WebSocket("ws://localhost:"+getUrlParam("wsport")) command.onopen = function() { let message = { type: 6, //消息类型,0代表连接操作 @@ -604,4 +613,4 @@ function inputDelete(e) { e.stopPropagation(); //输入框按delete应该正常运行 //Electron中如果有alert或者confirm,执行后会卡死输入框,所以最好不要用 } -} \ No newline at end of file +} diff --git a/ElectronJS/src/taskGrid/logic.js b/ElectronJS/src/taskGrid/logic.js index b8bd79f7..74bd5902 100644 --- a/ElectronJS/src/taskGrid/logic.js +++ b/ElectronJS/src/taskGrid/logic.js @@ -66,7 +66,7 @@ function handleAddElement(msg) { addElement(1, msg); } else if (msg["type"] == "singleClick") { addElement(2, msg); - } else if (msg["type"] == "InputText") { + } else if (msg["type"] == "inputText") { addElement(4, msg); } else if (msg["type"] == "changeOption"){ addElement(6, msg); diff --git a/ElectronJS/src/taskGrid/logic_CN.js b/ElectronJS/src/taskGrid/logic_CN.js index e3c47dd2..6c1816db 100644 --- a/ElectronJS/src/taskGrid/logic_CN.js +++ b/ElectronJS/src/taskGrid/logic_CN.js @@ -65,7 +65,7 @@ function handleAddElement(msg) { addElement(1, msg); } else if (msg["type"] == "singleClick") { addElement(2, msg); - } else if (msg["type"] == "InputText") { + } else if (msg["type"] == "inputText") { addElement(4, msg); } else if (msg["type"] == "changeOption"){ addElement(6, msg); @@ -96,6 +96,14 @@ function handleAddElement(msg) { addElement(8, msg); addElement(3, msg); notifyParameterNum(msg["parameters"].length); //通知浏览器端参数的个数变化 + } else if(msg["type"] == "GetCookies"){ + for(let node of nodeList){ + if(node["option"] == 1){ + node["parameters"]["cookies"] = msg["message"]; + $("#pageCookies").val(msg["message"]); + break; + } + } } } @@ -156,6 +164,7 @@ function addParameters(t) { t["parameters"]["scrollType"] = 0; //滚动类型,0不滚动,1向下滚动1屏,2滚动到底部 t["parameters"]["scrollCount"] = 1; //滚动次数 t["parameters"]["scrollWaitTime"] = 1; //滚动后等待时间 + t["parameters"]["cookies"] = ""; //cookies } else if (t.option == 2) { //点击元素 t["parameters"]["scrollType"] = 0; //滚动类型,0不滚动,1向下滚动1屏,2滚动到底部 t["parameters"]["scrollCount"] = 1; //滚动次数 @@ -254,8 +263,7 @@ function modifyParameters(t, para) { } } -//点击确定按钮时的处理 -$("#confirm").mousedown(function() { +function updateUI() { refresh(false); app.$data.nowArrow["num"]++; //改变元素的值,通知画图,重新对锚点画图 let tnodes = document.getElementsByClassName("clk"); @@ -268,7 +276,10 @@ $("#confirm").mousedown(function() { break; } } -}); +} + +//点击确定按钮时的处理 +$("#confirm").mousedown(updateUI); //获取url中的参数 function getUrlParam(name) { diff --git a/ElectronJS/tasks/141.json b/ElectronJS/tasks/141.json new file mode 100644 index 00000000..bfc89201 --- /dev/null +++ b/ElectronJS/tasks/141.json @@ -0,0 +1 @@ +{"id":141,"name":"京东全球版-专业的综合网上购物商城","url":"https://www.jd.com","links":"https://www.jd.com","create_time":"7/5/2023, 11:13:03 PM","version":"0.3.5","saveThreshold":10,"cloudflare":0,"environment":0,"containJudge":false,"desc":"https://www.jd.com","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.jd.com","desc":"要采集的网址列表,多行以\\n分开","type":"string","exampleValue":"https://www.jd.com"}],"outputParameters":[{"id":0,"name":"参数1_文本","desc":"","type":"string","exampleValue":"/手机/数码"}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"url":"https://www.jd.com","links":"https://www.jd.com","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":"ipLoc-djd=53283-53456-0-0; areaId=53283; mba_sid=16885699655799419528717331774.0; __jdc=122270672; 3AB9D23F7A4B3C9B=3EHIRO46HKXUNNSA7AFBSLZLZ6ICUQG3NUT5VTWFZFUBTRI5ZUXV6XM5CYB5VWCYC6YLWOOIUQAUILWMLGTQWCRMBA; __jdb=122270672.1.16885699655751091362768|1.1688569965; mba_muid=16885699655751091362768; __jdv=122270672%7Clocalhost%3A8074%7C-%7Creferral%7C-%7C1688569965576; __jda=122270672.16885699655751091362768.1688569965.1688569965.1688569965.1"}},{"id":2,"index":2,"parentId":0,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":false,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"paras":[{"nodeType":0,"contentType":0,"relative":false,"name":"参数1_文本","desc":"","extractType":0,"relativeXPath":"/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]","allXPaths":["/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]","//div[contains(., '/手机/数码')]","//DIV[@class='LeftSide_menu_item__SBMWC LeftSide_text_space__2UhbG LeftSide_menu_hover__OCHiO']","/html/body/div[last()-5]/div/div[last()-4]/div/div[last()-2]/div/div/div/div[last()-1]/div[last()-12]"],"exampleValues":[{"num":0,"value":"/手机/数码"}],"unique_index":"wm0dfnp2tjeljpuyon0","iframe":false,"default":"","beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0}]}}]} \ No newline at end of file diff --git a/ElectronJS/tasks/142.json b/ElectronJS/tasks/142.json new file mode 100644 index 00000000..5bcd87f2 --- /dev/null +++ b/ElectronJS/tasks/142.json @@ -0,0 +1 @@ +{"id":142,"name":"京东全球版-专业的综合网上购物商城","url":"https://www.jd.com","links":"https://www.jd.com","create_time":"7/6/2023, 3:38:35 AM","version":"0.3.5","saveThreshold":10,"cloudflare":0,"environment":0,"containJudge":false,"desc":"https://www.jd.com","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.jd.com","desc":"要采集的网址列表,多行以\\n分开","type":"string","exampleValue":"https://www.jd.com"}],"outputParameters":[{"id":0,"name":"参数1_文本","desc":"","type":"string","exampleValue":"/手机/数码"}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"url":"https://www.jd.com","links":"https://www.jd.com","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":"test=123\nipLoc-djd=53283-53456-0-0\nareaId=53283\nmba_sid=16885856346417163685425076773.0\n__jdc=122270672\n__jdb=122270672.1.16885856346381587112207|1.1688585634\nmba_muid=16885856346381587112207\n__jdv=122270672%7Clocalhost%3A8074%7C-%7Creferral%7C-%7C1688585634639\n__jda=122270672.16885856346381587112207.1688585634.1688585634.1688585634.1"}},{"id":2,"index":2,"parentId":0,"type":1,"option":8,"title":"循环","sequence":[3],"isInLoop":false,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"loopType":1,"pathList":"","textList":"","code":"","waitTime":0,"exitCount":0,"historyWait":2,"breakMode":0,"breakCode":"","breakCodeWaitTime":0,"allXPaths":["/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]","//div[contains(., '/手机/数码')]","//DIV[@class='LeftSide_menu_item__SBMWC LeftSide_text_space__2UhbG ']","/html/body/div[last()-5]/div/div[last()-4]/div/div[last()-2]/div/div/div/div[last()-1]/div[last()-12]"]}},{"id":3,"index":3,"parentId":2,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":true,"position":0,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"paras":[{"nodeType":0,"contentType":0,"relative":true,"name":"参数1_文本","desc":"","extractType":0,"relativeXPath":"","allXPaths":"","exampleValues":[{"num":0,"value":"/手机/数码"}],"unique_index":"p2h2i1dva8ljq4aje2","iframe":false,"default":"","beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0}],"loopType":1}}]} \ No newline at end of file diff --git a/ExecuteStage/.vscode/launch.json b/ExecuteStage/.vscode/launch.json index 504b2db2..136962bf 100644 --- a/ExecuteStage/.vscode/launch.json +++ b/ExecuteStage/.vscode/launch.json @@ -10,7 +10,7 @@ "program": "${file}", "console": "integratedTerminal", "justMyCode": true, - "args": ["--id", "[1]", "--read_type", "remote", "--headless", "0"] + "args": ["--id", "[3]", "--read_type", "remote", "--headless", "0"] // "args": ["--id", "[2]", "--read_type", "remote", "--headless", "0", "--saved_file_name", "YOUTUBE"] // "args": ["--id", "[44]", "--headless", "0", "--user_data", "1"] } diff --git a/ExecuteStage/easyspider_executestage.py b/ExecuteStage/easyspider_executestage.py index 87515f5b..8c3e6399 100644 --- a/ExecuteStage/easyspider_executestage.py +++ b/ExecuteStage/easyspider_executestage.py @@ -114,6 +114,11 @@ def preprocess(self): iframe = node["parameters"]["iframe"] except: node["parameters"]["iframe"] = False + if node["option"] == 1: # 打开网页操作 + try: + cookies = node["parameters"]["cookies"] + except: + node["parameters"]["cookies"] = "" if node["option"] == 3: # 提取数据操作 paras = node["parameters"]["paras"] for para in paras: @@ -705,6 +710,14 @@ def openPage(self, para, loopValue): self.browser.set_page_load_timeout(maxWaitTime) # 加载页面最大超时时间 self.browser.set_script_timeout(maxWaitTime) self.browser.get(url) + if para["cookies"] != "": + self.browser.delete_all_cookies() # 清除所有已有cookie + cookies = para["cookies"].split('\n') + for cookie in cookies: + name, value = cookie.split('=', 1) + cookie_dict = {'name': name, 'value': value} + # 加载 cookie + self.browser.add_cookie(cookie_dict) self.Log('Loading page: ' + url) self.recordLog('Loading page: ' + url) except TimeoutException: @@ -1001,7 +1014,6 @@ def get_content(self, p, element): return content # 提取数据事件 - def getData(self, para, loopElement, isInLoop=True, parentPath="", index=0): pageHTML = etree.HTML(self.browser.page_source) if loopElement != "": # 只在数据在循环中提取时才需要获取循环元素 diff --git a/ExecuteStage/utils.py b/ExecuteStage/utils.py index f6b34b8e..895939a0 100644 --- a/ExecuteStage/utils.py +++ b/ExecuteStage/utils.py @@ -71,6 +71,7 @@ def isnull(s): return len(s) != 0 + class Time: def __init__(self, type1=""): self.t = int(round(time.time() * 1000)) diff --git a/Extension/manifest_v3/src/content-scripts/messageInteraction.js b/Extension/manifest_v3/src/content-scripts/messageInteraction.js index f161910c..2d10282e 100644 --- a/Extension/manifest_v3/src/content-scripts/messageInteraction.js +++ b/Extension/manifest_v3/src/content-scripts/messageInteraction.js @@ -31,7 +31,7 @@ global.ws.onopen = function() { export function input(value) { let message = { - "type": "InputText", + "type": "inputText", "history": history.length, //记录history的长度 "tabIndex": -1, "xpath": readXPath(global.nodeList[0]["node"], 0),