Skip to content

Commit

Permalink
V0.3.2
Browse files Browse the repository at this point in the history
  • Loading branch information
naibo committed Jun 1, 2023
1 parent 527a004 commit 0a5d159
Show file tree
Hide file tree
Showing 5 changed files with 30 additions and 3 deletions.
1 change: 1 addition & 0 deletions ElectronJS/main.js
Original file line number Diff line number Diff line change
Expand Up @@ -296,6 +296,7 @@ async function runBrowser(lang = "en", user_data_folder = '') {
}
options.addExtensions(path.join(__dirname, "XPathHelper.crx"));
options.setChromeBinaryPath(chromeBinaryPath);
options.add
if (user_data_folder != "") {
let dir = path.join(task_server.getDir(), user_data_folder);
console.log(dir);
Expand Down
1 change: 1 addition & 0 deletions ElectronJS/tasks/96.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"id":96,"name":"[2305.07067] SigRec: Automatic Recovery of Function Signatures in Smart Contracts","url":"https://arxiv.org/abs/2305.07067","links":"https://arxiv.org/pdf/2008.03554.pdf","create_time":"6/2/2023, 1:00:27 AM","version":"0.3.2","containJudge":false,"desc":"https://arxiv.org/abs/2305.07067","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://arxiv.org/pdf/2008.03554.pdf","desc":"要采集的网址列表,多行以\\n分开","type":"string","exampleValue":"https://arxiv.org/pdf/2008.03554.pdf"}],"outputParameters":[],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"url":"https://arxiv.org/abs/2305.07067","links":"https://arxiv.org/pdf/2008.03554.pdf","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1}},{"id":2,"index":2,"parentId":0,"type":0,"option":2,"title":"点击元素","sequence":[],"isInLoop":false,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"//*[contains(@class, \"download-pdf\")]","wait":2,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"maxWaitTime":10,"paras":[],"allXPaths":["/html/body/div[2]/main[1]/div[1]/div[1]/div[2]/div[1]/ul[1]/li[1]/a[1]","//a[contains(., 'PDF')]","//A[@class='abs-button download-pdf']"]}}]}
1 change: 1 addition & 0 deletions ElectronJS/tasks/97.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"id":97,"name":"京东全球版-专业的综合网上购物商城","url":"https://www.jd.com","links":"https://www.jd.com","create_time":"6/2/2023, 1:34:59 AM","version":"0.3.2","containJudge":false,"desc":"https://www.jd.com","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.jd.com","desc":"要采集的网址列表,多行以\\n分开","type":"string","exampleValue":"https://www.jd.com"},{"id":1,"name":"inputText_1","nodeName":"输入文字","nodeId":3,"desc":"要输入的文本,如京东搜索框输入:电脑","type":"string","exampleValue":"Field[\"参数1_链接文本\"]123Field[\"参数2_链接地Field[\"参数1_链接文本\"]","value":"Field[\"参数1_链接文本\"]123Field[\"参数2_链接地Field[\"参数1_链接文本\"]"}],"outputParameters":[{"id":0,"name":"参数1_链接文本","desc":"","type":"string","exampleValue":"手机"},{"id":1,"name":"参数2_链接地址","desc":"","type":"string","exampleValue":"https://shouji.jd.com/"}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2,4,3],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":6,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"url":"https://www.jd.com","links":"https://www.jd.com","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1}},{"id":2,"index":2,"parentId":0,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":false,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"","wait":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"paras":[{"nodeType":1,"contentType":0,"relative":false,"name":"参数1_链接文本","desc":"","extractType":0,"relativeXPath":"/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/a[1]","allXPaths":["/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/a[1]","//a[contains(., '手机')]"],"exampleValues":[{"num":0,"value":"手机"}],"unique_index":"yaeqxdsd9dlidernop","default":"","beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0}]}},{"id":4,"index":3,"parentId":0,"type":0,"option":4,"title":"输入文字","sequence":[],"isInLoop":false,"position":3,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"//*[@id=\"key\"]","wait":10,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"value":"Field[\"参数1_链接文本\"]123Field[\"参数2_链接地Field[\"参数1_链接文本\"]","allXPaths":["/html/body/div[4]/div[1]/div[2]/div[1]/input[1]","//input[contains(., '')]","id(\"key\")","//INPUT[@class='text defcolor']"]}},{"id":3,"index":4,"parentId":0,"type":1,"option":8,"title":"循环","sequence":[5],"isInLoop":false,"position":2,"parameters":{"history":3,"tabIndex":-1,"useLoop":false,"xpath":"/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div/a","wait":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"loopType":1,"pathList":"","textList":"","code":"","waitTime":0,"exitCount":0,"historyWait":2,"breakMode":0,"breakCode":"","breakCodeWaitTime":0,"allXPaths":["/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/a[1]","//a[contains(., '手机')]"]}},{"id":5,"index":5,"parentId":3,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":true,"position":0,"parameters":{"history":3,"tabIndex":-1,"useLoop":false,"xpath":"","wait":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"paras":[{"nodeType":1,"contentType":0,"relative":true,"name":"参数1_链接文本","desc":"","extractType":0,"relativeXPath":"","allXPaths":"","exampleValues":[{"num":0,"value":"手机"},{"num":1,"value":"数码"},{"num":2,"value":"家用电器"},{"num":3,"value":"电脑"},{"num":4,"value":"办公"},{"num":5,"value":"家纺"},{"num":6,"value":"家居"},{"num":7,"value":"厨具"},{"num":8,"value":"家具"},{"num":9,"value":"家装"},{"num":10,"value":"灯具"},{"num":11,"value":"工业品"},{"num":12,"value":"内衣"},{"num":13,"value":"男装"},{"num":14,"value":"女装"},{"num":15,"value":"童装"},{"num":16,"value":"箱包"},{"num":17,"value":"钟表"},{"num":18,"value":"珠宝"},{"num":19,"value":"女鞋"},{"num":20,"value":"运动"},{"num":21,"value":"户外"},{"num":22,"value":"男鞋"},{"num":23,"value":"汽车用品"},{"num":24,"value":"车载电器"},{"num":25,"value":"母婴"},{"num":26,"value":"洗护喂养"},{"num":27,"value":"玩具乐器"},{"num":28,"value":"宠物生活"},{"num":29,"value":"家庭清洁"},{"num":30,"value":"个人护理"},{"num":31,"value":"计生情趣"},{"num":32,"value":"图书"},{"num":33,"value":"童书"},{"num":34,"value":"文学"}],"unique_index":"jsqj8nvtjfplidevrwo","default":"","beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0},{"nodeType":2,"contentType":0,"relative":true,"name":"参数2_链接地址","desc":"","relativeXPath":"","allXPaths":"","exampleValues":[{"num":0,"value":"https://shouji.jd.com/"},{"num":1,"value":"https://shuma.jd.com/"},{"num":2,"value":"https://search.jd.com/Search?keyword=%E5%B0%8F%E5%AE%B6%E7%94%B5&enc=utf-8&wq=%E5%B0%8F%E5%AE%B6%E7%94%B5&pvid=261a350161304c979fa0e7ce95c05671"},{"num":3,"value":"https://diannao.jd.com/"},{"num":4,"value":"https://bg.jd.com/"},{"num":5,"value":"https://channel.jd.com/jf.html"},{"num":6,"value":"https://channel.jd.com/home.html"},{"num":7,"value":"https://channel.jd.com/kitchenware.html"},{"num":8,"value":"https://channel.jd.com/furniture.html"},{"num":9,"value":"https://jzjc.jd.com/"},{"num":10,"value":"https://channel.jd.com/9855-9856.html"},{"num":11,"value":"https://pro.jd.com/mall/active/2u2DR1dUiK34csAE3DqmcG8aXvUK/index.html"},{"num":12,"value":"https://channel.jd.com/underwear.html"},{"num":13,"value":"https://channel.jd.com/1315-1342.html"},{"num":14,"value":"https://channel.jd.com/women.html"},{"num":15,"value":"https://list.jd.com/list.html?cat=1319,11842"},{"num":16,"value":"https://channel.jd.com/bag.html"},{"num":17,"value":"https://channel.jd.com/watch.html"},{"num":18,"value":"https://channel.jd.com/jewellery.html"},{"num":19,"value":"https://channel.jd.com/womensshoes.html"},{"num":20,"value":"https://phat.jd.com/10-109.html"},{"num":21,"value":"https://channel.jd.com/outdoor.html"},{"num":22,"value":"https://channel.jd.com/mensshoes.html"},{"num":23,"value":"https://che.jd.com/"},{"num":24,"value":"https://list.jd.com/list.html?cat=6728,6740&page=1&delivery_glb=1&stock=1&sort=sort_totalsales15_desc&trans=1&JL=4_7_0#J_main"},{"num":25,"value":"https://search.jd.com/Search?keyword=%E6%AF%8D%E5%A9%B4&enc=utf-8&qrst=1&rt=1&stop=1&vt=2&wq=%E6%AF%8D%E5%A9%B4&stock=1&gp=2&click=1"},{"num":26,"value":"https://channel.jd.com/feed.html"},{"num":27,"value":"https://toy.jd.com/"},{"num":28,"value":"https://channel.jd.com/pet.html"},{"num":29,"value":"https://channel.jd.com/beauty.html"},{"num":30,"value":"https://lady.jd.com/"},{"num":31,"value":"https://channel.jd.com/9192-9196.html"},{"num":32,"value":"https://book.jd.com/"},{"num":33,"value":"https://book.jd.com/children.html"},{"num":34,"value":"https://channel.jd.com/p_wenxuezongheguan.html"}],"unique_index":"jsqj8nvtjfplidevrwo","default":"","beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0}],"loopType":1}}]}
2 changes: 1 addition & 1 deletion ExecuteStage/.vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
"console": "integratedTerminal",
"justMyCode": true,
// "args": ["--id", "38", "--read_type", "local", "--headless", "1"]
"args": ["--id", "[5]", "--headless", "0", "--user_data", "0"]
"args": ["--id", "[80]", "--headless", "0", "--user_data", "0"]
}
]
}
28 changes: 26 additions & 2 deletions ExecuteStage/easyspider_executestage.py
Original file line number Diff line number Diff line change
Expand Up @@ -702,10 +702,18 @@ def inputInfo(self, para, loopValue):
textbox.send_keys(Keys.SHIFT, Keys.END)
# Send the DELETE key
textbox.send_keys(Keys.DELETE)
value = ""
if para["useLoop"]:
textbox.send_keys(loopValue)
value = loopValue
else:
textbox.send_keys(para["value"])
value = para["value"]
pattern = r'Field\["([^"]+)"\]' # 将value中的Field[""]替换为outputParameters中的键值
try:
replaced_text = re.sub(pattern, lambda match: self.outputParameters.get(match.group(1), ''), value)
except:
replaced_text = value
value = replaced_text
textbox.send_keys(value)
self.execute_code(2, para["afterJS"], para["afterJSWaitTime"], textbox) # 执行后置js
# global bodyText # 每次执行点击,输入元素和打开网页操作后,需要更新bodyText
self.bodyText = self.browser.find_element(By.CSS_SELECTOR, "body").text
Expand Down Expand Up @@ -1089,6 +1097,22 @@ def getData(self, para, loopElement, isInLoop=True, parentPath="", index=0):
option.add_argument(
"--disable-blink-features=AutomationControlled") # TMALL 反扒
options.add_argument("--disable-blink-features=AutomationControlled") # TMALL 反扒
options.add_experimental_option("prefs", {
"download.default_directory": "Data/", # 设置文件下载路径
"download.prompt_for_download": False, # 禁止下载提示框
"plugins.plugins_list": [{"enabled": False, "name": "Chrome PDF Viewer"}],
"download.directory_upgrade": True,
"download.extensions_to_open": "applications/pdf",
"plugins.always_open_pdf_externally": True # 总是在外部程序中打开PDF
})
option.add_experimental_option("prefs", {
"download.default_directory": "Data/", # 设置文件下载路径
"download.prompt_for_download": False, # 禁止下载提示框
"plugins.plugins_list": [{"enabled": False, "name": "Chrome PDF Viewer"}],
"download.directory_upgrade": True,
"download.extensions_to_open": "applications/pdf",
"plugins.always_open_pdf_externally": True # 总是在外部程序中打开PDF
})
print(options)
threads = []
for i in c.id:
Expand Down

0 comments on commit 0a5d159

Please sign in to comment.