deliter 203 Posted March 16, 2017 Report Share Posted March 16, 2017 Hey all heres a quick example bot, you can use this script to use ubot scraping tools for HTTP Gets, Exbrowser etc http://screencast-o-matic.com/watch/cbeDoT6glj This example code below, just uses Aymans HTTP Post to scrape 7 pages of youtube video results and I use Ubots Scraper to scrape the video titles, basically put your own scraping code below the comment and your good to go The one below is for the new Ubot, the post below that for the old ubot You can use Ubots Scraping tools, like scrape attribute, exists etc, with HTTP Gets with this command, so no need for regex, just go to a page, use the ubot scraping tools, then put the scrape code,logic etc inside of the container at the bottom If you want the browser switching pages while this code is running, add a wait for browser event DOM ready to the command for this to work you will need to download my CSS Plugin, Ive just added a Encode URI for this function, the Ubot one will not work, link below **Update Ive edited the code, as sometimes the ubot scraper will also scrape the actual page the browser is on, if your scrape is just a wildcard etc, so simply use your ubot scraping code, to scrape HTTP Get documents but add in at the end............ AND data-ubot="true" This will ensure only the http doc is scraped not the page, the update does make it run a bit slower but hardly noticeable, the below code is to scrape youtube titles from their results page, tagname "a" with a class of yt etc, but also notice, after getting that scrape code, I add in AND data-ubot="true" $scrape attribute(<((tagname="a" AND class="yt-uix-tile-link yt-ui-ellipsis yt-ui-ellipsis-2 yt-uix-sessionlink spf-link ") AND data-ubot="true") ** https://1drv.ms/u/s!AgO9AudYbciJgP8ZAkIdZ6ICeNRCWQ add list to list(%href,$list from text("https://www.youtube.com/results?q=soccer&sp=SBTqAwA%253D https://www.youtube.com/results?q=soccer&sp=SCjqAwA%253D https://www.youtube.com/results?q=soccer&sp=SDzqAwA%253D https://www.youtube.com/results?q=soccer&sp=SFDqAwA%253D https://www.youtube.com/results?q=soccer&sp=SGTqAwA%253D https://www.youtube.com/results?q=soccer&sp=SHjqAwA%253D",$new line),"Delete","Global") set(#position,0,"Global") loop($list total(%href)) { set(#get,$plugin function("HTTP post.dll", "$http get", $list item(%href,#position), "", "", "", ""),"Global") scrapePage(#get) increment(#position) } define scrapePage(#doc) { set(#doc,$plugin function("DeliterCSS.dll", "$Deliter URL Encode", #doc),"Local") run javascript("try\{ document.body.removeChild(ubotEl) \} catch(err)\{console.log(err)\} var mydoc = decodeURIComponent(\"{#doc}\") var parser = new DOMParser(); var mydoc = parser.parseFromString(mydoc, \"text/html\"); mydoc.body.style.display=\"none\" var thedoc = mydoc.querySelectorAll(\"*\") for(var x =0;x < thedoc.length;x++)\{ thedoc[x].dataset.ubot = \"true\" \} thedoc=\"\" var ubotEl = document.createElement(\"html\") ubotEl.innerHTML=mydoc.body.outerHTML document.body.appendChild(ubotEl)") comment("add your ubot scrape codes and logic below, for handling the page") add list to list(%titles,$scrape attribute(<((tagname="a" AND class="yt-uix-tile-link yt-ui-ellipsis yt-ui-ellipsis-2 yt-uix-sessionlink spf-link ") AND data-ubot="true")>,"innertext"),"Delete","Global") } 4 Quote Link to post Share on other sites
deliter 203 Posted March 16, 2017 Author Report Share Posted March 16, 2017 Here is a version for the Ubot 4 add list to list(%href,$list from text("https://www.youtube.com/results?q=soccer&sp=SBTqAwA%253D https://www.youtube.com/results?q=soccer&sp=SCjqAwA%253D https://www.youtube.com/results?q=soccer&sp=SDzqAwA%253D https://www.youtube.com/results?q=soccer&sp=SFDqAwA%253D https://www.youtube.com/results?q=soccer&sp=SGTqAwA%253D https://www.youtube.com/results?q=soccer&sp=SHjqAwA%253D",$new line),"Delete","Global") set(#position,0,"Global") loop($list total(%href)) { set(#get,$plugin function("HTTP post.dll", "$http get", $list item(%href,#position), "", "", "", ""),"Global") scrapePage(#get) increment(#position) } define scrapePage(#doc) { set(#doc,$plugin function("DeliterCSS.dll", "$Deliter URL Encode", #doc),"Local") run javascript("/* * DOMParser HTML extension * 2012-09-04 * * By Eli Grey, http://eligrey.com * Public domain. * NO WARRANTY EXPRESSED OR IMPLIED. USE AT YOUR OWN RISK. */ /*! @source https://gist.github.com/1129031 */ /*global document, DOMParser*/ (function(DOMParser) \{ \"use strict\"; var DOMParser_proto = DOMParser.prototype , real_parseFromString = DOMParser_proto.parseFromString ; // Firefox/Opera/IE throw errors on unsupported types try \{ // WebKit returns null on unsupported types if ((new DOMParser).parseFromString(\"\", \"text/html\")) \{ // text/html parsing is natively supported return; \} \} catch (ex) \{\} DOMParser_proto.parseFromString = function(markup, type) \{ if (/^\\s*text\\/html\\s*(?:;|$)/i.test(type)) \{ var doc = document.implementation.createHTMLDocument(\"\") ; if (markup.toLowerCase().indexOf(\'<!doctype\') > -1) \{ doc.documentElement.innerHTML = markup; \} else \{ doc.body.innerHTML = markup; \} return doc; \} else \{ return real_parseFromString.apply(this, arguments); \} \}; \}(DOMParser)); try\{ document.body.removeChild(ubotEl) \} catch(err)\{console.log(err)\} var mydoc = decodeURIComponent(\"{#doc}\") var parser = new DOMParser(); var mydoc = parser.parseFromString(mydoc, \"text/html\"); mydoc.body.style.display=\"none\" var thedoc = mydoc.querySelectorAll(\"*\") for(var x =0;x < thedoc.length;x++)\{ thedoc[x].dataset.ubot = \"true\" \} thedoc=\"\" var ubotEl = document.createElement(\"html\") ubotEl.innerHTML=mydoc.body.outerHTML document.body.appendChild(ubotEl)") comment("add your ubot scrape codes and logic below, for handling the page") add list to list(%titles,$scrape attribute(<((tagname="a" AND class="yt-uix-tile-link yt-ui-ellipsis yt-ui-ellipsis-2 yt-uix-sessionlink spf-link ") AND data-ubot="true")>,"innertext"),"Delete","Global") } 2 Quote Link to post Share on other sites
deliter 203 Posted March 16, 2017 Author Report Share Posted March 16, 2017 Does this work on exbrowser? Quote Link to post Share on other sites
HelloInsomnia 1103 Posted March 16, 2017 Report Share Posted March 16, 2017 Does this work on exbrowser? Yes it does, I just switched the http part to use ExBrowser: add list to list(%href,$list from text("https://www.youtube.com/results?q=soccer&sp=SBTqAwA%253D https://www.youtube.com/results?q=soccer&sp=SCjqAwA%253D https://www.youtube.com/results?q=soccer&sp=SDzqAwA%253D https://www.youtube.com/results?q=soccer&sp=SFDqAwA%253D https://www.youtube.com/results?q=soccer&sp=SGTqAwA%253D https://www.youtube.com/results?q=soccer&sp=SHjqAwA%253D",$new line),"Delete","Global") set(#position,0,"Global") plugin command("ExBrowser.dll", "ExBrowser CleanUp") plugin command("ExBrowser.dll", "ExBrowser Launcher", "Chrome", "", "") loop($list total(%href)) { plugin command("ExBrowser.dll", "ExBrowser Navigate", $list item(%href,#position)) set(#get,$plugin function("ExBrowser.dll", "$ExBrowser Document Text"),"Global") scrapePage(#get) increment(#position) } define scrapePage(#doc) { set(#doc,$plugin function("DeliterCSS.dll", "$Deliter URL Encode", #doc),"Local") run javascript("try\{ document.body.removeChild(ubotEl) \} catch(err)\{console.log(err)\} var mydoc = decodeURIComponent(\"{#doc}\") var parser = new DOMParser(); var mydoc = parser.parseFromString(mydoc, \"text/html\"); mydoc.body.style.display=\"none\" var thedoc = mydoc.querySelectorAll(\"*\") for(var x =0;x < thedoc.length;x++)\{ thedoc[x].dataset.ubot = \"true\" \} thedoc=\"\" var ubotEl = document.createElement(\"html\") ubotEl.innerHTML=mydoc.body.outerHTML document.body.appendChild(ubotEl)") comment("add your ubot scrape codes and logic below, for handling the page") add list to list(%titles,$scrape attribute(<((tagname="a" AND class="yt-uix-tile-link yt-ui-ellipsis yt-ui-ellipsis-2 yt-uix-sessionlink spf-link ") AND data-ubot="true")>,"innertext"),"Delete","Global") } 1 Quote Link to post Share on other sites
deliter 203 Posted March 16, 2017 Author Report Share Posted March 16, 2017 Yes it does, I just switched the http part to use ExBrowser:That's cool, didn't know if you still had a ubot browser when running ecbrowser Quote Link to post Share on other sites
HelloInsomnia 1103 Posted March 17, 2017 Report Share Posted March 17, 2017 That's cool, didn't know if you still had a ubot browser when running ecbrowser ExBrowser is basically like a remote control for Chrome/Firefox and PhantomJS. You can run the nodes in Ubot just like any other nodes, but a new (and completely separate) instance of one of your browsers will open and you can basically use it similar to Ubot browser it has most of the same kind of functionality but runs via xpath input. Nothing about Ubot itself changes though, the browser is still there. 1 Quote Link to post Share on other sites
Recommended Posts
Join the conversation
You can post now and register later. If you have an account, sign in now to post with your account.