Use Ubots Scraping Tools With Socket/ayman Http Gets

deliter · March 16, 2017

Hey all

heres a quick example bot, you can use this script to use ubot scraping tools for HTTP Gets, Exbrowser etc

http://screencast-o-matic.com/watch/cbeDoT6glj

This example code below, just uses Aymans HTTP Post to scrape 7 pages of youtube video results and I use Ubots Scraper to scrape the video titles, basically put your own scraping code below the comment and your good to go

The one below is for the new Ubot, the post below that for the old ubot

You can use Ubots Scraping tools, like scrape attribute, exists etc, with HTTP Gets with this command, so no need for regex, just go to a page, use the ubot scraping tools, then put the scrape code,logic etc inside of the container at the bottom

If you want the browser switching pages while this code is running, add a wait for browser event DOM ready to the command

for this to work you will need to download my CSS Plugin, Ive just added a Encode URI for this function, the Ubot one will not work, link below

**Update

Ive edited the code, as sometimes the ubot scraper will also scrape the actual page the browser is on, if your scrape is just a wildcard etc, so simply use your ubot scraping code, to scrape HTTP Get documents but add in at the end............ AND data-ubot="true"

This will ensure only the http doc is scraped not the page, the update does make it run a bit slower but hardly noticeable, the below code is to scrape youtube titles from their results page, tagname "a" with a class of yt etc, but also notice, after getting that scrape code, I add in AND data-ubot="true"

$scrape attribute(<((tagname="a" AND class="yt-uix-tile-link yt-ui-ellipsis yt-ui-ellipsis-2 yt-uix-sessionlink spf-link ") AND data-ubot="true")

**

https://1drv.ms/u/s!AgO9AudYbciJgP8ZAkIdZ6ICeNRCWQ

add list to list(%href,$list from text("https://www.youtube.com/results?q=soccer&sp=SBTqAwA%253D
https://www.youtube.com/results?q=soccer&sp=SCjqAwA%253D
https://www.youtube.com/results?q=soccer&sp=SDzqAwA%253D
https://www.youtube.com/results?q=soccer&sp=SFDqAwA%253D
https://www.youtube.com/results?q=soccer&sp=SGTqAwA%253D
https://www.youtube.com/results?q=soccer&sp=SHjqAwA%253D",$new line),"Delete","Global")
set(#position,0,"Global")
loop($list total(%href)) {
    set(#get,$plugin function("HTTP post.dll", "$http get", $list item(%href,#position), "", "", "", ""),"Global")
    scrapePage(#get)
    increment(#position)
}
define scrapePage(#doc) {
  set(#doc,$plugin function("DeliterCSS.dll", "$Deliter URL Encode", #doc),"Local")
    run javascript("try\{

document.body.removeChild(ubotEl)

\}

catch(err)\{console.log(err)\}
var mydoc = decodeURIComponent(\"{#doc}\")

var parser = new DOMParser();
var mydoc = parser.parseFromString(mydoc, \"text/html\");

mydoc.body.style.display=\"none\"
var thedoc = mydoc.querySelectorAll(\"*\")
for(var x =0;x < thedoc.length;x++)\{

thedoc[x].dataset.ubot = \"true\"
\}
thedoc=\"\"
var ubotEl = document.createElement(\"html\")
ubotEl.innerHTML=mydoc.body.outerHTML


document.body.appendChild(ubotEl)")
    comment("add your ubot scrape codes and logic below, for handling the page")
     add list to list(%titles,$scrape attribute(<((tagname="a" AND class="yt-uix-tile-link yt-ui-ellipsis yt-ui-ellipsis-2 yt-uix-sessionlink      spf-link ") AND data-ubot="true")>,"innertext"),"Delete","Global")
}

deliter · March 16, 2017

Here is a version for the Ubot 4

add list to list(%href,$list from text("https://www.youtube.com/results?q=soccer&sp=SBTqAwA%253D
https://www.youtube.com/results?q=soccer&sp=SCjqAwA%253D
https://www.youtube.com/results?q=soccer&sp=SDzqAwA%253D
https://www.youtube.com/results?q=soccer&sp=SFDqAwA%253D
https://www.youtube.com/results?q=soccer&sp=SGTqAwA%253D
https://www.youtube.com/results?q=soccer&sp=SHjqAwA%253D",$new line),"Delete","Global")
set(#position,0,"Global")
loop($list total(%href)) {
    set(#get,$plugin function("HTTP post.dll", "$http get", $list item(%href,#position), "", "", "", ""),"Global")
    scrapePage(#get)
    increment(#position)
}
define scrapePage(#doc) {
    set(#doc,$plugin function("DeliterCSS.dll", "$Deliter URL Encode", #doc),"Local")
    run javascript("/*
 * DOMParser HTML extension
 * 2012-09-04
 * 
 * By Eli Grey, http://eligrey.com
 * Public domain.
 * NO WARRANTY EXPRESSED OR IMPLIED. USE AT YOUR OWN RISK.
 */

/*! @source https://gist.github.com/1129031 */
/*global document, DOMParser*/

(function(DOMParser) \{
	\"use strict\";

	var
	  DOMParser_proto = DOMParser.prototype
	, real_parseFromString = DOMParser_proto.parseFromString
	;

	// Firefox/Opera/IE throw errors on unsupported types
	try \{
		// WebKit returns null on unsupported types
		if ((new DOMParser).parseFromString(\"\", \"text/html\")) \{
			// text/html parsing is natively supported
			return;
		\}
	\} catch (ex) \{\}

	DOMParser_proto.parseFromString = function(markup, type) \{
		if (/^\\s*text\\/html\\s*(?:;|$)/i.test(type)) \{
			var
			  doc = document.implementation.createHTMLDocument(\"\")
			;
	      		if (markup.toLowerCase().indexOf(\'<!doctype\') > -1) \{
        			doc.documentElement.innerHTML = markup;
      			\}
      			else \{
        			doc.body.innerHTML = markup;
      			\}
			return doc;
		\} else \{
			return real_parseFromString.apply(this, arguments);
		\}
	\};
\}(DOMParser));




try\{

document.body.removeChild(ubotEl)

\}

catch(err)\{console.log(err)\}
var mydoc = decodeURIComponent(\"{#doc}\")

var parser = new DOMParser();
var mydoc = parser.parseFromString(mydoc, \"text/html\");

mydoc.body.style.display=\"none\"
var thedoc = mydoc.querySelectorAll(\"*\")
for(var x =0;x < thedoc.length;x++)\{

thedoc[x].dataset.ubot = \"true\"
\}
thedoc=\"\"
var ubotEl = document.createElement(\"html\")
ubotEl.innerHTML=mydoc.body.outerHTML


document.body.appendChild(ubotEl)")
    comment("add your ubot scrape codes and logic below, for handling the page")
     add list to list(%titles,$scrape attribute(<((tagname="a" AND class="yt-uix-tile-link yt-ui-ellipsis yt-ui-ellipsis-2 yt-uix-sessionlink      spf-link ") AND data-ubot="true")>,"innertext"),"Delete","Global")
}

deliter · March 16, 2017

Does this work on exbrowser?

HelloInsomnia · March 16, 2017

Does this work on exbrowser?

Yes it does, I just switched the http part to use ExBrowser:

add list to list(%href,$list from text("https://www.youtube.com/results?q=soccer&sp=SBTqAwA%253D
https://www.youtube.com/results?q=soccer&sp=SCjqAwA%253D
https://www.youtube.com/results?q=soccer&sp=SDzqAwA%253D
https://www.youtube.com/results?q=soccer&sp=SFDqAwA%253D
https://www.youtube.com/results?q=soccer&sp=SGTqAwA%253D
https://www.youtube.com/results?q=soccer&sp=SHjqAwA%253D",$new line),"Delete","Global")
set(#position,0,"Global")
plugin command("ExBrowser.dll", "ExBrowser CleanUp")
plugin command("ExBrowser.dll", "ExBrowser Launcher", "Chrome", "", "")
loop($list total(%href)) {
    plugin command("ExBrowser.dll", "ExBrowser Navigate", $list item(%href,#position))
    set(#get,$plugin function("ExBrowser.dll", "$ExBrowser Document Text"),"Global")
    scrapePage(#get)
    increment(#position)
}
define scrapePage(#doc) {
    set(#doc,$plugin function("DeliterCSS.dll", "$Deliter URL Encode", #doc),"Local")
    run javascript("try\{

document.body.removeChild(ubotEl)

\}

catch(err)\{console.log(err)\}
var mydoc = decodeURIComponent(\"{#doc}\")

var parser = new DOMParser();
var mydoc = parser.parseFromString(mydoc, \"text/html\");

mydoc.body.style.display=\"none\"
var thedoc = mydoc.querySelectorAll(\"*\")
for(var x =0;x < thedoc.length;x++)\{

thedoc[x].dataset.ubot = \"true\"
\}
thedoc=\"\"
var ubotEl = document.createElement(\"html\")
ubotEl.innerHTML=mydoc.body.outerHTML


document.body.appendChild(ubotEl)")
    comment("add your ubot scrape codes and logic below, for handling the page")
    add list to list(%titles,$scrape attribute(<((tagname="a" AND class="yt-uix-tile-link yt-ui-ellipsis yt-ui-ellipsis-2 yt-uix-sessionlink      spf-link ") AND data-ubot="true")>,"innertext"),"Delete","Global")
}

deliter · March 16, 2017

Yes it does, I just switched the http part to use ExBrowser:

That's cool, didn't know if you still had a ubot browser when running ecbrowser

HelloInsomnia · March 17, 2017

That's cool, didn't know if you still had a ubot browser when running ecbrowser

ExBrowser is basically like a remote control for Chrome/Firefox and PhantomJS. You can run the nodes in Ubot just like any other nodes, but a new (and completely separate) instance of one of your browsers will open and you can basically use it similar to Ubot browser it has most of the same kind of functionality but runs via xpath input. Nothing about Ubot itself changes though, the browser is still there.

Sign In

Use Ubots Scraping Tools With Socket/ayman Http Gets

Recommended Posts

deliter 203

Link to post

Share on other sites

deliter 203

Link to post

Share on other sites

deliter 203

Link to post

Share on other sites

HelloInsomnia 1103

Link to post

Share on other sites

deliter 203

Link to post

Share on other sites

HelloInsomnia 1103

Link to post

Share on other sites

Join the conversation

Browse

Activity