Jump to content
UBot Underground

Use Ubots Scraping Tools With Socket/ayman Http Gets


Recommended Posts

Hey all

 

heres a quick example bot, you can use this script to use ubot scraping tools for HTTP Gets, Exbrowser etc

 

http://screencast-o-matic.com/watch/cbeDoT6glj

 

This example code below, just uses Aymans HTTP Post to scrape 7 pages of youtube video results and I use Ubots Scraper to scrape the video titles, basically put your own scraping code below the comment and your good to go

 

 

The one below is for the new Ubot, the post below that for the old ubot

 

You can use Ubots Scraping tools, like scrape attribute, exists etc, with HTTP Gets with this command, so no need for regex, just go to a page, use the ubot scraping tools, then put the scrape code,logic etc inside of the container at the bottom

 

If you want the browser switching pages while this code is running, add a wait for browser event DOM ready to the command

 

for this to work  you will need to download my CSS Plugin, Ive just added a Encode URI for this function, the Ubot one will not work, link below

 

 

**Update

 

Ive edited the code, as sometimes the ubot scraper will also scrape the actual page the browser is on, if your scrape is just a wildcard etc, so simply use your ubot scraping code, to scrape HTTP Get documents but add in at the end............ AND data-ubot="true"

 

This will ensure only the http doc is scraped not the page, the update does make it run a bit slower but hardly noticeable, the below code is to scrape youtube titles from their results page, tagname "a" with a class of yt etc, but also notice, after getting that scrape code, I add in AND data-ubot="true"

 

$scrape attribute(<((tagname="a" AND class="yt-uix-tile-link yt-ui-ellipsis yt-ui-ellipsis-2 yt-uix-sessionlink      spf-link ") AND data-ubot="true")

 

**

https://1drv.ms/u/s!AgO9AudYbciJgP8ZAkIdZ6ICeNRCWQ
add list to list(%href,$list from text("https://www.youtube.com/results?q=soccer&sp=SBTqAwA%253D
https://www.youtube.com/results?q=soccer&sp=SCjqAwA%253D
https://www.youtube.com/results?q=soccer&sp=SDzqAwA%253D
https://www.youtube.com/results?q=soccer&sp=SFDqAwA%253D
https://www.youtube.com/results?q=soccer&sp=SGTqAwA%253D
https://www.youtube.com/results?q=soccer&sp=SHjqAwA%253D",$new line),"Delete","Global")
set(#position,0,"Global")
loop($list total(%href)) {
    set(#get,$plugin function("HTTP post.dll", "$http get", $list item(%href,#position), "", "", "", ""),"Global")
    scrapePage(#get)
    increment(#position)
}
define scrapePage(#doc) {
  set(#doc,$plugin function("DeliterCSS.dll", "$Deliter URL Encode", #doc),"Local")
    run javascript("try\{

document.body.removeChild(ubotEl)

\}

catch(err)\{console.log(err)\}
var mydoc = decodeURIComponent(\"{#doc}\")

var parser = new DOMParser();
var mydoc = parser.parseFromString(mydoc, \"text/html\");

mydoc.body.style.display=\"none\"
var thedoc = mydoc.querySelectorAll(\"*\")
for(var x =0;x < thedoc.length;x++)\{

thedoc[x].dataset.ubot = \"true\"
\}
thedoc=\"\"
var ubotEl = document.createElement(\"html\")
ubotEl.innerHTML=mydoc.body.outerHTML


document.body.appendChild(ubotEl)")
    comment("add your ubot scrape codes and logic below, for handling the page")
     add list to list(%titles,$scrape attribute(<((tagname="a" AND class="yt-uix-tile-link yt-ui-ellipsis yt-ui-ellipsis-2 yt-uix-sessionlink      spf-link ") AND data-ubot="true")>,"innertext"),"Delete","Global")
}

  • Like 4
Link to post
Share on other sites

Here is a version for the Ubot 4

add list to list(%href,$list from text("https://www.youtube.com/results?q=soccer&sp=SBTqAwA%253D
https://www.youtube.com/results?q=soccer&sp=SCjqAwA%253D
https://www.youtube.com/results?q=soccer&sp=SDzqAwA%253D
https://www.youtube.com/results?q=soccer&sp=SFDqAwA%253D
https://www.youtube.com/results?q=soccer&sp=SGTqAwA%253D
https://www.youtube.com/results?q=soccer&sp=SHjqAwA%253D",$new line),"Delete","Global")
set(#position,0,"Global")
loop($list total(%href)) {
    set(#get,$plugin function("HTTP post.dll", "$http get", $list item(%href,#position), "", "", "", ""),"Global")
    scrapePage(#get)
    increment(#position)
}
define scrapePage(#doc) {
    set(#doc,$plugin function("DeliterCSS.dll", "$Deliter URL Encode", #doc),"Local")
    run javascript("/*
 * DOMParser HTML extension
 * 2012-09-04
 * 
 * By Eli Grey, http://eligrey.com
 * Public domain.
 * NO WARRANTY EXPRESSED OR IMPLIED. USE AT YOUR OWN RISK.
 */

/*! @source https://gist.github.com/1129031 */
/*global document, DOMParser*/

(function(DOMParser) \{
	\"use strict\";

	var
	  DOMParser_proto = DOMParser.prototype
	, real_parseFromString = DOMParser_proto.parseFromString
	;

	// Firefox/Opera/IE throw errors on unsupported types
	try \{
		// WebKit returns null on unsupported types
		if ((new DOMParser).parseFromString(\"\", \"text/html\")) \{
			// text/html parsing is natively supported
			return;
		\}
	\} catch (ex) \{\}

	DOMParser_proto.parseFromString = function(markup, type) \{
		if (/^\\s*text\\/html\\s*(?:;|$)/i.test(type)) \{
			var
			  doc = document.implementation.createHTMLDocument(\"\")
			;
	      		if (markup.toLowerCase().indexOf(\'<!doctype\') > -1) \{
        			doc.documentElement.innerHTML = markup;
      			\}
      			else \{
        			doc.body.innerHTML = markup;
      			\}
			return doc;
		\} else \{
			return real_parseFromString.apply(this, arguments);
		\}
	\};
\}(DOMParser));




try\{

document.body.removeChild(ubotEl)

\}

catch(err)\{console.log(err)\}
var mydoc = decodeURIComponent(\"{#doc}\")

var parser = new DOMParser();
var mydoc = parser.parseFromString(mydoc, \"text/html\");

mydoc.body.style.display=\"none\"
var thedoc = mydoc.querySelectorAll(\"*\")
for(var x =0;x < thedoc.length;x++)\{

thedoc[x].dataset.ubot = \"true\"
\}
thedoc=\"\"
var ubotEl = document.createElement(\"html\")
ubotEl.innerHTML=mydoc.body.outerHTML


document.body.appendChild(ubotEl)")
    comment("add your ubot scrape codes and logic below, for handling the page")
     add list to list(%titles,$scrape attribute(<((tagname="a" AND class="yt-uix-tile-link yt-ui-ellipsis yt-ui-ellipsis-2 yt-uix-sessionlink      spf-link ") AND data-ubot="true")>,"innertext"),"Delete","Global")
}

  • Like 2
Link to post
Share on other sites

Does this work on exbrowser?

 

Yes it does, I just switched the http part to use ExBrowser:

add list to list(%href,$list from text("https://www.youtube.com/results?q=soccer&sp=SBTqAwA%253D
https://www.youtube.com/results?q=soccer&sp=SCjqAwA%253D
https://www.youtube.com/results?q=soccer&sp=SDzqAwA%253D
https://www.youtube.com/results?q=soccer&sp=SFDqAwA%253D
https://www.youtube.com/results?q=soccer&sp=SGTqAwA%253D
https://www.youtube.com/results?q=soccer&sp=SHjqAwA%253D",$new line),"Delete","Global")
set(#position,0,"Global")
plugin command("ExBrowser.dll", "ExBrowser CleanUp")
plugin command("ExBrowser.dll", "ExBrowser Launcher", "Chrome", "", "")
loop($list total(%href)) {
    plugin command("ExBrowser.dll", "ExBrowser Navigate", $list item(%href,#position))
    set(#get,$plugin function("ExBrowser.dll", "$ExBrowser Document Text"),"Global")
    scrapePage(#get)
    increment(#position)
}
define scrapePage(#doc) {
    set(#doc,$plugin function("DeliterCSS.dll", "$Deliter URL Encode", #doc),"Local")
    run javascript("try\{

document.body.removeChild(ubotEl)

\}

catch(err)\{console.log(err)\}
var mydoc = decodeURIComponent(\"{#doc}\")

var parser = new DOMParser();
var mydoc = parser.parseFromString(mydoc, \"text/html\");

mydoc.body.style.display=\"none\"
var thedoc = mydoc.querySelectorAll(\"*\")
for(var x =0;x < thedoc.length;x++)\{

thedoc[x].dataset.ubot = \"true\"
\}
thedoc=\"\"
var ubotEl = document.createElement(\"html\")
ubotEl.innerHTML=mydoc.body.outerHTML


document.body.appendChild(ubotEl)")
    comment("add your ubot scrape codes and logic below, for handling the page")
    add list to list(%titles,$scrape attribute(<((tagname="a" AND class="yt-uix-tile-link yt-ui-ellipsis yt-ui-ellipsis-2 yt-uix-sessionlink      spf-link ") AND data-ubot="true")>,"innertext"),"Delete","Global")
}
  • Like 1
Link to post
Share on other sites

That's cool, didn't know if you still had a ubot browser when running ecbrowser

 

ExBrowser is basically like a remote control for Chrome/Firefox and PhantomJS. You can run the nodes in Ubot just like any other nodes, but a new (and completely separate) instance of one of your browsers will open and you can basically use it similar to Ubot browser it has most of the same kind of functionality but runs via xpath input. Nothing about Ubot itself changes though, the browser is still there.

  • Like 1
Link to post
Share on other sites

Join the conversation

You can post now and register later. If you have an account, sign in now to post with your account.

Guest
Reply to this topic...

×   Pasted as rich text.   Paste as plain text instead

  Only 75 emoji are allowed.

×   Your link has been automatically embedded.   Display as a link instead

×   Your previous content has been restored.   Clear editor

×   You cannot paste images directly. Upload or insert images from URL.

Loading...
×
×
  • Create New...