Jump to content
UBot Underground

Scrape Facebook Image URLS


Recommended Posts

Hi all,

 

I thought this would be a walk in the park to do, however I can't get the full image urls for photos on Facebook.

 

Here's an example of the page I am looking at:

https://www.facebook.com/thescript/photos

 

 

Here's the scrape code I am using:

add list to list(%photourls, $scrape attribute(<outerhtml=w"<i style=\"background-image: url(http://*.jpg);\" class=\"uiMediaThumbImg\"></i>">, "href"), "Delete", "Global")

 

I have the browser set to Safari and can see the images, however I cant get the URLs?

 

Thanks in advance for your help

Link to post
Share on other sites

Hi,

 

Sample code:

clear list(%photourlsscrape)
add list to list(%photourlsscrape, $scrape attribute(<outerhtml=w"<i style=\"background-image: url(*);\" class=\"uiMediaThumbImg\"></i>">, "outerhtml"), "Delete", "Global")
clear list(%photourls)
loop($list total(%photourlsscrape)) {
if($comparison($list position(%photourlsscrape), "<", $list total(%photourlsscrape))) {
 then {
	 set(#photourlitem, $replace regular expression($replace regular expression($next list item(%photourlsscrape), "\\).*", $nothing), ".*url\\(", $nothing), "Global")
	 add item to list(%photourls, #photourlitem, "Delete", "Global")
 }
 else {
 }
}
}

 

Kevin

Link to post
Share on other sites

Hi Kevin

 

How would I save these images to the desktop whilst they keep their original filenames? Alternatively,their file names arent that important as long as I could download these in a sequential order and rename them 1,2,3 and so on?

 

Thanks for your help,

 

Kevin

Link to post
Share on other sites

Hi,

 

Save images using file names.

set user agent("Internet Explorer 8")
navigate("http://www.facebook.com/thesmallhoursdublin/photos", "Wait")
wait for browser event("Everything Loaded", 30)
clear list(%photourlsscrape)
add list to list(%photourlsscrape, $scrape attribute(<outerhtml=w"<i style=\"background-image: url(*);\" class=\"uiMediaThumbImg\"></i>">, "outerhtml"), "Delete", "Global")
clear list(%photourls)
loop($list total(%photourlsscrape)) {
if($comparison($list position(%photourlsscrape), "<", $list total(%photourlsscrape))) {
 then {
	 set(#photourlitem, $replace regular expression($replace regular expression($next list item(%photourlsscrape), "\\).*", $nothing), ".*url\\(", $nothing), "Global")
	 add item to list(%photourls, #photourlitem, "Delete", "Global")
	 clear list(%imgurlbreakdown)
	 add list to list(%imgurlbreakdown, $list from text(#photourlitem, "/"), "Delete", "Global")
	 set(#downloadfn, $list item(%imgurlbreakdown, $subtract($list total(%imgurlbreakdown), 1)), "Global")
	 wait($rand(3, $rand(4, 6)))
	 download file(#photourlitem, "c:\\downloads\\fb-img\\{#downloadfn}")
 }
 else {
 }
}
}

 

Kevin

Link to post
Share on other sites

Hi Kevin,

 

Turns out this was downloading the thumbnails rather than the actual images themselves. Couldn't understand why the resolution was so poor tiki checked the images that were downloaded. What's needed to download the actual file?

Link to post
Share on other sites

Ok been looking at this some more today:

 

https://www.facebook.com/media/set/?set=a.10150700393470399.442286.10638675398&type=3

 

I can only get the thumbnails scraped rather than the actual full sized image. Will I need to click each one individually and load them up in order to download?

Link to post
Share on other sites

Hi,

 

Updated code:

set user agent("Internet Explorer 8")
set(#fburl, "http://www.facebook.com/thesmallhoursdublin/photos", "Global")
navigate(#fburl, "Wait")
wait for browser event("Everything Loaded", 30)
clear list(%photourlsscrape)
add list to list(%photourlsscrape, $scrape attribute(<outerhtml=w"<i style=\"background-image: url(*);\" class=\"uiMediaThumbImg\"></i>">, "outerhtml"), "Delete", "Global")
clear list(%photourlthumbs)
clear list(%photourls)
loop($list total(%photourlsscrape)) {
if($comparison($list position(%photourlsscrape), "<", $list total(%photourlsscrape))) {
 then {
	 set(#photourlsscrapeitem, $replace regular expression($replace regular expression($next list item(%photourlsscrape), "\\).*", $nothing), ".*url\\(", $nothing), "Global")
	 click(<outerhtml=w"<i style=\"background-image: url({#photourlsscrapeitem});\" class=\"uiMediaThumbImg\"></i>">, "Left Click", "No")
	 wait for browser event("Everything Loaded", 30)
	 wait($rand(5, $rand(6, 10)))
	 if($exists(<outerhtml=w"<a class=\"uiMediaThumb uiScrollableThumb uiMediaThumbLarge\" href=\"*\"*</a>">)) {
		 then {
			 clear list(%photoalbumurls)
			 add list to list(%photoalbumurls, $scrape attribute(<outerhtml=w"<a class=\"uiMediaThumb uiScrollableThumb uiMediaThumbLarge\" href=\"*\"*</a>">, "fullhref"), "Delete", "Global")
			 loop($list total(%photoalbumurls)) {
				 if($comparison($list position(%photoalbumurls), "<", $list total(%photoalbumurls))) {
					 then {
						 set(#photoalbumurlitem, $next list item(%photoalbumurls), "Global")
						 set(#photoalbumurlitemname, $replace regular expression($replace regular expression(#photoalbumurlitem, "&set=.*", $nothing), ".*fbid=", $nothing), "Global")
						 click(<name=#photoalbumurlitemname>, "Left Click", "No")
						 wait for browser event("Everything Loaded", "")
						 wait($rand(3, $rand(4, 6)))
						 if($exists(<outerhtml=w"<img class=\"fbPhotoImage img\" id=\"fbPhotoImage\" src=\"*\" alt=\"\">">)) {
							 then {
								 add item to list(%photourls, $scrape attribute(<outerhtml=w"<img class=\"fbPhotoImage img\" id=\"fbPhotoImage\" src=\"*\" alt=\"\">">, "fullsrc"), "Delete", "Global")
							 }
							 else {
							 }
						 }
						 if($exists(<class="spotlight">)) {
							 then {
								 add item to list(%photourls, $scrape attribute(<class="spotlight">, "fullsrc"), "Delete", "Global")
							 }
							 else {
							 }
						 }
						 click(<class="closeTheater">, "Left Click", "No")
						 wait for browser event("Everything Loaded", 30)
						 wait($rand(5, $rand(6, 10)))
						 if($comparison($url, "!=", #fburl)) {
							 then {
								 navigate(#fburl, "Wait")
								 wait for browser event("Everything Loaded", 30)
								 wait($rand(3, $rand(4, 6)))
							 }
							 else {
							 }
						 }
						 if($comparison($list position(%photoalbumurls), "<", $list total(%photoalbumurls))) {
							 then {
								 click(<outerhtml=w"<i style=\"background-image: url({#photourlsscrapeitem});\" class=\"uiMediaThumbImg\"></i>">, "Left Click", "No")
								 wait for browser event("Everything Loaded", 30)
								 wait($rand(3, $rand(4, 6)))
							 }
							 else {
							 }
						 }
					 }
					 else {
						 run javascript("history.go(-1)")
						 wait($rand(3, $rand(4, 6)))
						 if($comparison($url, "!=", #fburl)) {
							 then {
								 navigate(#fburl, "Wait")
								 wait for browser event("Everything Loaded", 30)
								 wait($rand(3, $rand(4, 6)))
							 }
							 else {
							 }
						 }
					 }
				 }
			 }
		 }
		 else {
		 }
	 }
	 if($exists(<class="spotlight">)) {
		 then {
			 add item to list(%photourls, $scrape attribute(<class="spotlight">, "fullsrc"), "Delete", "Global")
		 }
		 else {
		 }
	 }
	 wait($rand(3, $rand(4, 6)))
	 if($exists(<class="closeTheater">)) {
		 then {
			 click(<class="closeTheater">, "Left Click", "No")
			 wait for browser event("Everything Loaded", 30)
			 wait($rand(3, $rand(4, 6)))
		 }
		 else {
			 run javascript("history.go(-1)")
			 wait($rand(3, $rand(4, 6)))
			 if($comparison($url, "!=", #fburl)) {
				 then {
					 navigate(#fburl, "Wait")
					 wait for browser event("Everything Loaded", 30)
					 wait($rand(3, $rand(4, 6)))
				 }
				 else {
				 }
			 }
		 }
	 }
	 wait for browser event("Everything Loaded", 30)
	 add item to list(%photourlthumbs, #photourlsscrapeitem, "Delete", "Global")
 }
 else {
 }
}
}
set list position(%photourls, 0)
loop($list total(%photourls)) {
if($comparison($list position(%photourls), "<", $list total(%photourls))) {
 then {
	 set(#photourlsitem, $next list item(%photourls), "Global")
	 clear list(%imgurlbreakdown)
	 add list to list(%imgurlbreakdown, $list from text(#photourlsitem, "/"), "Delete", "Global")
	 set(#downloadfn, $list item(%imgurlbreakdown, $subtract($list total(%imgurlbreakdown), 1)), "Global")
	 wait($rand(3, $rand(4, 6)))
	 download file(#photourlsitem, "c:\\downloads\\fb-img\\{#downloadfn}")
 }
 else {
 }
}
}

 

Kevin

  • Like 1
Link to post
Share on other sites

Join the conversation

You can post now and register later. If you have an account, sign in now to post with your account.

Guest
Reply to this topic...

×   Pasted as rich text.   Paste as plain text instead

  Only 75 emoji are allowed.

×   Your link has been automatically embedded.   Display as a link instead

×   Your previous content has been restored.   Clear editor

×   You cannot paste images directly. Upload or insert images from URL.

Loading...
×
×
  • Create New...