Stack Overflow Asked by ss_0708 on December 25, 2021
I am trying to scrape first 3 images for a list of search strings from google. As i am not very well versed with Python, I have taken help from few sources & could write the below code. I need the images to be saved in respective folders(bearing the name of search string) in current Working directory. The code does create folder, but only for the first search string & that too an empty folder.
It is exiting with below error :-
NameError: name 'time' is not defined
The code is :-
def fetch_image_urls(query:str, max_links_to_fetch:int, wd:webdriver, sleep_between_interactions:int=3):
search_url = f"https://www.google.com/search?q={query}&tbm=isch&ved=2ahUKEwjdh7KtreXqAhX8zIsBHbGHDGkQ2-cCegQIABAA&oq={query}&gs_lcp=CgNpbWcQA1CntRBYp7UQYNe7EGgAcAB4AIABywGIAcsBkgEDMi0xmAEAoAEBqgELZ3dzLXdpei1pbWfAAQE&sclient=img&ei=MYwaX52rF_yZr7wPsY-yyAY&bih=578&biw=1280"
wd.get(search_url)
time.sleep(sleep_between_interactions)
image_urls = set()
image_count = 0
number_results = 0
for i in range(1,20):
time.sleep(5)
thumb = wd.find_elements_by_css_selector("img.Q4LuWd")
time.sleep(5)
for img in thumb:
print(img)
print(img.get_attribute('src'))
image_urls.add(img.get_attribute('src'))
image_count = len(image_urls)
number_results = image_count
time.sleep(.5)
print(f"Found: {number_results} search results. Extracting links...")
return image_urls
def persist_image(folder_path:str,url:str):
try:
headers = {'User-agent': 'Chrome/84.0.4147.89'}
image_content = requests.get(url, headers=headers).content
except Exception as e:
print(f"ERROR - Could not download {url} - {e}")
try:
image_file = io.BytesIO(image_content)
image = Image.open(image_file).convert('RGB')
file_path = os.path.join(folder_path,hashlib.sha1(image_content).hexdigest()[:10] + '.jpg')
with open(file_path, 'wb') as f:
image.save(f, "JPEG", quality=85)
print(f"SUCCESS - saved {url} - as {file_path}")
except Exception as e:
print(f"ERROR - Could not save {url} - {e}")
def search_and_download(search_term:str,driver_path:str,target_path='./images',number_images=5):
target_folder = os.path.join(target_path,'_'.join(search_term.lower().split(' ')))
if not os.path.exists(target_folder):
os.makedirs(target_folder)
with webdriver.Chrome(executable_path=driver_path) as wd:
res = fetch_image_urls(search_term, number_images, wd=wd, sleep_between_interactions=3)
for elem in res:
persist_image(target_folder,elem)
import requests
import os
import io
from PIL import Image
import hashlib
search_terms = ['1415 Bush St', '2015 Washington Blvd', '1420 Joh Ave', '901 W Ostend St']
for search_term in search_terms:
search_and_download(search_term=search_term, driver_path=DRIVER_PATH)
Please suggest the correction/edits to produce desired output,
Thanks !!
I have resolved the issue with a workaround. Below code works exactly as per the requirement. Posting it for future help to anyone having similar requirement.
Thanks all for any help !!
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import os
import urllib.request
searchterm = ['3500 Boston St','1415 Bush St','1811 POrtal St']
for i in searchterm:
url = "https://www.google.co.in/search?q="+i+"&source=lnms&tbm=isch"
browser = webdriver.Chrome(r'C:UsersXXXXxxxxDocumentschromedriver.exe')
browser.get(url)
header={'User-Agent':"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.134 Safari/537.36"}
counter = 0
succounter = 0
if not os.path.exists(i):
os.mkdir(i)
for x in browser.find_elements_by_xpath('//img[contains(@class,"rg_i Q4LuWd")]'):
counter = counter + 1
print("Total Count:", counter)
print("Succsessful Count:", succounter)
print("URL:", x.get_attribute('src'))
img = x.get_attribute('src')
new_filename = i+" " +str(counter)+".jpg"
try:
path = os.path.join(i , i + "_" + str(counter))
path += new_filename
urllib.request.urlretrieve(img, path)
succounter += 1
except Exception as e:
print(e)
print(succounter, "pictures succesfully downloaded")
browser.close()
Answered by ss_0708 on December 25, 2021
import time
You need to import time for it to work.
Answered by AaronS on December 25, 2021
Get help from others!
Recent Questions
Recent Answers
© 2024 TransWikia.com. All rights reserved. Sites we Love: PCI Database, UKBizDB, Menu Kuliner, Sharing RPP