I’m making an attempt to obtain all photographs that seem in Google picture search
I’m utilizing “Selenium” with Python, Pycharm
After I examine Chrome developer software to get Xpath i acquired the next:
//[@id=”islrg”]/div[1]/div[1]/a[1]/div[1]/img
//[@id=”islrg”]/div[1]/div[24]/a[1]/div[1]/img
//[@id=”islrg”]/div[1]/div[26]/a[1]/div[1]/img
//[@id=”islrg”]/div[1]/div[49]/a[1]/div[1]/img
//[@id=”islrg”]/div[1]/div[51]/div[1]/a[1]/div[1]/img
//[@id=”islrg”]/div[1]/div[51]/div[24]/a[1]/div[1]/img
//[@id=”islrg”]/div[1]/div[51]/div[26]/a[1]/div[1]/img
//[@id=”islrg”]/div[1]/div[51]/div[49]/a[1]/div[1]/img
//*[@id=”islrg”]/div[1]/div[51]/div[51]/a[1]/div[1]/img
//[@id=”islrg”]/div[1]/div[52]/div[20]/a[1]/div[1]/img
//[@id=”islrg”]/div[1]/div[52]/div[45]/a[1]/div[1]/img
//*[@id=”islrg”]/div[1]/div[52]/div[47]/a[1]/div[1]/img
//[@id=”islrg”]/div[1]/div[52]/div[70]/a[1]/div[1]/img
//[@id=”islrg”]/div[1]/div[52]/div[72]/a[1]/div[1]/img
//[@id=”islrg”]/div[1]/div[52]/div[95]/a[1]/div[1]/img
//[@id=”islrg”]/div[1]/div[53]/div[18]/a[1]/div[1]/img
1- Easy methods to get dynamically Xpath?!
after which making an attempt to get the precise picture url however i acquired encrypted ones
(https://encrypted-tbn0.gstatic.com/photographs?q=tbn:ANd9GcTWbVxeA8AzNYZoHKN91jM2UMG_g1pjlWN5kQ&usqp=CAU) by instance?
I publish my entire code right here:
import os
import time
import requests # Import the requests module
from selenium import webdriver
from selenium.webdriver.chrome.choices import Choices
from selenium.webdriver.frequent.by import By
from PIL import Picture
import io
import pandas as pd
import sys
import msvcrt, time
books = []
def get_images_from_google(driver, delay, max_images):
def scroll_down(driver):
driver.execute_script("window.scrollTo(0, doc.physique.scrollHeight);")
time.sleep(delay)
image_urls = set()
skips = 0
whereas len(image_urls) + skips < max_images:
scroll_down(driver)
thumbnails = driver.find_elements(By.CSS_SELECTOR, '.rg_i, .Q4LuWd')
for img in thumbnails[len(image_urls) + skips:max_images]:
strive:
img.click on()
time.sleep(delay)
besides:
proceed
photographs = driver.find_elements(By.CSS_SELECTOR, '.sFlh5c, .pT0Scc, .iPVvYb')
for picture in photographs:
if picture.get_attribute('src') in image_urls:
max_images += 1
skips += 1
break
if picture.get_attribute('src') and 'http' in picture.get_attribute('src'):
image_urls.add(picture.get_attribute('src'))
print(f"Discovered {len(image_urls)}")
return image_urls
def download_image(download_path, url, file_name):
strive:
image_content = requests.get(url).content material
image_file = io.BytesIO(image_content)
picture = Picture.open(image_file)
print(url)
# Verify if the picture might be recognized and is in a suitable format
if picture.format not in ["JPEG", "PNG"]:
print(f"Skipping picture with unsupported format: {url}")
return
file_path = os.path.be a part of(download_path, file_name) # Use os.path.be a part of to make sure appropriate path
with open(file_path, "wb") as f:
picture.save(f, "JPEG")
print("Success")
besides Exception as e:
print('FAILED -', e)
# Ask the person for the search question
search_query = enter("Enter your Google Pictures search question: ")
# Create the 'imgs/' listing if it would not exist
download_path = "imgs"
os.makedirs(download_path, exist_ok=True)
# Create a Chrome driver
choices = Choices()
choices.add_argument("--start-maximized")
driver = webdriver.Chrome(choices=choices)
# Open the Google Pictures search web page with the offered search question
search_url = f"https://www.google.com/search?q={search_query}&tbm=isch"
driver.get(search_url)
# Carry out picture scraping and downloading
urls = get_images_from_google(driver, 0.01, 10000)
for i, url in enumerate(urls):
download_image(download_path, url, str(i) + ".JPEG")
books.append([url])
df = pd.DataFrame(books, columns=['URL'])
df.to_csv('books.csv')
# Shut the motive force occasion
driver.give up()
2-how to enhance it to get the precise URL hyperlink of every picture?