import time
import requests
from bs4 import BeautifulSoup
# Function to check if a page has only one element with the class grid-images_box
def has_single_grid_images_box(page_content):
soup = BeautifulSoup(page_content, 'html.parser')
# make sure the text inside the break-normal class inside brackets is less than (200 MB)
break_normal = soup.find(class_="break-normal")
if break_normal:
if break_normal.text.find("MB)") != -1:
# get the numbers inside the ()
numbers = break_normal.text.split("(")[1].split(" ")[0]
if float(numbers) < 200:
grid_images_boxes = soup.find_all(class_="grid-images_box")
return len(grid_images_boxes) == 1
return False
# Base URL without the page parameter
base_url = "https://www.bunkr-albums.io/?search=&page="
# Starting page
page = 1
# Store links in a list
visit_links = []
# Initialize a session to maintain cookies
session = requests.Session()
# Loop through pages
while True:
url = base_url + str(page)
response = session.get(url)
visit_links = []
# Check if the page is valid
if response.status_code == 200:
soup = BeautifulSoup(response.text, 'html.parser')
links = soup.find_all('a', href=True, target='_blank', text='Visit')
visit_links.extend([link['href'] for link in links])
# Visit each "Visit" link and check for the presence of a single grid-images_box
for visit_link in visit_links:
response = session.get(visit_link)
if response.status_code == 200:
if has_single_grid_images_box(response.text):
print(
f"Page {page} with only one grid-images_box: {visit_link}")
# Increment the page number
page += 1
# You can add a condition to break out of the loop based on your requirements.
# For example, if you've reached a page with no "Visit" links, you can break out of the loop.
if not links:
break