PreReq -
- pip3 install requests
- pip install BeautifulSoup4
- Save this code into mycode.py
- Run python3 mycode.py
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin
def scrape_links(url, depth=0, max_depth=3):
if depth > max_depth:
return
try:
response = requests.get(url)
response.raise_for_status()
except requests.exceptions.RequestException as e:
print(f"Error: {e}")
return
soup = BeautifulSoup(response.content, "html.parser")
print(f"Level {depth}: {url}")
# Extract links from the page and process them recursively
for link in soup.find_all("a", href=True):
next_url = urljoin(url, link["href"])
scrape_links(next_url, depth + 1, max_depth)
if __name__ == "__main__":
starting_url = "https://example.com" # Replace with the starting URL of the website you want to scrape
scrape_links(starting_url)
Top comments (0)