Debug School

rakesh kumar
rakesh kumar

Posted on

How to scrape the you tube comment,upvote and timestamp in Selenium

METHOD 1

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
import time

# URL of the YouTube video you want to extract comments from
video_url = "https://www.youtube.com/watch?v=YOUR_VIDEO_ID"

# Set up the Chrome WebDriver
driver = webdriver.Chrome(executable_path='path_to_chromedriver')  # Replace with your Chrome WebDriver path
driver.get(video_url)

# Scroll down to load more comments
while True:
    try:
        # Find the "Show more" button and click it to load more comments
        show_more_button = driver.find_element(By.XPATH, "//yt-formatted-string[contains(text(),'Show more')]")
        show_more_button.click()
        time.sleep(2)  # Wait for comments to load (adjust as needed)
    except Exception:
        break  # No more comments to load

# Extract comments, upvotes, and timestamps
comments = []
comment_elements = driver.find_elements(By.CSS_SELECTOR, "#content-text")
upvote_elements = driver.find_elements(By.CSS_SELECTOR, "#vote-count-middle")
timestamp_elements = driver.find_elements(By.CSS_SELECTOR, "#header-author > yt-formatted-string > a")

for i in range(len(comment_elements)):
    comment = comment_elements[i].text
    upvotes = upvote_elements[i].text if i < len(upvote_elements) else "0"
    timestamp = timestamp_elements[i].text if i < len(timestamp_elements) else "N/A"
    comments.append({
        "comment_text": comment,
        "upvotes": upvotes,
        "timestamp": timestamp
    })

# Print the first 10 comments (you can save them to a file or database)
for comment in comments[:10]:
    print(f"Comment: {comment['comment_text']}")
    print(f"Upvotes: {comment['upvotes']}")
    print(f"Timestamp: {comment['timestamp']}")
    print("-" * 20)

# Close the browser
driver.quit()
Enter fullscreen mode Exit fullscreen mode

METHOD 2

import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys

# Create a new instance of the Chrome driver (specify the path to your WebDriver)
driver = webdriver.Chrome(executable_path='/path/to/chromedriver')

# Open the YouTube video page
video_url = 'https://www.youtube.com/watch?v=your_video_id_here'
driver.get(video_url)

# Scroll down to load more comments (adjust the number of scrolls as needed)
scrolls = 10
for _ in range(scrolls):
    driver.find_element(By.TAG_NAME, 'body').send_keys(Keys.END)
    time.sleep(2)  # Adjust the sleep time as needed

# Extract comments, upvotes, and timestamps
comments = driver.find_elements(By.CSS_SELECTOR, '.style-scope.yt-simple-endpoint')
upvotes = driver.find_elements(By.CSS_SELECTOR, 'span#vote-count-middle')
timestamps = driver.find_elements(By.CSS_SELECTOR, 'yt-formatted-string.published-time-text a')

# Limit the number of comments to at least 500
min_comments = 500

if len(comments) < min_comments:
    print(f"Warning: Only {len(comments)} comments found. You may need to scroll further to load more comments.")

# Print the extracted data
for i in range(min_comments):
    comment_text = comments[i].text
    upvote_count = upvotes[i].text
    timestamp = timestamps[i].text
    print(f"Comment {i + 1}:")
    print(f"Comment: {comment_text}")
    print(f"Upvotes: {upvote_count}")
    print(f"Timestamp: {timestamp}")
    print("\n")

# Close the browser
driver.quit()
Enter fullscreen mode Exit fullscreen mode

Top comments (0)