HARPA.AI
LIBRARYAPIGUIDESAI COMMANDSBLOG

🧩  YouTube Comments Extraction

Extracts comments from YouTube video page. You can specify how many comments you need. #extraction

Created by Adrian Larsson
Updated on Nov 9, 2024 04:32
Installed 132 times
RUNS JS CODE

How to Use

IMPORT COMMAND

Content

- type: ask
  param: targetMessageCount
  message: How many comments would you like to extract?
  options:
    - label: 50 comments
      value: 50
    - label: 100 comments
      value: 100
    - label: 200 comments
      value: 200
    - $custom
  default: ''
  vision:
    enabled: false
    mode: area
    send: true
    hint: ''
  optionsInvalid: false
- type: js
  code: |
    async function scrollAndCollectMessages(targetMessageCount) {
        const config = {
          // Existing selectors
          commentThread: 'ytd-comment-thread-renderer',
          commentModel: 'ytd-comment-view-model#comment',
          author: '#header-author h3 a',
          content: '#content-text',
          likes: '#vote-count-middle',
          time: '#published-time-text a',
          commentBox: 'ytd-comment-simplebox-renderer',
          
          // New video selectors
          videoTitle: 'title',
          videoAuthor: '.style-scope.ytd-channel-name.complex-string',
          videoViews: '#count .view-count',
          videoPublishedAt: 'ytd-video-primary-info-renderer #info-strings yt-formatted-string',
          videoLikes: 'ytd-menu-renderer button[aria-label*="like this video"]'
        }

        function getVideoInfo() {
          try {
            const title = document.querySelector(config.videoTitle)?.textContent.trim() || '';
            const author = document.querySelector(config.videoAuthor)?.textContent.trim() || '';
            const views = document.querySelector(config.videoViews)?.textContent.trim() || '';
            const publishedAt = document.querySelector(config.videoPublishedAt)?.textContent.trim() || '';
            const likesElement = document.querySelector(config.videoLikes);
            const likes = likesElement ? likesElement.getAttribute('aria-label').match(/[\d,]+/)[0] : '0';
            const url = window.location.href;

            return {
              title,
              author,
              views,
              publishedAt,
              likes,
              url
            };
          } catch (error) {
            console.error('Error getting video info:', error);
            return null;
          }
        }

        async function scrollAndCollectComments() {
          const comments = [];
          const uniqueComments = new Set();
          let retries = 0;
          const maxRetries = 5;

          async function scrollAndWait() {
            window.scrollTo(0, document.documentElement.scrollHeight);
            await new Promise(resolve => setTimeout(resolve, 1500));
          }

          function scrollToTop() {
            window.scrollTo(0, 0);
          }

          async function waitForComments() {
            let commentBox = document.querySelector(config.commentBox);
            if (commentBox) return true;

            document.documentElement.scrollTop += window.innerHeight;
            await new Promise(resolve => setTimeout(resolve, 1500));
            commentBox = document.querySelector(config.commentBox);
            return !!commentBox;
          }

          function extractCurrentComments() {
            const commentElements = document.querySelectorAll(config.commentThread);
            let newCommentsFound = false;

            commentElements.forEach(commentElement => {
              const comment = commentElement.querySelector(config.commentModel);
              if (!comment) return;

              const author = comment.querySelector(config.author)?.textContent.trim() || 'Unknown';
              const content = comment.querySelector(config.content)?.textContent.trim() || '';
              const likes = comment.querySelector(config.likes)?.textContent.trim() || '0';
              const time = comment.querySelector(config.time)?.textContent.trim() || '';

              const commentId = `${time}-${author}-${content}`;

              if (!uniqueComments.has(commentId) && author && content) {
                const formattedComment = `${time}. ${author}: ${content} (${likes} likes)`;
                uniqueComments.add(commentId);
                comments.push(formattedComment);
                newCommentsFound = true;
              }
            });

            return newCommentsFound;
          }

          await waitForComments();

          while (comments.length < targetMessageCount && retries < maxRetries) {
            const newCommentsFound = extractCurrentComments();

            if (comments.length < targetMessageCount) {
              await scrollAndWait();
              if (!newCommentsFound) {
                retries++;
              } else {
                retries = 0;
              }
            }
          }

          if (comments.length > targetMessageCount) {
            comments.splice(targetMessageCount);
          }

          scrollToTop();
          return comments;
        }

        try {
          const videoInfo = getVideoInfo();
          const comments = await scrollAndCollectComments();
          return {
            video: videoInfo,
            comments
          };
        } catch (error) {
          return null;
        }
      }

      return scrollAndCollectMessages(targetMessageCount);
  param: array
  timeout: 15000
  args: targetMessageCount
  silent: true
- type: say
  message: |-
    **Data Array:**

    {{array}}
Notice: Please read before using

This automation command is created by a community member. HARPA AI team does not audit community commands.

Please review the command carefully and only install if you trust the creator.

Contact us
HomeUse CasesGuidesPrivacy PolicyTerms of Service
CAN WE STORE COOKIES?
Our website uses cookies for the purposes of accessibility and security. They also allow us to gather statistics in order to improve the website for you. More info: Privacy Policy