Script 'Advanced SEO analysis on website URL' for scraping

Advanced SEO analysis on website URL

This script performs an advanced SEO analysis on a specified website URL using Beautiful Soup for parsing HTML content. It evaluates various SEO factors such as title length, number of headings, presence and content of meta description and robots meta tags, canonical link existence, text-to-HTML ratio, keyword density (with an example keyword), mobile friendliness based on viewport meta tag presence, and link health including total links, nofollow links, external, and internal links. The results are returned in a dictionary format, providing a comprehensive overview of the website's SEO performance.

Created by henri186 455 days ago Viewed 48 times

Submitted by henri186 Python3

Created 455 days ago

All edits

Permalink

import requests
from bs4 import BeautifulSoup
from collections import Counter
import re

def main(url: str) -> dict:
    """
    Perform advanced SEO analysis on the given website URL using Beautiful Soup.

    Parameters:
    - url (str): The URL of the website to analyze.

    Returns:
    - dict: A dictionary containing advanced SEO analysis results including title length,
      number of headings, presence of meta description, meta tags, text-to-HTML ratio,
      canonical link, keyword density, mobile friendliness, and link health.
    """
    try:
        # Send a GET request to the URL
        response = requests.get(url)
        # Parse the HTML content of the page
        soup = BeautifulSoup(response.text, "html.parser")

        # SEO analysis
        seo_analysis = {}

        # Get the title of the page and its length
        title = soup.find("title").text if soup.find("title") else "No title found"
        seo_analysis["title"] = title
        seo_analysis["title_length"] = len(title)

        # Count the number of headings (h1, h2, h3, h4, h5, h6)
        headings = {f"h{i}": len(soup.find_all(f"h{i}")) for i in range(1, 7)}
        seo_analysis["headings_count"] = headings

        # Check for meta description
        meta_description = soup.find("meta", attrs={"name": "description"})
        seo_analysis["meta_description"] = (
            meta_description["content"]
            if meta_description
            else "No meta description found"
        )

        # Additional meta tags
        meta_robots = soup.find('meta', attrs={'name': 'robots'})
        seo_analysis['meta_robots'] = meta_robots['content'] if meta_robots else 'No robots meta tag'

        # Canonical link
        canonical_link = soup.find('link', rel='canonical')
        seo_analysis['canonical_link'] = canonical_link['href'] if canonical_link else 'No canonical link'

        # Text to HTML Ratio
        text_length = len(soup.get_text())
        html_length = len(response.text)
        seo_analysis['text_to_html_ratio'] = text_length / html_length if html_length > 0 else 0

        # Keyword Density (Example: assuming 'example_keyword' is the keyword)
        words = re.findall(r'\w+', soup.get_text().lower())
        word_count = Counter(words)
        total_words = sum(word_count.values())
        focus_keyword = 'example_keyword'
        keyword_density = word_count[focus_keyword] / total_words if focus_keyword in word_count and total_words > 0 else 0
        seo_analysis['keyword_density'] = keyword_density

        # Mobile Friendliness
        seo_analysis['mobile_friendly'] = 'yes' if 'viewport' in (meta_description["content"].lower() if meta_description else '') else 'no'

        # Link Analysis
        links = soup.find_all('a', href=True)
        seo_analysis['total_links'] = len(links)
        seo_analysis['nofollow_links'] = sum(1 for link in links if 'nofollow' in link.get('rel', []))
        seo_analysis['external_links'] = sum(1 for link in links if link['href'].startswith('http'))
        seo_analysis['internal_links'] = seo_analysis['total_links'] - seo_analysis['external_links']

        return seo_analysis
    except Exception as e:
        return {"error": str(e)}


`1`	`import requests`
`2`	`from bs4 import BeautifulSoup`
`3`	`from collections import Counter`
`4`	`import re`
`5`
`6`	`def main(url: str) -> dict:`
`7`	`"""`
`8`	`Perform advanced SEO analysis on the given website URL using Beautiful Soup.`
`9`
`10`	`Parameters:`
`11`	`- url (str): The URL of the website to analyze.`
`12`
`13`	`Returns:`
`14`	`- dict: A dictionary containing advanced SEO analysis results including title length,`
`15`	`number of headings, presence of meta description, meta tags, text-to-HTML ratio,`
`16`	`canonical link, keyword density, mobile friendliness, and link health.`
`17`	`"""`
`18`	`try:`
`19`	`# Send a GET request to the URL`
`20`	`response = requests.get(url)`
`21`	`# Parse the HTML content of the page`
`22`	`soup = BeautifulSoup(response.text, "html.parser")`
`23`
`24`	`# SEO analysis`
`25`	`seo_analysis = {}`
`26`
`27`	`# Get the title of the page and its length`
`28`	`title = soup.find("title").text if soup.find("title") else "No title found"`
`29`	`seo_analysis["title"] = title`
`30`	`seo_analysis["title_length"] = len(title)`
`31`
`32`	`# Count the number of headings (h1, h2, h3, h4, h5, h6)`
`33`	`headings = {f"h{i}": len(soup.find_all(f"h{i}")) for i in range(1, 7)}`
`34`	`seo_analysis["headings_count"] = headings`
`35`
`36`	`# Check for meta description`
`37`	`meta_description = soup.find("meta", attrs={"name": "description"})`
`38`	`seo_analysis["meta_description"] = (`
`39`	`meta_description["content"]`
`40`	`if meta_description`
`41`	`else "No meta description found"`
`42`	`)`
`43`
`44`	`# Additional meta tags`
`45`	`meta_robots = soup.find('meta', attrs={'name': 'robots'})`
`46`	`seo_analysis['meta_robots'] = meta_robots['content'] if meta_robots else 'No robots meta tag'`
`47`
`48`	`# Canonical link`
`49`	`canonical_link = soup.find('link', rel='canonical')`
`50`	`seo_analysis['canonical_link'] = canonical_link['href'] if canonical_link else 'No canonical link'`
`51`
`52`	`# Text to HTML Ratio`
`53`	`text_length = len(soup.get_text())`
`54`	`html_length = len(response.text)`
`55`	`seo_analysis['text_to_html_ratio'] = text_length / html_length if html_length > 0 else 0`
`56`
`57`	`# Keyword Density (Example: assuming 'example_keyword' is the keyword)`
`58`	`words = re.findall(r'\w+', soup.get_text().lower())`
`59`	`word_count = Counter(words)`
`60`	`total_words = sum(word_count.values())`
`61`	`focus_keyword = 'example_keyword'`
`62`	`keyword_density = word_count[focus_keyword] / total_words if focus_keyword in word_count and total_words > 0 else 0`
`63`	`seo_analysis['keyword_density'] = keyword_density`
`64`
`65`	`# Mobile Friendliness`
`66`	`seo_analysis['mobile_friendly'] = 'yes' if 'viewport' in (meta_description["content"].lower() if meta_description else '') else 'no'`
`67`
`68`	`# Link Analysis`
`69`	`links = soup.find_all('a', href=True)`
`70`	`seo_analysis['total_links'] = len(links)`
`71`	`seo_analysis['nofollow_links'] = sum(1 for link in links if 'nofollow' in link.get('rel', []))`
`72`	`seo_analysis['external_links'] = sum(1 for link in links if link['href'].startswith('http'))`
`73`	`seo_analysis['internal_links'] = seo_analysis['total_links'] - seo_analysis['external_links']`
`74`
`75`	`return seo_analysis`
`76`	`except Exception as e:`
`77`	`return {"error": str(e)}`
`78`