1 | import requests |
2 | from bs4 import BeautifulSoup |
3 | from collections import Counter |
4 | import re |
5 |
|
6 | def main(url: str) -> dict: |
7 | """ |
8 | Perform advanced SEO analysis on the given website URL using Beautiful Soup. |
9 |
|
10 | Parameters: |
11 | - url (str): The URL of the website to analyze. |
12 |
|
13 | Returns: |
14 | - dict: A dictionary containing advanced SEO analysis results including title length, |
15 | number of headings, presence of meta description, meta tags, text-to-HTML ratio, |
16 | canonical link, keyword density, mobile friendliness, and link health. |
17 | """ |
18 | try: |
19 | # Send a GET request to the URL |
20 | response = requests.get(url) |
21 | # Parse the HTML content of the page |
22 | soup = BeautifulSoup(response.text, "html.parser") |
23 |
|
24 | # SEO analysis |
25 | seo_analysis = {} |
26 |
|
27 | # Get the title of the page and its length |
28 | title = soup.find("title").text if soup.find("title") else "No title found" |
29 | seo_analysis["title"] = title |
30 | seo_analysis["title_length"] = len(title) |
31 |
|
32 | # Count the number of headings (h1, h2, h3, h4, h5, h6) |
33 | headings = {f"h{i}": len(soup.find_all(f"h{i}")) for i in range(1, 7)} |
34 | seo_analysis["headings_count"] = headings |
35 |
|
36 | # Check for meta description |
37 | meta_description = soup.find("meta", attrs={"name": "description"}) |
38 | seo_analysis["meta_description"] = ( |
39 | meta_description["content"] |
40 | if meta_description |
41 | else "No meta description found" |
42 | ) |
43 |
|
44 | # Additional meta tags |
45 | meta_robots = soup.find('meta', attrs={'name': 'robots'}) |
46 | seo_analysis['meta_robots'] = meta_robots['content'] if meta_robots else 'No robots meta tag' |
47 |
|
48 | # Canonical link |
49 | canonical_link = soup.find('link', rel='canonical') |
50 | seo_analysis['canonical_link'] = canonical_link['href'] if canonical_link else 'No canonical link' |
51 |
|
52 | # Text to HTML Ratio |
53 | text_length = len(soup.get_text()) |
54 | html_length = len(response.text) |
55 | seo_analysis['text_to_html_ratio'] = text_length / html_length if html_length > 0 else 0 |
56 |
|
57 | # Keyword Density (Example: assuming 'example_keyword' is the keyword) |
58 | words = re.findall(r'\w+', soup.get_text().lower()) |
59 | word_count = Counter(words) |
60 | total_words = sum(word_count.values()) |
61 | focus_keyword = 'example_keyword' |
62 | keyword_density = word_count[focus_keyword] / total_words if focus_keyword in word_count and total_words > 0 else 0 |
63 | seo_analysis['keyword_density'] = keyword_density |
64 |
|
65 | # Mobile Friendliness |
66 | seo_analysis['mobile_friendly'] = 'yes' if 'viewport' in (meta_description["content"].lower() if meta_description else '') else 'no' |
67 |
|
68 | # Link Analysis |
69 | links = soup.find_all('a', href=True) |
70 | seo_analysis['total_links'] = len(links) |
71 | seo_analysis['nofollow_links'] = sum(1 for link in links if 'nofollow' in link.get('rel', [])) |
72 | seo_analysis['external_links'] = sum(1 for link in links if link['href'].startswith('http')) |
73 | seo_analysis['internal_links'] = seo_analysis['total_links'] - seo_analysis['external_links'] |
74 |
|
75 | return seo_analysis |
76 | except Exception as e: |
77 | return {"error": str(e)} |
78 |
|