Newer
Older
gcp_docs_scrape / gcp_docs.ipynb
@moreserverless moreserverless on 25 May 67 KB Initial commit.
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "7e66c738-f269-4cc6-ab32-e91fb65b234d",
   "metadata": {},
   "outputs": [],
   "source": [
    "import re\n",
    "from openai import OpenAI\n",
    "import gradio as gr\n",
    "import requests\n",
    "import json\n",
    "from typing import List\n",
    "from bs4 import BeautifulSoup, Tag, NavigableString\n",
    "from IPython.display import Markdown, display, update_display"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "f85f4df7-3173-4ed9-ae1b-83b02088b00f",
   "metadata": {},
   "outputs": [],
   "source": [
    "#\n",
    "# from Ed Donner's github repo for LLM Engineering course on Udemy: \n",
    "# https://github.com/ed-donner/llm_engineering/blob/main/week1/day5.ipynb\n",
    "#\n",
    "headers = {\n",
    " \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36\"\n",
    "}\n",
    "\n",
    "class Website:\n",
    "    \"\"\"\n",
    "    A utility class to represent a Website that we have scraped, now with links\n",
    "    \"\"\"\n",
    "\n",
    "    def __init__(self, url):\n",
    "        \n",
    "        self.url = url\n",
    "        self.title = None\n",
    "        self.links = set()\n",
    "        self.last_updated = '1970-01-01'\n",
    "        \n",
    "        response = requests.get(url, headers=headers)\n",
    "        self.body = response.content\n",
    "        self.soup = BeautifulSoup(self.body, 'html.parser')\n",
    "        self.article = self.soup.find('article', class_='devsite-article')\n",
    "        # self.title = soup.title.string if soup.title else \"No title found\"\n",
    "        # if soup.body:\n",
    "        #     for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\"]):\n",
    "        #         irrelevant.decompose()\n",
    "        #     self.text = soup.body.get_text(separator=\"\\n\", strip=True)\n",
    "        # else:\n",
    "        #     self.text = \"\"\n",
    "        # links = [link.get('href') for link in soup.find_all('a')]\n",
    "        # self.links = [link for link in links if link]\n",
    "\n",
    "    def get_doc_title(self):\n",
    "        if self.title:\n",
    "            return self.title\n",
    "\n",
    "        title = self.soup.find('h1', class_='devsite-page-title').find(string=True, recursive=False).strip()\n",
    "        if not title:\n",
    "            self.title = '{} | No Title'.format(self.url)\n",
    "        else:\n",
    "            self.title = title\n",
    "\n",
    "        return self.title\n",
    "\n",
    "    def get_last_updated(self):\n",
    "        footer = self.soup.find('devsite-content-footer')\n",
    "        footer_paras = footer.find_all('p')\n",
    "        for fp in footer_paras:\n",
    "            last_updated_re = r'Last updated (.*) UTC'\n",
    "            match = re.search(last_updated_re, fp.get_text())\n",
    "            if match:\n",
    "                self.last_updated = match.group(1)\n",
    "                break\n",
    "\n",
    "    def get_doc_links(self):\n",
    "        nav_items = self.soup.find_all('li', class_='devsite-nav-item')\n",
    "        links_in_article = self.article_section.find_all('a', href=True)\n",
    "        all_links = nav_items + links_in_article\n",
    "        \n",
    "        for al in all_links:\n",
    "            link = al.find('a', href=True)\n",
    "            if link and 'docs' in link['href']:\n",
    "                if link['href'][0] == '/':\n",
    "                    self.links.add('https://cloud.google.com{}'.format(link['href']))\n",
    "                else:\n",
    "                    self.links.add(link['href'])\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "id": "46dff4b3-949f-40ac-b8e3-cd89c5b20461",
   "metadata": {},
   "outputs": [],
   "source": [
    "exclude_tags = ['devsite-feedback', 'devsite-actions', 'devsite-toc', 'aside', 'devsite-page-title-meta', 'devsite-thumb-rating']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "5c6233b9-498b-44e4-9a87-e58aaa281ba9",
   "metadata": {},
   "outputs": [],
   "source": [
    "eventarc = Website('https://cloud.google.com/eventarc/docs')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "afa176e2-4024-493a-be97-f2526cca92cd",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'Eventarc overview'"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "eventarc.get_doc_title()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "7dcaa961-55f9-4778-9d55-96e24b6e8c7e",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'Eventarc overview'"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "eventarc.title"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "030abb06-d590-43da-8333-184520f000b0",
   "metadata": {},
   "outputs": [],
   "source": [
    "soup = eventarc.soup"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "7d0e2f63-2d88-4d86-86c0-6d0cd1a463e4",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Eventarc overview\n"
     ]
    }
   ],
   "source": [
    "print(soup.find('h1', class_='devsite-page-title').find(string=True, recursive=False).strip())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "0c908d8b-0cdb-4617-ae9a-d411cca8b7f2",
   "metadata": {},
   "outputs": [],
   "source": [
    "footer = soup.find('devsite-content-footer')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "e675fde6-664c-428c-98cc-c0d1ce4b82f1",
   "metadata": {},
   "outputs": [],
   "source": [
    "footer_paras = footer.find_all('p')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "a841163b-84f2-4068-981e-4a99b5e1fcc5",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "None\n",
      "Nope\n",
      "<re.Match object; span=(0, 27), match='Last updated 2025-05-08 UTC'>\n",
      "2025-05-08\n"
     ]
    }
   ],
   "source": [
    "for fp in footer_paras:\n",
    "    last_updated_re = r'Last updated (.*) UTC'\n",
    "    match = re.search(last_updated_re, fp.get_text())\n",
    "    print(match)\n",
    "    if match:\n",
    "        print(match.group(1))\n",
    "    else:\n",
    "        print('Nope')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "436065d7-d3fe-412a-b0b6-a774044290b3",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'2025-05-08'"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "eventarc.get_last_updated()\n",
    "eventarc.last_updated"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "bb6ec155-ed4d-451a-9d0d-885f959ebd85",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'https://cloud.google.com/docs/ai-ml'"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "nav_items = soup.find_all('li', class_='devsite-nav-item')\n",
    "nav_items[0].find('a', href=True)['href']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "0a740921-505c-4e4a-987f-fa8947a27ebd",
   "metadata": {},
   "outputs": [],
   "source": [
    "links_on_page = set()\n",
    "for ni in nav_items:\n",
    "    link = ni.find('a', href=True)\n",
    "    if link and 'docs' in link['href']:\n",
    "        if link['href'][0] == '/' and '#' not in link['href']:\n",
    "            links_on_page.add('https://cloud.google.com{}'.format(link['href']))\n",
    "        else:\n",
    "            links_on_page.add(link['href'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "f9474a3e-579e-4923-b1e6-22b5c782cd5f",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'https://cloud.google.com/docs',\n",
       " 'https://cloud.google.com/docs/access-resources',\n",
       " 'https://cloud.google.com/docs/ai-ml',\n",
       " 'https://cloud.google.com/docs/application-development',\n",
       " 'https://cloud.google.com/docs/application-hosting',\n",
       " 'https://cloud.google.com/docs/compute-area',\n",
       " 'https://cloud.google.com/docs/costs-usage',\n",
       " 'https://cloud.google.com/docs/cross-product-overviews',\n",
       " 'https://cloud.google.com/docs/data',\n",
       " 'https://cloud.google.com/docs/databases',\n",
       " 'https://cloud.google.com/docs/devtools',\n",
       " 'https://cloud.google.com/docs/dhm-cloud',\n",
       " 'https://cloud.google.com/docs/generative-ai',\n",
       " 'https://cloud.google.com/docs/iac',\n",
       " 'https://cloud.google.com/docs/industry',\n",
       " 'https://cloud.google.com/docs/migration',\n",
       " 'https://cloud.google.com/docs/networking',\n",
       " 'https://cloud.google.com/docs/observability',\n",
       " 'https://cloud.google.com/docs/security',\n",
       " 'https://cloud.google.com/docs/storage',\n",
       " 'https://cloud.google.com/docs/tech-area-overviews',\n",
       " 'https://cloud.google.com/eventarc/advanced/docs/overview',\n",
       " 'https://cloud.google.com/eventarc/docs',\n",
       " 'https://cloud.google.com/eventarc/docs/apis',\n",
       " 'https://cloud.google.com/eventarc/docs/event-driven-architectures',\n",
       " 'https://cloud.google.com/eventarc/docs/event-format',\n",
       " 'https://cloud.google.com/eventarc/docs/event-providers-targets',\n",
       " 'https://cloud.google.com/eventarc/docs/event-types',\n",
       " 'https://cloud.google.com/eventarc/docs/resources',\n",
       " 'https://cloud.google.com/eventarc/docs/samples',\n",
       " 'https://cloud.google.com/eventarc/standard/docs/overview',\n",
       " 'https://cloud.google.com/marketplace/docs'}"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "links_on_page"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "6d1987db-b346-41cb-adcb-1a4ba0f6fd03",
   "metadata": {},
   "outputs": [],
   "source": [
    "article_section = soup.find('article', class_='devsite-article')\n",
    "links_in_article = article_section.find_all('a', href=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "2b48bcf0-dd19-4378-83e0-6526eea688e7",
   "metadata": {},
   "outputs": [],
   "source": [
    "def make_google_cloud_link(link):\n",
    "    return 'https://cloud.google.com{}'.format(link['href'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "id": "20d62b32-bac1-49d6-96ae-86143fcaa3bd",
   "metadata": {},
   "outputs": [],
   "source": [
    "article_links = set()\n",
    "for lia in links_in_article:\n",
    "    if lia and 'docs' in lia['href'] and '#' not in lia['href']:\n",
    "        article_links.add(make_google_cloud_link(lia))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "id": "bad4726e-394a-4935-a57f-f53fe8f87ae6",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'https://cloud.google.com/eventarc/advanced/docs/audit-logs',\n",
       " 'https://cloud.google.com/eventarc/advanced/docs/event-providers-targets',\n",
       " 'https://cloud.google.com/eventarc/advanced/docs/overview',\n",
       " 'https://cloud.google.com/eventarc/advanced/docs/publish-events/publish-events-google-sources',\n",
       " 'https://cloud.google.com/eventarc/advanced/docs/receive-events/configure-format-events',\n",
       " 'https://cloud.google.com/eventarc/advanced/docs/receive-events/transform-events',\n",
       " 'https://cloud.google.com/eventarc/advanced/docs/use-cmek',\n",
       " 'https://cloud.google.com/eventarc/advanced/docs/using-vpc-service-controls',\n",
       " 'https://cloud.google.com/eventarc/docs/access-control',\n",
       " 'https://cloud.google.com/eventarc/docs/compliance',\n",
       " 'https://cloud.google.com/eventarc/docs/event-driven-architectures',\n",
       " 'https://cloud.google.com/eventarc/docs/event-format',\n",
       " 'https://cloud.google.com/eventarc/docs/quotas',\n",
       " 'https://cloud.google.com/eventarc/docs/reference/audit-logs',\n",
       " 'https://cloud.google.com/eventarc/docs/reference/libraries',\n",
       " 'https://cloud.google.com/eventarc/docs/reference/publishing/rest',\n",
       " 'https://cloud.google.com/eventarc/docs/reference/rest',\n",
       " 'https://cloud.google.com/eventarc/docs/retry-events',\n",
       " 'https://cloud.google.com/eventarc/docs/understand-locations',\n",
       " 'https://cloud.google.com/eventarc/docs/use-cmek',\n",
       " 'https://cloud.google.com/eventarc/docs/using-vpc-service-controls',\n",
       " 'https://cloud.google.com/eventarc/standard/docs/event-providers-targets',\n",
       " 'https://cloud.google.com/eventarc/standard/docs/overview',\n",
       " 'https://cloud.google.com/logging/docs/overview',\n",
       " 'https://cloud.google.com/monitoring/docs/monitoring-overview',\n",
       " 'https://cloud.google.comhttps://cloud.google.com/docs',\n",
       " 'https://cloud.google.comhttps://cloud.google.com/eventarc/docs'}"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "article_links"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "id": "98477112-90f6-4ddd-96e9-dfc5e8f5ff82",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "** 0 **: \n",
      "\n",
      "** 0 **: \n",
      "\n",
      "** 0 **: \n",
      "\n",
      "** 0 **: <h1 class=\"devsite-page-title\" tabindex=\"-1\">\n",
      "      Eventarc overview\n",
      "      <div class=\"devsite-actions\" data-nosnippet=\"\"><devsite-feature-tooltip ack-key=\"AckCollectionsBookmarkTooltipDismiss\" analytics-action-close=\"Callout Profile dismissed\" analytics-action-show=\"Callout Profile displayed\" analytics-category=\"Site-Wide Custom Events\" analytics-label=\"Create Collection Callout\" class=\"devsite-page-bookmark-tooltip nocontent\" close-button-text=\"Got it\" dismiss-button=\"true\" dismiss-button-text=\"Dismiss\" id=\"devsite-collections-dropdown\">\n",
      "<devsite-bookmark></devsite-bookmark>\n",
      "<span slot=\"popout-heading\">\n",
      "      \n",
      "      Stay organized with collections\n",
      "    </span>\n",
      "<span slot=\"popout-contents\">\n",
      "      \n",
      "      Save and categorize content based on your preferences.\n",
      "    </span>\n",
      "</devsite-feature-tooltip></div>\n",
      "</h1>\n",
      "dict_keys(['class', 'tabindex'])\n",
      "---*** TAG NAME: h1\n",
      "['devsite-page-title']\n",
      "CLASS_LIST: ['devsite-page-title']\n",
      "CLASS cl: devsite-page-title\n",
      "** 1 **: \n",
      "\n",
      "** 1 **: \n",
      "\n",
      "** 1 **: \n",
      "\n",
      "** 1 **: <div class=\"devsite-article-body clearfix\">\n",
      "<div class=\"nocontent\" id=\"tags\">\n",
      "<a class=\"cloud-chip\" data-title=\"Applies to Eventarc Advanced\" href=\"/eventarc/advanced/docs/overview\" track-name=\"Advanced\" track-type=\"docChip\">\n",
      "  Advanced\n",
      "\n",
      "  </a>\n",
      "<a class=\"cloud-chip\" data-title=\"Applies to Eventarc Standard\" href=\"/eventarc/standard/docs/overview\" track-name=\"Standard\" track-type=\"docChip\">\n",
      "  Standard\n",
      "\n",
      "  </a>\n",
      "</div>\n",
      "<aside class=\"beta\">\n",
      "<p>\n",
      "<strong>\n",
      "      \n",
      "        Preview\n",
      "      \n",
      "        — Eventarc Advanced\n",
      "    </strong>\n",
      "</p>\n",
      "<p>\n",
      "      \n",
      "      \n",
      "        This feature is subject to the \"Pre-GA Offerings Terms\" in the General Service Terms section\n",
      "        of the <a href=\"/terms/service-terms#1\" track-type=\"commonIncludes\">Service Specific Terms</a>.\n",
      "        \n",
      "        Pre-GA features are available \"as is\" and might have limited support.\n",
      "      \n",
      "      For more information, see the\n",
      "      <a href=\"/products#product-launch-stages\" track-type=\"commonIncludes\">launch stage descriptions</a>.\n",
      "  </p>\n",
      "</aside>\n",
      "<p>Eventarc lets you build event-driven architectures without having to implement,\n",
      "    customize, or maintain the underlying infrastructure.</p>\n",
      "<p>Eventarc is offered in two editions: <b>Eventarc Advanced</b> and\n",
      "    <b>Eventarc Standard</b>.</p>\n",
      "<p>Both editions offer a scalable, serverless, and fully managed eventing solution that lets you\n",
      "  asynchronously route messages from sources to targets using loosely coupled services that are\n",
      "  triggered by and react to state changes known as <i>events</i>. Both editions support a range of\n",
      "  event providers and destinations—including Google Cloud services, custom applications, SaaS\n",
      "  applications, and third-party services—while managing delivery, security, authorization,\n",
      "  observability, and error-handling for you.</p>\n",
      "<p>Note that the underlying data model for both editions of Eventarc is the same. As\n",
      "  a use case grows in complexity, you have the option of seamlessly transitioning from using\n",
      "  Eventarc Standard to using Eventarc Advanced.</p>\n",
      "<h2 data-text=\"Editions overview\" id=\"editions-overview\" tabindex=\"-1\">Editions overview</h2>\n",
      "<p>The following is an overview of both editions. For more detailed information, see the\n",
      "  <a href=\"/eventarc/advanced/docs/overview\">Eventarc Advanced overview</a> and the\n",
      "  <a href=\"/eventarc/standard/docs/overview\">Eventarc Standard overview</a>.\n",
      "</p>\n",
      "<dl>\n",
      "<dt>Eventarc Advanced</dt>\n",
      "<dd><p>Eventarc Advanced is a fully managed platform for building event-driven\n",
      "    architectures. It lets you collect events that occur in a system and publish them to a central\n",
      "    bus. Interested services can subscribe to specific messages by creating enrollments. You\n",
      "    can use the bus to route events from multiple sources in real time and publish them to\n",
      "    multiple destinations, and optionally transform events prior to delivery to a target.\n",
      "    Eventarc Advanced is feature rich and is ideal for organizations with\n",
      "    complex eventing and messaging needs, particularly those grappling with managing numerous\n",
      "    Pub/Sub topics, Kafka queues, or other third-party messaging systems. By providing\n",
      "    administrators with enhanced and centralized visibility and control,\n",
      "    Eventarc Advanced enables organizations to connect multiple teams across\n",
      "    different projects.</p>\n",
      "<figure id=\"architecture-overview\">\n",
      "<a href=\"/static/eventarc/images/eventarc-advanced-overview-sans-numbers.svg\">\n",
      "<img alt=\"Eventarc Advanced lets you receive, filter, transform, route, and\n",
      "            deliver messages between different event providers and destinations.\" src=\"/static/eventarc/images/eventarc-advanced-overview-sans-numbers.svg\"/></a>\n",
      "<figcaption style=\"font-size:85%\">Eventarc Advanced lets you receive, filter,\n",
      "          transform, route, and deliver messages<br/>between different event providers and\n",
      "          destinations (click diagram to enlarge).</figcaption>\n",
      "</figure>\n",
      "</dd>\n",
      "<dt>Eventarc Standard</dt>\n",
      "<dd><p>Eventarc Standard is recommended for applications where the focus is on simply\n",
      "    delivering events from event provider to event destination. It lets you quickly and easily\n",
      "    consume Google events by defining triggers that filter inbound events according to their source,\n",
      "    type, and other attributes, and then route them to a specified destination.</p>\n",
      "<figure id=\"architecture-overview\">\n",
      "<a href=\"/static/eventarc/images/eventarc-standard-overview.svg\">\n",
      "<img alt=\"Eventarc Standard routes events from event providers to event destinations.\" src=\"/static/eventarc/images/eventarc-standard-overview.svg\"/></a>\n",
      "<figcaption style=\"font-size:85%\">Eventarc Standard lets you filter and\n",
      "            route events<br/>from event providers to event destinations (click diagram to enlarge).</figcaption>\n",
      "</figure>\n",
      "</dd>\n",
      "</dl>\n",
      "<h2 data-text=\"Features comparison table\" id=\"features-comparison-table\" tabindex=\"-1\">Features comparison table</h2>\n",
      "\n",
      "The following table can help you choose between Eventarc Advanced and\n",
      "Eventarc Standard. It assumes your familiarity with the basic concepts of\n",
      "\n",
      "  <a href=\"/eventarc/docs/event-driven-architectures\">event-driven architectures</a>.\n",
      "\n",
      "\n",
      "<table>\n",
      "<thead>\n",
      "<tr>\n",
      "<th width=\"20%\">Feature</th>\n",
      "<th width=\"40%\">Eventarc Advanced</th>\n",
      "<th>Eventarc Standard</th>\n",
      "</tr>\n",
      "</thead>\n",
      "<tbody>\n",
      "<tr>\n",
      "<td>Access control</td>\n",
      "<td>Per message access control and central governance with IAM<br/>\n",
      "      See <a href=\"/eventarc/docs/access-control\">Access control with IAM</a></td>\n",
      "<td>See <a href=\"/eventarc/docs/access-control\">Access control with IAM</a></td>\n",
      "</tr>\n",
      "<tr>\n",
      "<td>Capacity</td>\n",
      "<td>Automatically provisioned</td>\n",
      "<td>Automatically provisioned</td>\n",
      "</tr>\n",
      "<tr>\n",
      "<td>Client library languages</td>\n",
      "<td>Java, Python, Go, Node.js, C++, C#, PHP, Ruby<br/>See\n",
      "      <a href=\"/eventarc/docs/reference/libraries\">Eventarc client libraries</a></td>\n",
      "<td>Java, Python, Go, Node.js, C++, C#, PHP, Ruby<br/>See\n",
      "      <a href=\"/eventarc/docs/reference/libraries\">Eventarc client libraries</a></td>\n",
      "</tr>\n",
      "<tr>\n",
      "<td>Compliance standards</td>\n",
      "<td>Doesn't apply to any feature in <a href=\"/products#product-launch-stages\">Preview</a></td>\n",
      "<td>See <a href=\"/eventarc/docs/compliance\">Compliance standards</a>\n",
      "</td>\n",
      "</tr>\n",
      "<tr>\n",
      "<td>Cross-project event delivery</td>\n",
      "<td>Supported<br/>See\n",
      "      <a href=\"/eventarc/advanced/docs/publish-events/publish-events-google-sources\">Publish events\n",
      "        from Google sources</a></td>\n",
      "<td>Not supported</td>\n",
      "</tr>\n",
      "<tr>\n",
      "<td>Customer managed encryption keys</td>\n",
      "<td>Yes<br/>See <a href=\"/eventarc/advanced/docs/use-cmek\">Use customer-managed encryption keys</a></td>\n",
      "<td>Yes<br/>See <a href=\"/eventarc/docs/use-cmek\">Use customer-managed encryption keys</a></td>\n",
      "</tr>\n",
      "<tr>\n",
      "<td>Dead letter queues supported</td>\n",
      "<td>No</td>\n",
      "<td>Yes, through Pub/Sub dead letter topic<br/>See\n",
      "      <a href=\"/eventarc/docs/retry-events\">Retry events</a></td>\n",
      "</tr>\n",
      "<tr>\n",
      "<td>Event format</td>\n",
      "<td>Events are delivered to the destination in a CloudEvents format<br/>See\n",
      "        <a href=\"/eventarc/docs/event-format\">Event format</a><br/>\n",
      "        Optionally, you can override this behavior by\n",
      "        <a href=\"/eventarc/advanced/docs/receive-events/transform-events\">defining an HTTP binding</a></td>\n",
      "<td>Events are delivered to the destination in a CloudEvents format<br/>\n",
      "      See <a href=\"/eventarc/docs/event-format\">Event format</a></td>\n",
      "</tr>\n",
      "<tr>\n",
      "<td>Event size</td>\n",
      "<td>1 MB maximum<br/>See\n",
      "      <a href=\"/eventarc/docs/quotas\">Quotas and limits</a></td>\n",
      "<td>512 KB maximum<br/>See\n",
      "      <a href=\"/eventarc/docs/quotas\">Quotas and limits</a></td>\n",
      "</tr>\n",
      "<tr>\n",
      "<td>Locations</td>\n",
      "<td>See <a href=\"/eventarc/docs/locations#advanced-regions\">Eventarc Advanced locations</a></td>\n",
      "<td>See <a href=\"/eventarc/docs/locations#triggers-regions\">Eventarc Standard locations</a></td>\n",
      "</tr>\n",
      "<tr>\n",
      "<td>Message filtering</td>\n",
      "<td>Filtering on any and all event attributes</td>\n",
      "<td>Filtering on event type and specific attributes</td>\n",
      "</tr>\n",
      "<tr>\n",
      "<td>Message routing</td>\n",
      "<td>Many providers to many destinations<br/></td>\n",
      "<td>Provider to destination</td>\n",
      "</tr>\n",
      "<tr>\n",
      "<td>Message schema conversion</td>\n",
      "<td>Yes<br/>See\n",
      "      <a href=\"/eventarc/advanced/docs/receive-events/configure-format-events\">Convert the format of\n",
      "        received events</a></td>\n",
      "<td>No</td>\n",
      "</tr>\n",
      "<tr>\n",
      "<td>Message transformation</td>\n",
      "<td>Yes, through CEL expressions<br/>See\n",
      "      <a href=\"/eventarc/advanced/docs/receive-events/transform-events\">Transform received events</a></td>\n",
      "<td>No</td>\n",
      "</tr>\n",
      "<tr>\n",
      "<td>Observability</td>\n",
      "<td>Through Google Cloud Observability such as <a href=\"/logging/docs/overview\">Cloud Logging</a>\n",
      "      and <a href=\"/monitoring/docs/monitoring-overview\">Cloud Monitoring</a><br/>See\n",
      "        <a href=\"/eventarc/advanced/docs/audit-logs\">Eventarc audit logging</a></td>\n",
      "<td>Through Google Cloud Observability such as <a href=\"/logging/docs/overview\">Cloud Logging</a>\n",
      "      and <a href=\"/monitoring/docs/monitoring-overview\">Cloud Monitoring</a><br/>See\n",
      "        <a href=\"/eventarc/docs/reference/audit-logs\">Eventarc audit logging</a></td>\n",
      "</tr>\n",
      "<tr>\n",
      "<td>Ordered delivery</td>\n",
      "<td>There is no in-order, first-in-first-out delivery guarantee</td>\n",
      "<td>There is no in-order, first-in-first-out delivery guarantee</td>\n",
      "</tr>\n",
      "<tr>\n",
      "<td>Pricing</td>\n",
      "<td>See <a href=\"/eventarc/pricing\">Eventarc pricing</a></td>\n",
      "<td>See <a href=\"/eventarc/pricing\">Eventarc pricing</a></td>\n",
      "</tr>\n",
      "<tr>\n",
      "<td>Regionality</td>\n",
      "<td>Regional<br/>See <a href=\"/eventarc/advanced/docs/overview#regionality\">Understand regionality</a></td>\n",
      "<td>Regional, Global<br/>See <a href=\"/eventarc/docs/understand-locations\">Understand\n",
      "      Eventarc locations</a></td>\n",
      "</tr>\n",
      "<tr>\n",
      "<td>REST endpoints</td>\n",
      "<td><code dir=\"ltr\" translate=\"no\">https://eventarc.googleapis.com</code><br/>See\n",
      "      <a href=\"/eventarc/docs/reference/rest\">Eventarc API</a><br/>\n",
      "<code dir=\"ltr\" translate=\"no\">https://eventarcpublishing.googleapis.com</code><br/>See\n",
      "      <a href=\"/eventarc/docs/reference/publishing/rest\">Eventarc Publishing API</a></td>\n",
      "<td><code dir=\"ltr\" translate=\"no\">https://eventarc.googleapis.com</code><br/>See\n",
      "      <a href=\"/eventarc/docs/reference/rest\">Eventarc API</a></td>\n",
      "</tr>\n",
      "<tr>\n",
      "<td>Retry and retention</td>\n",
      "<td>At-least-once event delivery to targets; default message retention duration is 24 hours with\n",
      "      an exponential backoff delay<br/>\n",
      "      See <a href=\"/eventarc/docs/retry-events\">Retry events</a></td>\n",
      "<td>At-least-once event delivery to targets; default message retention duration is 24 hours with\n",
      "      an exponential backoff delay<br/>\n",
      "      See <a href=\"/eventarc/docs/retry-events\">Retry events</a></td>\n",
      "</tr>\n",
      "<tr>\n",
      "<td>Service limits</td>\n",
      "<td>One bus per Google Cloud project<br/>100 pipelines per Google Cloud project per\n",
      "      region<br/>See <a href=\"/eventarc/docs/quotas\">Quotas and limits</a></td>\n",
      "<td>500 triggers per location per Google Cloud project<br/>See\n",
      "        <a href=\"/eventarc/docs/quotas\">Quotas and limits</a></td>\n",
      "</tr>\n",
      "<tr>\n",
      "<td>Service perimeter using VPC Service Controls</td>\n",
      "<td>Yes<br/>See\n",
      "      <a href=\"/eventarc/advanced/docs/using-vpc-service-controls\">Set up a service perimeter using\n",
      "        VPC Service Controls</a></td>\n",
      "<td>Yes<br/>See\n",
      "      <a href=\"/eventarc/docs/using-vpc-service-controls\">Set up a service perimeter using\n",
      "        VPC Service Controls</a></td>\n",
      "</tr>\n",
      "<tr>\n",
      "<td>Supported sources</td>\n",
      "<td>Google providers<br/>Direct publishers using the Eventarc Publishing API<br/>See\n",
      "    <a href=\"/eventarc/advanced/docs/event-providers-targets\">Event providers and destinations</a></td>\n",
      "<td>Google providers<br/>Google providers through audit logs<br/>Third-party providers<br/>See\n",
      "    <a href=\"/eventarc/standard/docs/event-providers-targets\">Event providers and destinations</a></td>\n",
      "</tr>\n",
      "<tr>\n",
      "<td>Supported targets</td>\n",
      "<td>Cloud Run functions (including 1st gen)<br/>Cloud Run jobs and services<br/>\n",
      "    Eventarc Advanced buses<br/>Internal HTTP endpoints in\n",
      "    VPC networks<br/>Pub/Sub topics<br/>Workflows<br/>See\n",
      "    <a href=\"/eventarc/advanced/docs/event-providers-targets\">Event providers and destinations</a></td>\n",
      "<td>Cloud Run functions<br/>Cloud Run services<br/>Internal HTTP endpoints in\n",
      "    VPC networks<br/>Public endpoints of private and public GKE\n",
      "    services<br/>Workflows<br/>See\n",
      "    <a href=\"/eventarc/standard/docs/event-providers-targets\">Event providers and destinations</a></td>\n",
      "</tr>\n",
      "</tbody>\n",
      "</table>\n",
      "<devsite-hats-survey class=\"nocontent\" hats-id=\"mwETRvWii0eU5NUYprb0Y9z5GVbc\" listnr-id=\"83405\"></devsite-hats-survey>\n",
      "</div>\n",
      "dict_keys(['class'])\n",
      "---*** TAG NAME: div\n",
      "['devsite-article-body', 'clearfix']\n",
      "CLASS_LIST: ['devsite-article-body', 'clearfix']\n",
      "CLASS cl: devsite-article-body\n",
      "CLASS cl: clearfix\n",
      "** 2 **: \n",
      "\n",
      "** 2 **: \n",
      "\n",
      "** 2 **: \n",
      "\n",
      "** 2 **: <div class=\"devsite-floating-action-buttons\">\n",
      "</div>\n",
      "dict_keys(['class'])\n",
      "---*** TAG NAME: div\n",
      "['devsite-floating-action-buttons']\n",
      "CLASS_LIST: ['devsite-floating-action-buttons']\n",
      "CLASS cl: devsite-floating-action-buttons\n",
      "** 3 **: \n",
      "\n"
     ]
    }
   ],
   "source": [
    "i = 0\n",
    "for elem in article_section:\n",
    "    if i < 5:\n",
    "        \n",
    "        if elem.name in exclude_tags:\n",
    "            continue\n",
    "\n",
    "        print('** {} **: {}'.format(i,elem))\n",
    "        if isinstance(elem, Tag):\n",
    "            keys = elem.attrs.keys()\n",
    "            print(keys)\n",
    "            if 'class' in keys:\n",
    "                print(\"---*** TAG NAME: {}\".format(elem.name))\n",
    "                print(elem['class'])\n",
    "                class_list = elem['class']\n",
    "                print(\"CLASS_LIST: {}\".format(class_list))\n",
    "                for cl in class_list:\n",
    "                    print(\"CLASS cl: {}\".format(cl))\n",
    "                    if 'breadcrumb' in cl or 'meta' in cl:\n",
    "                        print('DECOMPOSING\\n')\n",
    "                        elem.decompose()\n",
    "                        break\n",
    "            else:\n",
    "                print('No class or no breadcrumb')\n",
    "            i = i + 1\n",
    "        else:\n",
    "            continue\n",
    "    else:\n",
    "        article_section = article_section\n",
    "        break"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "id": "849ab8d6-3f38-46a7-bbe8-5341f070861a",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<article class=\"devsite-article\">\n",
       "\n",
       "<devsite-feedback bucket=\"Documentation\" class=\"nocontent\" context=\"\" data-label=\"Send Feedback Button\" position=\"header\" product-id=\"97037\" project-icon=\"https://www.gstatic.com/devrel-devsite/prod/v6dc4611c4232bd02b2b914c4948f523846f90835f230654af18f87f75fe9f73c/cloud/images/favicons/onecloud/super_cloud.png\" project-name=\"Eventarc\" track-metadata-position=\"header\" track-name=\"sendFeedbackLink\" track-type=\"feedback\" version=\"t-devsite-webserver-20250506-r00-rc01.469018407534597322\">\n",
       "<button>\n",
       "  \n",
       "    \n",
       "    Send feedback\n",
       "  \n",
       "  </button>\n",
       "</devsite-feedback>\n",
       "<h1 class=\"devsite-page-title\" tabindex=\"-1\">\n",
       "      Eventarc overview\n",
       "      <div class=\"devsite-actions\" data-nosnippet=\"\"><devsite-feature-tooltip ack-key=\"AckCollectionsBookmarkTooltipDismiss\" analytics-action-close=\"Callout Profile dismissed\" analytics-action-show=\"Callout Profile displayed\" analytics-category=\"Site-Wide Custom Events\" analytics-label=\"Create Collection Callout\" class=\"devsite-page-bookmark-tooltip nocontent\" close-button-text=\"Got it\" dismiss-button=\"true\" dismiss-button-text=\"Dismiss\" id=\"devsite-collections-dropdown\">\n",
       "<devsite-bookmark></devsite-bookmark>\n",
       "<span slot=\"popout-heading\">\n",
       "      \n",
       "      Stay organized with collections\n",
       "    </span>\n",
       "<span slot=\"popout-contents\">\n",
       "      \n",
       "      Save and categorize content based on your preferences.\n",
       "    </span>\n",
       "</devsite-feature-tooltip></div>\n",
       "</h1>\n",
       "\n",
       "<devsite-toc class=\"devsite-nav\" depth=\"2\" devsite-toc-embedded=\"\">\n",
       "</devsite-toc>\n",
       "<div class=\"devsite-article-body clearfix\">\n",
       "<div class=\"nocontent\" id=\"tags\">\n",
       "<a class=\"cloud-chip\" data-title=\"Applies to Eventarc Advanced\" href=\"/eventarc/advanced/docs/overview\" track-name=\"Advanced\" track-type=\"docChip\">\n",
       "  Advanced\n",
       "\n",
       "  </a>\n",
       "<a class=\"cloud-chip\" data-title=\"Applies to Eventarc Standard\" href=\"/eventarc/standard/docs/overview\" track-name=\"Standard\" track-type=\"docChip\">\n",
       "  Standard\n",
       "\n",
       "  </a>\n",
       "</div>\n",
       "<aside class=\"beta\">\n",
       "<p>\n",
       "<strong>\n",
       "      \n",
       "        Preview\n",
       "      \n",
       "        — Eventarc Advanced\n",
       "    </strong>\n",
       "</p>\n",
       "<p>\n",
       "      \n",
       "      \n",
       "        This feature is subject to the \"Pre-GA Offerings Terms\" in the General Service Terms section\n",
       "        of the <a href=\"/terms/service-terms#1\" track-type=\"commonIncludes\">Service Specific Terms</a>.\n",
       "        \n",
       "        Pre-GA features are available \"as is\" and might have limited support.\n",
       "      \n",
       "      For more information, see the\n",
       "      <a href=\"/products#product-launch-stages\" track-type=\"commonIncludes\">launch stage descriptions</a>.\n",
       "  </p>\n",
       "</aside>\n",
       "<p>Eventarc lets you build event-driven architectures without having to implement,\n",
       "    customize, or maintain the underlying infrastructure.</p>\n",
       "<p>Eventarc is offered in two editions: <b>Eventarc Advanced</b> and\n",
       "    <b>Eventarc Standard</b>.</p>\n",
       "<p>Both editions offer a scalable, serverless, and fully managed eventing solution that lets you\n",
       "  asynchronously route messages from sources to targets using loosely coupled services that are\n",
       "  triggered by and react to state changes known as <i>events</i>. Both editions support a range of\n",
       "  event providers and destinations—including Google Cloud services, custom applications, SaaS\n",
       "  applications, and third-party services—while managing delivery, security, authorization,\n",
       "  observability, and error-handling for you.</p>\n",
       "<p>Note that the underlying data model for both editions of Eventarc is the same. As\n",
       "  a use case grows in complexity, you have the option of seamlessly transitioning from using\n",
       "  Eventarc Standard to using Eventarc Advanced.</p>\n",
       "<h2 data-text=\"Editions overview\" id=\"editions-overview\" tabindex=\"-1\">Editions overview</h2>\n",
       "<p>The following is an overview of both editions. For more detailed information, see the\n",
       "  <a href=\"/eventarc/advanced/docs/overview\">Eventarc Advanced overview</a> and the\n",
       "  <a href=\"/eventarc/standard/docs/overview\">Eventarc Standard overview</a>.\n",
       "</p>\n",
       "<dl>\n",
       "<dt>Eventarc Advanced</dt>\n",
       "<dd><p>Eventarc Advanced is a fully managed platform for building event-driven\n",
       "    architectures. It lets you collect events that occur in a system and publish them to a central\n",
       "    bus. Interested services can subscribe to specific messages by creating enrollments. You\n",
       "    can use the bus to route events from multiple sources in real time and publish them to\n",
       "    multiple destinations, and optionally transform events prior to delivery to a target.\n",
       "    Eventarc Advanced is feature rich and is ideal for organizations with\n",
       "    complex eventing and messaging needs, particularly those grappling with managing numerous\n",
       "    Pub/Sub topics, Kafka queues, or other third-party messaging systems. By providing\n",
       "    administrators with enhanced and centralized visibility and control,\n",
       "    Eventarc Advanced enables organizations to connect multiple teams across\n",
       "    different projects.</p>\n",
       "<figure id=\"architecture-overview\">\n",
       "<a href=\"/static/eventarc/images/eventarc-advanced-overview-sans-numbers.svg\">\n",
       "<img alt=\"Eventarc Advanced lets you receive, filter, transform, route, and\n",
       "            deliver messages between different event providers and destinations.\" src=\"/static/eventarc/images/eventarc-advanced-overview-sans-numbers.svg\"/></a>\n",
       "<figcaption style=\"font-size:85%\">Eventarc Advanced lets you receive, filter,\n",
       "          transform, route, and deliver messages<br/>between different event providers and\n",
       "          destinations (click diagram to enlarge).</figcaption>\n",
       "</figure>\n",
       "</dd>\n",
       "<dt>Eventarc Standard</dt>\n",
       "<dd><p>Eventarc Standard is recommended for applications where the focus is on simply\n",
       "    delivering events from event provider to event destination. It lets you quickly and easily\n",
       "    consume Google events by defining triggers that filter inbound events according to their source,\n",
       "    type, and other attributes, and then route them to a specified destination.</p>\n",
       "<figure id=\"architecture-overview\">\n",
       "<a href=\"/static/eventarc/images/eventarc-standard-overview.svg\">\n",
       "<img alt=\"Eventarc Standard routes events from event providers to event destinations.\" src=\"/static/eventarc/images/eventarc-standard-overview.svg\"/></a>\n",
       "<figcaption style=\"font-size:85%\">Eventarc Standard lets you filter and\n",
       "            route events<br/>from event providers to event destinations (click diagram to enlarge).</figcaption>\n",
       "</figure>\n",
       "</dd>\n",
       "</dl>\n",
       "<h2 data-text=\"Features comparison table\" id=\"features-comparison-table\" tabindex=\"-1\">Features comparison table</h2>\n",
       "\n",
       "The following table can help you choose between Eventarc Advanced and\n",
       "Eventarc Standard. It assumes your familiarity with the basic concepts of\n",
       "\n",
       "  <a href=\"/eventarc/docs/event-driven-architectures\">event-driven architectures</a>.\n",
       "\n",
       "\n",
       "<table>\n",
       "<thead>\n",
       "<tr>\n",
       "<th width=\"20%\">Feature</th>\n",
       "<th width=\"40%\">Eventarc Advanced</th>\n",
       "<th>Eventarc Standard</th>\n",
       "</tr>\n",
       "</thead>\n",
       "<tbody>\n",
       "<tr>\n",
       "<td>Access control</td>\n",
       "<td>Per message access control and central governance with IAM<br/>\n",
       "      See <a href=\"/eventarc/docs/access-control\">Access control with IAM</a></td>\n",
       "<td>See <a href=\"/eventarc/docs/access-control\">Access control with IAM</a></td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td>Capacity</td>\n",
       "<td>Automatically provisioned</td>\n",
       "<td>Automatically provisioned</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td>Client library languages</td>\n",
       "<td>Java, Python, Go, Node.js, C++, C#, PHP, Ruby<br/>See\n",
       "      <a href=\"/eventarc/docs/reference/libraries\">Eventarc client libraries</a></td>\n",
       "<td>Java, Python, Go, Node.js, C++, C#, PHP, Ruby<br/>See\n",
       "      <a href=\"/eventarc/docs/reference/libraries\">Eventarc client libraries</a></td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td>Compliance standards</td>\n",
       "<td>Doesn't apply to any feature in <a href=\"/products#product-launch-stages\">Preview</a></td>\n",
       "<td>See <a href=\"/eventarc/docs/compliance\">Compliance standards</a>\n",
       "</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td>Cross-project event delivery</td>\n",
       "<td>Supported<br/>See\n",
       "      <a href=\"/eventarc/advanced/docs/publish-events/publish-events-google-sources\">Publish events\n",
       "        from Google sources</a></td>\n",
       "<td>Not supported</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td>Customer managed encryption keys</td>\n",
       "<td>Yes<br/>See <a href=\"/eventarc/advanced/docs/use-cmek\">Use customer-managed encryption keys</a></td>\n",
       "<td>Yes<br/>See <a href=\"/eventarc/docs/use-cmek\">Use customer-managed encryption keys</a></td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td>Dead letter queues supported</td>\n",
       "<td>No</td>\n",
       "<td>Yes, through Pub/Sub dead letter topic<br/>See\n",
       "      <a href=\"/eventarc/docs/retry-events\">Retry events</a></td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td>Event format</td>\n",
       "<td>Events are delivered to the destination in a CloudEvents format<br/>See\n",
       "        <a href=\"/eventarc/docs/event-format\">Event format</a><br/>\n",
       "        Optionally, you can override this behavior by\n",
       "        <a href=\"/eventarc/advanced/docs/receive-events/transform-events\">defining an HTTP binding</a></td>\n",
       "<td>Events are delivered to the destination in a CloudEvents format<br/>\n",
       "      See <a href=\"/eventarc/docs/event-format\">Event format</a></td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td>Event size</td>\n",
       "<td>1 MB maximum<br/>See\n",
       "      <a href=\"/eventarc/docs/quotas\">Quotas and limits</a></td>\n",
       "<td>512 KB maximum<br/>See\n",
       "      <a href=\"/eventarc/docs/quotas\">Quotas and limits</a></td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td>Locations</td>\n",
       "<td>See <a href=\"/eventarc/docs/locations#advanced-regions\">Eventarc Advanced locations</a></td>\n",
       "<td>See <a href=\"/eventarc/docs/locations#triggers-regions\">Eventarc Standard locations</a></td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td>Message filtering</td>\n",
       "<td>Filtering on any and all event attributes</td>\n",
       "<td>Filtering on event type and specific attributes</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td>Message routing</td>\n",
       "<td>Many providers to many destinations<br/></td>\n",
       "<td>Provider to destination</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td>Message schema conversion</td>\n",
       "<td>Yes<br/>See\n",
       "      <a href=\"/eventarc/advanced/docs/receive-events/configure-format-events\">Convert the format of\n",
       "        received events</a></td>\n",
       "<td>No</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td>Message transformation</td>\n",
       "<td>Yes, through CEL expressions<br/>See\n",
       "      <a href=\"/eventarc/advanced/docs/receive-events/transform-events\">Transform received events</a></td>\n",
       "<td>No</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td>Observability</td>\n",
       "<td>Through Google Cloud Observability such as <a href=\"/logging/docs/overview\">Cloud Logging</a>\n",
       "      and <a href=\"/monitoring/docs/monitoring-overview\">Cloud Monitoring</a><br/>See\n",
       "        <a href=\"/eventarc/advanced/docs/audit-logs\">Eventarc audit logging</a></td>\n",
       "<td>Through Google Cloud Observability such as <a href=\"/logging/docs/overview\">Cloud Logging</a>\n",
       "      and <a href=\"/monitoring/docs/monitoring-overview\">Cloud Monitoring</a><br/>See\n",
       "        <a href=\"/eventarc/docs/reference/audit-logs\">Eventarc audit logging</a></td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td>Ordered delivery</td>\n",
       "<td>There is no in-order, first-in-first-out delivery guarantee</td>\n",
       "<td>There is no in-order, first-in-first-out delivery guarantee</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td>Pricing</td>\n",
       "<td>See <a href=\"/eventarc/pricing\">Eventarc pricing</a></td>\n",
       "<td>See <a href=\"/eventarc/pricing\">Eventarc pricing</a></td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td>Regionality</td>\n",
       "<td>Regional<br/>See <a href=\"/eventarc/advanced/docs/overview#regionality\">Understand regionality</a></td>\n",
       "<td>Regional, Global<br/>See <a href=\"/eventarc/docs/understand-locations\">Understand\n",
       "      Eventarc locations</a></td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td>REST endpoints</td>\n",
       "<td><code dir=\"ltr\" translate=\"no\">https://eventarc.googleapis.com</code><br/>See\n",
       "      <a href=\"/eventarc/docs/reference/rest\">Eventarc API</a><br/>\n",
       "<code dir=\"ltr\" translate=\"no\">https://eventarcpublishing.googleapis.com</code><br/>See\n",
       "      <a href=\"/eventarc/docs/reference/publishing/rest\">Eventarc Publishing API</a></td>\n",
       "<td><code dir=\"ltr\" translate=\"no\">https://eventarc.googleapis.com</code><br/>See\n",
       "      <a href=\"/eventarc/docs/reference/rest\">Eventarc API</a></td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td>Retry and retention</td>\n",
       "<td>At-least-once event delivery to targets; default message retention duration is 24 hours with\n",
       "      an exponential backoff delay<br/>\n",
       "      See <a href=\"/eventarc/docs/retry-events\">Retry events</a></td>\n",
       "<td>At-least-once event delivery to targets; default message retention duration is 24 hours with\n",
       "      an exponential backoff delay<br/>\n",
       "      See <a href=\"/eventarc/docs/retry-events\">Retry events</a></td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td>Service limits</td>\n",
       "<td>One bus per Google Cloud project<br/>100 pipelines per Google Cloud project per\n",
       "      region<br/>See <a href=\"/eventarc/docs/quotas\">Quotas and limits</a></td>\n",
       "<td>500 triggers per location per Google Cloud project<br/>See\n",
       "        <a href=\"/eventarc/docs/quotas\">Quotas and limits</a></td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td>Service perimeter using VPC Service Controls</td>\n",
       "<td>Yes<br/>See\n",
       "      <a href=\"/eventarc/advanced/docs/using-vpc-service-controls\">Set up a service perimeter using\n",
       "        VPC Service Controls</a></td>\n",
       "<td>Yes<br/>See\n",
       "      <a href=\"/eventarc/docs/using-vpc-service-controls\">Set up a service perimeter using\n",
       "        VPC Service Controls</a></td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td>Supported sources</td>\n",
       "<td>Google providers<br/>Direct publishers using the Eventarc Publishing API<br/>See\n",
       "    <a href=\"/eventarc/advanced/docs/event-providers-targets\">Event providers and destinations</a></td>\n",
       "<td>Google providers<br/>Google providers through audit logs<br/>Third-party providers<br/>See\n",
       "    <a href=\"/eventarc/standard/docs/event-providers-targets\">Event providers and destinations</a></td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td>Supported targets</td>\n",
       "<td>Cloud Run functions (including 1st gen)<br/>Cloud Run jobs and services<br/>\n",
       "    Eventarc Advanced buses<br/>Internal HTTP endpoints in\n",
       "    VPC networks<br/>Pub/Sub topics<br/>Workflows<br/>See\n",
       "    <a href=\"/eventarc/advanced/docs/event-providers-targets\">Event providers and destinations</a></td>\n",
       "<td>Cloud Run functions<br/>Cloud Run services<br/>Internal HTTP endpoints in\n",
       "    VPC networks<br/>Public endpoints of private and public GKE\n",
       "    services<br/>Workflows<br/>See\n",
       "    <a href=\"/eventarc/standard/docs/event-providers-targets\">Event providers and destinations</a></td>\n",
       "</tr>\n",
       "</tbody>\n",
       "</table>\n",
       "<devsite-hats-survey class=\"nocontent\" hats-id=\"mwETRvWii0eU5NUYprb0Y9z5GVbc\" listnr-id=\"83405\"></devsite-hats-survey>\n",
       "</div>\n",
       "<devsite-thumb-rating position=\"footer\">\n",
       "</devsite-thumb-rating>\n",
       "<devsite-feedback bucket=\"Documentation\" class=\"nocontent\" context=\"\" data-label=\"Send Feedback Button\" position=\"footer\" product-id=\"97037\" project-icon=\"https://www.gstatic.com/devrel-devsite/prod/v6dc4611c4232bd02b2b914c4948f523846f90835f230654af18f87f75fe9f73c/cloud/images/favicons/onecloud/super_cloud.png\" project-name=\"Eventarc\" track-metadata-position=\"footer\" track-name=\"sendFeedbackLink\" track-type=\"feedback\" version=\"t-devsite-webserver-20250506-r00-rc01.469018407534597322\">\n",
       "<button>\n",
       "  \n",
       "    \n",
       "    Send feedback\n",
       "  \n",
       "  </button>\n",
       "</devsite-feedback>\n",
       "<div class=\"devsite-floating-action-buttons\">\n",
       "</div>\n",
       "</article>"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "article_section"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "id": "dceddf77-03a8-46d8-b569-929b1115b35d",
   "metadata": {},
   "outputs": [],
   "source": [
    "soupy = eventarc.soup"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "id": "1445ba84-7e53-4612-8a31-2c60355913ab",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[<div class=\"devsite-article-body clearfix\">\n",
       " <div class=\"nocontent\" id=\"tags\">\n",
       " <a class=\"cloud-chip\" data-title=\"Applies to Eventarc Advanced\" href=\"/eventarc/advanced/docs/overview\" track-name=\"Advanced\" track-type=\"docChip\">\n",
       "   Advanced\n",
       " \n",
       "   </a>\n",
       " <a class=\"cloud-chip\" data-title=\"Applies to Eventarc Standard\" href=\"/eventarc/standard/docs/overview\" track-name=\"Standard\" track-type=\"docChip\">\n",
       "   Standard\n",
       " \n",
       "   </a>\n",
       " </div>\n",
       " <aside class=\"beta\">\n",
       " <p>\n",
       " <strong>\n",
       "       \n",
       "         Preview\n",
       "       \n",
       "         — Eventarc Advanced\n",
       "     </strong>\n",
       " </p>\n",
       " <p>\n",
       "       \n",
       "       \n",
       "         This feature is subject to the \"Pre-GA Offerings Terms\" in the General Service Terms section\n",
       "         of the <a href=\"/terms/service-terms#1\" track-type=\"commonIncludes\">Service Specific Terms</a>.\n",
       "         \n",
       "         Pre-GA features are available \"as is\" and might have limited support.\n",
       "       \n",
       "       For more information, see the\n",
       "       <a href=\"/products#product-launch-stages\" track-type=\"commonIncludes\">launch stage descriptions</a>.\n",
       "   </p>\n",
       " </aside>\n",
       " <p>Eventarc lets you build event-driven architectures without having to implement,\n",
       "     customize, or maintain the underlying infrastructure.</p>\n",
       " <p>Eventarc is offered in two editions: <b>Eventarc Advanced</b> and\n",
       "     <b>Eventarc Standard</b>.</p>\n",
       " <p>Both editions offer a scalable, serverless, and fully managed eventing solution that lets you\n",
       "   asynchronously route messages from sources to targets using loosely coupled services that are\n",
       "   triggered by and react to state changes known as <i>events</i>. Both editions support a range of\n",
       "   event providers and destinations—including Google Cloud services, custom applications, SaaS\n",
       "   applications, and third-party services—while managing delivery, security, authorization,\n",
       "   observability, and error-handling for you.</p>\n",
       " <p>Note that the underlying data model for both editions of Eventarc is the same. As\n",
       "   a use case grows in complexity, you have the option of seamlessly transitioning from using\n",
       "   Eventarc Standard to using Eventarc Advanced.</p>\n",
       " <h2 data-text=\"Editions overview\" id=\"editions-overview\" tabindex=\"-1\">Editions overview</h2>\n",
       " <p>The following is an overview of both editions. For more detailed information, see the\n",
       "   <a href=\"/eventarc/advanced/docs/overview\">Eventarc Advanced overview</a> and the\n",
       "   <a href=\"/eventarc/standard/docs/overview\">Eventarc Standard overview</a>.\n",
       " </p>\n",
       " <dl>\n",
       " <dt>Eventarc Advanced</dt>\n",
       " <dd><p>Eventarc Advanced is a fully managed platform for building event-driven\n",
       "     architectures. It lets you collect events that occur in a system and publish them to a central\n",
       "     bus. Interested services can subscribe to specific messages by creating enrollments. You\n",
       "     can use the bus to route events from multiple sources in real time and publish them to\n",
       "     multiple destinations, and optionally transform events prior to delivery to a target.\n",
       "     Eventarc Advanced is feature rich and is ideal for organizations with\n",
       "     complex eventing and messaging needs, particularly those grappling with managing numerous\n",
       "     Pub/Sub topics, Kafka queues, or other third-party messaging systems. By providing\n",
       "     administrators with enhanced and centralized visibility and control,\n",
       "     Eventarc Advanced enables organizations to connect multiple teams across\n",
       "     different projects.</p>\n",
       " <figure id=\"architecture-overview\">\n",
       " <a href=\"/static/eventarc/images/eventarc-advanced-overview-sans-numbers.svg\">\n",
       " <img alt=\"Eventarc Advanced lets you receive, filter, transform, route, and\n",
       "             deliver messages between different event providers and destinations.\" src=\"/static/eventarc/images/eventarc-advanced-overview-sans-numbers.svg\"/></a>\n",
       " <figcaption style=\"font-size:85%\">Eventarc Advanced lets you receive, filter,\n",
       "           transform, route, and deliver messages<br/>between different event providers and\n",
       "           destinations (click diagram to enlarge).</figcaption>\n",
       " </figure>\n",
       " </dd>\n",
       " <dt>Eventarc Standard</dt>\n",
       " <dd><p>Eventarc Standard is recommended for applications where the focus is on simply\n",
       "     delivering events from event provider to event destination. It lets you quickly and easily\n",
       "     consume Google events by defining triggers that filter inbound events according to their source,\n",
       "     type, and other attributes, and then route them to a specified destination.</p>\n",
       " <figure id=\"architecture-overview\">\n",
       " <a href=\"/static/eventarc/images/eventarc-standard-overview.svg\">\n",
       " <img alt=\"Eventarc Standard routes events from event providers to event destinations.\" src=\"/static/eventarc/images/eventarc-standard-overview.svg\"/></a>\n",
       " <figcaption style=\"font-size:85%\">Eventarc Standard lets you filter and\n",
       "             route events<br/>from event providers to event destinations (click diagram to enlarge).</figcaption>\n",
       " </figure>\n",
       " </dd>\n",
       " </dl>\n",
       " <h2 data-text=\"Features comparison table\" id=\"features-comparison-table\" tabindex=\"-1\">Features comparison table</h2>\n",
       " \n",
       " The following table can help you choose between Eventarc Advanced and\n",
       " Eventarc Standard. It assumes your familiarity with the basic concepts of\n",
       " \n",
       "   <a href=\"/eventarc/docs/event-driven-architectures\">event-driven architectures</a>.\n",
       " \n",
       " \n",
       " <table>\n",
       " <thead>\n",
       " <tr>\n",
       " <th width=\"20%\">Feature</th>\n",
       " <th width=\"40%\">Eventarc Advanced</th>\n",
       " <th>Eventarc Standard</th>\n",
       " </tr>\n",
       " </thead>\n",
       " <tbody>\n",
       " <tr>\n",
       " <td>Access control</td>\n",
       " <td>Per message access control and central governance with IAM<br/>\n",
       "       See <a href=\"/eventarc/docs/access-control\">Access control with IAM</a></td>\n",
       " <td>See <a href=\"/eventarc/docs/access-control\">Access control with IAM</a></td>\n",
       " </tr>\n",
       " <tr>\n",
       " <td>Capacity</td>\n",
       " <td>Automatically provisioned</td>\n",
       " <td>Automatically provisioned</td>\n",
       " </tr>\n",
       " <tr>\n",
       " <td>Client library languages</td>\n",
       " <td>Java, Python, Go, Node.js, C++, C#, PHP, Ruby<br/>See\n",
       "       <a href=\"/eventarc/docs/reference/libraries\">Eventarc client libraries</a></td>\n",
       " <td>Java, Python, Go, Node.js, C++, C#, PHP, Ruby<br/>See\n",
       "       <a href=\"/eventarc/docs/reference/libraries\">Eventarc client libraries</a></td>\n",
       " </tr>\n",
       " <tr>\n",
       " <td>Compliance standards</td>\n",
       " <td>Doesn't apply to any feature in <a href=\"/products#product-launch-stages\">Preview</a></td>\n",
       " <td>See <a href=\"/eventarc/docs/compliance\">Compliance standards</a>\n",
       " </td>\n",
       " </tr>\n",
       " <tr>\n",
       " <td>Cross-project event delivery</td>\n",
       " <td>Supported<br/>See\n",
       "       <a href=\"/eventarc/advanced/docs/publish-events/publish-events-google-sources\">Publish events\n",
       "         from Google sources</a></td>\n",
       " <td>Not supported</td>\n",
       " </tr>\n",
       " <tr>\n",
       " <td>Customer managed encryption keys</td>\n",
       " <td>Yes<br/>See <a href=\"/eventarc/advanced/docs/use-cmek\">Use customer-managed encryption keys</a></td>\n",
       " <td>Yes<br/>See <a href=\"/eventarc/docs/use-cmek\">Use customer-managed encryption keys</a></td>\n",
       " </tr>\n",
       " <tr>\n",
       " <td>Dead letter queues supported</td>\n",
       " <td>No</td>\n",
       " <td>Yes, through Pub/Sub dead letter topic<br/>See\n",
       "       <a href=\"/eventarc/docs/retry-events\">Retry events</a></td>\n",
       " </tr>\n",
       " <tr>\n",
       " <td>Event format</td>\n",
       " <td>Events are delivered to the destination in a CloudEvents format<br/>See\n",
       "         <a href=\"/eventarc/docs/event-format\">Event format</a><br/>\n",
       "         Optionally, you can override this behavior by\n",
       "         <a href=\"/eventarc/advanced/docs/receive-events/transform-events\">defining an HTTP binding</a></td>\n",
       " <td>Events are delivered to the destination in a CloudEvents format<br/>\n",
       "       See <a href=\"/eventarc/docs/event-format\">Event format</a></td>\n",
       " </tr>\n",
       " <tr>\n",
       " <td>Event size</td>\n",
       " <td>1 MB maximum<br/>See\n",
       "       <a href=\"/eventarc/docs/quotas\">Quotas and limits</a></td>\n",
       " <td>512 KB maximum<br/>See\n",
       "       <a href=\"/eventarc/docs/quotas\">Quotas and limits</a></td>\n",
       " </tr>\n",
       " <tr>\n",
       " <td>Locations</td>\n",
       " <td>See <a href=\"/eventarc/docs/locations#advanced-regions\">Eventarc Advanced locations</a></td>\n",
       " <td>See <a href=\"/eventarc/docs/locations#triggers-regions\">Eventarc Standard locations</a></td>\n",
       " </tr>\n",
       " <tr>\n",
       " <td>Message filtering</td>\n",
       " <td>Filtering on any and all event attributes</td>\n",
       " <td>Filtering on event type and specific attributes</td>\n",
       " </tr>\n",
       " <tr>\n",
       " <td>Message routing</td>\n",
       " <td>Many providers to many destinations<br/></td>\n",
       " <td>Provider to destination</td>\n",
       " </tr>\n",
       " <tr>\n",
       " <td>Message schema conversion</td>\n",
       " <td>Yes<br/>See\n",
       "       <a href=\"/eventarc/advanced/docs/receive-events/configure-format-events\">Convert the format of\n",
       "         received events</a></td>\n",
       " <td>No</td>\n",
       " </tr>\n",
       " <tr>\n",
       " <td>Message transformation</td>\n",
       " <td>Yes, through CEL expressions<br/>See\n",
       "       <a href=\"/eventarc/advanced/docs/receive-events/transform-events\">Transform received events</a></td>\n",
       " <td>No</td>\n",
       " </tr>\n",
       " <tr>\n",
       " <td>Observability</td>\n",
       " <td>Through Google Cloud Observability such as <a href=\"/logging/docs/overview\">Cloud Logging</a>\n",
       "       and <a href=\"/monitoring/docs/monitoring-overview\">Cloud Monitoring</a><br/>See\n",
       "         <a href=\"/eventarc/advanced/docs/audit-logs\">Eventarc audit logging</a></td>\n",
       " <td>Through Google Cloud Observability such as <a href=\"/logging/docs/overview\">Cloud Logging</a>\n",
       "       and <a href=\"/monitoring/docs/monitoring-overview\">Cloud Monitoring</a><br/>See\n",
       "         <a href=\"/eventarc/docs/reference/audit-logs\">Eventarc audit logging</a></td>\n",
       " </tr>\n",
       " <tr>\n",
       " <td>Ordered delivery</td>\n",
       " <td>There is no in-order, first-in-first-out delivery guarantee</td>\n",
       " <td>There is no in-order, first-in-first-out delivery guarantee</td>\n",
       " </tr>\n",
       " <tr>\n",
       " <td>Pricing</td>\n",
       " <td>See <a href=\"/eventarc/pricing\">Eventarc pricing</a></td>\n",
       " <td>See <a href=\"/eventarc/pricing\">Eventarc pricing</a></td>\n",
       " </tr>\n",
       " <tr>\n",
       " <td>Regionality</td>\n",
       " <td>Regional<br/>See <a href=\"/eventarc/advanced/docs/overview#regionality\">Understand regionality</a></td>\n",
       " <td>Regional, Global<br/>See <a href=\"/eventarc/docs/understand-locations\">Understand\n",
       "       Eventarc locations</a></td>\n",
       " </tr>\n",
       " <tr>\n",
       " <td>REST endpoints</td>\n",
       " <td><code dir=\"ltr\" translate=\"no\">https://eventarc.googleapis.com</code><br/>See\n",
       "       <a href=\"/eventarc/docs/reference/rest\">Eventarc API</a><br/>\n",
       " <code dir=\"ltr\" translate=\"no\">https://eventarcpublishing.googleapis.com</code><br/>See\n",
       "       <a href=\"/eventarc/docs/reference/publishing/rest\">Eventarc Publishing API</a></td>\n",
       " <td><code dir=\"ltr\" translate=\"no\">https://eventarc.googleapis.com</code><br/>See\n",
       "       <a href=\"/eventarc/docs/reference/rest\">Eventarc API</a></td>\n",
       " </tr>\n",
       " <tr>\n",
       " <td>Retry and retention</td>\n",
       " <td>At-least-once event delivery to targets; default message retention duration is 24 hours with\n",
       "       an exponential backoff delay<br/>\n",
       "       See <a href=\"/eventarc/docs/retry-events\">Retry events</a></td>\n",
       " <td>At-least-once event delivery to targets; default message retention duration is 24 hours with\n",
       "       an exponential backoff delay<br/>\n",
       "       See <a href=\"/eventarc/docs/retry-events\">Retry events</a></td>\n",
       " </tr>\n",
       " <tr>\n",
       " <td>Service limits</td>\n",
       " <td>One bus per Google Cloud project<br/>100 pipelines per Google Cloud project per\n",
       "       region<br/>See <a href=\"/eventarc/docs/quotas\">Quotas and limits</a></td>\n",
       " <td>500 triggers per location per Google Cloud project<br/>See\n",
       "         <a href=\"/eventarc/docs/quotas\">Quotas and limits</a></td>\n",
       " </tr>\n",
       " <tr>\n",
       " <td>Service perimeter using VPC Service Controls</td>\n",
       " <td>Yes<br/>See\n",
       "       <a href=\"/eventarc/advanced/docs/using-vpc-service-controls\">Set up a service perimeter using\n",
       "         VPC Service Controls</a></td>\n",
       " <td>Yes<br/>See\n",
       "       <a href=\"/eventarc/docs/using-vpc-service-controls\">Set up a service perimeter using\n",
       "         VPC Service Controls</a></td>\n",
       " </tr>\n",
       " <tr>\n",
       " <td>Supported sources</td>\n",
       " <td>Google providers<br/>Direct publishers using the Eventarc Publishing API<br/>See\n",
       "     <a href=\"/eventarc/advanced/docs/event-providers-targets\">Event providers and destinations</a></td>\n",
       " <td>Google providers<br/>Google providers through audit logs<br/>Third-party providers<br/>See\n",
       "     <a href=\"/eventarc/standard/docs/event-providers-targets\">Event providers and destinations</a></td>\n",
       " </tr>\n",
       " <tr>\n",
       " <td>Supported targets</td>\n",
       " <td>Cloud Run functions (including 1st gen)<br/>Cloud Run jobs and services<br/>\n",
       "     Eventarc Advanced buses<br/>Internal HTTP endpoints in\n",
       "     VPC networks<br/>Pub/Sub topics<br/>Workflows<br/>See\n",
       "     <a href=\"/eventarc/advanced/docs/event-providers-targets\">Event providers and destinations</a></td>\n",
       " <td>Cloud Run functions<br/>Cloud Run services<br/>Internal HTTP endpoints in\n",
       "     VPC networks<br/>Public endpoints of private and public GKE\n",
       "     services<br/>Workflows<br/>See\n",
       "     <a href=\"/eventarc/standard/docs/event-providers-targets\">Event providers and destinations</a></td>\n",
       " </tr>\n",
       " </tbody>\n",
       " </table>\n",
       " <devsite-hats-survey class=\"nocontent\" hats-id=\"mwETRvWii0eU5NUYprb0Y9z5GVbc\" listnr-id=\"83405\"></devsite-hats-survey>\n",
       " </div>]"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "art_body = soupy.select('.devsite-article-body')\n",
    "art_body"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3a2263cb-694d-490c-8fd3-57a53e8dabfc",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.17"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}