diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..e01c128 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +venv/ +ipynb_checkpoints/ +.ipynb_checkpoints/ diff --git a/.python-version b/.python-version new file mode 100644 index 0000000..1d4830e --- /dev/null +++ b/.python-version @@ -0,0 +1 @@ +3.10.17 diff --git a/gcp_docs.ipynb b/gcp_docs.ipynb new file mode 100644 index 0000000..fdfc338 --- /dev/null +++ b/gcp_docs.ipynb @@ -0,0 +1,1458 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "7e66c738-f269-4cc6-ab32-e91fb65b234d", + "metadata": {}, + "outputs": [], + "source": [ + "import re\n", + "from openai import OpenAI\n", + "import gradio as gr\n", + "import requests\n", + "import json\n", + "from typing import List\n", + "from bs4 import BeautifulSoup, Tag, NavigableString\n", + "from IPython.display import Markdown, display, update_display" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "f85f4df7-3173-4ed9-ae1b-83b02088b00f", + "metadata": {}, + "outputs": [], + "source": [ + "#\n", + "# from Ed Donner's github repo for LLM Engineering course on Udemy: \n", + "# https://github.com/ed-donner/llm_engineering/blob/main/week1/day5.ipynb\n", + "#\n", + "headers = {\n", + " \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36\"\n", + "}\n", + "\n", + "class Website:\n", + " \"\"\"\n", + " A utility class to represent a Website that we have scraped, now with links\n", + " \"\"\"\n", + "\n", + " def __init__(self, url):\n", + " \n", + " self.url = url\n", + " self.title = None\n", + " self.links = set()\n", + " self.last_updated = '1970-01-01'\n", + " \n", + " response = requests.get(url, headers=headers)\n", + " self.body = response.content\n", + " self.soup = BeautifulSoup(self.body, 'html.parser')\n", + " self.article = self.soup.find('article', class_='devsite-article')\n", + " # self.title = soup.title.string if soup.title else \"No title found\"\n", + " # if soup.body:\n", + " # for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\"]):\n", + " # irrelevant.decompose()\n", + " # self.text = soup.body.get_text(separator=\"\\n\", strip=True)\n", + " # else:\n", + " # self.text = \"\"\n", + " # links = [link.get('href') for link in soup.find_all('a')]\n", + " # self.links = [link for link in links if link]\n", + "\n", + " def get_doc_title(self):\n", + " if self.title:\n", + " return self.title\n", + "\n", + " title = self.soup.find('h1', class_='devsite-page-title').find(string=True, recursive=False).strip()\n", + " if not title:\n", + " self.title = '{} | No Title'.format(self.url)\n", + " else:\n", + " self.title = title\n", + "\n", + " return self.title\n", + "\n", + " def get_last_updated(self):\n", + " footer = self.soup.find('devsite-content-footer')\n", + " footer_paras = footer.find_all('p')\n", + " for fp in footer_paras:\n", + " last_updated_re = r'Last updated (.*) UTC'\n", + " match = re.search(last_updated_re, fp.get_text())\n", + " if match:\n", + " self.last_updated = match.group(1)\n", + " break\n", + "\n", + " def get_doc_links(self):\n", + " nav_items = self.soup.find_all('li', class_='devsite-nav-item')\n", + " links_in_article = self.article_section.find_all('a', href=True)\n", + " all_links = nav_items + links_in_article\n", + " \n", + " for al in all_links:\n", + " link = al.find('a', href=True)\n", + " if link and 'docs' in link['href']:\n", + " if link['href'][0] == '/':\n", + " self.links.add('https://cloud.google.com{}'.format(link['href']))\n", + " else:\n", + " self.links.add(link['href'])\n" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "46dff4b3-949f-40ac-b8e3-cd89c5b20461", + "metadata": {}, + "outputs": [], + "source": [ + "exclude_tags = ['devsite-feedback', 'devsite-actions', 'devsite-toc', 'aside', 'devsite-page-title-meta', 'devsite-thumb-rating']" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "5c6233b9-498b-44e4-9a87-e58aaa281ba9", + "metadata": {}, + "outputs": [], + "source": [ + "eventarc = Website('https://cloud.google.com/eventarc/docs')" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "afa176e2-4024-493a-be97-f2526cca92cd", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'Eventarc overview'" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "eventarc.get_doc_title()" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "7dcaa961-55f9-4778-9d55-96e24b6e8c7e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'Eventarc overview'" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "eventarc.title" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "030abb06-d590-43da-8333-184520f000b0", + "metadata": {}, + "outputs": [], + "source": [ + "soup = eventarc.soup" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "7d0e2f63-2d88-4d86-86c0-6d0cd1a463e4", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Eventarc overview\n" + ] + } + ], + "source": [ + "print(soup.find('h1', class_='devsite-page-title').find(string=True, recursive=False).strip())" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "0c908d8b-0cdb-4617-ae9a-d411cca8b7f2", + "metadata": {}, + "outputs": [], + "source": [ + "footer = soup.find('devsite-content-footer')" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "e675fde6-664c-428c-98cc-c0d1ce4b82f1", + "metadata": {}, + "outputs": [], + "source": [ + "footer_paras = footer.find_all('p')" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "a841163b-84f2-4068-981e-4a99b5e1fcc5", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "None\n", + "Nope\n", + "\n", + "2025-05-08\n" + ] + } + ], + "source": [ + "for fp in footer_paras:\n", + " last_updated_re = r'Last updated (.*) UTC'\n", + " match = re.search(last_updated_re, fp.get_text())\n", + " print(match)\n", + " if match:\n", + " print(match.group(1))\n", + " else:\n", + " print('Nope')" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "436065d7-d3fe-412a-b0b6-a774044290b3", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'2025-05-08'" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "eventarc.get_last_updated()\n", + "eventarc.last_updated" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "bb6ec155-ed4d-451a-9d0d-885f959ebd85", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'https://cloud.google.com/docs/ai-ml'" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "nav_items = soup.find_all('li', class_='devsite-nav-item')\n", + "nav_items[0].find('a', href=True)['href']" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "0a740921-505c-4e4a-987f-fa8947a27ebd", + "metadata": {}, + "outputs": [], + "source": [ + "links_on_page = set()\n", + "for ni in nav_items:\n", + " link = ni.find('a', href=True)\n", + " if link and 'docs' in link['href']:\n", + " if link['href'][0] == '/' and '#' not in link['href']:\n", + " links_on_page.add('https://cloud.google.com{}'.format(link['href']))\n", + " else:\n", + " links_on_page.add(link['href'])" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "f9474a3e-579e-4923-b1e6-22b5c782cd5f", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'https://cloud.google.com/docs',\n", + " 'https://cloud.google.com/docs/access-resources',\n", + " 'https://cloud.google.com/docs/ai-ml',\n", + " 'https://cloud.google.com/docs/application-development',\n", + " 'https://cloud.google.com/docs/application-hosting',\n", + " 'https://cloud.google.com/docs/compute-area',\n", + " 'https://cloud.google.com/docs/costs-usage',\n", + " 'https://cloud.google.com/docs/cross-product-overviews',\n", + " 'https://cloud.google.com/docs/data',\n", + " 'https://cloud.google.com/docs/databases',\n", + " 'https://cloud.google.com/docs/devtools',\n", + " 'https://cloud.google.com/docs/dhm-cloud',\n", + " 'https://cloud.google.com/docs/generative-ai',\n", + " 'https://cloud.google.com/docs/iac',\n", + " 'https://cloud.google.com/docs/industry',\n", + " 'https://cloud.google.com/docs/migration',\n", + " 'https://cloud.google.com/docs/networking',\n", + " 'https://cloud.google.com/docs/observability',\n", + " 'https://cloud.google.com/docs/security',\n", + " 'https://cloud.google.com/docs/storage',\n", + " 'https://cloud.google.com/docs/tech-area-overviews',\n", + " 'https://cloud.google.com/eventarc/advanced/docs/overview',\n", + " 'https://cloud.google.com/eventarc/docs',\n", + " 'https://cloud.google.com/eventarc/docs/apis',\n", + " 'https://cloud.google.com/eventarc/docs/event-driven-architectures',\n", + " 'https://cloud.google.com/eventarc/docs/event-format',\n", + " 'https://cloud.google.com/eventarc/docs/event-providers-targets',\n", + " 'https://cloud.google.com/eventarc/docs/event-types',\n", + " 'https://cloud.google.com/eventarc/docs/resources',\n", + " 'https://cloud.google.com/eventarc/docs/samples',\n", + " 'https://cloud.google.com/eventarc/standard/docs/overview',\n", + " 'https://cloud.google.com/marketplace/docs'}" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "links_on_page" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "6d1987db-b346-41cb-adcb-1a4ba0f6fd03", + "metadata": {}, + "outputs": [], + "source": [ + "article_section = soup.find('article', class_='devsite-article')\n", + "links_in_article = article_section.find_all('a', href=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "2b48bcf0-dd19-4378-83e0-6526eea688e7", + "metadata": {}, + "outputs": [], + "source": [ + "def make_google_cloud_link(link):\n", + " return 'https://cloud.google.com{}'.format(link['href'])" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "20d62b32-bac1-49d6-96ae-86143fcaa3bd", + "metadata": {}, + "outputs": [], + "source": [ + "article_links = set()\n", + "for lia in links_in_article:\n", + " if lia and 'docs' in lia['href'] and '#' not in lia['href']:\n", + " article_links.add(make_google_cloud_link(lia))" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "bad4726e-394a-4935-a57f-f53fe8f87ae6", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'https://cloud.google.com/eventarc/advanced/docs/audit-logs',\n", + " 'https://cloud.google.com/eventarc/advanced/docs/event-providers-targets',\n", + " 'https://cloud.google.com/eventarc/advanced/docs/overview',\n", + " 'https://cloud.google.com/eventarc/advanced/docs/publish-events/publish-events-google-sources',\n", + " 'https://cloud.google.com/eventarc/advanced/docs/receive-events/configure-format-events',\n", + " 'https://cloud.google.com/eventarc/advanced/docs/receive-events/transform-events',\n", + " 'https://cloud.google.com/eventarc/advanced/docs/use-cmek',\n", + " 'https://cloud.google.com/eventarc/advanced/docs/using-vpc-service-controls',\n", + " 'https://cloud.google.com/eventarc/docs/access-control',\n", + " 'https://cloud.google.com/eventarc/docs/compliance',\n", + " 'https://cloud.google.com/eventarc/docs/event-driven-architectures',\n", + " 'https://cloud.google.com/eventarc/docs/event-format',\n", + " 'https://cloud.google.com/eventarc/docs/quotas',\n", + " 'https://cloud.google.com/eventarc/docs/reference/audit-logs',\n", + " 'https://cloud.google.com/eventarc/docs/reference/libraries',\n", + " 'https://cloud.google.com/eventarc/docs/reference/publishing/rest',\n", + " 'https://cloud.google.com/eventarc/docs/reference/rest',\n", + " 'https://cloud.google.com/eventarc/docs/retry-events',\n", + " 'https://cloud.google.com/eventarc/docs/understand-locations',\n", + " 'https://cloud.google.com/eventarc/docs/use-cmek',\n", + " 'https://cloud.google.com/eventarc/docs/using-vpc-service-controls',\n", + " 'https://cloud.google.com/eventarc/standard/docs/event-providers-targets',\n", + " 'https://cloud.google.com/eventarc/standard/docs/overview',\n", + " 'https://cloud.google.com/logging/docs/overview',\n", + " 'https://cloud.google.com/monitoring/docs/monitoring-overview',\n", + " 'https://cloud.google.comhttps://cloud.google.com/docs',\n", + " 'https://cloud.google.comhttps://cloud.google.com/eventarc/docs'}" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "article_links" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "98477112-90f6-4ddd-96e9-dfc5e8f5ff82", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "** 0 **: \n", + "\n", + "** 0 **: \n", + "\n", + "** 0 **: \n", + "\n", + "** 0 **:

\n", + " Eventarc overview\n", + "
\n", + "\n", + "\n", + " \n", + " Stay organized with collections\n", + " \n", + "\n", + " \n", + " Save and categorize content based on your preferences.\n", + " \n", + "
\n", + "

\n", + "dict_keys(['class', 'tabindex'])\n", + "---*** TAG NAME: h1\n", + "['devsite-page-title']\n", + "CLASS_LIST: ['devsite-page-title']\n", + "CLASS cl: devsite-page-title\n", + "** 1 **: \n", + "\n", + "** 1 **: \n", + "\n", + "** 1 **: \n", + "\n", + "** 1 **:
\n", + "\n", + "\n", + "

Eventarc lets you build event-driven architectures without having to implement,\n", + " customize, or maintain the underlying infrastructure.

\n", + "

Eventarc is offered in two editions: Eventarc Advanced and\n", + " Eventarc Standard.

\n", + "

Both editions offer a scalable, serverless, and fully managed eventing solution that lets you\n", + " asynchronously route messages from sources to targets using loosely coupled services that are\n", + " triggered by and react to state changes known as events. Both editions support a range of\n", + " event providers and destinations—including Google Cloud services, custom applications, SaaS\n", + " applications, and third-party services—while managing delivery, security, authorization,\n", + " observability, and error-handling for you.

\n", + "

Note that the underlying data model for both editions of Eventarc is the same. As\n", + " a use case grows in complexity, you have the option of seamlessly transitioning from using\n", + " Eventarc Standard to using Eventarc Advanced.

\n", + "

Editions overview

\n", + "

The following is an overview of both editions. For more detailed information, see the\n", + " Eventarc Advanced overview and the\n", + " Eventarc Standard overview.\n", + "

\n", + "
\n", + "
Eventarc Advanced
\n", + "

Eventarc Advanced is a fully managed platform for building event-driven\n", + " architectures. It lets you collect events that occur in a system and publish them to a central\n", + " bus. Interested services can subscribe to specific messages by creating enrollments. You\n", + " can use the bus to route events from multiple sources in real time and publish them to\n", + " multiple destinations, and optionally transform events prior to delivery to a target.\n", + " Eventarc Advanced is feature rich and is ideal for organizations with\n", + " complex eventing and messaging needs, particularly those grappling with managing numerous\n", + " Pub/Sub topics, Kafka queues, or other third-party messaging systems. By providing\n", + " administrators with enhanced and centralized visibility and control,\n", + " Eventarc Advanced enables organizations to connect multiple teams across\n", + " different projects.

\n", + "
\n", + "\n", + "\"Eventarc\n", + "
Eventarc Advanced lets you receive, filter,\n", + " transform, route, and deliver messages
between different event providers and\n", + " destinations (click diagram to enlarge).
\n", + "
\n", + "
\n", + "
Eventarc Standard
\n", + "

Eventarc Standard is recommended for applications where the focus is on simply\n", + " delivering events from event provider to event destination. It lets you quickly and easily\n", + " consume Google events by defining triggers that filter inbound events according to their source,\n", + " type, and other attributes, and then route them to a specified destination.

\n", + "
\n", + "\n", + "\"Eventarc\n", + "
Eventarc Standard lets you filter and\n", + " route events
from event providers to event destinations (click diagram to enlarge).
\n", + "
\n", + "
\n", + "
\n", + "

Features comparison table

\n", + "\n", + "The following table can help you choose between Eventarc Advanced and\n", + "Eventarc Standard. It assumes your familiarity with the basic concepts of\n", + "\n", + " event-driven architectures.\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
FeatureEventarc AdvancedEventarc Standard
Access controlPer message access control and central governance with IAM
\n", + " See Access control with IAM
See Access control with IAM
CapacityAutomatically provisionedAutomatically provisioned
Client library languagesJava, Python, Go, Node.js, C++, C#, PHP, Ruby
See\n", + " Eventarc client libraries
Java, Python, Go, Node.js, C++, C#, PHP, Ruby
See\n", + " Eventarc client libraries
Compliance standardsDoesn't apply to any feature in PreviewSee Compliance standards\n", + "
Cross-project event deliverySupported
See\n", + " Publish events\n", + " from Google sources
Not supported
Customer managed encryption keysYes
See Use customer-managed encryption keys
Yes
See Use customer-managed encryption keys
Dead letter queues supportedNoYes, through Pub/Sub dead letter topic
See\n", + " Retry events
Event formatEvents are delivered to the destination in a CloudEvents format
See\n", + " Event format
\n", + " Optionally, you can override this behavior by\n", + " defining an HTTP binding
Events are delivered to the destination in a CloudEvents format
\n", + " See Event format
Event size1 MB maximum
See\n", + " Quotas and limits
512 KB maximum
See\n", + " Quotas and limits
LocationsSee Eventarc Advanced locationsSee Eventarc Standard locations
Message filteringFiltering on any and all event attributesFiltering on event type and specific attributes
Message routingMany providers to many destinations
Provider to destination
Message schema conversionYes
See\n", + " Convert the format of\n", + " received events
No
Message transformationYes, through CEL expressions
See\n", + " Transform received events
No
ObservabilityThrough Google Cloud Observability such as Cloud Logging\n", + " and Cloud Monitoring
See\n", + " Eventarc audit logging
Through Google Cloud Observability such as Cloud Logging\n", + " and Cloud Monitoring
See\n", + " Eventarc audit logging
Ordered deliveryThere is no in-order, first-in-first-out delivery guaranteeThere is no in-order, first-in-first-out delivery guarantee
PricingSee Eventarc pricingSee Eventarc pricing
RegionalityRegional
See Understand regionality
Regional, Global
See Understand\n", + " Eventarc locations
REST endpointshttps://eventarc.googleapis.com
See\n", + " Eventarc API
\n", + "https://eventarcpublishing.googleapis.com
See\n", + " Eventarc Publishing API
https://eventarc.googleapis.com
See\n", + " Eventarc API
Retry and retentionAt-least-once event delivery to targets; default message retention duration is 24 hours with\n", + " an exponential backoff delay
\n", + " See Retry events
At-least-once event delivery to targets; default message retention duration is 24 hours with\n", + " an exponential backoff delay
\n", + " See Retry events
Service limitsOne bus per Google Cloud project
100 pipelines per Google Cloud project per\n", + " region
See Quotas and limits
500 triggers per location per Google Cloud project
See\n", + " Quotas and limits
Service perimeter using VPC Service ControlsYes
See\n", + " Set up a service perimeter using\n", + " VPC Service Controls
Yes
See\n", + " Set up a service perimeter using\n", + " VPC Service Controls
Supported sourcesGoogle providers
Direct publishers using the Eventarc Publishing API
See\n", + " Event providers and destinations
Google providers
Google providers through audit logs
Third-party providers
See\n", + " Event providers and destinations
Supported targetsCloud Run functions (including 1st gen)
Cloud Run jobs and services
\n", + " Eventarc Advanced buses
Internal HTTP endpoints in\n", + " VPC networks
Pub/Sub topics
Workflows
See\n", + " Event providers and destinations
Cloud Run functions
Cloud Run services
Internal HTTP endpoints in\n", + " VPC networks
Public endpoints of private and public GKE\n", + " services
Workflows
See\n", + " Event providers and destinations
\n", + "\n", + "
\n", + "dict_keys(['class'])\n", + "---*** TAG NAME: div\n", + "['devsite-article-body', 'clearfix']\n", + "CLASS_LIST: ['devsite-article-body', 'clearfix']\n", + "CLASS cl: devsite-article-body\n", + "CLASS cl: clearfix\n", + "** 2 **: \n", + "\n", + "** 2 **: \n", + "\n", + "** 2 **: \n", + "\n", + "** 2 **:
\n", + "
\n", + "dict_keys(['class'])\n", + "---*** TAG NAME: div\n", + "['devsite-floating-action-buttons']\n", + "CLASS_LIST: ['devsite-floating-action-buttons']\n", + "CLASS cl: devsite-floating-action-buttons\n", + "** 3 **: \n", + "\n" + ] + } + ], + "source": [ + "i = 0\n", + "for elem in article_section:\n", + " if i < 5:\n", + " \n", + " if elem.name in exclude_tags:\n", + " continue\n", + "\n", + " print('** {} **: {}'.format(i,elem))\n", + " if isinstance(elem, Tag):\n", + " keys = elem.attrs.keys()\n", + " print(keys)\n", + " if 'class' in keys:\n", + " print(\"---*** TAG NAME: {}\".format(elem.name))\n", + " print(elem['class'])\n", + " class_list = elem['class']\n", + " print(\"CLASS_LIST: {}\".format(class_list))\n", + " for cl in class_list:\n", + " print(\"CLASS cl: {}\".format(cl))\n", + " if 'breadcrumb' in cl or 'meta' in cl:\n", + " print('DECOMPOSING\\n')\n", + " elem.decompose()\n", + " break\n", + " else:\n", + " print('No class or no breadcrumb')\n", + " i = i + 1\n", + " else:\n", + " continue\n", + " else:\n", + " article_section = article_section\n", + " break" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "id": "849ab8d6-3f38-46a7-bbe8-5341f070861a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "

\n", + " Eventarc overview\n", + "
\n", + "\n", + "\n", + " \n", + " Stay organized with collections\n", + " \n", + "\n", + " \n", + " Save and categorize content based on your preferences.\n", + " \n", + "
\n", + "

\n", + "\n", + "\n", + "\n", + "
\n", + "\n", + "\n", + "

Eventarc lets you build event-driven architectures without having to implement,\n", + " customize, or maintain the underlying infrastructure.

\n", + "

Eventarc is offered in two editions: Eventarc Advanced and\n", + " Eventarc Standard.

\n", + "

Both editions offer a scalable, serverless, and fully managed eventing solution that lets you\n", + " asynchronously route messages from sources to targets using loosely coupled services that are\n", + " triggered by and react to state changes known as events. Both editions support a range of\n", + " event providers and destinations—including Google Cloud services, custom applications, SaaS\n", + " applications, and third-party services—while managing delivery, security, authorization,\n", + " observability, and error-handling for you.

\n", + "

Note that the underlying data model for both editions of Eventarc is the same. As\n", + " a use case grows in complexity, you have the option of seamlessly transitioning from using\n", + " Eventarc Standard to using Eventarc Advanced.

\n", + "

Editions overview

\n", + "

The following is an overview of both editions. For more detailed information, see the\n", + " Eventarc Advanced overview and the\n", + " Eventarc Standard overview.\n", + "

\n", + "
\n", + "
Eventarc Advanced
\n", + "

Eventarc Advanced is a fully managed platform for building event-driven\n", + " architectures. It lets you collect events that occur in a system and publish them to a central\n", + " bus. Interested services can subscribe to specific messages by creating enrollments. You\n", + " can use the bus to route events from multiple sources in real time and publish them to\n", + " multiple destinations, and optionally transform events prior to delivery to a target.\n", + " Eventarc Advanced is feature rich and is ideal for organizations with\n", + " complex eventing and messaging needs, particularly those grappling with managing numerous\n", + " Pub/Sub topics, Kafka queues, or other third-party messaging systems. By providing\n", + " administrators with enhanced and centralized visibility and control,\n", + " Eventarc Advanced enables organizations to connect multiple teams across\n", + " different projects.

\n", + "
\n", + "\n", + "\"Eventarc\n", + "
Eventarc Advanced lets you receive, filter,\n", + " transform, route, and deliver messages
between different event providers and\n", + " destinations (click diagram to enlarge).
\n", + "
\n", + "
\n", + "
Eventarc Standard
\n", + "

Eventarc Standard is recommended for applications where the focus is on simply\n", + " delivering events from event provider to event destination. It lets you quickly and easily\n", + " consume Google events by defining triggers that filter inbound events according to their source,\n", + " type, and other attributes, and then route them to a specified destination.

\n", + "
\n", + "\n", + "\"Eventarc\n", + "
Eventarc Standard lets you filter and\n", + " route events
from event providers to event destinations (click diagram to enlarge).
\n", + "
\n", + "
\n", + "
\n", + "

Features comparison table

\n", + "\n", + "The following table can help you choose between Eventarc Advanced and\n", + "Eventarc Standard. It assumes your familiarity with the basic concepts of\n", + "\n", + " event-driven architectures.\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
FeatureEventarc AdvancedEventarc Standard
Access controlPer message access control and central governance with IAM
\n", + " See Access control with IAM
See Access control with IAM
CapacityAutomatically provisionedAutomatically provisioned
Client library languagesJava, Python, Go, Node.js, C++, C#, PHP, Ruby
See\n", + " Eventarc client libraries
Java, Python, Go, Node.js, C++, C#, PHP, Ruby
See\n", + " Eventarc client libraries
Compliance standardsDoesn't apply to any feature in PreviewSee Compliance standards\n", + "
Cross-project event deliverySupported
See\n", + " Publish events\n", + " from Google sources
Not supported
Customer managed encryption keysYes
See Use customer-managed encryption keys
Yes
See Use customer-managed encryption keys
Dead letter queues supportedNoYes, through Pub/Sub dead letter topic
See\n", + " Retry events
Event formatEvents are delivered to the destination in a CloudEvents format
See\n", + " Event format
\n", + " Optionally, you can override this behavior by\n", + " defining an HTTP binding
Events are delivered to the destination in a CloudEvents format
\n", + " See Event format
Event size1 MB maximum
See\n", + " Quotas and limits
512 KB maximum
See\n", + " Quotas and limits
LocationsSee Eventarc Advanced locationsSee Eventarc Standard locations
Message filteringFiltering on any and all event attributesFiltering on event type and specific attributes
Message routingMany providers to many destinations
Provider to destination
Message schema conversionYes
See\n", + " Convert the format of\n", + " received events
No
Message transformationYes, through CEL expressions
See\n", + " Transform received events
No
ObservabilityThrough Google Cloud Observability such as Cloud Logging\n", + " and Cloud Monitoring
See\n", + " Eventarc audit logging
Through Google Cloud Observability such as Cloud Logging\n", + " and Cloud Monitoring
See\n", + " Eventarc audit logging
Ordered deliveryThere is no in-order, first-in-first-out delivery guaranteeThere is no in-order, first-in-first-out delivery guarantee
PricingSee Eventarc pricingSee Eventarc pricing
RegionalityRegional
See Understand regionality
Regional, Global
See Understand\n", + " Eventarc locations
REST endpointshttps://eventarc.googleapis.com
See\n", + " Eventarc API
\n", + "https://eventarcpublishing.googleapis.com
See\n", + " Eventarc Publishing API
https://eventarc.googleapis.com
See\n", + " Eventarc API
Retry and retentionAt-least-once event delivery to targets; default message retention duration is 24 hours with\n", + " an exponential backoff delay
\n", + " See Retry events
At-least-once event delivery to targets; default message retention duration is 24 hours with\n", + " an exponential backoff delay
\n", + " See Retry events
Service limitsOne bus per Google Cloud project
100 pipelines per Google Cloud project per\n", + " region
See Quotas and limits
500 triggers per location per Google Cloud project
See\n", + " Quotas and limits
Service perimeter using VPC Service ControlsYes
See\n", + " Set up a service perimeter using\n", + " VPC Service Controls
Yes
See\n", + " Set up a service perimeter using\n", + " VPC Service Controls
Supported sourcesGoogle providers
Direct publishers using the Eventarc Publishing API
See\n", + " Event providers and destinations
Google providers
Google providers through audit logs
Third-party providers
See\n", + " Event providers and destinations
Supported targetsCloud Run functions (including 1st gen)
Cloud Run jobs and services
\n", + " Eventarc Advanced buses
Internal HTTP endpoints in\n", + " VPC networks
Pub/Sub topics
Workflows
See\n", + " Event providers and destinations
Cloud Run functions
Cloud Run services
Internal HTTP endpoints in\n", + " VPC networks
Public endpoints of private and public GKE\n", + " services
Workflows
See\n", + " Event providers and destinations
\n", + "\n", + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
\n", + "
\n", + "
" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "article_section" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "id": "dceddf77-03a8-46d8-b569-929b1115b35d", + "metadata": {}, + "outputs": [], + "source": [ + "soupy = eventarc.soup" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "id": "1445ba84-7e53-4612-8a31-2c60355913ab", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[
\n", + " \n", + " \n", + "

Eventarc lets you build event-driven architectures without having to implement,\n", + " customize, or maintain the underlying infrastructure.

\n", + "

Eventarc is offered in two editions: Eventarc Advanced and\n", + " Eventarc Standard.

\n", + "

Both editions offer a scalable, serverless, and fully managed eventing solution that lets you\n", + " asynchronously route messages from sources to targets using loosely coupled services that are\n", + " triggered by and react to state changes known as events. Both editions support a range of\n", + " event providers and destinations—including Google Cloud services, custom applications, SaaS\n", + " applications, and third-party services—while managing delivery, security, authorization,\n", + " observability, and error-handling for you.

\n", + "

Note that the underlying data model for both editions of Eventarc is the same. As\n", + " a use case grows in complexity, you have the option of seamlessly transitioning from using\n", + " Eventarc Standard to using Eventarc Advanced.

\n", + "

Editions overview

\n", + "

The following is an overview of both editions. For more detailed information, see the\n", + " Eventarc Advanced overview and the\n", + " Eventarc Standard overview.\n", + "

\n", + "
\n", + "
Eventarc Advanced
\n", + "

Eventarc Advanced is a fully managed platform for building event-driven\n", + " architectures. It lets you collect events that occur in a system and publish them to a central\n", + " bus. Interested services can subscribe to specific messages by creating enrollments. You\n", + " can use the bus to route events from multiple sources in real time and publish them to\n", + " multiple destinations, and optionally transform events prior to delivery to a target.\n", + " Eventarc Advanced is feature rich and is ideal for organizations with\n", + " complex eventing and messaging needs, particularly those grappling with managing numerous\n", + " Pub/Sub topics, Kafka queues, or other third-party messaging systems. By providing\n", + " administrators with enhanced and centralized visibility and control,\n", + " Eventarc Advanced enables organizations to connect multiple teams across\n", + " different projects.

\n", + "
\n", + " \n", + " \"Eventarc\n", + "
Eventarc Advanced lets you receive, filter,\n", + " transform, route, and deliver messages
between different event providers and\n", + " destinations (click diagram to enlarge).
\n", + "
\n", + "
\n", + "
Eventarc Standard
\n", + "

Eventarc Standard is recommended for applications where the focus is on simply\n", + " delivering events from event provider to event destination. It lets you quickly and easily\n", + " consume Google events by defining triggers that filter inbound events according to their source,\n", + " type, and other attributes, and then route them to a specified destination.

\n", + "
\n", + " \n", + " \"Eventarc\n", + "
Eventarc Standard lets you filter and\n", + " route events
from event providers to event destinations (click diagram to enlarge).
\n", + "
\n", + "
\n", + "
\n", + "

Features comparison table

\n", + " \n", + " The following table can help you choose between Eventarc Advanced and\n", + " Eventarc Standard. It assumes your familiarity with the basic concepts of\n", + " \n", + " event-driven architectures.\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
FeatureEventarc AdvancedEventarc Standard
Access controlPer message access control and central governance with IAM
\n", + " See Access control with IAM
See Access control with IAM
CapacityAutomatically provisionedAutomatically provisioned
Client library languagesJava, Python, Go, Node.js, C++, C#, PHP, Ruby
See\n", + " Eventarc client libraries
Java, Python, Go, Node.js, C++, C#, PHP, Ruby
See\n", + " Eventarc client libraries
Compliance standardsDoesn't apply to any feature in PreviewSee Compliance standards\n", + "
Cross-project event deliverySupported
See\n", + " Publish events\n", + " from Google sources
Not supported
Customer managed encryption keysYes
See Use customer-managed encryption keys
Yes
See Use customer-managed encryption keys
Dead letter queues supportedNoYes, through Pub/Sub dead letter topic
See\n", + " Retry events
Event formatEvents are delivered to the destination in a CloudEvents format
See\n", + " Event format
\n", + " Optionally, you can override this behavior by\n", + " defining an HTTP binding
Events are delivered to the destination in a CloudEvents format
\n", + " See Event format
Event size1 MB maximum
See\n", + " Quotas and limits
512 KB maximum
See\n", + " Quotas and limits
LocationsSee Eventarc Advanced locationsSee Eventarc Standard locations
Message filteringFiltering on any and all event attributesFiltering on event type and specific attributes
Message routingMany providers to many destinations
Provider to destination
Message schema conversionYes
See\n", + " Convert the format of\n", + " received events
No
Message transformationYes, through CEL expressions
See\n", + " Transform received events
No
ObservabilityThrough Google Cloud Observability such as Cloud Logging\n", + " and Cloud Monitoring
See\n", + " Eventarc audit logging
Through Google Cloud Observability such as Cloud Logging\n", + " and Cloud Monitoring
See\n", + " Eventarc audit logging
Ordered deliveryThere is no in-order, first-in-first-out delivery guaranteeThere is no in-order, first-in-first-out delivery guarantee
PricingSee Eventarc pricingSee Eventarc pricing
RegionalityRegional
See Understand regionality
Regional, Global
See Understand\n", + " Eventarc locations
REST endpointshttps://eventarc.googleapis.com
See\n", + " Eventarc API
\n", + " https://eventarcpublishing.googleapis.com
See\n", + " Eventarc Publishing API
https://eventarc.googleapis.com
See\n", + " Eventarc API
Retry and retentionAt-least-once event delivery to targets; default message retention duration is 24 hours with\n", + " an exponential backoff delay
\n", + " See Retry events
At-least-once event delivery to targets; default message retention duration is 24 hours with\n", + " an exponential backoff delay
\n", + " See Retry events
Service limitsOne bus per Google Cloud project
100 pipelines per Google Cloud project per\n", + " region
See Quotas and limits
500 triggers per location per Google Cloud project
See\n", + " Quotas and limits
Service perimeter using VPC Service ControlsYes
See\n", + " Set up a service perimeter using\n", + " VPC Service Controls
Yes
See\n", + " Set up a service perimeter using\n", + " VPC Service Controls
Supported sourcesGoogle providers
Direct publishers using the Eventarc Publishing API
See\n", + " Event providers and destinations
Google providers
Google providers through audit logs
Third-party providers
See\n", + " Event providers and destinations
Supported targetsCloud Run functions (including 1st gen)
Cloud Run jobs and services
\n", + " Eventarc Advanced buses
Internal HTTP endpoints in\n", + " VPC networks
Pub/Sub topics
Workflows
See\n", + " Event providers and destinations
Cloud Run functions
Cloud Run services
Internal HTTP endpoints in\n", + " VPC networks
Public endpoints of private and public GKE\n", + " services
Workflows
See\n", + " Event providers and destinations
\n", + " \n", + "
]" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "art_body = soupy.select('.devsite-article-body')\n", + "art_body" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3a2263cb-694d-490c-8fd3-57a53e8dabfc", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.17" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/gcp_products.ipynb b/gcp_products.ipynb new file mode 100644 index 0000000..144e838 --- /dev/null +++ b/gcp_products.ipynb @@ -0,0 +1,205 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 8, + "id": "bc99549d-4ea3-4c04-a065-1b3af7b5023a", + "metadata": {}, + "outputs": [], + "source": [ + "import re\n", + "from openai import OpenAI\n", + "import gradio as gr\n", + "import requests\n", + "import json\n", + "from typing import List\n", + "from bs4 import BeautifulSoup\n", + "from IPython.display import Markdown, display, update_display" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "7dbabd45-29ab-450c-8e19-b105b6996610", + "metadata": {}, + "outputs": [], + "source": [ + "#\n", + "# from Ed Donner's github repo for LLM Engineering course on Udemy: \n", + "# https://github.com/ed-donner/llm_engineering/blob/main/week1/day5.ipynb\n", + "#\n", + "headers = {\n", + " \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36\"\n", + "}\n", + "\n", + "class Website:\n", + " \"\"\"\n", + " A utility class to represent a Website that we have scraped, now with links\n", + " \"\"\"\n", + "\n", + " def __init__(self, url):\n", + " self.url = url\n", + " response = requests.get(url, headers=headers)\n", + " self.body = response.content\n", + " soup = BeautifulSoup(self.body, 'html.parser')\n", + " self.title = soup.title.string if soup.title else \"No title found\"\n", + " if soup.body:\n", + " for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\"]):\n", + " irrelevant.decompose()\n", + " self.text = soup.body.get_text(separator=\"\\n\", strip=True)\n", + " else:\n", + " self.text = \"\"\n", + " links = [link.get('href') for link in soup.find_all('a')]\n", + " self.links = [link for link in links if link]\n", + "\n", + " def get_contents(self):\n", + " return f\"Webpage Title:\\n{self.title}\\nWebpage Contents:\\n{self.text}\\n\\n\"\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "6da96cc9-2efb-4967-9e94-0b89cf8d82d6", + "metadata": {}, + "outputs": [], + "source": [ + "gcp_products = Website(\"https://cloud.google.com/products\")" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "d305e8dd-443d-4089-832e-b989a6d37fd5", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Found 32 links containing /docs\n" + ] + } + ], + "source": [ + "doc_links = set()\n", + "for link in gcp_products.links:\n", + " if re.search(r'(.*)\\/docs', link):\n", + " doc_links.add(link)\n", + "\n", + "print('Found {} links containing /docs'.format(len(doc_links)))" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "3931609f-0d1c-43b5-ac07-0669d0072362", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'https://cloud.google.com/api-gateway/docs',\n", + " 'https://cloud.google.com/artifact-registry/docs',\n", + " 'https://cloud.google.com/bigquery-transfer/docs/introduction',\n", + " 'https://cloud.google.com/compute/docs/nodes/sole-tenant-nodes',\n", + " 'https://cloud.google.com/config-connector/docs/overview',\n", + " 'https://cloud.google.com/deep-learning-containers/docs',\n", + " 'https://cloud.google.com/deployment-manager/docs',\n", + " 'https://cloud.google.com/docs',\n", + " 'https://cloud.google.com/docs/get-started',\n", + " 'https://cloud.google.com/docs/quotas',\n", + " 'https://cloud.google.com/docs/samples',\n", + " 'https://cloud.google.com/docs/terraform/blueprints/terraform-blueprints',\n", + " 'https://cloud.google.com/docs/tutorials?doctype=quickstart',\n", + " 'https://cloud.google.com/endpoints/docs',\n", + " 'https://cloud.google.com/error-reporting/docs/grouping-errors',\n", + " 'https://cloud.google.com/eventarc/docs',\n", + " 'https://cloud.google.com/infrastructure-manager/docs',\n", + " 'https://cloud.google.com/livestream/docs',\n", + " 'https://cloud.google.com/migration-center/docs',\n", + " 'https://cloud.google.com/profiler/docs',\n", + " 'https://cloud.google.com/recommender/docs/whatis-activeassist',\n", + " 'https://cloud.google.com/scheduler/docs',\n", + " 'https://cloud.google.com/service-catalog/docs',\n", + " 'https://cloud.google.com/shell/docs',\n", + " 'https://cloud.google.com/source-repositories/docs',\n", + " 'https://cloud.google.com/tasks/docs',\n", + " 'https://cloud.google.com/trace/docs',\n", + " 'https://cloud.google.com/transcoder/docs',\n", + " 'https://cloud.google.com/transfer-appliance/docs/4.0/overview',\n", + " 'https://cloud.google.com/video-stitcher/docs',\n", + " 'https://knative.dev/docs/',\n", + " 'https://www.opencue.io/docs/getting-started/'}" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "doc_links" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "6e088ecd-c6fc-46ab-8e09-612509fc15c1", + "metadata": {}, + "outputs": [], + "source": [ + "eventarc_page = Website('https://cloud.google.com/eventarc/docs')" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "d46411f1-6443-4876-b9ea-5492c6d7392e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "b'\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n \\n \\n \\n \\n \\n \\n \\n \\n\\n \\n \\n \\n \\n \\n \\n \\n \\n \\n Eventarc overview  |  Google Cloud\\n\\n\\n \\n \\n \\n \\n \\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n \\n \\n \\n \\n \\n \\n \\n \\n\\n \\n \\n \\n \\n \\n \\n \\n Skip to main content\\n \\n
\\n \\n\\n\\n\\n \\n \\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n
\\n
\\n
\\n
\\n \\n
\\n\\n \\n \\n \\n \\n \"Google\\n \\n \\n\\n\\n\\n\\n \\n \\n \\n
    \\n \\n
  • \\n \\n \\n \\n \\n \\n \\n
  • \\n \\n
\\n
\\n\\n
\\n
\\n
\\n \\n \\n \\n \\n\\n \\n\\n \\n\\n \\n
\\n \\n\\n
\\n
\\n \\n
\\n \\n
\\n \\n
\\n
\\n /\\n
\\n
\\n
\\n
\\n \\n\\n\\n
\\n\\n \\n\\n \\n\\n \\n\\n \\n\\n \\n\\n \\n\\n \\n\\n\\n\\n \\n \\n Console\\n\\n \\n\\n\\n\\n \\n \\n \\n \\n Sign in\\n \\n \\n \\n
\\n
\\n
\\n\\n\\n\\n
\\n
\\n \\n \\n \\n \\n \\n \\n \\n
\\n \\n \\n \\n \\n \\n \\n \\n \\n \\n\\n \\n\\n \\n\\n \\n \\n
\\n \\n\\n \\n Contact Us\\n\\n \\n Start free\\n\\n
\\n \\n
\\n \\n
\\n
\\n\\n
\\n\\n\\n\\n \\n\\n \\n
\\n \\n \\n \\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n
\\n \\n \\n \\n \\n
\\n\\n\\n \\n
\\n
\\n
\\n
\\n
\\n \\n \\n \\n \\n
\\n
\\n \\n \\n \\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n
\\n \\n \\n \\n \\n \\n\\n \\n \\n \\n\\n \\n\\n \\n

\\n Eventarc overview\\n
\\n\\n \\n \\n\\n \\n \\n Stay organized with collections\\n \\n \\n \\n Save and categorize content based on your preferences.\\n \\n
\\n \\n

\\n
\\n \\n\\n \\n \\n \\n \\n \\n\\n
\\n\\n \\n \\n \\n \\n
\\n \\n \\n \\n \\n \\n \\n\\n \\n Advanced\\n\\n \\n\\n \\n \\n \\n \\n \\n \\n \\n\\n \\n Standard\\n\\n \\n\\n \\n \\n
\\n \\n \\n\\n\\n\\n\\n\\n\\n\\n\\n\\n \\n\\n\\n\\n \\n\\n

Eventarc lets you build event-driven architectures without having to implement,\\n customize, or maintain the underlying infrastructure.

\\n\\n\\n\\n

Eventarc is offered in two editions: Eventarc Advanced and\\n Eventarc Standard.

\\n\\n

Both editions offer a scalable, serverless, and fully managed eventing solution that lets you\\n asynchronously route messages from sources to targets using loosely coupled services that are\\n triggered by and react to state changes known as events. Both editions support a range of\\n event providers and destinations\\xe2\\x80\\x94including Google Cloud services, custom applications, SaaS\\n applications, and third-party services\\xe2\\x80\\x94while managing delivery, security, authorization,\\n observability, and error-handling for you.

\\n\\n

Note that the underlying data model for both editions of Eventarc is the same. As\\n a use case grows in complexity, you have the option of seamlessly transitioning from using\\n Eventarc Standard to using Eventarc Advanced.

\\n\\n\\n

Editions overview

\\n

The following is an overview of both editions. For more detailed information, see the\\n Eventarc Advanced overview and the\\n Eventarc Standard overview.\\n

\\n\\n
\\n
Eventarc Advanced
\\n

Eventarc Advanced is a fully managed platform for building event-driven\\n architectures. It lets you collect events that occur in a system and publish them to a central\\n bus. Interested services can subscribe to specific messages by creating enrollments. You\\n can use the bus to route events from multiple sources in real time and publish them to\\n multiple destinations, and optionally transform events prior to delivery to a target.\\n Eventarc Advanced is feature rich and is ideal for organizations with\\n complex eventing and messaging needs, particularly those grappling with managing numerous\\n Pub/Sub topics, Kafka queues, or other third-party messaging systems. By providing\\n administrators with enhanced and centralized visibility and control,\\n Eventarc Advanced enables organizations to connect multiple teams across\\n different projects.

\\n
\\n \\n \"Eventarc\\n
Eventarc Advanced lets you receive, filter,\\n transform, route, and deliver messages
between different event providers and\\n destinations (click diagram to enlarge).
\\n
\\n
\\n
Eventarc Standard
\\n

Eventarc Standard is recommended for applications where the focus is on simply\\n delivering events from event provider to event destination. It lets you quickly and easily\\n consume Google events by defining triggers that filter inbound events according to their source,\\n type, and other attributes, and then route them to a specified destination.

\\n
\\n \\n \"Eventarc\\n
Eventarc Standard lets you filter and\\n route events
from event providers to event destinations (click diagram to enlarge).
\\n
\\n
\\n
\\n\\n\\n

Features comparison table

\\n\\nThe following table can help you choose between Eventarc Advanced and\\nEventarc Standard. It assumes your familiarity with the basic concepts of\\n\\n event-driven architectures.\\n\\n\\n\\n\\n\\n \\n \\n \\n\\n\\n\\n\\n \\n \\n \\n\\n\\n \\n \\n \\n\\n\\n \\n \\n \\n\\n\\n \\n \\n \\n\\n\\n \\n \\n \\n\\n\\n \\n \\n \\n\\n\\n \\n \\n \\n\\n\\n \\n \\n \\n\\n\\n \\n \\n \\n\\n\\n \\n \\n \\n\\n\\n \\n \\n \\n\\n\\n \\n \\n \\n\\n\\n \\n \\n \\n\\n\\n \\n \\n \\n\\n\\n \\n \\n \\n\\n\\n \\n \\n \\n\\n\\n \\n \\n \\n\\n\\n \\n \\n \\n\\n\\n \\n \\n \\n\\n\\n \\n \\n \\n\\n\\n \\n \\n \\n\\n\\n \\n \\n \\n\\n\\n \\n \\n \\n\\n\\n \\n \\n \\n\\n\\n
FeatureEventarc AdvancedEventarc Standard
Access controlPer message access control and central governance with IAM
\\n See Access control with IAM
See Access control with IAM
CapacityAutomatically provisionedAutomatically provisioned
Client library languagesJava, Python, Go, Node.js, C++, C#, PHP, Ruby
See\\n Eventarc client libraries
Java, Python, Go, Node.js, C++, C#, PHP, Ruby
See\\n Eventarc client libraries
Compliance standardsDoesn\\'t apply to any feature in PreviewSee Compliance standards\\n
Cross-project event deliverySupported
See\\n Publish events\\n from Google sources
Not supported
Customer managed encryption keysYes
See Use customer-managed encryption keys
Yes
See Use customer-managed encryption keys
Dead letter queues supportedNoYes, through Pub/Sub dead letter topic
See\\n Retry events
Event formatEvents are delivered to the destination in a CloudEvents format
See\\n Event format
\\n Optionally, you can override this behavior by\\n defining an HTTP binding
Events are delivered to the destination in a CloudEvents format
\\n See Event format
Event size1 MB maximum
See\\n Quotas and limits
512 KB maximum
See\\n Quotas and limits
LocationsSee Eventarc Advanced locationsSee Eventarc Standard locations
Message filteringFiltering on any and all event attributesFiltering on event type and specific attributes
Message routingMany providers to many destinations
Provider to destination
Message schema conversionYes
See\\n Convert the format of\\n received events
No
Message transformationYes, through CEL expressions
See\\n Transform received events
No
ObservabilityThrough Google Cloud Observability such as Cloud Logging\\n and Cloud Monitoring
See\\n Eventarc audit logging
Through Google Cloud Observability such as Cloud Logging\\n and Cloud Monitoring
See\\n Eventarc audit logging
Ordered deliveryThere is no in-order, first-in-first-out delivery guaranteeThere is no in-order, first-in-first-out delivery guarantee
PricingSee Eventarc pricingSee Eventarc pricing
RegionalityRegional
See Understand regionality
Regional, Global
See Understand\\n Eventarc locations
REST endpointshttps://eventarc.googleapis.com
See\\n Eventarc API
\\n https://eventarcpublishing.googleapis.com
See\\n Eventarc Publishing API
https://eventarc.googleapis.com
See\\n Eventarc API
Retry and retentionAt-least-once event delivery to targets; default message retention duration is 24 hours with\\n an exponential backoff delay
\\n See Retry events
At-least-once event delivery to targets; default message retention duration is 24 hours with\\n an exponential backoff delay
\\n See Retry events
Service limitsOne bus per Google Cloud project
100 pipelines per Google Cloud project per\\n region
See Quotas and limits
500 triggers per location per Google Cloud project
See\\n Quotas and limits
Service perimeter using VPC Service ControlsYes
See\\n Set up a service perimeter using\\n VPC Service Controls
Yes
See\\n Set up a service perimeter using\\n VPC Service Controls
Supported sourcesGoogle providers
Direct publishers using the Eventarc Publishing API
See\\n Event providers and destinations
Google providers
Google providers through audit logs
Third-party providers
See\\n Event providers and destinations
Supported targetsCloud Run functions (including 1st gen)
Cloud Run jobs and services
\\n Eventarc Advanced buses
Internal HTTP endpoints in\\n VPC networks
Pub/Sub topics
Workflows
See\\n Event providers and destinations
Cloud Run functions
Cloud Run services
Internal HTTP endpoints in\\n VPC networks
Public endpoints of private and public GKE\\n services
Workflows
See\\n Event providers and destinations
\\n\\n\\n\\n \\n \\n\\n \\n \\n \\n\\n \\n
\\n\\n \\n\\n \\n \\n \\n \\n \\n \\n \\n \\n \\n\\n \\n\\n \\n \\n \\n \\n\\n \\n
\\n \\n \\n
\\n
\\n\\n\\n\\n

Except as otherwise noted, the content of this page is licensed under the Creative Commons Attribution 4.0 License, and code samples are licensed under the Apache 2.0 License. For details, see the Google Developers Site Policies. Java is a registered trademark of Oracle and/or its affiliates.

\\n

Last updated 2025-05-08 UTC.

\\n
\\n\\n\\n\\n\\n\\n\\n \\n
\\n \\n \\n \\n \\n \\n \\n \\n \\n
\\n \\n
\\n
\\n \\n \\n \\n \\n \\n \\n \\n \\n\\n \\n \\n \\n \\n \\n\\n\\n \\n \\n \\n \\n \\n \\n
\\n \\n \\n \\n \\n \\n \\n \\n\\n \\n \\n \\n \\n \\n \\n\\n\\n \\n\\n\\n\\n\\n\\n\\n \\n\\n\\n \\n \\n'" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "eventarc_page.body" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b9c95041-c470-4ee1-bf77-c1019edd29b2", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.17" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/sample.ipynb b/sample.ipynb new file mode 100644 index 0000000..3eb4a4d --- /dev/null +++ b/sample.ipynb @@ -0,0 +1,339 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "771cb720-1a7c-4c63-aeba-503ee926fce9", + "metadata": {}, + "outputs": [], + "source": [ + "from openai import OpenAI\n", + "import gradio as gr\n", + "import requests\n", + "import json\n", + "from typing import List\n", + "from bs4 import BeautifulSoup\n", + "from IPython.display import Markdown, display, update_display" + ] + }, + { + "cell_type": "markdown", + "id": "f67efb4a-efc3-483e-84aa-60a31b888305", + "metadata": {}, + "source": [ + "The next 2 code blocks from [Ollama website](https://ollama.com/blog/openai-compatibility)" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "1d8b2464-352f-4df9-8ea9-796d1f886ddd", + "metadata": {}, + "outputs": [], + "source": [ + "from openai import OpenAI\n", + "\n", + "client = OpenAI(\n", + " base_url = 'http://localhost:11434/v1',\n", + " api_key='ollama', # required, but unused\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "d629773a-c8cd-4ea9-a4c7-17e3d52f3415", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The 2020 World Series was played between the Los Angeles Dodgers and the Tampa Bay Rays at Globe Life Field, as well as Rogers Centre due to COVID-19 restrictions in Texas\n" + ] + } + ], + "source": [ + "response = client.chat.completions.create(\n", + " model=\"llama3.2\",\n", + " messages=[\n", + " {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n", + " {\"role\": \"user\", \"content\": \"Who won the world series in 2020?\"},\n", + " {\"role\": \"assistant\", \"content\": \"The LA Dodgers won in 2020.\"},\n", + " {\"role\": \"user\", \"content\": \"Where was it played?\"}\n", + " ]\n", + ")\n", + "\n", + "print(response.choices[0].message.content)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "c7d1eea8-cf7a-4f26-891f-f649e3ab6f03", + "metadata": {}, + "outputs": [], + "source": [ + "def get_chat_response(client, model, user_message, system_message=\"You are a helpful assistant.\"):\n", + " response = client.chat.completions.create(\n", + " model=model,\n", + " messages=[\n", + " {\"role\": \"system\", \"content\": system_message},\n", + " {\"role\": \"user\", \"content\": user_message}\n", + " ]\n", + " )\n", + "\n", + " return response.choices[0].message.content" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "744ae925-fd10-48f7-ba5f-28b9c2467582", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "I'm not aware of the current date, as I'm a text-based AI model and do not have real-time access to the current date or time. My training data only goes up until December 2023, but I don't know what day of the week it is or what month it currently is.\n", + "\n", + "If you need to know the current date or time, I recommend checking your device's clock or searching online for \"current date\" or \"current time.\"\n" + ] + } + ], + "source": [ + "print(get_chat_response(client, \"llama3.2\", \"What is today's date?\"))" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "70e1608f-976c-4257-9a66-bb4cc19dec93", + "metadata": {}, + "outputs": [], + "source": [ + "#print(get_chat_response(client, \"gemma3\", \"What is your cutoff date?\"))" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "9f9ca64a-c43d-4f7d-8fea-87a8d0a38835", + "metadata": {}, + "outputs": [], + "source": [ + "#print(get_chat_response(client, \"gemma3\", \"Are you able to generate images?\"))" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "4b7f8415-783b-4ebd-abec-852443abd87c", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "#\n", + "# from Ed Donner's github repo for LLM Engineering course on Udemy: \n", + "# https://github.com/ed-donner/llm_engineering/blob/main/week1/day5.ipynb\n", + "#\n", + "headers = {\n", + " \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36\"\n", + "}\n", + "\n", + "class Website:\n", + " \"\"\"\n", + " A utility class to represent a Website that we have scraped, now with links\n", + " \"\"\"\n", + "\n", + " def __init__(self, url):\n", + " self.url = url\n", + " response = requests.get(url, headers=headers)\n", + " self.body = response.content\n", + " soup = BeautifulSoup(self.body, 'html.parser')\n", + " self.title = soup.title.string if soup.title else \"No title found\"\n", + " if soup.body:\n", + " for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\"]):\n", + " irrelevant.decompose()\n", + " self.text = soup.body.get_text(separator=\"\\n\", strip=True)\n", + " else:\n", + " self.text = \"\"\n", + " links = [link.get('href') for link in soup.find_all('a')]\n", + " self.links = [link for link in links if link]\n", + "\n", + " def get_contents(self):\n", + " return f\"Webpage Title:\\n{self.title}\\nWebpage Contents:\\n{self.text}\\n\\n\"\n" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "badd4eac-8b1c-4068-ba46-01ef60948c59", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Number of links: 194\n" + ] + } + ], + "source": [ + "eventarc_website = Website(\"https://cloud.google.com/eventarc/docs\")\n", + "eventarc_links = eventarc_website.links\n", + "print('Number of links: {}'.format(len(eventarc_links)))" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "6b0b2f4c-027f-48aa-875a-24939a5120c1", + "metadata": {}, + "outputs": [], + "source": [ + "system_message = \"You are an helpful assistant and an expert in determining if links are related to \"\n", + "system_message += \"Google Cloud eventarcs. Decide which links are relevant to the Google Cloud Professional \"\n", + "system_message += \"Developer certification and return only those links. respond with the full https URL in JSON format. \"\n" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "f9f9b6d5-04cd-465f-a16a-6b8757ab7ec9", + "metadata": {}, + "outputs": [], + "source": [ + "def get_links_user_prompt(website):\n", + " user_prompt = f\"Here is the list of links on the website of {website.url} - \"\n", + " user_prompt += \"please decide which of these are relevant web links for a brochure about the company, respond with the full https URL in JSON format. \\\n", + "Do not include Terms of Service, Privacy, email links.\\n\"\n", + " user_prompt += \"Links (some might be relative links):\\n\"\n", + " user_prompt += \"\\n\".join(website.links)\n", + " return user_prompt" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "6ceccbf7-156e-40bd-ac85-a84125c850bb", + "metadata": {}, + "outputs": [], + "source": [ + "def get_links(client, url, model, user_prompt, system_prompt):\n", + " website = Website(url)\n", + " response = client.chat.completions.create(\n", + " model=model,\n", + " messages=[\n", + " {\"role\": \"system\", \"content\": system_prompt},\n", + " {\"role\": \"user\", \"content\": user_prompt}\n", + " ],\n", + " response_format={\"type\": \"json_object\"}\n", + " )\n", + " result = response.choices[0].message.content\n", + " return json.loads(result)" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "99fd93c8-6443-4b98-9e1f-5f1b24e92940", + "metadata": {}, + "outputs": [], + "source": [ + "user_prompt = get_links_user_prompt(eventarc_website)" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "a733e5cc-0e22-47e0-84eb-0d6a4bbbf4f4", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'links': ['https://cloud.google.com/eventarc/docs', 'https://cloud.google.com/eventarcStandard/docs/overview', 'https://cloud.google.com/eventarcAdvanced/docs/overview', 'https://cloud.google.com/eventarcStandard/docs/apis', 'https://cloud.google.com/eventarcStandard/docs/samples', 'https://cloud.google.com/eventarcaAdvanced/docs/resources', 'https://www.apache.org/licenses/LICENSE-2.0', 'https://github.com/googlecloudPlatform']}\n" + ] + } + ], + "source": [ + "llama32_relevant_links = get_links(client, eventarc_website.url, \"llama3.2\", user_prompt, system_message)\n", + "print(llama32_relevant_links)" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "423642fe-fb0e-4c8a-96bc-11e6416048dd", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Number of relevant links found by llama3.2: 8\n" + ] + } + ], + "source": [ + "print(\"Number of relevant links found by llama3.2: {}\".format(len(llama32_relevant_links['links'])))" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "16511679-7b47-451e-b469-4ca9b23d3576", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{}\n" + ] + } + ], + "source": [ + "gemma3_relevant_links = get_links(client, eventarc_website.url, \"gemma3\", user_prompt, system_message)\n", + "print(gemma3_relevant_links)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "66db5fe2-6a51-48d0-80d0-85abb6afec21", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.17" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}