{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "771cb720-1a7c-4c63-aeba-503ee926fce9", "metadata": {}, "outputs": [], "source": [ "from openai import OpenAI\n", "import gradio as gr\n", "import requests\n", "import json\n", "from typing import List\n", "from bs4 import BeautifulSoup\n", "from IPython.display import Markdown, display, update_display" ] }, { "cell_type": "markdown", "id": "f67efb4a-efc3-483e-84aa-60a31b888305", "metadata": {}, "source": [ "The next 2 code blocks from [Ollama website](https://ollama.com/blog/openai-compatibility)" ] }, { "cell_type": "code", "execution_count": 2, "id": "1d8b2464-352f-4df9-8ea9-796d1f886ddd", "metadata": {}, "outputs": [], "source": [ "from openai import OpenAI\n", "\n", "client = OpenAI(\n", " base_url = 'http://localhost:11434/v1',\n", " api_key='ollama', # required, but unused\n", ")" ] }, { "cell_type": "code", "execution_count": 3, "id": "d629773a-c8cd-4ea9-a4c7-17e3d52f3415", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The 2020 World Series was played between the Los Angeles Dodgers and the Tampa Bay Rays at Globe Life Field, as well as Rogers Centre due to COVID-19 restrictions in Texas\n" ] } ], "source": [ "response = client.chat.completions.create(\n", " model=\"llama3.2\",\n", " messages=[\n", " {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n", " {\"role\": \"user\", \"content\": \"Who won the world series in 2020?\"},\n", " {\"role\": \"assistant\", \"content\": \"The LA Dodgers won in 2020.\"},\n", " {\"role\": \"user\", \"content\": \"Where was it played?\"}\n", " ]\n", ")\n", "\n", "print(response.choices[0].message.content)" ] }, { "cell_type": "code", "execution_count": 4, "id": "c7d1eea8-cf7a-4f26-891f-f649e3ab6f03", "metadata": {}, "outputs": [], "source": [ "def get_chat_response(client, model, user_message, system_message=\"You are a helpful assistant.\"):\n", " response = client.chat.completions.create(\n", " model=model,\n", " messages=[\n", " {\"role\": \"system\", \"content\": system_message},\n", " {\"role\": \"user\", \"content\": user_message}\n", " ]\n", " )\n", "\n", " return response.choices[0].message.content" ] }, { "cell_type": "code", "execution_count": 5, "id": "744ae925-fd10-48f7-ba5f-28b9c2467582", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "I'm not aware of the current date, as I'm a text-based AI model and do not have real-time access to the current date or time. My training data only goes up until December 2023, but I don't know what day of the week it is or what month it currently is.\n", "\n", "If you need to know the current date or time, I recommend checking your device's clock or searching online for \"current date\" or \"current time.\"\n" ] } ], "source": [ "print(get_chat_response(client, \"llama3.2\", \"What is today's date?\"))" ] }, { "cell_type": "code", "execution_count": 6, "id": "70e1608f-976c-4257-9a66-bb4cc19dec93", "metadata": {}, "outputs": [], "source": [ "#print(get_chat_response(client, \"gemma3\", \"What is your cutoff date?\"))" ] }, { "cell_type": "code", "execution_count": 7, "id": "9f9ca64a-c43d-4f7d-8fea-87a8d0a38835", "metadata": {}, "outputs": [], "source": [ "#print(get_chat_response(client, \"gemma3\", \"Are you able to generate images?\"))" ] }, { "cell_type": "code", "execution_count": 8, "id": "4b7f8415-783b-4ebd-abec-852443abd87c", "metadata": { "editable": true, "slideshow": { "slide_type": "" }, "tags": [] }, "outputs": [], "source": [ "#\n", "# from Ed Donner's github repo for LLM Engineering course on Udemy: \n", "# https://github.com/ed-donner/llm_engineering/blob/main/week1/day5.ipynb\n", "#\n", "headers = {\n", " \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36\"\n", "}\n", "\n", "class Website:\n", " \"\"\"\n", " A utility class to represent a Website that we have scraped, now with links\n", " \"\"\"\n", "\n", " def __init__(self, url):\n", " self.url = url\n", " response = requests.get(url, headers=headers)\n", " self.body = response.content\n", " soup = BeautifulSoup(self.body, 'html.parser')\n", " self.title = soup.title.string if soup.title else \"No title found\"\n", " if soup.body:\n", " for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\"]):\n", " irrelevant.decompose()\n", " self.text = soup.body.get_text(separator=\"\\n\", strip=True)\n", " else:\n", " self.text = \"\"\n", " links = [link.get('href') for link in soup.find_all('a')]\n", " self.links = [link for link in links if link]\n", "\n", " def get_contents(self):\n", " return f\"Webpage Title:\\n{self.title}\\nWebpage Contents:\\n{self.text}\\n\\n\"\n" ] }, { "cell_type": "code", "execution_count": 12, "id": "badd4eac-8b1c-4068-ba46-01ef60948c59", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Number of links: 194\n" ] } ], "source": [ "eventarc_website = Website(\"https://cloud.google.com/eventarc/docs\")\n", "eventarc_links = eventarc_website.links\n", "print('Number of links: {}'.format(len(eventarc_links)))" ] }, { "cell_type": "code", "execution_count": 13, "id": "6b0b2f4c-027f-48aa-875a-24939a5120c1", "metadata": {}, "outputs": [], "source": [ "system_message = \"You are an helpful assistant and an expert in determining if links are related to \"\n", "system_message += \"Google Cloud eventarcs. Decide which links are relevant to the Google Cloud Professional \"\n", "system_message += \"Developer certification and return only those links. respond with the full https URL in JSON format. \"\n" ] }, { "cell_type": "code", "execution_count": 14, "id": "f9f9b6d5-04cd-465f-a16a-6b8757ab7ec9", "metadata": {}, "outputs": [], "source": [ "def get_links_user_prompt(website):\n", " user_prompt = f\"Here is the list of links on the website of {website.url} - \"\n", " user_prompt += \"please decide which of these are relevant web links for a brochure about the company, respond with the full https URL in JSON format. \\\n", "Do not include Terms of Service, Privacy, email links.\\n\"\n", " user_prompt += \"Links (some might be relative links):\\n\"\n", " user_prompt += \"\\n\".join(website.links)\n", " return user_prompt" ] }, { "cell_type": "code", "execution_count": 22, "id": "6ceccbf7-156e-40bd-ac85-a84125c850bb", "metadata": {}, "outputs": [], "source": [ "def get_links(client, url, model, user_prompt, system_prompt):\n", " website = Website(url)\n", " response = client.chat.completions.create(\n", " model=model,\n", " messages=[\n", " {\"role\": \"system\", \"content\": system_prompt},\n", " {\"role\": \"user\", \"content\": user_prompt}\n", " ],\n", " response_format={\"type\": \"json_object\"}\n", " )\n", " result = response.choices[0].message.content\n", " return json.loads(result)" ] }, { "cell_type": "code", "execution_count": 23, "id": "99fd93c8-6443-4b98-9e1f-5f1b24e92940", "metadata": {}, "outputs": [], "source": [ "user_prompt = get_links_user_prompt(eventarc_website)" ] }, { "cell_type": "code", "execution_count": 28, "id": "a733e5cc-0e22-47e0-84eb-0d6a4bbbf4f4", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'links': ['https://cloud.google.com/eventarc/docs', 'https://cloud.google.com/eventarcStandard/docs/overview', 'https://cloud.google.com/eventarcAdvanced/docs/overview', 'https://cloud.google.com/eventarcStandard/docs/apis', 'https://cloud.google.com/eventarcStandard/docs/samples', 'https://cloud.google.com/eventarcaAdvanced/docs/resources', 'https://www.apache.org/licenses/LICENSE-2.0', 'https://github.com/googlecloudPlatform']}\n" ] } ], "source": [ "llama32_relevant_links = get_links(client, eventarc_website.url, \"llama3.2\", user_prompt, system_message)\n", "print(llama32_relevant_links)" ] }, { "cell_type": "code", "execution_count": 29, "id": "423642fe-fb0e-4c8a-96bc-11e6416048dd", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Number of relevant links found by llama3.2: 8\n" ] } ], "source": [ "print(\"Number of relevant links found by llama3.2: {}\".format(len(llama32_relevant_links['links'])))" ] }, { "cell_type": "code", "execution_count": 31, "id": "16511679-7b47-451e-b469-4ca9b23d3576", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{}\n" ] } ], "source": [ "gemma3_relevant_links = get_links(client, eventarc_website.url, \"gemma3\", user_prompt, system_message)\n", "print(gemma3_relevant_links)" ] }, { "cell_type": "code", "execution_count": null, "id": "66db5fe2-6a51-48d0-80d0-85abb6afec21", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.17" } }, "nbformat": 4, "nbformat_minor": 5 }