In [1]:
from openai import OpenAI
import gradio as gr
import requests
import json
from typing import List
from bs4 import BeautifulSoup
from IPython.display import Markdown, display, update_display

The next 2 code blocks from [Ollama website](https://ollama.com/blog/openai-compatibility)

In [2]:
from openai import OpenAI

client = OpenAI(
 base_url = 'http://localhost:11434/v1',
 api_key='ollama', # required, but unused
)

In [3]:
response = client.chat.completions.create(
 model="llama3.2",
 messages=[
 {"role": "system", "content": "You are a helpful assistant."},
 {"role": "user", "content": "Who won the world series in 2020?"},
 {"role": "assistant", "content": "The LA Dodgers won in 2020."},
 {"role": "user", "content": "Where was it played?"}
 ]
)

print(response.choices[0].message.content)

The 2020 World Series was played between the Los Angeles Dodgers and the Tampa Bay Rays at Globe Life Field, as well as Rogers Centre due to COVID-19 restrictions in Texas


In [4]:
def get_chat_response(client, model, user_message, system_message="You are a helpful assistant."):
 response = client.chat.completions.create(
 model=model,
 messages=[
 {"role": "system", "content": system_message},
 {"role": "user", "content": user_message}
 ]
 )

 return response.choices[0].message.content

In [5]:
print(get_chat_response(client, "llama3.2", "What is today's date?"))

I'm not aware of the current date, as I'm a text-based AI model and do not have real-time access to the current date or time. My training data only goes up until December 2023, but I don't know what day of the week it is or what month it currently is.

If you need to know the current date or time, I recommend checking your device's clock or searching online for "current date" or "current time."


In [6]:
#print(get_chat_response(client, "gemma3", "What is your cutoff date?"))

In [7]:
#print(get_chat_response(client, "gemma3", "Are you able to generate images?"))

In [8]:
#
# from Ed Donner's github repo for LLM Engineering course on Udemy: 
# https://github.com/ed-donner/llm_engineering/blob/main/week1/day5.ipynb
#
headers = {
 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}

class Website:
 """
 A utility class to represent a Website that we have scraped, now with links
 """

 def __init__(self, url):
 self.url = url
 response = requests.get(url, headers=headers)
 self.body = response.content
 soup = BeautifulSoup(self.body, 'html.parser')
 self.title = soup.title.string if soup.title else "No title found"
 if soup.body:
 for irrelevant in soup.body(["script", "style", "img", "input"]):
 irrelevant.decompose()
 self.text = soup.body.get_text(separator="\n", strip=True)
 else:
 self.text = ""
 links = [link.get('href') for link in soup.find_all('a')]
 self.links = [link for link in links if link]

 def get_contents(self):
 return f"Webpage Title:\n{self.title}\nWebpage Contents:\n{self.text}\n\n"


In [12]:
eventarc_website = Website("https://cloud.google.com/eventarc/docs")
eventarc_links = eventarc_website.links
print('Number of links: {}'.format(len(eventarc_links)))

Number of links: 194


In [13]:
system_message = "You are an helpful assistant and an expert in determining if links are related to "
system_message += "Google Cloud eventarcs. Decide which links are relevant to the Google Cloud Professional "
system_message += "Developer certification and return only those links. respond with the full https URL in JSON format. "


In [14]:
def get_links_user_prompt(website):
 user_prompt = f"Here is the list of links on the website of {website.url} - "
 user_prompt += "please decide which of these are relevant web links for a brochure about the company, respond with the full https URL in JSON format. \
Do not include Terms of Service, Privacy, email links.\n"
 user_prompt += "Links (some might be relative links):\n"
 user_prompt += "\n".join(website.links)
 return user_prompt

In [22]:
def get_links(client, url, model, user_prompt, system_prompt):
 website = Website(url)
 response = client.chat.completions.create(
 model=model,
 messages=[
 {"role": "system", "content": system_prompt},
 {"role": "user", "content": user_prompt}
 ],
 response_format={"type": "json_object"}
 )
 result = response.choices[0].message.content
 return json.loads(result)

In [23]:
user_prompt = get_links_user_prompt(eventarc_website)

In [28]:
llama32_relevant_links = get_links(client, eventarc_website.url, "llama3.2", user_prompt, system_message)
print(llama32_relevant_links)

{'links': ['https://cloud.google.com/eventarc/docs', 'https://cloud.google.com/eventarcStandard/docs/overview', 'https://cloud.google.com/eventarcAdvanced/docs/overview', 'https://cloud.google.com/eventarcStandard/docs/apis', 'https://cloud.google.com/eventarcStandard/docs/samples', 'https://cloud.google.com/eventarcaAdvanced/docs/resources', 'https://www.apache.org/licenses/LICENSE-2.0', 'https://github.com/googlecloudPlatform']}


In [29]:
print("Number of relevant links found by llama3.2: {}".format(len(llama32_relevant_links['links'])))

Number of relevant links found by llama3.2: 8


In [31]:
gemma3_relevant_links = get_links(client, eventarc_website.url, "gemma3", user_prompt, system_message)
print(gemma3_relevant_links)

{}
