diff --git a/README.md b/README.md
index 5f27338..bf7981f 100644
--- a/README.md
+++ b/README.md
@@ -14,12 +14,19 @@
 
 
 ```bash
-# Ch 12 Q2
+# Exam 12 - Question 2
 
 curl -X POST http://localhost:8080/api/ai/chat-with-sources  -H "Content-Type: application/json"  -d '{"question": "You are an IT Specialist at a technology company, and your Dataproc cluster runs in a single Virtual Private Cloud (VPC) network in a single subnetwork with range 172.16.20.128/25. The subnetwork runs out of private IP addresses. Your manager asks you to find a way to add new VMs for communication with the cluster while minimizing the steps involved. What should you do? A. Create a new subnetwork in the existing VPC with a range of 172.16.21.0/24 and configure the VMs to use that subnetwork. B. Create a new VPC network for the VMs with a subnet of 172.32.0.0/16. Enable VPC network Peering between the Dataproc VPC network and the VMs VPC network. Configure a custom Route exchange. C. Configure Shared VPC for the existing VPC and add the VMs to a new subnetwork in the Shared VPC. D. Modify the existing subnet range to 172.16.20.0/24."}'
 ```
 
 ```bash
+# Exam 12 - Question 2
+# OPEN AI
+
+curl -X POST http://localhost:8080/openai/chat-with-sources  -H "Content-Type: application/json"  -d '{"question": "You are an IT Specialist at a technology company, and your Dataproc cluster runs in a single Virtual Private Cloud (VPC) network in a single subnetwork with range 172.16.20.128/25. The subnetwork runs out of private IP addresses. Your manager asks you to find a way to add new VMs for communication with the cluster while minimizing the steps involved. What should you do? A. Create a new subnetwork in the existing VPC with a range of 172.16.21.0/24 and configure the VMs to use that subnetwork. B. Create a new VPC network for the VMs with a subnet of 172.32.0.0/16. Enable VPC network Peering between the Dataproc VPC network and the VMs VPC network. Configure a custom Route exchange. C. Configure Shared VPC for the existing VPC and add the VMs to a new subnetwork in the Shared VPC. D. Modify the existing subnet range to 172.16.20.0/24."}'
+```
+
+```bash
 curl -X POST http://localhost:8080/api/ai/chat-with-sources  -H "Content-Type: application/json"  -d '{"question": "As a developer at a software company, you have been working on a project that utilizes Google Cloud services. Initially, you used your personal credit card for the expenses and later got reimbursed by your company. However, your company now wants to directly handle the billing for these services in their monthly invoice. What should you do to make this happen? A. Use Google Cloud Pub/Sub to send billing notifications to your finance team. B. Change the billing account of your projects to the billing account of your company. C. Enable Google Cloud Monitoring alerts for billing thresholds to notify your financial team. D. Share your credit card details with your financial team and have them add it to a new billing account."}'
 ```
 
@@ -31,4 +38,19 @@
 #e4q1
 
 curl -X POST http://localhost:8080/api/ai/chat-with-sources  -H "Content-Type: application/json"  -d '{"question": "You are working as a network administrator for a company with two subnets (subnet-a and subnet-b) in their default VPC. The company’s database servers are located in subnet-a, while the application servers and web servers operate in subnet-b. Your task is to configure a firewall rule that permits database traffic exclusively from the application servers to the database servers. What steps should be taken to accomplish this? A. • Create service accounts sa-app and sa-db. • Associate service account sa- app with the application servers and the service account sa-db with the database servers. • Create an ingress firewall rule to allow network traffic from source service account sa-app to target service account sa-db. B. Create network tags db-server and app-server. • Add the db-server tag to the application servers and the app-server tag to the database servers. • Create an egress firewall rule to allow network traffic from source network tag db-server to target network tag app-server. C. Create a service account sa-app and a network tag db-server. • Associate the service account sa-app with the database servers and the network tag db-server with the application servers. • Create an ingress firewall rule to allow network traffic from source service account sa-app to target network tag db-server. D. Create a service account sa-app and a network tag app-server. • Add the service account sa-app to the application servers and the network tag app-server to the database servers. • Create an ingress firewall rule to allow network traffic from source VPC IP addresses and target the subnet-b IP addresses"}'
+```
+
+```bash
+#e1q6
+
+curl -X POST http://localhost:8080/api/ai/chat-with-sources  -H "Content-Type: application/json"  -d '{"question": "Question 6: As a software engineer at a data analytics company, you are tasked with setting up permissions for a group of Compute Engine instances to enable them to write data intoa specific Cloud Storage bucket, while adhering to Google-recommended practices. What is the most appropriate action to take? A. Create a service account and add it to the IAM role ‘compute.admin’ for that bucket. B.CreateaserviceaccountandaddittotheIAMrole‘storage.legacyBucketReader’ for that bucket. C. Create a service account and add it to the IAM role ‘storage.objectCreator’ for that bucket. 4D. Create a service account with an access scope. Use the access scope ‘https://www.googleapis.com/auth/cloud-platform’."}'
+
+```
+
+
+```bash
+#e1q4
+
+curl -X POST http://localhost:8080/api/ai/chat-with-sources  -H "Content-Type: application/json"  -d '{"question": "As a developer working for a software company specializing in the financeindustry, youarerunningLinuxworkloadsonComputeEngineinstances. Your company is planning to collaborate with a new operations partner that does not have Google Accounts. In order to maintain the installed tools on these instances, you need to grant the operations partner access. How should you proceed? A. Configure Compute Engine instances to use an external metadata server and grant the operations partner access to that server. B. Ask the operations partner to generate SSH key pairs, and add the public keys to the VM instances. C. Enable Cloud NAT and grant the operations partner access to the Cloud NAT gateway to allow traﬀic redirection. D. Enable Cloud IAP for the Compute Engine instances, and add the operations partner as a Cloud IAP Tunnel User."}'
+
 ```
\ No newline at end of file
diff --git a/src/main/java/net/curtlewis/gcprag/ai/AiController.java b/src/main/java/net/curtlewis/gcprag/ai/AiController.java
index b163e42..ead5523 100644
--- a/src/main/java/net/curtlewis/gcprag/ai/AiController.java
+++ b/src/main/java/net/curtlewis/gcprag/ai/AiController.java
@@ -46,11 +46,18 @@
                 request.containsKey("threshold") ? ((Number) request.get("threshold")).doubleValue()
                         : 0.7;
 
-        RagService.RagResponse ragResponse = ragService.chatWithSources(question, topK, threshold);
+        long startTime = System.nanoTime();
+        RagService.RagResponse ragResponse = ragService.ragWithOllama(question);
+        // RagService.RagResponse ragResponse = ragService.chatWithSources(question, topK, threshold);
+        long endTime = System.nanoTime();
+        double durationInSeconds = (endTime - startTime) / 1_000_000_000.0;
+        String duration = String.format("%.3f", durationInSeconds);
 
-        return ResponseEntity.ok(Map.of("response", ragResponse.getResponse(), "sources",
-                ragResponse.getSources(), "context", ragResponse.getContext(), "sourceCount",
-                ragResponse.getSources().size()));
+        // return ResponseEntity.ok(Map.of("response", ragResponse.getResponse(), "sources",
+        //         ragResponse.getSources(), "context", ragResponse.getContext(), "sourceCount",
+        //         ragResponse.getSources().size(), "durationInSeconds", duration));
+
+        return ResponseEntity.ok(Map.of("response", ragResponse.shortToString(), "duratonInSeconds", duration));
     }
 
 
diff --git a/src/main/java/net/curtlewis/gcprag/ai/openai/OpenAiController.java b/src/main/java/net/curtlewis/gcprag/ai/openai/OpenAiController.java
index eee4650..1e27e46 100644
--- a/src/main/java/net/curtlewis/gcprag/ai/openai/OpenAiController.java
+++ b/src/main/java/net/curtlewis/gcprag/ai/openai/OpenAiController.java
@@ -5,23 +5,58 @@
 import org.springframework.ai.chat.model.ChatResponse;
 import org.springframework.ai.chat.prompt.Prompt;
 import org.springframework.beans.factory.annotation.Qualifier;
+import org.springframework.http.ResponseEntity;
 import org.springframework.web.bind.annotation.GetMapping;
+import org.springframework.web.bind.annotation.PostMapping;
+import org.springframework.web.bind.annotation.RequestBody;
+import org.springframework.web.bind.annotation.RequestMapping;
 import org.springframework.web.bind.annotation.RequestParam;
 import org.springframework.web.bind.annotation.RestController;
+import net.curtlewis.gcprag.service.RagService;
 
 @RestController
+@RequestMapping("/openai")
 public class OpenAiController {
 
     
     private final ChatModel chatModel;
+    private final RagService ragService;
 
-    public OpenAiController(@Qualifier("openAiChatModel") ChatModel chatModel) {
+    public OpenAiController(@Qualifier("openAiChatModel") ChatModel chatModel, RagService ragService) {
         this.chatModel = chatModel;
+        this.ragService = ragService;
     }
 
-    @GetMapping("/ai/generate")
+    @GetMapping("/generate")
     public Map<String,ChatResponse> generate(@RequestParam(value = "message", defaultValue = "Tell me a joke") String message) {
         return Map.of("generation", chatModel.call(new Prompt(message)));
     }
 
+    @PostMapping("/chat-with-sources")
+    public ResponseEntity<Map<String, Object>> ragChatWithSources(
+            @RequestBody Map<String, Object> request) {
+        String question = (String) request.get("question");
+        if (question == null || question.trim().isEmpty()) {
+            return ResponseEntity.badRequest().body(Map.of("error", "Question is required"));
+        }
+
+        // Optional parameters
+        int topK = request.containsKey("topK") ? ((Number) request.get("topK")).intValue() : 5;
+        double threshold =
+                request.containsKey("threshold") ? ((Number) request.get("threshold")).doubleValue()
+                        : 0.7;
+        long startTime = System.nanoTime();
+        RagService.RagResponse ragResponse = ragService.ragWithSources(question, topK, threshold, chatModel);
+        long endTime = System.nanoTime();
+        double durationInSeconds = (endTime - startTime) / 1_000_000_000.0;
+        String duration = String.format("%.3f", durationInSeconds);
+        
+        // return  ResponseEntity.ok(Map.of("response", ragResponse.getResponse(), "sources",
+        //         ragResponse.getSources(), "context", ragResponse.getContext(), "sourceCount",
+        //         ragResponse.getSources().size(), "durationInSeconds", duration ));
+
+        return ResponseEntity.ok(Map.of("response", ragResponse.shortToString(), "duratonInSeconds", duration));
+
+    }
+
 }
diff --git a/src/main/java/net/curtlewis/gcprag/service/RagService.java b/src/main/java/net/curtlewis/gcprag/service/RagService.java
index 34c250f..5b18b0d 100644
--- a/src/main/java/net/curtlewis/gcprag/service/RagService.java
+++ b/src/main/java/net/curtlewis/gcprag/service/RagService.java
@@ -5,7 +5,10 @@
 import java.util.UUID;
 import java.util.stream.Collectors;
 import org.springframework.ai.chat.client.ChatClient;
+import org.springframework.ai.chat.client.ChatClient.CallResponseSpec;
+import org.springframework.ai.chat.metadata.ChatResponseMetadata;
 import org.springframework.ai.chat.model.ChatModel;
+import org.springframework.ai.chat.model.ChatResponse;
 import org.springframework.ai.embedding.EmbeddingModel;
 import org.springframework.ai.embedding.EmbeddingRequest;
 import org.springframework.ai.embedding.EmbeddingResponse;
@@ -21,7 +24,8 @@
 
     private final ChatModel openAiChatModel;
     private final ChatModel ollamaChatModel;
-    private final ChatClient chatClient;
+    // private final ChatClient chatClient;
+    // private final ChatClient openAiChatClient;
     private final EmbeddingModel embeddingModel;
     private final DocumentRepository documentRepository;
     private final ChunkRepository chunkRepository;
@@ -40,11 +44,11 @@
         
         this.openAiChatModel = chatModel;
         this.ollamaChatModel = ollamaChatModel;
-        this.chatClient = ChatClient.create(this.ollamaChatModel);
+        // this.chatClient = ChatClient.create(this.ollamaChatModel);
+        // this.openAiChatClient = ChatClient.create(this.openAiChatModel);
         this.embeddingModel = embeddingModel;
         this.documentRepository = documentRepository;
         this.chunkRepository = chunkRepository;
-        // this.openAiChatModel = (OpenAiChatModel) model;
     }
 
     // public RagService(
@@ -66,9 +70,13 @@
 
     /**
      * RAG chat with custom parameters
+     * 
+     * TODO:
+     * Make generic by adding ChatModel as a method parameter.
      */
     public String chat(String userQuestion, int topK, double similarityThreshold) {
         // 1. Generate embedding for the user question
+
         List<Double> queryEmbedding = generateEmbedding(userQuestion);
 
         // 2. Retrieve relevant documents
@@ -81,17 +89,28 @@
         // 4. Generate response using the context
         String prompt = buildRagPrompt(userQuestion, context);
 
-        return chatClient.prompt().user(prompt).call().content();
+        ChatClient cc = ChatClient.create(ollamaChatModel);
+        return cc.prompt().user(prompt).call().content();
     }
 
     /**
      * RAG chat with detailed response including sources
      */
-    public RagResponse chatWithSources(String userQuestion) {
-        return chatWithSources(userQuestion, DEFAULT_TOP_K, DEFAULT_SIMILARITY_THRESHOLD);
+    // public RagResponse chatWithSources(String userQuestion) {
+    //     return chatWithSources(userQuestion, DEFAULT_TOP_K, DEFAULT_SIMILARITY_THRESHOLD);
+    // }
+
+    public RagResponse ragWithOpenAi(String userQuestion) {
+        return ragWithSources(userQuestion, DEFAULT_TOP_K, DEFAULT_SIMILARITY_THRESHOLD, openAiChatModel);
     }
 
-    public RagResponse chatWithSources(String userQuestion, int topK, double similarityThreshold) {
+    public RagResponse ragWithOllama(String userQuestion) {
+        return ragWithSources(userQuestion, DEFAULT_TOP_K, DEFAULT_SIMILARITY_THRESHOLD, ollamaChatModel);
+    }    
+
+    public RagResponse ragWithSources(String userQuestion, int topK, double similarityThreshold, ChatModel cm) {
+
+        ChatClient chatClient = ChatClient.create(cm);
         // Generate embedding for the user question
         List<Double> queryEmbedding = generateEmbedding(userQuestion);
 
@@ -104,12 +123,50 @@
 
         // Generate response using the context
         String prompt = buildRagPrompt(userQuestion, context);
-        String response = chatClient.prompt().user(prompt).call().content();
+        CallResponseSpec callResponseSpec = chatClient.prompt().user(prompt).call();
+        ChatResponse chatResponse = callResponseSpec.chatResponse();
 
+        ChatResponseMetadata metaData = chatResponse == null ? null : chatResponse.getMetadata();
+        Integer promptTokens = metaData.getUsage().getPromptTokens();
+        Integer responseTokens = metaData.getUsage().getCompletionTokens();
+        String model = metaData.getModel();
+        
+        // DefaultUsage defaultUsage = metaData.get("usage");
+        // String response = chatClient.prompt().user(prompt).call().content();
+        System.out.println("*************************************");
+        System.out.println(String.format("Model: %s", metaData.getModel()));
+        System.out.println(String.format("Input tokens: %s | Output Tokens: %s", promptTokens, responseTokens));
+        System.out.println("*************************************");
+        
         // Return detailed response with sources
-        return new RagResponse(response, relevantDocs, context);
+        return new RagResponse(chatResponse.getResult().getOutput().getText(), 
+                    relevantDocs, context, promptTokens, responseTokens, model);
     }
 
+    /*
+     * TODO:
+     * Make generic by passing in ChatModel as parameter and then get / create ChatClient
+     */
+    // public RagResponse chatWithSources(String userQuestion, int topK, double similarityThreshold) {
+    //     // Generate embedding for the user question
+    //     List<Double> queryEmbedding = generateEmbedding(userQuestion);
+
+    //     // Retrieve relevant documents
+    //     List<ChunkEntity> relevantDocs =
+    //             retrieveRelevantDocuments(queryEmbedding, topK, similarityThreshold);
+
+    //     // Create context from retrieved documents
+    //     String context = createContext(relevantDocs);
+
+    //     // Generate response using the context
+    //     String prompt = buildRagPrompt(userQuestion, context);
+    //     ChatClient cc = ChatClient.create(ollamaChatModel);
+    //     String response = cc.prompt().user(prompt).call().content();
+
+    //     // Return detailed response with sources
+    //     return new RagResponse(response, relevantDocs, context);
+    // }
+
     // public RagResponse openAiChatWithSources(String userQuestion, int topK, double similarityThreshold) {
     //     // Generate embedding for the user question
     //     List<Double> queryEmbedding = generateEmbedding(userQuestion);
@@ -254,13 +311,21 @@
         private final String response;
         private final List<ChunkEntity> sources;
         private final String context;
+        private final Integer promptTokens;
+        private final Integer responseTokens;
+        private final String model;
 
-        public RagResponse(String response, List<ChunkEntity> sources, String context) {
+        public RagResponse(String response, List<ChunkEntity> sources, 
+            String context, Integer promptTokens, Integer responseTokens, String model) {
             this.response = response;
             this.sources = sources;
             this.context = context;
+            this.promptTokens = promptTokens;
+            this.responseTokens = responseTokens;
+            this.model = model;
         }
 
+
         public String getResponse() {
             return response;
         }
@@ -272,6 +337,23 @@
         public String getContext() {
             return context;
         }
+
+        public String getModel() {
+            return model;
+        }
+
+        public Integer getPromptTokens() {
+            return promptTokens;
+        }
+        
+        public Integer getResponseTokens() {
+            return responseTokens;
+        }
+
+        public String shortToString() {
+            return String.format("%s || PromptTokens: %s || ResponseTokens: %s || model: %s",
+                    response, promptTokens, responseTokens, model);
+        }
     }
 
     public static class DatabaseStats {