{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "a91a0116-3eca-4a85-b211-2e55361f653e", "metadata": {}, "outputs": [], "source": [ "from pypdf import PdfReader" ] }, { "cell_type": "code", "execution_count": 4, "id": "269ece5f-6de7-4ca5-87a0-436396a041ea", "metadata": {}, "outputs": [], "source": [ "pdf_path = \"/home/clewis/hdd1tb/docs/gcp/Aldovelio_Castremonte_1000_Practice_Questions_to_Master_the_GCP_Google_Cloud_Certified_Associate_Cloud_Engineer_Exam-ipgdou.pdf\"" ] }, { "cell_type": "code", "execution_count": 20, "id": "d77bfee7-3890-447f-8403-79ef7e4d2a43", "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Number of pages: 1039\n", "33\n" ] } ], "source": [ "try:\n", " # Create a PdfReader object\n", " reader = PdfReader(pdf_path)\n", "\n", " # Initialize an empty string to store extracted text\n", " extracted_text = \"\"\n", "\n", " # Last page of the first 50 questions = page 19, thus zero index = 20\n", " last_page_first_50_questions = 20\n", " first_page_first_50_answers = 20\n", " last_page_first_50_answers = 53\n", " answer_phrase = \"Solution to Question\"\n", "\n", " # Loop through each page and extract text\n", " print(f\"Number of pages: {len(reader.pages)}\")\n", " pages = []\n", " page_answers = []\n", " for page_number in range(first_page_first_50_answers, last_page_first_50_answers):\n", " page = reader.pages[page_number]\n", " pages.append(page)\n", " \n", " page_answers.append([answer for answer in page.extract_text().split(\"\\n\") if answer_phrase in answer])\n", " # extracted_text += page.extract_text() + \"\\n\" # Add a newline for readability\n", "\n", " # Print or process the extracted text\n", " print(len(pages))\n", " answers = [item for sublist in page_answers for item in sublist]\n", "\n", "except FileNotFoundError:\n", " print(f\"Error: The file '{pdf_path}' was not found.\")\n", "except Exception as e:\n", " print(f\"An error occurred: {e}\")" ] }, { "cell_type": "code", "execution_count": 21, "id": "c73cdb8f-808f-4968-894c-f89a52f4df5a", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['Solution to Question 1: D',\n", " 'Solution to Question 2: B',\n", " 'Solution to Question 3: C',\n", " 'Solution to Question 4: D',\n", " 'Solution to Question 5: C',\n", " 'Solution to Question 6: C',\n", " 'Solution to Question 7: C',\n", " 'Solution to Question 8: D',\n", " 'Solution to Question 9: D',\n", " 'Solution to Question 10: C',\n", " 'Solution to Question 11: B',\n", " 'Solution to Question 12: C',\n", " 'Solution to Question 13: D',\n", " 'Solution to Question 14: A',\n", " 'Solution to Question 15: A',\n", " 'Solution to Question 16: D',\n", " 'Solution to Question 17: D',\n", " 'Solution to Question 18: D',\n", " 'Solution to Question 19: A',\n", " 'Solution to Question 20: D',\n", " 'Solution to Question 21: C',\n", " 'Solution to Question 22: C',\n", " 'Solution to Question 23: B',\n", " 'Solution to Question 24: A',\n", " 'Solution to Question 25: D',\n", " 'Solution to Question 26: A',\n", " 'Solution to Question 27: C',\n", " 'Solution to Question 28: D',\n", " 'Solution to Question 29: D',\n", " 'Solution to Question 30: A',\n", " 'Solution to Question 31: D',\n", " 'Solution to Question 32: A',\n", " 'Solution to Question 33: D',\n", " 'Solution to Question 34: B',\n", " 'Solution to Question 35: D',\n", " 'Solution to Question 36: B',\n", " 'Solution to Question 37: A',\n", " 'Solution to Question 38: D',\n", " 'Solution to Question 39: D',\n", " 'Solution to Question 40: C',\n", " 'Solution to Question 41: B',\n", " 'Solution to Question 42: A',\n", " 'Solution to Question 43: C',\n", " 'Solution to Question 44: C',\n", " 'Solution to Question 45: D',\n", " 'Solution to Question 46: C',\n", " 'Solution to Question 47: D',\n", " 'Solution to Question 48: B',\n", " 'Solution to Question 49: C',\n", " 'Solution to Question 50: A']" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "answers" ] }, { "cell_type": "code", "execution_count": null, "id": "7cf9bf24-dbfa-437f-9625-1f5d6571394a", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.2" } }, "nbformat": 4, "nbformat_minor": 5 }