diff --git a/quiz_pdf.ipynb b/quiz_pdf.ipynb new file mode 100644 index 0000000..8fe02e7 --- /dev/null +++ b/quiz_pdf.ipynb @@ -0,0 +1,175 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "a91a0116-3eca-4a85-b211-2e55361f653e", + "metadata": {}, + "outputs": [], + "source": [ + "from pypdf import PdfReader" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "269ece5f-6de7-4ca5-87a0-436396a041ea", + "metadata": {}, + "outputs": [], + "source": [ + "pdf_path = \"/home/clewis/hdd1tb/docs/gcp/Aldovelio_Castremonte_1000_Practice_Questions_to_Master_the_GCP_Google_Cloud_Certified_Associate_Cloud_Engineer_Exam-ipgdou.pdf\"" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "d77bfee7-3890-447f-8403-79ef7e4d2a43", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Number of pages: 1039\n", + "33\n" + ] + } + ], + "source": [ + "try:\n", + " # Create a PdfReader object\n", + " reader = PdfReader(pdf_path)\n", + "\n", + " # Initialize an empty string to store extracted text\n", + " extracted_text = \"\"\n", + "\n", + " # Last page of the first 50 questions = page 19, thus zero index = 20\n", + " last_page_first_50_questions = 20\n", + " first_page_first_50_answers = 20\n", + " last_page_first_50_answers = 53\n", + " answer_phrase = \"Solution to Question\"\n", + "\n", + " # Loop through each page and extract text\n", + " print(f\"Number of pages: {len(reader.pages)}\")\n", + " pages = []\n", + " page_answers = []\n", + " for page_number in range(first_page_first_50_answers, last_page_first_50_answers):\n", + " page = reader.pages[page_number]\n", + " pages.append(page)\n", + " \n", + " page_answers.append([answer for answer in page.extract_text().split(\"\\n\") if answer_phrase in answer])\n", + " # extracted_text += page.extract_text() + \"\\n\" # Add a newline for readability\n", + "\n", + " # Print or process the extracted text\n", + " print(len(pages))\n", + " answers = [item for sublist in page_answers for item in sublist]\n", + "\n", + "except FileNotFoundError:\n", + " print(f\"Error: The file '{pdf_path}' was not found.\")\n", + "except Exception as e:\n", + " print(f\"An error occurred: {e}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "c73cdb8f-808f-4968-894c-f89a52f4df5a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['Solution to Question 1: D',\n", + " 'Solution to Question 2: B',\n", + " 'Solution to Question 3: C',\n", + " 'Solution to Question 4: D',\n", + " 'Solution to Question 5: C',\n", + " 'Solution to Question 6: C',\n", + " 'Solution to Question 7: C',\n", + " 'Solution to Question 8: D',\n", + " 'Solution to Question 9: D',\n", + " 'Solution to Question 10: C',\n", + " 'Solution to Question 11: B',\n", + " 'Solution to Question 12: C',\n", + " 'Solution to Question 13: D',\n", + " 'Solution to Question 14: A',\n", + " 'Solution to Question 15: A',\n", + " 'Solution to Question 16: D',\n", + " 'Solution to Question 17: D',\n", + " 'Solution to Question 18: D',\n", + " 'Solution to Question 19: A',\n", + " 'Solution to Question 20: D',\n", + " 'Solution to Question 21: C',\n", + " 'Solution to Question 22: C',\n", + " 'Solution to Question 23: B',\n", + " 'Solution to Question 24: A',\n", + " 'Solution to Question 25: D',\n", + " 'Solution to Question 26: A',\n", + " 'Solution to Question 27: C',\n", + " 'Solution to Question 28: D',\n", + " 'Solution to Question 29: D',\n", + " 'Solution to Question 30: A',\n", + " 'Solution to Question 31: D',\n", + " 'Solution to Question 32: A',\n", + " 'Solution to Question 33: D',\n", + " 'Solution to Question 34: B',\n", + " 'Solution to Question 35: D',\n", + " 'Solution to Question 36: B',\n", + " 'Solution to Question 37: A',\n", + " 'Solution to Question 38: D',\n", + " 'Solution to Question 39: D',\n", + " 'Solution to Question 40: C',\n", + " 'Solution to Question 41: B',\n", + " 'Solution to Question 42: A',\n", + " 'Solution to Question 43: C',\n", + " 'Solution to Question 44: C',\n", + " 'Solution to Question 45: D',\n", + " 'Solution to Question 46: C',\n", + " 'Solution to Question 47: D',\n", + " 'Solution to Question 48: B',\n", + " 'Solution to Question 49: C',\n", + " 'Solution to Question 50: A']" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "answers" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7cf9bf24-dbfa-437f-9625-1f5d6571394a", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.2" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}