|
| 1 | +import pandas as pd |
| 2 | +import json |
| 3 | +import streamlit as st |
| 4 | +import tempfile |
| 5 | +import base64 |
| 6 | +import io |
| 7 | +import os |
| 8 | + |
| 9 | +from langchain.chains.llm import LLMChain |
| 10 | +from langchain_core.prompts import PromptTemplate |
| 11 | +from langchain_community.chat_models.oci_generative_ai import ChatOCIGenAI |
| 12 | +from langchain_core.messages import HumanMessage, SystemMessage |
| 13 | + |
| 14 | +from pdf2image import convert_from_bytes, convert_from_path |
| 15 | +from docx import Document |
| 16 | +from collections import OrderedDict |
| 17 | + |
| 18 | +# Function to extract text from a Word (.docx) document |
| 19 | +def extract_text_from_docx(file_path): |
| 20 | + """ |
| 21 | + Extracts text from a .docx file and returns it as a single string. |
| 22 | + |
| 23 | + Parameters: |
| 24 | + file_path (str): The path to the .docx file. |
| 25 | + |
| 26 | + Returns: |
| 27 | + str: The extracted text from the document. |
| 28 | + """ |
| 29 | + doc = Document(file_path) |
| 30 | + text = ' '.join(para.text.strip() for para in doc.paragraphs if para.text.strip()) |
| 31 | + return text |
| 32 | + |
| 33 | +# Function to save extracted data to a CSV file |
| 34 | +def save_to_csv(data, file_name="extracted_data.csv"): |
| 35 | + """ |
| 36 | + Saves extracted data to a CSV file. |
| 37 | + |
| 38 | + Parameters: |
| 39 | + data (list or dict): Extracted data in dictionary format. |
| 40 | + file_name (str): Name of the CSV file (default: 'extracted_data.csv'). |
| 41 | + """ |
| 42 | + if isinstance(data, dict): |
| 43 | + data = [data] # Convert dictionary to a list of dictionaries |
| 44 | + df = pd.DataFrame(data) |
| 45 | + df.to_csv(file_name, index=False) |
| 46 | + |
| 47 | +# Streamlit UI for Patient Triage Extraction |
| 48 | +def patientTriage(): |
| 49 | + """ |
| 50 | + Streamlit app function for processing patient triage letters. |
| 51 | + |
| 52 | + Allows users to upload patient referral letters (.pdf, .docx) and processes them using an LLM to extract key details. |
| 53 | + """ |
| 54 | + st.title("🏥 Patient Triage Extraction") |
| 55 | + |
| 56 | + # Sidebar for file upload |
| 57 | + with st.sidebar: |
| 58 | + st.title("Upload your letters here") |
| 59 | + uploaded_files = st.file_uploader("Upload your patient file(s) here", type=["pdf", "docx"], accept_multiple_files=True) |
| 60 | + |
| 61 | + extracted_data = [] # List to store extracted data from all uploaded files |
| 62 | + |
| 63 | + # Process each uploaded file |
| 64 | + if uploaded_files: |
| 65 | + for uploaded_file in uploaded_files: |
| 66 | + |
| 67 | + # Save uploaded file temporarily |
| 68 | + with tempfile.NamedTemporaryFile(delete=False) as temp_file: |
| 69 | + temp_file.write(uploaded_file.read()) |
| 70 | + temp_path = temp_file.name |
| 71 | + |
| 72 | + # Extract text from docx files |
| 73 | + if uploaded_file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document": |
| 74 | + text_pages = extract_text_from_docx(temp_path) |
| 75 | + else: |
| 76 | + st.error("Unsupported file format!") |
| 77 | + continue |
| 78 | + |
| 79 | + with st.spinner(f"Processing {uploaded_file.name}..."): |
| 80 | + # TO-DO: Initialize OCI AI model |
| 81 | + llm = ChatOCIGenAI( |
| 82 | + model_id= "Add your model name", |
| 83 | + compartment_id="Add your compartment ID", |
| 84 | + model_kwargs={"temperature": 0, "max_tokens": 2000}, |
| 85 | + ) |
| 86 | + |
| 87 | + # Construct system message to ensure structured JSON response |
| 88 | + system_message = SystemMessage( |
| 89 | + content=""" |
| 90 | + You are an AI system processing patient referral letters to the Dermatology Department. Your task is to extract key details from each letter and return the information strictly in valid JSON format. |
| 91 | +
|
| 92 | + Extract the following details: |
| 93 | + - Referring Doctor: Extract the full name of the doctor if mentioned. |
| 94 | + - Condition: Identify the primary dermatological condition mentioned. |
| 95 | + - Recommended Clinic: Assign the relevant dermatology clinic EXCLUSIVELY from this list: |
| 96 | + (Acne, Eczema and Dermatitis, Psoriasis, Skin Cancer, Hair and Nail Disorders, Laser Skin Treatments, Male Genital and Vulval Skin Disorders, Patch Testing for Contact Dermatitis, Leg Ulcers, Cosmetic Camouflage) |
| 97 | + - Brief Summary: A concise summary of the condition and reason for referral. |
| 98 | +
|
| 99 | + Ensure the response is formatted strictly as JSON, without additional text. |
| 100 | +
|
| 101 | + Example Response: |
| 102 | + { |
| 103 | + "Referring Doctor": "Dr. Sarah Thompson"/ "Not mentioned", |
| 104 | + "Condition": "Severe plaque psoriasis", |
| 105 | + "Recommended Clinic": "Psoriasis", |
| 106 | + "Brief Summary": "Patient has been experiencing severe plaque psoriasis unresponsive to topical treatments. Referral requested for specialist evaluation and potential systemic therapy." |
| 107 | + } |
| 108 | + """ |
| 109 | + ) |
| 110 | + |
| 111 | + # Construct human message with extracted text |
| 112 | + human_message = HumanMessage( |
| 113 | + content=f"Patient triage letter content: {text_pages}" |
| 114 | + ) |
| 115 | + |
| 116 | + # Invoke the LLM with system and human messages |
| 117 | + ai_response = llm.invoke(input=[system_message, human_message]) |
| 118 | + response_dict = json.loads(ai_response.content) # Convert response to dictionary |
| 119 | + |
| 120 | + extracted_data.append(response_dict) |
| 121 | + |
| 122 | + # Save extracted data and display in Streamlit |
| 123 | + if extracted_data: |
| 124 | + save_to_csv(extracted_data) |
| 125 | + st.dataframe(pd.DataFrame(extracted_data)) |
| 126 | + |
| 127 | +# Run the Streamlit app |
| 128 | +if __name__ == "__main__": |
| 129 | + patientTriage() |
0 commit comments