Skip to content

Commit 7481c8f

Browse files
authored
Merge pull request #1670 from oracle-devrel/ao-patient-triaging
Patient letter triaging solution
2 parents 22a5f44 + 945323c commit 7481c8f

File tree

3 files changed

+212
-0
lines changed

3 files changed

+212
-0
lines changed
Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
import pandas as pd
2+
import json
3+
import streamlit as st
4+
import tempfile
5+
import base64
6+
import io
7+
import os
8+
9+
from langchain.chains.llm import LLMChain
10+
from langchain_core.prompts import PromptTemplate
11+
from langchain_community.chat_models.oci_generative_ai import ChatOCIGenAI
12+
from langchain_core.messages import HumanMessage, SystemMessage
13+
14+
from pdf2image import convert_from_bytes, convert_from_path
15+
from docx import Document
16+
from collections import OrderedDict
17+
18+
# Function to extract text from a Word (.docx) document
19+
def extract_text_from_docx(file_path):
20+
"""
21+
Extracts text from a .docx file and returns it as a single string.
22+
23+
Parameters:
24+
file_path (str): The path to the .docx file.
25+
26+
Returns:
27+
str: The extracted text from the document.
28+
"""
29+
doc = Document(file_path)
30+
text = ' '.join(para.text.strip() for para in doc.paragraphs if para.text.strip())
31+
return text
32+
33+
# Function to save extracted data to a CSV file
34+
def save_to_csv(data, file_name="extracted_data.csv"):
35+
"""
36+
Saves extracted data to a CSV file.
37+
38+
Parameters:
39+
data (list or dict): Extracted data in dictionary format.
40+
file_name (str): Name of the CSV file (default: 'extracted_data.csv').
41+
"""
42+
if isinstance(data, dict):
43+
data = [data] # Convert dictionary to a list of dictionaries
44+
df = pd.DataFrame(data)
45+
df.to_csv(file_name, index=False)
46+
47+
# Streamlit UI for Patient Triage Extraction
48+
def patientTriage():
49+
"""
50+
Streamlit app function for processing patient triage letters.
51+
52+
Allows users to upload patient referral letters (.pdf, .docx) and processes them using an LLM to extract key details.
53+
"""
54+
st.title("🏥 Patient Triage Extraction")
55+
56+
# Sidebar for file upload
57+
with st.sidebar:
58+
st.title("Upload your letters here")
59+
uploaded_files = st.file_uploader("Upload your patient file(s) here", type=["pdf", "docx"], accept_multiple_files=True)
60+
61+
extracted_data = [] # List to store extracted data from all uploaded files
62+
63+
# Process each uploaded file
64+
if uploaded_files:
65+
for uploaded_file in uploaded_files:
66+
67+
# Save uploaded file temporarily
68+
with tempfile.NamedTemporaryFile(delete=False) as temp_file:
69+
temp_file.write(uploaded_file.read())
70+
temp_path = temp_file.name
71+
72+
# Extract text from docx files
73+
if uploaded_file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
74+
text_pages = extract_text_from_docx(temp_path)
75+
else:
76+
st.error("Unsupported file format!")
77+
continue
78+
79+
with st.spinner(f"Processing {uploaded_file.name}..."):
80+
# TO-DO: Initialize OCI AI model
81+
llm = ChatOCIGenAI(
82+
model_id= "Add your model name",
83+
compartment_id="Add your compartment ID",
84+
model_kwargs={"temperature": 0, "max_tokens": 2000},
85+
)
86+
87+
# Construct system message to ensure structured JSON response
88+
system_message = SystemMessage(
89+
content="""
90+
You are an AI system processing patient referral letters to the Dermatology Department. Your task is to extract key details from each letter and return the information strictly in valid JSON format.
91+
92+
Extract the following details:
93+
- Referring Doctor: Extract the full name of the doctor if mentioned.
94+
- Condition: Identify the primary dermatological condition mentioned.
95+
- Recommended Clinic: Assign the relevant dermatology clinic EXCLUSIVELY from this list:
96+
(Acne, Eczema and Dermatitis, Psoriasis, Skin Cancer, Hair and Nail Disorders, Laser Skin Treatments, Male Genital and Vulval Skin Disorders, Patch Testing for Contact Dermatitis, Leg Ulcers, Cosmetic Camouflage)
97+
- Brief Summary: A concise summary of the condition and reason for referral.
98+
99+
Ensure the response is formatted strictly as JSON, without additional text.
100+
101+
Example Response:
102+
{
103+
"Referring Doctor": "Dr. Sarah Thompson"/ "Not mentioned",
104+
"Condition": "Severe plaque psoriasis",
105+
"Recommended Clinic": "Psoriasis",
106+
"Brief Summary": "Patient has been experiencing severe plaque psoriasis unresponsive to topical treatments. Referral requested for specialist evaluation and potential systemic therapy."
107+
}
108+
"""
109+
)
110+
111+
# Construct human message with extracted text
112+
human_message = HumanMessage(
113+
content=f"Patient triage letter content: {text_pages}"
114+
)
115+
116+
# Invoke the LLM with system and human messages
117+
ai_response = llm.invoke(input=[system_message, human_message])
118+
response_dict = json.loads(ai_response.content) # Convert response to dictionary
119+
120+
extracted_data.append(response_dict)
121+
122+
# Save extracted data and display in Streamlit
123+
if extracted_data:
124+
save_to_csv(extracted_data)
125+
st.dataframe(pd.DataFrame(extracted_data))
126+
127+
# Run the Streamlit app
128+
if __name__ == "__main__":
129+
patientTriage()
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
# Patient Referral Extraction
2+
3+
This is a Generative AI-powered application that extracts key information from patient referral letters (in `.docx` format) and processes them using the OCI Generative AI services. This application allows users to simply and efficiently extract key data from patient referrals for expedited patient handling.
4+
5+
## Features
6+
- Upload `.docx` files containing patient referral letters.
7+
- Process documents using Oracle Cloud Infrastructure (OCI) Generative AI.
8+
- Extract structured data and display it in a table.
9+
- Save extracted data as a CSV file for further use.
10+
11+
## Prerequisites
12+
Before running the application, ensure you have:
13+
- Python 3.8 or later installed
14+
- An active Oracle Cloud Infrastructure (OCI) account
15+
- Required Python dependencies installed
16+
- OCI Generative AI model name and compartment ID
17+
18+
## How It Works
19+
1. **Upload Files:** Users upload patient referral letters in .pdf or .docx format.
20+
2. **Processing:**
21+
- Extracts text from uploaded documents.
22+
- Sends the text to an OCI AI model for structured data extraction.
23+
- Extracted data includes:
24+
- Referring Doctor
25+
- Condition
26+
- Recommended Clinic (chosen from predefined options)
27+
- Brief Summary
28+
3. **Results Display:**
29+
- Extracted details are displayed in a table.
30+
- Data is saved to a CSV file for download.
31+
32+
## Example Output
33+
```json
34+
{
35+
"Referring Doctor": "Dr. Sarah Thompson",
36+
"Condition": "Severe plaque psoriasis",
37+
"Recommended Clinic": "Psoriasis",
38+
"Brief Summary": "Patient has been experiencing severe plaque psoriasis unresponsive to topical treatments. Referral requested for specialist evaluation and potential systemic therapy."
39+
}
40+
```
41+
42+
## Installation
43+
Clone this repository and navigate to the project directory:
44+
```bash
45+
git clone <repository-url>
46+
cd <repository-folder>
47+
```
48+
49+
Install the required dependencies:
50+
```bash
51+
pip install -r requirements.txt
52+
```
53+
54+
## Configuration
55+
To integrate with OCI Generative AI, update the following parameters in the code:
56+
```python
57+
llm = ChatOCIGenAI(
58+
model_id="Add your model name",
59+
compartment_id="Add your compartment ID",
60+
model_kwargs={"temperature": 0, "max_tokens": 2000},
61+
)
62+
```
63+
Replace `model_id` and `compartment_id` with the appropriate values.
64+
65+
## Running the Application
66+
Run the Streamlit app with:
67+
```bash
68+
streamlit run <script-name>.py
69+
```
70+
71+
Replace `<script-name>.py` with the filename of the main script (e.g., `patient_triage_extraction.py`).
72+
73+
74+
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
pandas==2.1.4
2+
jsonschema==4.19.2
3+
langchain==0.1.10
4+
streamlit==1.30.0
5+
langchain-community==0.0.32
6+
pdf2image==1.16.3
7+
python-docx==0.8.11
8+
poppler-utils==22.12.0
9+
oci==2.112.0

0 commit comments

Comments
 (0)