import os
import streamlit as st
from langchain.chains import create_sql_query_chain
from langchain_google_genai import GoogleGenerativeAI
from sqlalchemy import create_engine
from sqlalchemy.exc import ProgrammingError
from langchain_community.utilities import SQLDatabase
import google.generativeai as genai
import pymysql
import pandas as pd
import numpy as np
import random
from io import StringIO
import json
import re
from pathlib import Path
import mimetypes

# Configure GenAI Key
from dotenv import load_dotenv
load_dotenv() 


#SET API KEY
genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
api_key = os.getenv("GOOGLE_API_KEY")

#LIST OF MODELS
for m in genai.list_models():
  if 'generateContent' in m.supported_generation_methods:
    print(m.name)

# Model Configuration
MODEL_CONFIG = {
  "temperature": 0.2,
  "top_p": 1,
  "top_k": 32,
  "max_output_tokens": 4096,
}

## Safety Settings of Model
safety_settings = [
  {
    "category": "HARM_CATEGORY_HARASSMENT",
    "threshold": "BLOCK_MEDIUM_AND_ABOVE"
  },
  {
    "category": "HARM_CATEGORY_HATE_SPEECH",
    "threshold": "BLOCK_MEDIUM_AND_ABOVE"
  },
  {
    "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
    "threshold": "BLOCK_MEDIUM_AND_ABOVE"
  },
  {
    "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
    "threshold": "BLOCK_MEDIUM_AND_ABOVE"
  }
]

#LOAD GEMINI MODEL WITH MODEL CONFIGURATIONS
model = genai.GenerativeModel(model_name = "gemini-2.5-flash-lite",
                              generation_config = MODEL_CONFIG,
                              safety_settings = safety_settings)

#DEFINE IMAGE FORMAT TO INPUT IN GEMINI
def image_format(image_path):
    img = Path(image_path)

    if not img.exists():
        raise FileNotFoundError(f"Could not find image: {img}")

    image_parts = [
        {
            "mime_type": "image/png", ## Mime type are PNG - image/png. JPEG - image/jpeg. WEBP - image/webp
            "data": img.read_bytes()
        }
    ]
    return image_parts

#DEFINE PDF FORMAT TO INPUT IN GEMINI
def read_pdf_bytes(pdf_path: str):
    """
    Reads a PDF file, gets its raw bytes, and formats it into a dictionary
    with the 'application/pdf' MIME type. This format is often used for APIs
    that accept PDF file inputs.

    Args:
        pdf_path (str): The path to the input PDF file.

    Returns:
        list: A list containing a dictionary with 'mime_type' and 'data' keys.
              Returns an empty list if the file is not found or not a PDF.
    """
    pdf_file = Path(pdf_path)

    if not pdf_file.exists():
        raise FileNotFoundError(f"Could not find PDF file: {pdf_file}")

    # Explicitly set MIME type for PDF
    mime_type = "application/pdf"

    # Optional: You could add a check here to ensure it's likely a PDF
    # based on extension, though reading bytes will work for any file.
    # For a stricter check, you might inspect the first few bytes (magic numbers)
    # or rely on a more robust library if validation is critical.
    if pdf_file.suffix.lower() != ".pdf":
        print(f"Warning: The file '{pdf_file.name}' does not have a .pdf extension. "
              f"Proceeding assuming it's a PDF, but this might indicate an issue.")
        # Alternatively, you could raise an error here if you only want to process .pdf files
        # raise ValueError(f"File '{pdf_file.name}' is not a PDF based on its extension.")


    try:
        # Read the entire content of the PDF file as bytes
        pdf_data = pdf_file.read_bytes()
    except Exception as e:
        raise IOError(f"Error reading bytes from PDF file '{pdf_path}': {e}")


    pdf_parts = [
        {
            "mime_type": mime_type,
            "data": pdf_data
        }
    ]
    return pdf_parts

#GEMINI MODEL OUTPUT
def gemini_output(image_path, system_prompt, user_prompt):

#    image_info = image_format(image_path)
    image_info = read_pdf_bytes(image_path)
    input_prompt= [system_prompt, image_info[0], user_prompt]
    response = model.generate_content(input_prompt)
    return response.text

#EXTRACTING PART OF THE INFORMATION FROM INVOICE
#system_prompt = """
#               You are a specialist in comprehending receipts.
#               Input images in the form of receipts will be provided to you,
#               and your task is to respond to questions based on the content of the input image.
#               """
#
#image_path = "one_bill.png"
#
#user_prompt = "What is the balance amount in the image?"
#
#result=gemini_output(image_path, system_prompt, user_prompt)
#st.write("Result:")
#st.write(result)


#EXTRACTING WHOLE DATA IN JSON FROM INVOICE
system_prompt = """
               You are a specialist in comprehending receipts.
               Input images in the form of receipts will be provided to you,
               and your task is to respond to questions based on the content of the input image.
               """
#system_prompt = "Convert Invoice data into json format with appropriate json tags as required for the data in image "
image_path = "one_bill.pdf"
user_prompt = "Convert Invoice data into json format with appropriate json tags as required for the data in image "

output = gemini_output(image_path, system_prompt, user_prompt)
#return output
return "done"



