from flask import Flask, request, jsonify
import os
import streamlit as st
from langchain.chains import create_sql_query_chain
from langchain_google_genai import GoogleGenerativeAI
from sqlalchemy import create_engine
from sqlalchemy.exc import ProgrammingError
from langchain_community.utilities import SQLDatabase
import google.generativeai as genai
import pymysql
import pandas as pd
import numpy as np
import random
from io import StringIO
import json
import re
from pathlib import Path
import mimetypes
from flask import request
import wget

# Configure GenAI Key
from dotenv import load_dotenv
load_dotenv() 


#SET API KEY
genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
api_key = os.getenv("GOOGLE_API_KEY")

# Model Configuration
MODEL_CONFIG = {
  "temperature": 0.2,
  "top_p": 1,
  "top_k": 32,
  "max_output_tokens": 4096,
}

## Safety Settings of Model
safety_settings = [
  {
    "category": "HARM_CATEGORY_HARASSMENT",
    "threshold": "BLOCK_MEDIUM_AND_ABOVE"
  },
  {
    "category": "HARM_CATEGORY_HATE_SPEECH",
    "threshold": "BLOCK_MEDIUM_AND_ABOVE"
  },
  {
    "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
    "threshold": "BLOCK_MEDIUM_AND_ABOVE"
  },
  {
    "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
    "threshold": "BLOCK_MEDIUM_AND_ABOVE"
  }
]

#LOAD GEMINI MODEL WITH MODEL CONFIGURATIONS
model = genai.GenerativeModel(model_name = "gemini-2.5-flash-lite",
                              generation_config = MODEL_CONFIG,
                              safety_settings = safety_settings)


app = Flask(__name__)

@app.route('/process_data', methods=['GET'])
def process_data():
    data = request.json
    input_value = data.get('input_value')

    # Perform your Python logic here
    result = f"Python processed: {input_value} and doubled it to {input_value * 2}"

    return jsonify({"status": "success", "result": result})

@app.route('/home', methods=['GET'])
def home():
    return jsonify({"status": "success"})

@app.route('/read-pdf', methods=['GET'])
def read_pdf():
    #EXTRACTING WHOLE DATA IN JSON FROM INVOICE
    system_prompt = """
                You are a specialist in comprehending shipping line's invoice.
                Input PDF in the form of shipping line invoice will be provided to you,
                and your task is to respond to questions based on the content of the input PDF.
                """
    #system_prompt = "Convert Invoice data into json format with appropriate json tags as required for the data in image "
    #image_path = "one_bill.pdf"
    image_path = request.args.get('file_name')
    user_prompt = """Convert Shipping Line Invoice data into json format with appropriate json tags like shipping line, customer 
                and the charges"""

    try:
        response = gemini_output(image_path, system_prompt, user_prompt)
        #output = gemini_output(image_path, system_prompt, user_prompt)
        #output=output[7:-4]
        #return output

        if response.prompt_feedback and response.prompt_feedback.block_reason:
            # For simplicity, if blocked, we'll return an error JSON
            return json.dumps({"error": "Content blocked", "reason": response.prompt_feedback.block_reason})

        if not response.text:
            return json.dumps({"error": "No text content received from Gemini"})

        raw_gemini_output = response.text
        print(f"Raw Gemini output (before extraction):\n{raw_gemini_output}")
        # Try to extract JSON from markdown fences
        json_string_extracted = extract_json_from_markdown(raw_gemini_output)

        if json_string_extracted:
            try:
                # Validate JSON by parsing it, then return the original string
                parsed_data = json.loads(json_string_extracted)
                return json.dumps(parsed_data) # Re-dump to ensure it's a single line and valid JSON
            except json.JSONDecodeError as e:
                return json.dumps({"error": "JSON decoding failed after extraction", "message": str(e), "extracted_json": json_string_extracted, "raw_output": raw_gemini_output})
        else:
            # If no markdown fences found, try to parse the whole output as JSON
            # This handles cases where the model might sometimes skip fences or add other text
            try:
                parsed_data = json.loads(raw_gemini_output)
                return json.dumps(parsed_data)
            except json.JSONDecodeError as e:
                return json.dumps({"error": "No JSON markdown fences found and direct JSON parsing failed", "message": str(e), "raw_output": raw_gemini_output})

    except Exception as e:
        return json.dumps({"error": "API call failed", "message": str(e)})


#USER METHODS
#DEFINE PDF FORMAT TO INPUT IN GEMINI
def read_pdf_bytes(pdf_path: str):
    """
    Reads a PDF file, gets its raw bytes, and formats it into a dictionary
    with the 'application/pdf' MIME type. This format is often used for APIs
    that accept PDF file inputs.

    Args:
        pdf_path (str): The path to the input PDF file.

    Returns:
        list: A list containing a dictionary with 'mime_type' and 'data' keys.
              Returns an empty list if the file is not found or not a PDF.
    """
    

    url = pdf_path
    save_path = wget.download(url,"/var/gemini-ai/pdf-upload/")
    file_name='pdf-upload/7689206597a01a-158412-7-MSCpdf.pdf'
    pdf_file = Path(file_name)

    if not pdf_file.exists():
        raise FileNotFoundError(f"Could not find PDF file: {pdf_file}")

    # Explicitly set MIME type for PDF
    mime_type = "application/pdf"

    # Optional: You could add a check here to ensure it's likely a PDF
    # based on extension, though reading bytes will work for any file.
    # For a stricter check, you might inspect the first few bytes (magic numbers)
    # or rely on a more robust library if validation is critical.
    if pdf_file.suffix.lower() != ".pdf":
        print(f"Warning: The file '{pdf_file.name}' does not have a .pdf extension. "
              f"Proceeding assuming it's a PDF, but this might indicate an issue.")
        # Alternatively, you could raise an error here if you only want to process .pdf files
        # raise ValueError(f"File '{pdf_file.name}' is not a PDF based on its extension.")


    try:
        # Read the entire content of the PDF file as bytes
        pdf_data = pdf_file.read_bytes()
    except Exception as e:
        raise IOError(f"Error reading bytes from PDF file '{pdf_path}': {e}")


    pdf_parts = [
        {
            "mime_type": mime_type,
            "data": pdf_data
        }
    ]
    return pdf_parts

#GEMINI MODEL OUTPUT
def gemini_output(image_path, system_prompt, user_prompt):

#    image_info = image_format(image_path)
    image_info = read_pdf_bytes(image_path)
    input_prompt= [system_prompt, image_info[0], user_prompt]
    response = model.generate_content(input_prompt)
    #return response.text
    return response

#EXTRACT JSON FROM MARKDOWN FENCES (```json ... ```).
def extract_json_from_markdown(text):
    """
    Extracts a JSON string enclosed in markdown code fences (```json ... ```).
    Returns the extracted JSON string or None if not found/invalid.
    """
    # Regex to find content between ```json and ```
    match = re.search(r'```json\s*(.*?)\s*```', text, re.DOTALL)
    if match:
        return match.group(1).strip()
    return None

#END USER METHODS

if __name__ == '__main__':
    app.run(host='0.0.0.0',port=5000) # Run on port 5000

