from openai import OpenAI
from groq import Groq
from typing import List
from fastapi import HTTPException
from enum import Enum

def matched_score_llm(system_message:str,resume_text:str,job_description:str,api_key:str, model="gpt-4.1-mini" ):
    """
    Send a message to OpenAI's chat completion API and return the response.
    
    Args:
        user_message (str): The message from the user
        model (str): The OpenAI model to use (default: "gpt-4.1")
        system_message (str): The system/developer message (default: "You are a helpful assistant.")
    
    Returns:
        str: The assistant's response message
    """
    client = OpenAI(api_key=api_key)
    
    completion = client.chat.completions.create(
        model=model,
        messages=[
            {"role": "system", "content": system_message},
            {"role": "user", "content": f"resume text: {resume_text}, Job_description: {job_description}"}
        ]
    )
    return completion.choices[0].message.content


def get_groq_llm(system_message:str,resume_text:str,job_description:str,api_key:str, model="llama-3.1-8b-instant" ):
    client = Groq(api_key=api_key)
    messages = [
        {"role": "system", "content": system_message},
        {
            "role": "user", 
            "content": f"Resume:\n{resume_text}\n\nJob Description:\n{job_description}"
        }
    ]
    
    # Prepare completion parameters
    completion_params = {
        "model": model,
        "messages": messages,
        "temperature": 0.2,
        "timeout": 10
    }
    
    # # Add max_tokens only if specified
    # if max_tokens is not None:
    #     completion_params["max_tokens"] = 10000
    
    # Make API call
    completion = client.chat.completions.create(**completion_params)
    
    # Extract and return response
    response_content = completion.choices[0].message.content
    
    if not response_content:
        raise Exception("Received empty response from Groq API")
        
    return response_content 


def base_prompt():
    prompt_match = f"""
    You are an intelligent matching engine that performs detailed comparative analysis between a job description (JD) and a candidate's resume. Your goal is to assess job-to-candidate fit by evaluating multiple dimensions and if any data is empty of lesser than match the scores accordingly generating a JSON output in the exact format below.
    
    Use the following dimensions for scoring:

    - hardSkillsAndTechStack: Programming languages, databases, cloud platforms, frameworks, etc.  
    - experienceAndProjects: Relevance, depth, and scope of past roles and projects.  
    - toolsAndMethodologies: CI/CD, testing frameworks, Agile/Scrum, etc.  
    - softSkills: Communication, collaboration, leadership, adaptability.  
    - educationAndBackground: Degrees, certifications, academic credentials.  
    - culturalAndTeamFit: Collaboration, alignment with stated company/team culture.

    Assign a score (0–100) to each dimension and compute a weighted average to generate the overallMatchScore.

    Identify specific skill overlaps (matchedSkills) and gaps (missingOrWeakSkills). Then summarize the alignment in roleAlignment, and provide a clear, actionable recommendation.
    
    [Instructions]
     - Ensure all key names in the JSON structure remain unchanged, preserving exact casing (e.g., 'jobFitAnalysis' must retain its original capitalization). Maintain strict adherence to provided JSON format—do not alter the key names under any circumstances.

    [output format]
    Return the result in this **exact provided JSON format**:
    {{
      "jobFitAnalysis": {{
        "overallMatchScore": 0-100,
        "categoryScores": {{
          "hardSkillsAndTechStack": 0-100,
          "hardSkillsAndTechStack_commentary": "string",
          "experienceAndProjects": 0-100,
          "toolsAndMethodologies": 0-100,
          "softSkills": 0-100,
          "educationAndBackground": 0-100,
          "culturalAndTeamFit": 0-100,
          "company_fit": 0-100,
          "company_fit_commentary": "string",
          "job_knowledge": 0-100,
          "leadership": 0-100,
          "leadership_commentary": "string"
        }},
        "matchedSkills": [
          "string"
        ],
        "missingOrWeakSkills": [
          "string"
        ],
        "roleAlignment": "string",
        "recommendation": "string",
        "strength": [
          "string"
        ],
        "gap": [
          "string"
        ],
        "profile_summary": "string",
        "role_benchmark_comparison": 0-100,
        "role_benchmark_comparison_commentary": "string",
        "profile_match_score": 0-100,
        "jd_match_score": 0-100
      }}
    }}

   
    """
    return prompt_match


def base_batch_prompt():
    prompt_match = """
    You are an intelligent matching engine that performs detailed comparative analysis between a job description (JD) and multiple candidate profiles. 
    Your goal is to assess job-to-candidate fit by evaluating multiple dimensions. 

    [Scoring Rules]
    - If no clear evidence exists for a dimension, assign a low score (10–30) if there is weak or indirect indication, otherwise assign 0. 
    - Do not infer or assume unrelated skills. Only use skills explicitly present in the candidate text.
    - All other candidate skills not appearing in the JD must be listed under `extraStrengths`.  

    [Evaluation Dimensions]
    - hardSkillsAndTechStack: Programming languages, databases, cloud platforms, frameworks, etc.  
    - experienceAndProjects: Relevance, depth, and scope of past roles and projects.  
    - toolsAndMethodologies: CI/CD, testing frameworks, Agile/Scrum, etc.  
    - softSkills: Communication, collaboration, leadership, adaptability.  
    - educationAndBackground: Degrees, certifications, academic credentials.  
    - culturalAndTeamFit: Collaboration, alignment with stated company/team culture.

    Compute a weighted average to generate the overallMatchScore.

    Identify skill overlaps (`matchedSkills`) and explicit gaps (`missingOrWeakSkills`).  
    Candidate-only skills must always go under `extraStrengths`.  

    Commentary must be direct. If data is missing, state it clearly (e.g., "Candidate lacks leadership experience").

    [Instructions]
    - Ensure all key names in the JSON structure remain unchanged (e.g., 'jobFitAnalysis').  
    - Maintain strict adherence to JSON format — do not alter key names.  
    - Always return results as a JSON array of objects, one per candidate.
    - do not match with sectors
    - `matchedSkills` field MUST include the exact intersection of skills that appear in BOTH the `Skills:` section of the candidate resume AND the `Skills:` section of the JD. Do not include synonyms, related technologies, inferred matches, or skills from project descriptions or free text.

    [Input Format]
    You will receive:
    - A job description (JD).
    - A list of candidate objects containing:
    - candidate_id (integer, copy exactly),
    - candidate_combined_text (resume text and skills),
    - milvus_hybrid_score (float similarity score, context only, do not output).

    [Output Format]
    Always return a JSON array (list) of objects. Example:

    [
    {
        "candidate_id": 123,
        "jobFitAnalysis": {
        "overallMatchScore": 0-100,
        "categoryScores": {
            "hardSkillsAndTechStack": 0-100,
            "hardSkillsAndTechStack_commentary": "string",
            "experienceAndProjects": 0-100,
            "toolsAndMethodologies": 0-100,
            "softSkills": 0-100,
            "educationAndBackground": 0-100,
            "culturalAndTeamFit": 0-100,
            "company_fit": 0-100,
            "company_fit_commentary": "string",
            "job_knowledge": 0-100,
            "leadership": 0-100,
            "leadership_commentary": "string"
        },
        "matchedSkills": ["string"],            // DO not include the Speaking Language(Exanple: English), ONLY include skills and soft skills present in both JD and resume
        "missingOrWeakSkills": ["string"],      // Skills required in JD but absent in resume
        "extraStrengths": ["string"],           // Candidate-only skills not in JD
        "roleAlignment": "string",
        "recommendation": "string",
        "strength": ["string"],
        "gap": ["string"],
        "profile_summary": "string",
        "role_benchmark_comparison": 0-100,
        "role_benchmark_comparison_commentary": "string",
        "profile_match_score": 0-100,
        "jd_match_score": 0-100
        }
    }
    ]
"""
 
    return prompt_match


def prompt_analysis(transcript:str):
    prompt = f"""
        You are a senior communication intelligence consultant with deep expertise in executive discourse evaluation and behavioral signal analysis. Your task is to audit the following executive interview transcript, augmented by synchronized real-time facial sentiment analytics.

Conduct a multi-dimensional assessment to extract professionalism indicators—such as body language efficacy and grooming standards—as well as communication metrics, including verbal pacing, articulation clarity, and coherence.

Synthesize your analysis into a structured JSON format aligned with the schema provided below. Ensure adherence to the schema and rigor in your scoring logic, drawing only from the transcript and the embedded sentiment data. below is strctured output.
        {{
            "presentation_score": {{
                "score": "0-100",
            }},
            "bussiness_acuman": {{
                "overall": "0-100",  
            }},
            "objection_handling": {{
                "score": "0-100",
            }},
            "opportunistic": {{
                "score":0-100,
            }},
            "clossing_technique": {{
                "score":0-100,
            }},
            "individual_quetion_score": [
                {{
                    "question_id": "",
                    "score": 0-100,
                    "commentary": ""
                }}
            ],
            "professionalism": {{
                "professionalism_score":0-100,
                "dressing":0-100,
                "grooming": 0-100,
                "body_language:0-100
            }},
            "positive_attitude": {{
                "positive_attitude_score":0-100
                "energy_level":0-100,
            }},
            "communication": {{
                "communication_score":0-100
                "pace_and_clarity_of_spech":0-100,
                "articulation": 0-100,
                "sentiments": 0-100
            }},
            "sociality": {{
                "sociality_score":0-100
                "emotions":0-100,
                "energy_level": 0-100,
                "sentiments": 0-100
            }},
            "red_flag_&_observation": {{
                "risk_level": "Low/Midium/High",
                "indicators": [],
                "mitigation_recommendation": ""
            }},
            "ai_video_score":"Average score of professionalism,positive_attitude,communication,sociality"
            "recomended_actions":[],
            "overall_facial sentiments_score":"str",
            "overall_score":0-100
        }}

        ----------------
        transcript:{transcript}
        """
    return prompt


from typing import List
from fastapi import HTTPException
def generate_ai_content(api_key: str, prompt: str, model: str = "gpt-5-nano") -> str:
    """
    Generates content using OpenAI's Responses API.

    Args:
        api_key (str): The OpenAI API key.
        prompt (str): The input prompt for content generation.

    Returns:
        str: The generated content as a string.
    
    Raises:
        Exception: If there's an error with the API call or empty output.
    """
    try:
        client = OpenAI(api_key=api_key)
        response = client.responses.create(
            model=model,
            input=prompt,
            reasoning={ "effort": "minimal" },
        )

        # Prefer high-level convenience property when available
        text_output = getattr(response, "output_text", None)
        if not text_output:
            # Fallback: extract first text part from the structured output
            try:
                parts = response.output[0].content  # type: ignore[attr-defined]
                text_chunks = [p.text for p in parts if hasattr(p, "text") and p.text]
                text_output = "".join(text_chunks)
            except Exception:
                text_output = None

        if not text_output or not text_output.strip():
            raise Exception("Empty response from OpenAI API")

        return text_output.strip()
    except Exception as e:
        raise Exception(f"OpenAI API error: {str(e)}")
    

async def generate_interview_questions(job_description: str, candidate_profile: str, num_questions: int, skills: List, OPENAI_API_KEY: str, LLM_MODEL: str = "gpt-5-nano") -> dict:
    """
    Generate interview questions based on job description and candidate profile using Gemini.
    """
    try:
        system_prompt = """You are an expert interviewer. Given a job description, candidate profile, and skill set, generate high-signal interview questions.
        
        Each question should include:
        - question: string
        - question_type: technical / behavioral / experience
        - skill_id: string
        - skill_assessed: string
        - difficulty: easy / medium / hard
        """

        user_prompt = f"""
        [Job Description]
        {job_description}

        [Candidate Profile]
        {candidate_profile}

        [Skills]
        {skills}

        Please generate exactly {num_questions} interview questions in the following structured JSON format:
        {{
            "questions": [
                {{
                    "question": "string",
                    "question_type": "string",
                    "skill_id": "string",
                    "skill_assessed": "string",
                    "difficulty": "string"
                }}
            ]
        }}
        """

        full_prompt = f"{system_prompt}\n\n{user_prompt}"

        response_text = generate_ai_content(api_key=OPENAI_API_KEY, prompt=full_prompt, model=LLM_MODEL)

        # Cautious deserialization (assume `safe_parse_json` is defined elsewhere)
        questions_json = safe_parse_json(response_text)
        return questions_json

    except Exception as e:
        raise HTTPException(status_code=500, detail=f"Error generating questions: {str(e)}")


async def generate_interview_questions_general(job_description: str, candidate_profile: str, num_questions: int, skills: List, OPENAI_API_KEY: str, options: str, topic: str, level: str, LLM_MODEL: str = "gpt-5-nano") -> dict:
    """
    Generate interview questions based on job description and candidate profile using Gemini.
    """
    try:
        
        if options == "aptitude":
            # breakpoint()

            system_prompt = """You are an expert interviewer. Generate multiple choice questions for aptitude testing.
            
            Each question should include:
            - question: string
            - options: {{"A": "string", "B": "string", "C": "string", "D": "string"}}
            - correct_option: "string" (must be one of A, B, C, or D)
            - difficulty: "string"
            - skill_id: "string"
            - skill_assessed: "string"
            
            Return ONLY valid JSON without any additional text or formatting.
            """

            user_prompt = f"""
            topic: {topic}
            level: {level}
            skills: {skills}

            Please generate exactly {num_questions} aptitude questions in the following structured JSON format:
            {{
                "questions": [
                    {{
                        "question": "string",
                        "options": {{"A": "string", "B": "string", "C": "string", "D": "string"}},
                        "correct_option": "string",
                        "difficulty": "string",
                        "skill_id": "string",
                        "skill_assessed": "string"
                    }}
                ]
            }}
            """

            full_prompt = f"{system_prompt}\n\n{user_prompt}"

            response_text = generate_ai_content(api_key=OPENAI_API_KEY, prompt=full_prompt, model=LLM_MODEL)

            # Cautious deserialization
            questions_json = safe_parse_json(response_text)
            return questions_json
        elif options == "general":
            system_prompt = """You are an expert interviewer. Given a job description, candidate profile, and skill set, generate high-signal interview questions.
            
            Each question should include:
            - question: string
            """

            user_prompt = f"""
            topic: {topic}
            level: {level}
            skills: {skills}
            generate basic questions for the candidate profile and job description.
            Please generate exactly {num_questions} interview questions in the following structured JSON format:
            {{
                "questions": [
                    {{
                        "question": "string",
                        "skill_id": "string",
                        "skill_assessed": "string"
                    }}
                ]
            }}
            """

            full_prompt = f"{system_prompt}\n\n{user_prompt}"

            response_text = generate_ai_content(api_key=OPENAI_API_KEY, prompt=full_prompt, model=LLM_MODEL)


            # Cautious deserialization (assume `safe_parse_json` is defined elsewhere)
            questions_json = safe_parse_json(response_text)
            #             {
            # "skill_id": "skillId",
            # "question": "question",
            # "question_difficulty": "difficulty"
            # }
            for question in questions_json["questions"]:
                question["skill_id"] = 0
                question["question_difficulty"] = "Easy"
 
            return questions_json
        else:
            raise HTTPException(status_code=400, detail=f"Invalid options parameter: {options}. Expected 'aptitude' or 'General'")
    except Exception as e:
        raise HTTPException(status_code=500, detail=f"Error generating questions: {str(e)}")


# async def generate_interview_questions(job_description: str, candidate_profile: str, num_questions: int,skills:List,OPENAI_API_KEY:str) -> dict:
#     """
#     Generate interview questions based on job description and candidate profile using OpenAI.
#     """
#     try:
#         system_prompt = """You are an expert interviewer. Given a job description and candidate profile and skills, generate relevant interview questions.
#         Focus on technical skills, experience, and behavioral aspects that are most relevant to the position.
#         Each question should have:
#         1. The question text
#         2. The type of question (technical/behavioral/experience)
#         3. The skill being assessed
#         4. Difficulty level (easy/medium/hard)

#         [Instructions]
#         - Make sure to add sskill_id in every question.
#         """

#         user_prompt = f"""
#         Job Description:
#         {job_description}

#         Candidate Profile:
#         {candidate_profile}

#         Skills:
#         {skills}

#         Please generate {num_questions} interview questions in a structured JSON format with the following schema:
#         {{
#             "questions": [
#                 {{
#                     "question": "string",
#                     "question_type": "string",
#                     "skill_id": "string",
#                     "skill_assessed": "string",
#                     "difficulty": "string"
#                 }}
#             ]
#         }}
#         """

#         from openai import OpenAI
#         client = OpenAI(api_key=OPENAI_API_KEY)

#         response = client.chat.completions.create(
#             model="gpt-3.5-turbo",
#             messages=[
#                 {"role": "system", "content": system_prompt},
#                 {"role": "user", "content": user_prompt}
#             ],
#             temperature=0.2
#         )

#         # Parse the response
#         questions_json = safe_parse_json(response.choices[0].message.content)
#         return questions_json

#     except Exception as e:
#         raise HTTPException(status_code=500, detail=f"Error generating questions: {str(e)}")
    

import json
import re
import logging

def safe_parse_json(llm_output: str) -> dict:
    """
    Parses and sanitizes a JSON-like string from LLM output, tolerating formatting anomalies
    like trailing commas, escaped characters, or extra non-JSON text.
    Returns a normalized dict or raises a detailed error.
    """
    try:
        # Log the raw output for debugging
        logging.info(f"Raw LLM output: {llm_output[:500]}..." if len(llm_output) > 500 else f"Raw LLM output: {llm_output}")
        
        # Check if the output is an error message from generate_ai_content
        if llm_output.startswith("An error occurred:") or llm_output.startswith("OpenAI API error:"):
            raise ValueError(f"API Error: {llm_output}")

        # Try to extract JSON from code blocks first (common in Gemini responses)
        code_block_match = re.search(r'```(?:json)?\s*({.*?})\s*```', llm_output, re.DOTALL)
        if code_block_match:
            json_str = code_block_match.group(1)
        else:
            # Extract potential JSON block
            match = re.search(r'\{.*\}', llm_output, re.DOTALL)
            if not match:
                raise ValueError("No JSON object found in response.")
            json_str = match.group(0)

        # --- SANITIZATION PHASE ---

        # Fix trailing commas before } or ]
        json_str = re.sub(r',\s*(\}|\])', r'\1', json_str)
        
        # Remove any markdown formatting that might be present
        json_str = re.sub(r'```(?:json)?', '', json_str)
        json_str = re.sub(r'```', '', json_str)

        # Unescape characters if LLM returns a double-escaped string
        try:
            json_str = bytes(json_str, "utf-8").decode("unicode_escape")
        except UnicodeDecodeError:
            pass  # Fall back to original if decode fails

        # --- PARSING PHASE ---
        raw_data = json.loads(json_str)

        # For this specific use case, don't normalize keys to snake_case
        # as the API expects specific key names
        return raw_data

    except json.JSONDecodeError as jde:
        logging.error(f"JSON parsing error: {jde}")
        logging.error(f"Failed to parse JSON: {llm_output}")
        raise ValueError(f"The response was not valid JSON even after sanitization. Original response: {llm_output[:200]}...") from jde

    except Exception as e:
        logging.error(f"Unexpected error during JSON parsing: {e}")
        logging.error(f"Original output: {llm_output}")
        raise ValueError(f"Failed to parse LLM response: {str(e)}. Original response: {llm_output[:200]}...")


import json
import logging
import re

def safe_parse_batch_json(llm_output: str):
    """
    Parses and sanitizes a JSON-like string from LLM output, tolerating formatting anomalies
    like trailing commas, escaped characters, or extra non-JSON text.
    Returns a normalized Python object (list or dict) or raises a detailed error.
    """
    try:
        # Log the raw output for debugging
        logging.info(
            f"Raw LLM output: {llm_output[:500]}..."
            if len(llm_output) > 500
            else f"Raw LLM output: {llm_output}"
        )

        # Check if the output is an error message
        if llm_output.startswith("An error occurred:") or llm_output.startswith("OpenAI API error:"):
            raise ValueError(f"API Error: {llm_output}")

        # Try to extract JSON from code blocks first
        code_block_match = re.search(r'(```(?:json)?\s*)([\s\S]*?)(```)', llm_output)
        if code_block_match:
            json_str = code_block_match.group(2).strip()
        else:
            # Extract array or object
            match = re.search(r'(\[.*\]|\{.*\})', llm_output, re.DOTALL)
            if not match:
                raise ValueError("No JSON object or array found in response.")
            json_str = match.group(1)

        # --- SANITIZATION PHASE ---

        # Fix trailing commas before } or ]
        json_str = re.sub(r',\s*(\}|\])', r'\1', json_str)

        # Remove stray markdown language hints
        json_str = re.sub(r'^\s*json\s*', '', json_str, flags=re.IGNORECASE)

        # Unescape characters if double-escaped
        try:
            json_str = bytes(json_str, "utf-8").decode("unicode_escape")
        except UnicodeDecodeError:
            pass  # Keep original if decode fails

        # --- PARSING PHASE ---
        raw_data = json.loads(json_str)

        # Return as-is (could be list or dict)
        return raw_data

    except json.JSONDecodeError as jde:
        logging.error(f"JSON parsing error: {jde}")
        logging.error(f"Failed to parse JSON: {llm_output}")
        raise ValueError(
            f"The response was not valid JSON even after sanitization. "
            f"Original response: {llm_output[:200]}..."
        ) from jde

    except Exception as e:
        logging.error(f"Unexpected error during JSON parsing: {e}")
        logging.error(f"Original output: {llm_output}")
        raise ValueError(
            f"Failed to parse LLM response: {str(e)}. "
            f"Original response: {llm_output[:200]}..."
        )


from openai import OpenAI
import os

def translate_audio_file(file_path: str,api_key:str, model: str = "whisper-1") -> str:
    """
    Translates a given audio file to English using OpenAI's Whisper model.

    Args:
        file_path (str): The full path to the audio file.
        model (str): The model identifier for Whisper (default is "whisper-1").

    Returns:
        str: The translated text from the audio file.

    Raises:
        FileNotFoundError: If the audio file does not exist.
        Exception: For any issues encountered during API communication.
    """
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"Audio file not found: {file_path}")

    client = OpenAI(api_key=api_key)

    try:
        with open(file_path, "rb") as audio_file:
            response = client.audio.translations.create(
                model=model,
                file=audio_file
            )
        print(response)
        return response.text
    except Exception as e:
        raise Exception(f"Translation failed due to: {str(e)}")

# Usage Example (replace with actual path)
# translated_text = translate_audio_file("/path/to/file/german.mp3")
# print(translated_text)