diff --git a/.gitignore b/.gitignore index c24424b..aa777b3 100644 --- a/.gitignore +++ b/.gitignore @@ -133,4 +133,6 @@ dmypy.json # Pyre type checker .pyre/ .username -.idea \ No newline at end of file +.idea +*.json +output/ \ No newline at end of file diff --git a/app/commands/__init__.py b/app/commands/__init__.py index bc2c992..c21841e 100644 --- a/app/commands/__init__.py +++ b/app/commands/__init__.py @@ -1,3 +1,5 @@ from .curator import commands as curator from .server import server from .media import commands as media +from .summary import summary +__all__ = ["media", "curator", "server", "summary"] \ No newline at end of file diff --git a/app/commands/summary.py b/app/commands/summary.py new file mode 100644 index 0000000..7807521 --- /dev/null +++ b/app/commands/summary.py @@ -0,0 +1,103 @@ +import os +import click +from ..services.summary import correct_transcript, summarize_transcript, configure_api +from ..config import settings + +def do_correct(input, output, provider, model, api_key): + """Implementation logic for transcript correction""" + if not api_key: + api_key = settings.config.get(f"{provider}_api_key", None) + + configure_api(provider=provider, api_key=api_key) + + with open(input, 'r', encoding='utf-8') as f: + original_text = f.read() + + corrected_text = correct_transcript(original_text, provider=provider, model_name=model) + + if corrected_text: + if not output: + base_name = os.path.splitext(os.path.basename(input))[0] + input_dir = os.path.dirname(input) or "." + output = os.path.join(input_dir, f"{base_name}_corrected.txt") + + os.makedirs(os.path.dirname(output), exist_ok=True) + with open(output, 'w', encoding='utf-8') as f: + f.write(corrected_text) + click.echo(f"Corrected transcript saved to: {output}") + +def do_summarize(input, output, provider, model, api_key, correct_first): + """Implementation logic for transcript summarization""" + if not api_key: + api_key = settings.config.get(f"{provider}_api_key", None) + + configure_api(provider=provider, api_key=api_key) + + with open(input, 'r', encoding='utf-8') as f: + original_text = f.read() + + text_to_summarize = original_text + + if correct_first: + click.echo("Correcting transcript before summarization...") + text_to_summarize = correct_transcript(original_text, provider=provider, model_name=model) + + summary_text = summarize_transcript(text_to_summarize, provider=provider, model_name=model) + + if summary_text: + if not output: + base_name = os.path.splitext(os.path.basename(input))[0] + input_dir = os.path.dirname(input) or "." + output = os.path.join(input_dir, f"{base_name}_summary.txt") + + os.makedirs(os.path.dirname(output), exist_ok=True) + with open(output, 'w', encoding='utf-8') as f: + f.write(summary_text) + click.echo(f"Summary saved to: {output}") + +@click.group() +def summary(): + """Transcript correction and summarization commands.""" + pass + +@summary.command() +@click.option("--input", "-i", required=True, help="Path to input transcript file.") +@click.option("--output", "-o", help="Path to save corrected transcript.") +@click.option("--provider", default="gemini", type=click.Choice(["gemini", "openai", "claude"]), help="AI provider to use.") +@click.option("--model", help="Model name to use with the selected provider.") +@click.option("--api-key", help="Custom API key for the selected provider.") +def correct(input, output, provider, model, api_key): + """Correct transcription errors in the transcript.""" + do_correct(input, output, provider, model, api_key) + +@summary.command() +@click.option("--input", "-i", required=True, help="Path to input transcript file.") +@click.option("--output", "-o", help="Path to save summary.") +@click.option("--provider", default="gemini", type=click.Choice(["gemini", "openai", "claude"]), help="AI provider to use.") +@click.option("--model", help="Model name to use with the selected provider.") +@click.option("--api-key", help="Custom API key for the selected provider.") +@click.option("--correct-first/--no-correct", default=False, help="Correct transcript before summarizing.") +def summarize(input, output, provider, model, api_key, correct_first): + """Generate a summary of the transcript.""" + do_summarize(input, output, provider, model, api_key, correct_first) + +@click.command(name="correct") +@click.option("--input", "-i", required=True, help="Path to input transcript file.") +@click.option("--output", "-o", help="Path to save corrected transcript.") +@click.option("--provider", default="gemini", type=click.Choice(["gemini", "openai", "claude"]), help="AI provider to use.") +@click.option("--model", help="Model name to use with the selected provider.") +@click.option("--api-key", help="Custom API key for the selected provider.") +def correct_command(input, output, provider, model, api_key): + """Correct transcription errors in the transcript.""" + do_correct(input, output, provider, model, api_key) + +@click.command(name="summarize") +@click.option("--input", "-i", required=True, help="Path to input transcript file.") +@click.option("--output", "-o", help="Path to save summary.") +@click.option("--provider", default="gemini", type=click.Choice(["gemini", "openai", "claude"]), help="AI provider to use.") +@click.option("--model", help="Model name to use with the selected provider.") +@click.option("--api-key", help="Custom API key for the selected provider.") +@click.option("--correct-first/--no-correct", default=False, help="Correct transcript before summarizing.") +def summarize_command(input, output, provider, model, api_key, correct_first): + """Generate a summary of the transcript.""" + do_summarize(input, output, provider, model, api_key, correct_first) \ No newline at end of file diff --git a/app/config.py b/app/config.py index 576c077..14975cc 100644 --- a/app/config.py +++ b/app/config.py @@ -36,7 +36,14 @@ def get_config_overview(self): overview += f"GITHUB_METADATA_REPO_NAME: {self.GITHUB_METADATA_REPO_NAME}\n" overview += f"TRANSCRIPTION_SERVER_URL: {self.TRANSCRIPTION_SERVER_URL}\n" overview += f"BTC_TRANSCRIPTS_URL: {self.BTC_TRANSCRIPTS_URL}\n" - + overview += f"GEMINI_API_KEY: {'[SET]' if self.GEMINI_API_KEY else '[NOT SET]'}\n" + overview += f"OPENAI_API_KEY: {'[SET]' if self.OPENAI_API_KEY else '[NOT SET]'}\n" + overview += f"ANTHROPIC_API_KEY: {'[SET]' if self.ANTHROPIC_API_KEY else '[NOT SET]'}\n" + overview += f"DEFAULT_SUMMARY_PROVIDER: {self.DEFAULT_SUMMARY_PROVIDER}\n" + overview += f"DEFAULT_GEMINI_MODEL: {self.DEFAULT_GEMINI_MODEL}\n" + overview += f"DEFAULT_OPENAI_MODEL: {self.DEFAULT_OPENAI_MODEL}\n" + overview += f"DEFAULT_CLAUDE_MODEL: {self.DEFAULT_CLAUDE_MODEL}\n" + # Add config.ini settings overview += "\nSettings from config.ini:\n" for key, value in self.config.items(): @@ -80,7 +87,36 @@ def GITHUB_PRIVATE_KEY(self): def GITHUB_INSTALLATION_ID(self): return self._get_env_variable('GITHUB_INSTALLATION_ID', "To use GitHub App integration, you need to define a 'GITHUB_INSTALLATION_ID' in your .env file") + # Add these properties to the Settings class in app/config.py + + @property + def GEMINI_API_KEY(self): + # First check environment, then config file + return os.getenv('GEMINI_API_KEY') or self.config.get('gemini_api_key', '') + + @property + def OPENAI_API_KEY(self): + return os.getenv('OPENAI_API_KEY') or self.config.get('openai_api_key', '') + + @property + def ANTHROPIC_API_KEY(self): + return os.getenv('ANTHROPIC_API_KEY') or self.config.get('anthropic_api_key', '') + @property + def DEFAULT_SUMMARY_PROVIDER(self): + return os.getenv('DEFAULT_SUMMARY_PROVIDER') or self.config.get('default_summary_provider', 'gemini') + + @property + def DEFAULT_GEMINI_MODEL(self): + return os.getenv('DEFAULT_GEMINI_MODEL') or self.config.get('default_gemini_model', 'gemma-3-27b-it') + + @property + def DEFAULT_OPENAI_MODEL(self): + return os.getenv('DEFAULT_OPENAI_MODEL') or self.config.get('default_openai_model', 'gpt-4o') + + @property + def DEFAULT_CLAUDE_MODEL(self): + return os.getenv('DEFAULT_CLAUDE_MODEL') or self.config.get('default_claude_model', 'claude-3-7-sonnet-20250219') # Initialize the Settings class and expose an instance settings = Settings() diff --git a/app/services/__init__.py b/app/services/__init__.py index 358cca7..d8a7a4f 100644 --- a/app/services/__init__.py +++ b/app/services/__init__.py @@ -1,2 +1,7 @@ from .whisper import Whisper -from .deepgram import Deepgram \ No newline at end of file +from .deepgram import Deepgram +from .summary import ( + correct_transcript, + summarize_transcript, + configure_api +) \ No newline at end of file diff --git a/app/services/summary.py b/app/services/summary.py new file mode 100644 index 0000000..64b372a --- /dev/null +++ b/app/services/summary.py @@ -0,0 +1,269 @@ +import os +import google.generativeai as genai +import argparse +import getpass +from dotenv import load_dotenv +import openai +import anthropic +load_dotenv() + + +def configure_api(provider="gemini", api_key=None): + if provider == "gemini": + if not api_key: + api_key = os.getenv("GEMINI_API_KEY") + if not api_key: + raise ValueError("Gemini API key not provided and not found in environment.") + genai.configure(api_key=api_key) + print("Gemini API configured successfully.") + elif provider == "openai": + if not api_key: + api_key = os.getenv("OPENAI_API_KEY") + if not api_key: + raise ValueError("OpenAI API key not provided and not found in environment.") + openai.api_key = api_key + print("OpenAI API configured successfully.") + elif provider == "claude": + if not api_key: + api_key = os.getenv("ANTHROPIC_API_KEY") + if not api_key: + raise ValueError("Claude API key not provided and not found in environment.") + print("Claude API configured successfully.") + else: + raise ValueError(f"Unsupported provider: {provider}") + return api_key + + +def create_correction_prompt(transcript_text): + return f""" +You are an expert in Bitcoin, Monero, and blockchain technologies. +The transcript below was generated from a conference audio recording using automatic speech recognition (ASR) and may contain misrecognized or misspelled technical terms. + +Your task is to correct all errors in the transcript while strictly following these rules: +1. Correct all misspelled or misrecognized Bitcoin and Monero-specific terms. +2. Capitalize technical terms and proper nouns correctly. +3. Remove ASR artifacts like stutters and false starts. +4. Remove nonsensical or duplicated lines. +5. Preserve the original sentence structure and meaning. +6. Only output the corrected transcript text. + +--- START OF TRANSCRIPT TO CORRECT --- + +{transcript_text} + +--- END OF TRANSCRIPT TO CORRECT --- +""" + + +def create_summary_prompt(text): + return f""" +You are an expert technical summarizer. + +Summarize the following transcript of a conference talk about Bitcoin and Monero. Your summary should be concise and cover all the key points mentioned by the speaker. Avoid repeating unnecessary phrases or filler words. Output only the summary, without introduction or conclusion lines. + +--- START OF TRANSCRIPT --- + +{text} + +--- END OF TRANSCRIPT --- +""" + + +def correct_transcript(text_to_correct, provider="gemini", model_name=None): + prompt = create_correction_prompt(text_to_correct) + + if provider == "gemini": + return correct_transcript_with_gemini(text_to_correct, model_name) + elif provider == "openai": + return correct_transcript_with_openai(text_to_correct, model_name) + elif provider == "claude": + return correct_transcript_with_claude(text_to_correct, model_name) + else: + raise ValueError(f"Unsupported provider: {provider}") + + +def correct_transcript_with_gemini(text_to_correct, model_name=None): + model = genai.GenerativeModel(model_name or 'gemma-3-27b-it') + prompt = create_correction_prompt(text_to_correct) + try: + print("Sending correction request to Gemini API...") + response = model.generate_content(prompt) + corrected_text = response.text.strip().strip("`").strip() + print("Correction received.") + return corrected_text + except Exception as e: + print(f"Error during Gemini correction: {e}") + return None + + +def correct_transcript_with_openai(text_to_correct, model_name=None): + model = model_name or "gpt-4o" + prompt = create_correction_prompt(text_to_correct) + try: + print(f"Sending correction request to OpenAI API using {model}...") + response = openai.chat.completions.create( + model=model, + messages=[{"role": "user", "content": prompt}], + temperature=0.0 + ) + corrected_text = response.choices[0].message.content.strip() + print("Correction received.") + return corrected_text + except Exception as e: + print(f"Error during OpenAI correction: {e}") + return None + + +def correct_transcript_with_claude(text_to_correct, model_name=None): + model = model_name or "claude-3-7-sonnet-20250219" + prompt = create_correction_prompt(text_to_correct) + try: + print(f"Sending correction request to Claude API using {model} (streaming)...") + client = anthropic.Anthropic() + with client.messages.with_streaming_response.create( + model=model, + max_tokens=100000, + temperature=0.0, + stream = True, + messages=[{"role": "user", "content": prompt}] + ) as response: + corrected_text = "" + for chunk in response.iter_text(): + corrected_text += chunk + print("Correction received.") + return corrected_text.strip() + except Exception as e: + print(f"Error during Claude correction: {e}") + return None + + +def summarize_transcript(text, provider="gemini", model_name=None): + if provider == "gemini": + return summarize_transcript_with_gemini(text, model_name) + elif provider == "openai": + return summarize_transcript_with_openai(text, model_name) + elif provider == "claude": + return summarize_transcript_with_claude(text, model_name) + else: + raise ValueError(f"Unsupported provider: {provider}") + + +def summarize_transcript_with_gemini(text, model_name=None): + model = genai.GenerativeModel(model_name or 'gemma-3-27b-it') + prompt = create_summary_prompt(text) + try: + print("Sending summarization request to Gemini API...") + response = model.generate_content(prompt) + summary_text = response.text.strip().strip("`").strip() + print("Summary received.") + return summary_text + except Exception as e: + print(f"Error during Gemini summarization: {e}") + return None + + +def summarize_transcript_with_openai(text, model_name=None): + model = model_name or "gpt-4.1-2025-04-14" + prompt = create_summary_prompt(text) + try: + print(f"Sending summarization request to OpenAI API using {model}...") + response = openai.chat.completions.create( + model=model, + messages=[{"role": "user", "content": prompt}], + temperature=0.0 + ) + summary_text = response.choices[0].message.content.strip() + print("Summary received.") + return summary_text + except Exception as e: + print(f"Error during OpenAI summarization: {e}") + return None + + +def summarize_transcript_with_claude(text, model_name=None): + model = model_name or "claude-3-5-sonnet-20240620" + prompt = create_summary_prompt(text) + try: + print(f"Sending summarization request to Claude API using {model}...") + client = anthropic.Anthropic() + response = client.messages.create( + model=model, + max_tokens=100000, + temperature=0.0, + messages=[{"role": "user", "content": prompt}] + ) + summary_text = response.content[0].text.strip() + print("Summary received.") + return summary_text + except Exception as e: + print(f"Error during Claude summarization: {e}") + return None + + +def main(): + parser = argparse.ArgumentParser(description="Transcript correction and summarization using AI models.") + parser.add_argument("-i", "--input", required=True, help="Path to input transcript file.") + parser.add_argument("-c", "--correct", action="store_true", help="Perform correction of the transcript.") + parser.add_argument("-s", "--summarize", action="store_true", help="Perform summarization of the transcript.") + parser.add_argument("--output", help="Path to save corrected transcript.") + parser.add_argument("--summary", help="Path to save summary.") + + parser.add_argument("--provider", choices=["gemini", "openai", "claude"], default="gemini", + help="AI provider to use (default: gemini)") + parser.add_argument("--api-key", help="API key for the selected provider") + parser.add_argument("--model", help="Model name to use with the selected provider") + + args = parser.parse_args() + + if not args.correct and not args.summarize: + parser.print_help() + print("\nError: You must specify at least one of --correct or --summarize.") + return + + try: + configure_api(provider=args.provider, api_key=args.api_key) + except ValueError as e: + print(f"Configuration Error: {e}") + return + + try: + with open(args.input, 'r', encoding='utf-8') as f: + original_text = f.read() + print(f"Loaded transcript from {args.input}") + except Exception as e: + print(f"Failed to read input file: {e}") + return + + base_name = os.path.splitext(os.path.basename(args.input))[0] + input_dir = os.path.dirname(args.input) or "." + + corrected_text = None + + if args.correct: + corrected_text = correct_transcript(original_text, provider=args.provider, model_name=args.model) + if corrected_text: + out_path = args.output or os.path.join(input_dir, f"{base_name}_corrected.txt") + try: + os.makedirs(os.path.dirname(out_path), exist_ok=True) + with open(out_path, 'w', encoding='utf-8') as f: + f.write(corrected_text) + print(f"Corrected transcript saved to: {out_path}") + except Exception as e: + print(f"Failed to write corrected transcript: {e}") + + if args.summarize: + text_to_summarize = corrected_text if corrected_text else original_text + summary_text = summarize_transcript(text_to_summarize, provider=args.provider, model_name=args.model) + if summary_text: + sum_path = args.summary or os.path.join(input_dir, f"{base_name}_summary.txt") + try: + os.makedirs(os.path.dirname(sum_path), exist_ok=True) + with open(sum_path, 'w', encoding='utf-8') as f: + f.write(summary_text) + print(f"Summary saved to: {sum_path}") + except Exception as e: + print(f"Failed to write summary: {e}") + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/routes/__init__.py b/routes/__init__.py index e69de29..bcedb01 100644 --- a/routes/__init__.py +++ b/routes/__init__.py @@ -0,0 +1 @@ +from .summary import summary_bp diff --git a/routes/summary.py b/routes/summary.py new file mode 100644 index 0000000..dade838 --- /dev/null +++ b/routes/summary.py @@ -0,0 +1,38 @@ +from flask import Blueprint, request, jsonify +from app.services.summary import correct_transcript, summarize_transcript, configure_api + +summary_bp = Blueprint('summary', __name__) + +@summary_bp.route('/correct', methods=['POST']) +def correct(): + """API endpoint to correct transcript text""" + data = request.json + if not data or 'text' not in data: + return jsonify({"error": "Missing text in request"}), 400 + + provider = data.get('provider', 'gemini') + model = data.get('model', None) + + try: + configure_api(provider=provider) + corrected_text = correct_transcript(data['text'], provider=provider, model_name=model) + return jsonify({"corrected_text": corrected_text}) + except Exception as e: + return jsonify({"error": str(e)}), 500 + +@summary_bp.route('/summarize', methods=['POST']) +def summarize(): + """API endpoint to summarize transcript text""" + data = request.json + if not data or 'text' not in data: + return jsonify({"error": "Missing text in request"}), 400 + + provider = data.get('provider', 'gemini') + model = data.get('model', None) + + try: + configure_api(provider=provider) + summary_text = summarize_transcript(data['text'], provider=provider, model_name=model) + return jsonify({"summary": summary_text}) + except Exception as e: + return jsonify({"error": str(e)}), 500 \ No newline at end of file diff --git a/transcriber.py b/transcriber.py index 4d6c745..b13e561 100644 --- a/transcriber.py +++ b/transcriber.py @@ -6,6 +6,7 @@ from app import __app_name__, __version__, commands, utils from app.api_client import APIClient +from app.commands.summary import summary, correct_command, summarize_command from app.commands.cli_utils import ( get_transcription_url, auto_start_server @@ -119,6 +120,13 @@ def print_help(ctx, param, value): show_default=True, help="Summarize the transcript [only available with deepgram]", ) +correct_transcript = click.option( + "--correct", + is_flag=True, + default=settings.config.getboolean("correct_transcript", False), + show_default=True, + help="Correct the transcript using AI models [works with any transcription method]", +) cutoff_date = click.option( "--cutoff-date", type=str, @@ -283,6 +291,7 @@ def transcribe( username: str, github: bool, deepgram: bool, + correct:bool, summarize: bool, diarize: bool, upload: bool, @@ -320,6 +329,7 @@ def transcribe( "category": list(category), "username": username, "github": github, + "correct": correct, "deepgram": deepgram, "summarize": summarize, "diarize": diarize, @@ -449,6 +459,7 @@ def postprocess( username: str, github: bool, upload: bool, + correct:bool, markdown: bool, text: bool, json: bool, @@ -524,6 +535,9 @@ def postprocess( cli.add_command(commands.media) cli.add_command(commands.curator) cli.add_command(commands.server) +cli.add_command(summary) +cli.add_command(correct_command) +cli.add_command(summarize_command) if __name__ == "__main__": cli()