From 5f90f26e0e1ac951e79f0f788f712d546e38c9ba Mon Sep 17 00:00:00 2001 From: staru09 Date: Thu, 26 Jun 2025 02:02:26 +0530 Subject: [PATCH 1/2] summariser using custom models added --- .gitignore | 3 +- app/commands/__init__.py | 1 + app/commands/summary.py | 75 +++++++++++ app/config.py | 38 +++++- app/services/__init__.py | 7 +- app/services/summary.py | 269 +++++++++++++++++++++++++++++++++++++++ routes/__init__.py | 1 + routes/summary.py | 38 ++++++ transcriber.py | 13 ++ 9 files changed, 442 insertions(+), 3 deletions(-) create mode 100644 app/commands/summary.py create mode 100644 app/services/summary.py create mode 100644 routes/summary.py diff --git a/.gitignore b/.gitignore index c24424b..01d1ba3 100644 --- a/.gitignore +++ b/.gitignore @@ -133,4 +133,5 @@ dmypy.json # Pyre type checker .pyre/ .username -.idea \ No newline at end of file +.idea +*.json \ No newline at end of file diff --git a/app/commands/__init__.py b/app/commands/__init__.py index bc2c992..fae6194 100644 --- a/app/commands/__init__.py +++ b/app/commands/__init__.py @@ -1,3 +1,4 @@ from .curator import commands as curator from .server import server from .media import commands as media +from .summary import summary \ No newline at end of file diff --git a/app/commands/summary.py b/app/commands/summary.py new file mode 100644 index 0000000..64f9bb4 --- /dev/null +++ b/app/commands/summary.py @@ -0,0 +1,75 @@ +import os +import click +from ..services.summary import correct_transcript, summarize_transcript, configure_api +from .cli_utils import get_config + +# Add these at the end of app/commands/summary.py + +@click.command(name="correct") +@click.option("--input", "-i", required=True, help="Path to input transcript file.") +@click.option("--output", "-o", help="Path to save corrected transcript.") +@click.option("--provider", default="gemini", type=click.Choice(["gemini", "openai", "claude"]), help="AI provider to use.") +@click.option("--model", help="Model name to use with the selected provider.") +@click.option("--api-key", help="Custom API key for the selected provider.") +def correct_command(input, output, provider, model, api_key): + """Correct transcription errors in the transcript.""" + config = get_config() + + if not api_key: + api_key = config.get(f"{provider}_api_key", None) + + configure_api(provider=provider, api_key=api_key) + + with open(input, 'r', encoding='utf-8') as f: + original_text = f.read() + + corrected_text = correct_transcript(original_text, provider=provider, model_name=model) + + if corrected_text: + if not output: + base_name = os.path.splitext(os.path.basename(input))[0] + input_dir = os.path.dirname(input) or "." + output = os.path.join(input_dir, f"{base_name}_corrected.txt") + + os.makedirs(os.path.dirname(output), exist_ok=True) + with open(output, 'w', encoding='utf-8') as f: + f.write(corrected_text) + click.echo(f"Corrected transcript saved to: {output}") + +@click.command(name="summarize") +@click.option("--input", "-i", required=True, help="Path to input transcript file.") +@click.option("--output", "-o", help="Path to save summary.") +@click.option("--provider", default="gemini", type=click.Choice(["gemini", "openai", "claude"]), help="AI provider to use.") +@click.option("--model", help="Model name to use with the selected provider.") +@click.option("--api-key", help="Custom API key for the selected provider.") +@click.option("--correct-first/--no-correct", default=False, help="Correct transcript before summarizing.") +def summarize_command(input, output, provider, model, api_key, correct_first): + """Generate a summary of the transcript.""" + config = get_config() + + if not api_key: + api_key = config.get(f"{provider}_api_key", None) + + configure_api(provider=provider, api_key=api_key) + + with open(input, 'r', encoding='utf-8') as f: + original_text = f.read() + + text_to_summarize = original_text + + if correct_first: + click.echo("Correcting transcript before summarization...") + text_to_summarize = correct_transcript(original_text, provider=provider, model_name=model) + + summary_text = summarize_transcript(text_to_summarize, provider=provider, model_name=model) + + if summary_text: + if not output: + base_name = os.path.splitext(os.path.basename(input))[0] + input_dir = os.path.dirname(input) or "." + output = os.path.join(input_dir, f"{base_name}_summary.txt") + + os.makedirs(os.path.dirname(output), exist_ok=True) + with open(output, 'w', encoding='utf-8') as f: + f.write(summary_text) + click.echo(f"Summary saved to: {output}") \ No newline at end of file diff --git a/app/config.py b/app/config.py index 576c077..14975cc 100644 --- a/app/config.py +++ b/app/config.py @@ -36,7 +36,14 @@ def get_config_overview(self): overview += f"GITHUB_METADATA_REPO_NAME: {self.GITHUB_METADATA_REPO_NAME}\n" overview += f"TRANSCRIPTION_SERVER_URL: {self.TRANSCRIPTION_SERVER_URL}\n" overview += f"BTC_TRANSCRIPTS_URL: {self.BTC_TRANSCRIPTS_URL}\n" - + overview += f"GEMINI_API_KEY: {'[SET]' if self.GEMINI_API_KEY else '[NOT SET]'}\n" + overview += f"OPENAI_API_KEY: {'[SET]' if self.OPENAI_API_KEY else '[NOT SET]'}\n" + overview += f"ANTHROPIC_API_KEY: {'[SET]' if self.ANTHROPIC_API_KEY else '[NOT SET]'}\n" + overview += f"DEFAULT_SUMMARY_PROVIDER: {self.DEFAULT_SUMMARY_PROVIDER}\n" + overview += f"DEFAULT_GEMINI_MODEL: {self.DEFAULT_GEMINI_MODEL}\n" + overview += f"DEFAULT_OPENAI_MODEL: {self.DEFAULT_OPENAI_MODEL}\n" + overview += f"DEFAULT_CLAUDE_MODEL: {self.DEFAULT_CLAUDE_MODEL}\n" + # Add config.ini settings overview += "\nSettings from config.ini:\n" for key, value in self.config.items(): @@ -80,7 +87,36 @@ def GITHUB_PRIVATE_KEY(self): def GITHUB_INSTALLATION_ID(self): return self._get_env_variable('GITHUB_INSTALLATION_ID', "To use GitHub App integration, you need to define a 'GITHUB_INSTALLATION_ID' in your .env file") + # Add these properties to the Settings class in app/config.py + + @property + def GEMINI_API_KEY(self): + # First check environment, then config file + return os.getenv('GEMINI_API_KEY') or self.config.get('gemini_api_key', '') + + @property + def OPENAI_API_KEY(self): + return os.getenv('OPENAI_API_KEY') or self.config.get('openai_api_key', '') + + @property + def ANTHROPIC_API_KEY(self): + return os.getenv('ANTHROPIC_API_KEY') or self.config.get('anthropic_api_key', '') + @property + def DEFAULT_SUMMARY_PROVIDER(self): + return os.getenv('DEFAULT_SUMMARY_PROVIDER') or self.config.get('default_summary_provider', 'gemini') + + @property + def DEFAULT_GEMINI_MODEL(self): + return os.getenv('DEFAULT_GEMINI_MODEL') or self.config.get('default_gemini_model', 'gemma-3-27b-it') + + @property + def DEFAULT_OPENAI_MODEL(self): + return os.getenv('DEFAULT_OPENAI_MODEL') or self.config.get('default_openai_model', 'gpt-4o') + + @property + def DEFAULT_CLAUDE_MODEL(self): + return os.getenv('DEFAULT_CLAUDE_MODEL') or self.config.get('default_claude_model', 'claude-3-7-sonnet-20250219') # Initialize the Settings class and expose an instance settings = Settings() diff --git a/app/services/__init__.py b/app/services/__init__.py index 358cca7..d8a7a4f 100644 --- a/app/services/__init__.py +++ b/app/services/__init__.py @@ -1,2 +1,7 @@ from .whisper import Whisper -from .deepgram import Deepgram \ No newline at end of file +from .deepgram import Deepgram +from .summary import ( + correct_transcript, + summarize_transcript, + configure_api +) \ No newline at end of file diff --git a/app/services/summary.py b/app/services/summary.py new file mode 100644 index 0000000..64b372a --- /dev/null +++ b/app/services/summary.py @@ -0,0 +1,269 @@ +import os +import google.generativeai as genai +import argparse +import getpass +from dotenv import load_dotenv +import openai +import anthropic +load_dotenv() + + +def configure_api(provider="gemini", api_key=None): + if provider == "gemini": + if not api_key: + api_key = os.getenv("GEMINI_API_KEY") + if not api_key: + raise ValueError("Gemini API key not provided and not found in environment.") + genai.configure(api_key=api_key) + print("Gemini API configured successfully.") + elif provider == "openai": + if not api_key: + api_key = os.getenv("OPENAI_API_KEY") + if not api_key: + raise ValueError("OpenAI API key not provided and not found in environment.") + openai.api_key = api_key + print("OpenAI API configured successfully.") + elif provider == "claude": + if not api_key: + api_key = os.getenv("ANTHROPIC_API_KEY") + if not api_key: + raise ValueError("Claude API key not provided and not found in environment.") + print("Claude API configured successfully.") + else: + raise ValueError(f"Unsupported provider: {provider}") + return api_key + + +def create_correction_prompt(transcript_text): + return f""" +You are an expert in Bitcoin, Monero, and blockchain technologies. +The transcript below was generated from a conference audio recording using automatic speech recognition (ASR) and may contain misrecognized or misspelled technical terms. + +Your task is to correct all errors in the transcript while strictly following these rules: +1. Correct all misspelled or misrecognized Bitcoin and Monero-specific terms. +2. Capitalize technical terms and proper nouns correctly. +3. Remove ASR artifacts like stutters and false starts. +4. Remove nonsensical or duplicated lines. +5. Preserve the original sentence structure and meaning. +6. Only output the corrected transcript text. + +--- START OF TRANSCRIPT TO CORRECT --- + +{transcript_text} + +--- END OF TRANSCRIPT TO CORRECT --- +""" + + +def create_summary_prompt(text): + return f""" +You are an expert technical summarizer. + +Summarize the following transcript of a conference talk about Bitcoin and Monero. Your summary should be concise and cover all the key points mentioned by the speaker. Avoid repeating unnecessary phrases or filler words. Output only the summary, without introduction or conclusion lines. + +--- START OF TRANSCRIPT --- + +{text} + +--- END OF TRANSCRIPT --- +""" + + +def correct_transcript(text_to_correct, provider="gemini", model_name=None): + prompt = create_correction_prompt(text_to_correct) + + if provider == "gemini": + return correct_transcript_with_gemini(text_to_correct, model_name) + elif provider == "openai": + return correct_transcript_with_openai(text_to_correct, model_name) + elif provider == "claude": + return correct_transcript_with_claude(text_to_correct, model_name) + else: + raise ValueError(f"Unsupported provider: {provider}") + + +def correct_transcript_with_gemini(text_to_correct, model_name=None): + model = genai.GenerativeModel(model_name or 'gemma-3-27b-it') + prompt = create_correction_prompt(text_to_correct) + try: + print("Sending correction request to Gemini API...") + response = model.generate_content(prompt) + corrected_text = response.text.strip().strip("`").strip() + print("Correction received.") + return corrected_text + except Exception as e: + print(f"Error during Gemini correction: {e}") + return None + + +def correct_transcript_with_openai(text_to_correct, model_name=None): + model = model_name or "gpt-4o" + prompt = create_correction_prompt(text_to_correct) + try: + print(f"Sending correction request to OpenAI API using {model}...") + response = openai.chat.completions.create( + model=model, + messages=[{"role": "user", "content": prompt}], + temperature=0.0 + ) + corrected_text = response.choices[0].message.content.strip() + print("Correction received.") + return corrected_text + except Exception as e: + print(f"Error during OpenAI correction: {e}") + return None + + +def correct_transcript_with_claude(text_to_correct, model_name=None): + model = model_name or "claude-3-7-sonnet-20250219" + prompt = create_correction_prompt(text_to_correct) + try: + print(f"Sending correction request to Claude API using {model} (streaming)...") + client = anthropic.Anthropic() + with client.messages.with_streaming_response.create( + model=model, + max_tokens=100000, + temperature=0.0, + stream = True, + messages=[{"role": "user", "content": prompt}] + ) as response: + corrected_text = "" + for chunk in response.iter_text(): + corrected_text += chunk + print("Correction received.") + return corrected_text.strip() + except Exception as e: + print(f"Error during Claude correction: {e}") + return None + + +def summarize_transcript(text, provider="gemini", model_name=None): + if provider == "gemini": + return summarize_transcript_with_gemini(text, model_name) + elif provider == "openai": + return summarize_transcript_with_openai(text, model_name) + elif provider == "claude": + return summarize_transcript_with_claude(text, model_name) + else: + raise ValueError(f"Unsupported provider: {provider}") + + +def summarize_transcript_with_gemini(text, model_name=None): + model = genai.GenerativeModel(model_name or 'gemma-3-27b-it') + prompt = create_summary_prompt(text) + try: + print("Sending summarization request to Gemini API...") + response = model.generate_content(prompt) + summary_text = response.text.strip().strip("`").strip() + print("Summary received.") + return summary_text + except Exception as e: + print(f"Error during Gemini summarization: {e}") + return None + + +def summarize_transcript_with_openai(text, model_name=None): + model = model_name or "gpt-4.1-2025-04-14" + prompt = create_summary_prompt(text) + try: + print(f"Sending summarization request to OpenAI API using {model}...") + response = openai.chat.completions.create( + model=model, + messages=[{"role": "user", "content": prompt}], + temperature=0.0 + ) + summary_text = response.choices[0].message.content.strip() + print("Summary received.") + return summary_text + except Exception as e: + print(f"Error during OpenAI summarization: {e}") + return None + + +def summarize_transcript_with_claude(text, model_name=None): + model = model_name or "claude-3-5-sonnet-20240620" + prompt = create_summary_prompt(text) + try: + print(f"Sending summarization request to Claude API using {model}...") + client = anthropic.Anthropic() + response = client.messages.create( + model=model, + max_tokens=100000, + temperature=0.0, + messages=[{"role": "user", "content": prompt}] + ) + summary_text = response.content[0].text.strip() + print("Summary received.") + return summary_text + except Exception as e: + print(f"Error during Claude summarization: {e}") + return None + + +def main(): + parser = argparse.ArgumentParser(description="Transcript correction and summarization using AI models.") + parser.add_argument("-i", "--input", required=True, help="Path to input transcript file.") + parser.add_argument("-c", "--correct", action="store_true", help="Perform correction of the transcript.") + parser.add_argument("-s", "--summarize", action="store_true", help="Perform summarization of the transcript.") + parser.add_argument("--output", help="Path to save corrected transcript.") + parser.add_argument("--summary", help="Path to save summary.") + + parser.add_argument("--provider", choices=["gemini", "openai", "claude"], default="gemini", + help="AI provider to use (default: gemini)") + parser.add_argument("--api-key", help="API key for the selected provider") + parser.add_argument("--model", help="Model name to use with the selected provider") + + args = parser.parse_args() + + if not args.correct and not args.summarize: + parser.print_help() + print("\nError: You must specify at least one of --correct or --summarize.") + return + + try: + configure_api(provider=args.provider, api_key=args.api_key) + except ValueError as e: + print(f"Configuration Error: {e}") + return + + try: + with open(args.input, 'r', encoding='utf-8') as f: + original_text = f.read() + print(f"Loaded transcript from {args.input}") + except Exception as e: + print(f"Failed to read input file: {e}") + return + + base_name = os.path.splitext(os.path.basename(args.input))[0] + input_dir = os.path.dirname(args.input) or "." + + corrected_text = None + + if args.correct: + corrected_text = correct_transcript(original_text, provider=args.provider, model_name=args.model) + if corrected_text: + out_path = args.output or os.path.join(input_dir, f"{base_name}_corrected.txt") + try: + os.makedirs(os.path.dirname(out_path), exist_ok=True) + with open(out_path, 'w', encoding='utf-8') as f: + f.write(corrected_text) + print(f"Corrected transcript saved to: {out_path}") + except Exception as e: + print(f"Failed to write corrected transcript: {e}") + + if args.summarize: + text_to_summarize = corrected_text if corrected_text else original_text + summary_text = summarize_transcript(text_to_summarize, provider=args.provider, model_name=args.model) + if summary_text: + sum_path = args.summary or os.path.join(input_dir, f"{base_name}_summary.txt") + try: + os.makedirs(os.path.dirname(sum_path), exist_ok=True) + with open(sum_path, 'w', encoding='utf-8') as f: + f.write(summary_text) + print(f"Summary saved to: {sum_path}") + except Exception as e: + print(f"Failed to write summary: {e}") + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/routes/__init__.py b/routes/__init__.py index e69de29..bcedb01 100644 --- a/routes/__init__.py +++ b/routes/__init__.py @@ -0,0 +1 @@ +from .summary import summary_bp diff --git a/routes/summary.py b/routes/summary.py new file mode 100644 index 0000000..dade838 --- /dev/null +++ b/routes/summary.py @@ -0,0 +1,38 @@ +from flask import Blueprint, request, jsonify +from app.services.summary import correct_transcript, summarize_transcript, configure_api + +summary_bp = Blueprint('summary', __name__) + +@summary_bp.route('/correct', methods=['POST']) +def correct(): + """API endpoint to correct transcript text""" + data = request.json + if not data or 'text' not in data: + return jsonify({"error": "Missing text in request"}), 400 + + provider = data.get('provider', 'gemini') + model = data.get('model', None) + + try: + configure_api(provider=provider) + corrected_text = correct_transcript(data['text'], provider=provider, model_name=model) + return jsonify({"corrected_text": corrected_text}) + except Exception as e: + return jsonify({"error": str(e)}), 500 + +@summary_bp.route('/summarize', methods=['POST']) +def summarize(): + """API endpoint to summarize transcript text""" + data = request.json + if not data or 'text' not in data: + return jsonify({"error": "Missing text in request"}), 400 + + provider = data.get('provider', 'gemini') + model = data.get('model', None) + + try: + configure_api(provider=provider) + summary_text = summarize_transcript(data['text'], provider=provider, model_name=model) + return jsonify({"summary": summary_text}) + except Exception as e: + return jsonify({"error": str(e)}), 500 \ No newline at end of file diff --git a/transcriber.py b/transcriber.py index 4d6c745..c5baad2 100644 --- a/transcriber.py +++ b/transcriber.py @@ -6,6 +6,7 @@ from app import __app_name__, __version__, commands, utils from app.api_client import APIClient +from app.commands.summary import correct_command, summarize_command from app.commands.cli_utils import ( get_transcription_url, auto_start_server @@ -119,6 +120,13 @@ def print_help(ctx, param, value): show_default=True, help="Summarize the transcript [only available with deepgram]", ) +correct_transcript = click.option( + "--correct", + is_flag=True, + default=settings.config.getboolean("correct_transcript", False), + show_default=True, + help="Correct the transcript using AI models [works with any transcription method]", +) cutoff_date = click.option( "--cutoff-date", type=str, @@ -283,6 +291,7 @@ def transcribe( username: str, github: bool, deepgram: bool, + correct:bool, summarize: bool, diarize: bool, upload: bool, @@ -320,6 +329,7 @@ def transcribe( "category": list(category), "username": username, "github": github, + "correct": correct, "deepgram": deepgram, "summarize": summarize, "diarize": diarize, @@ -449,6 +459,7 @@ def postprocess( username: str, github: bool, upload: bool, + correct:bool, markdown: bool, text: bool, json: bool, @@ -524,6 +535,8 @@ def postprocess( cli.add_command(commands.media) cli.add_command(commands.curator) cli.add_command(commands.server) +cli.add_command(correct_command) +cli.add_command(summarize_command) if __name__ == "__main__": cli() From 2d4bd2cfa0aee94d800b33e5caa498ae696f5f1b Mon Sep 17 00:00:00 2001 From: staru09 Date: Fri, 27 Jun 2025 00:56:47 +0530 Subject: [PATCH 2/2] installation errors fixed --- .gitignore | 3 +- app/commands/__init__.py | 3 +- app/commands/summary.py | 82 +++++++++++++++++++++++++++------------- transcriber.py | 3 +- 4 files changed, 61 insertions(+), 30 deletions(-) diff --git a/.gitignore b/.gitignore index 01d1ba3..aa777b3 100644 --- a/.gitignore +++ b/.gitignore @@ -134,4 +134,5 @@ dmypy.json .pyre/ .username .idea -*.json \ No newline at end of file +*.json +output/ \ No newline at end of file diff --git a/app/commands/__init__.py b/app/commands/__init__.py index fae6194..c21841e 100644 --- a/app/commands/__init__.py +++ b/app/commands/__init__.py @@ -1,4 +1,5 @@ from .curator import commands as curator from .server import server from .media import commands as media -from .summary import summary \ No newline at end of file +from .summary import summary +__all__ = ["media", "curator", "server", "summary"] \ No newline at end of file diff --git a/app/commands/summary.py b/app/commands/summary.py index 64f9bb4..7807521 100644 --- a/app/commands/summary.py +++ b/app/commands/summary.py @@ -1,22 +1,12 @@ import os import click from ..services.summary import correct_transcript, summarize_transcript, configure_api -from .cli_utils import get_config +from ..config import settings -# Add these at the end of app/commands/summary.py - -@click.command(name="correct") -@click.option("--input", "-i", required=True, help="Path to input transcript file.") -@click.option("--output", "-o", help="Path to save corrected transcript.") -@click.option("--provider", default="gemini", type=click.Choice(["gemini", "openai", "claude"]), help="AI provider to use.") -@click.option("--model", help="Model name to use with the selected provider.") -@click.option("--api-key", help="Custom API key for the selected provider.") -def correct_command(input, output, provider, model, api_key): - """Correct transcription errors in the transcript.""" - config = get_config() - +def do_correct(input, output, provider, model, api_key): + """Implementation logic for transcript correction""" if not api_key: - api_key = config.get(f"{provider}_api_key", None) + api_key = settings.config.get(f"{provider}_api_key", None) configure_api(provider=provider, api_key=api_key) @@ -36,19 +26,10 @@ def correct_command(input, output, provider, model, api_key): f.write(corrected_text) click.echo(f"Corrected transcript saved to: {output}") -@click.command(name="summarize") -@click.option("--input", "-i", required=True, help="Path to input transcript file.") -@click.option("--output", "-o", help="Path to save summary.") -@click.option("--provider", default="gemini", type=click.Choice(["gemini", "openai", "claude"]), help="AI provider to use.") -@click.option("--model", help="Model name to use with the selected provider.") -@click.option("--api-key", help="Custom API key for the selected provider.") -@click.option("--correct-first/--no-correct", default=False, help="Correct transcript before summarizing.") -def summarize_command(input, output, provider, model, api_key, correct_first): - """Generate a summary of the transcript.""" - config = get_config() - +def do_summarize(input, output, provider, model, api_key, correct_first): + """Implementation logic for transcript summarization""" if not api_key: - api_key = config.get(f"{provider}_api_key", None) + api_key = settings.config.get(f"{provider}_api_key", None) configure_api(provider=provider, api_key=api_key) @@ -72,4 +53,51 @@ def summarize_command(input, output, provider, model, api_key, correct_first): os.makedirs(os.path.dirname(output), exist_ok=True) with open(output, 'w', encoding='utf-8') as f: f.write(summary_text) - click.echo(f"Summary saved to: {output}") \ No newline at end of file + click.echo(f"Summary saved to: {output}") + +@click.group() +def summary(): + """Transcript correction and summarization commands.""" + pass + +@summary.command() +@click.option("--input", "-i", required=True, help="Path to input transcript file.") +@click.option("--output", "-o", help="Path to save corrected transcript.") +@click.option("--provider", default="gemini", type=click.Choice(["gemini", "openai", "claude"]), help="AI provider to use.") +@click.option("--model", help="Model name to use with the selected provider.") +@click.option("--api-key", help="Custom API key for the selected provider.") +def correct(input, output, provider, model, api_key): + """Correct transcription errors in the transcript.""" + do_correct(input, output, provider, model, api_key) + +@summary.command() +@click.option("--input", "-i", required=True, help="Path to input transcript file.") +@click.option("--output", "-o", help="Path to save summary.") +@click.option("--provider", default="gemini", type=click.Choice(["gemini", "openai", "claude"]), help="AI provider to use.") +@click.option("--model", help="Model name to use with the selected provider.") +@click.option("--api-key", help="Custom API key for the selected provider.") +@click.option("--correct-first/--no-correct", default=False, help="Correct transcript before summarizing.") +def summarize(input, output, provider, model, api_key, correct_first): + """Generate a summary of the transcript.""" + do_summarize(input, output, provider, model, api_key, correct_first) + +@click.command(name="correct") +@click.option("--input", "-i", required=True, help="Path to input transcript file.") +@click.option("--output", "-o", help="Path to save corrected transcript.") +@click.option("--provider", default="gemini", type=click.Choice(["gemini", "openai", "claude"]), help="AI provider to use.") +@click.option("--model", help="Model name to use with the selected provider.") +@click.option("--api-key", help="Custom API key for the selected provider.") +def correct_command(input, output, provider, model, api_key): + """Correct transcription errors in the transcript.""" + do_correct(input, output, provider, model, api_key) + +@click.command(name="summarize") +@click.option("--input", "-i", required=True, help="Path to input transcript file.") +@click.option("--output", "-o", help="Path to save summary.") +@click.option("--provider", default="gemini", type=click.Choice(["gemini", "openai", "claude"]), help="AI provider to use.") +@click.option("--model", help="Model name to use with the selected provider.") +@click.option("--api-key", help="Custom API key for the selected provider.") +@click.option("--correct-first/--no-correct", default=False, help="Correct transcript before summarizing.") +def summarize_command(input, output, provider, model, api_key, correct_first): + """Generate a summary of the transcript.""" + do_summarize(input, output, provider, model, api_key, correct_first) \ No newline at end of file diff --git a/transcriber.py b/transcriber.py index c5baad2..b13e561 100644 --- a/transcriber.py +++ b/transcriber.py @@ -6,7 +6,7 @@ from app import __app_name__, __version__, commands, utils from app.api_client import APIClient -from app.commands.summary import correct_command, summarize_command +from app.commands.summary import summary, correct_command, summarize_command from app.commands.cli_utils import ( get_transcription_url, auto_start_server @@ -535,6 +535,7 @@ def postprocess( cli.add_command(commands.media) cli.add_command(commands.curator) cli.add_command(commands.server) +cli.add_command(summary) cli.add_command(correct_command) cli.add_command(summarize_command)