|
6 | 6 | from data_scraper.common import constants |
7 | 7 | from data_scraper.core.scraper import JiraScraper, OSPDocScraper |
8 | 8 | from data_scraper.core.errata_scraper import ErrataScraper |
| 9 | +from data_scraper.core.tempest_scraper import TempestScraper |
9 | 10 |
|
10 | 11 | logging.basicConfig( |
11 | 12 | format="%(asctime)s - %(name)s - %(levelname)s - %(message)s" |
@@ -178,3 +179,57 @@ def errata_scraper() -> None: |
178 | 179 |
|
179 | 180 | scraper = ErrataScraper(config_args) |
180 | 181 | scraper.run() |
| 182 | + |
| 183 | + |
| 184 | + |
| 185 | + |
| 186 | +def tempest_scraper() -> None: |
| 187 | + """Entry point for command line execution.""" |
| 188 | + parser = ArgumentParser("tempest_scraper") |
| 189 | + |
| 190 | + # Required arguments |
| 191 | + parser.add_argument("--database_client_url", type=str, required=True) |
| 192 | + parser.add_argument("--llm_server_url", type=str, required=True) |
| 193 | + parser.add_argument("--llm_api_key", type=str, required=True) |
| 194 | + parser.add_argument("--database_api_key", type=str, required=True) |
| 195 | + parser.add_argument("--zuul-url", type=str, required=True) |
| 196 | + |
| 197 | + |
| 198 | + |
| 199 | + # Optional arguments |
| 200 | + parser.add_argument("--chunk_size", type=int, |
| 201 | + default=constants.DEFAULT_CHUNK_SIZE) |
| 202 | + parser.add_argument("--embedding_model", type=str, |
| 203 | + default=constants.DEFAULT_EMBEDDING_MODEL) |
| 204 | + parser.add_argument("--db_collection_name", type=str, |
| 205 | + default=constants.TEMPEST_COLLECTION_NAME) |
| 206 | + parser.add_argument("--scraper-processes", type=int, |
| 207 | + default=constants.DEFAULT_NUM_SCRAPER_PROCESSES) |
| 208 | + parser.add_argument("--date_cutoff", type=datetime.fromisoformat, |
| 209 | + default=datetime.fromisoformat(constants.DEFAULT_DATE_CUTOFF), |
| 210 | + help=( |
| 211 | + "No issues from before this date will be used. " |
| 212 | + "Date must follow ISO format 'YYYY-MM-DD'" |
| 213 | + ) |
| 214 | + ) |
| 215 | + parser.add_argument("--recreate_collection", type=bool, default=True, |
| 216 | + help="Recreate database collection from scratch.") |
| 217 | + args = parser.parse_args() |
| 218 | + |
| 219 | + config_args = { |
| 220 | + "database_client_url": args.database_client_url, |
| 221 | + "llm_server_url": args.llm_server_url, |
| 222 | + "llm_api_key": args.llm_api_key, |
| 223 | + "database_api_key": args.database_api_key, |
| 224 | + "chunk_size": args.chunk_size, |
| 225 | + "embedding_model": args.embedding_model, |
| 226 | + "db_collection_name": args.db_collection_name, |
| 227 | + "zuul_url": args.errata_url, |
| 228 | + "scraper_processes": args.scraper_processes, |
| 229 | + "date_cutoff": args.date_cutoff, |
| 230 | + "recreate_collection": args.recreate_collection, |
| 231 | + } |
| 232 | + |
| 233 | + scraper = TempestScraper(config_args) |
| 234 | + scraper.run() |
| 235 | + |
0 commit comments