Skip to content
This repository was archived by the owner on Sep 29, 2025. It is now read-only.

Commit 3edcf4a

Browse files
yakubova92nlarew
andauthored
(EAI-645) deprecate coachGTM (#657)
* script to process url list for coachgtm, should be removed * rm all references to coachGTM * rm data source * clean up imports * rm files related to url check * set up script to be re-usable, read from and write to local file system * moved urlCheck to scripts directory * changes to copy Co-authored-by: Nick Larew <nick.larew@mongodb.com> --------- Co-authored-by: Nick Larew <nick.larew@mongodb.com>
1 parent 5c17ac2 commit 3edcf4a

File tree

15 files changed

+202
-493
lines changed

15 files changed

+202
-493
lines changed

.drone.yml

Lines changed: 0 additions & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -496,98 +496,6 @@ steps:
496496
api_server: https://api.staging.corp.mongodb.com
497497
kubernetes_token:
498498
from_secret: staging_kubernetes_token
499-
# ingest for coach gtm
500-
---
501-
depends_on: ["test-all"]
502-
kind: pipeline
503-
type: kubernetes
504-
name: staging-build-ingest-coach-gtm
505-
506-
trigger:
507-
branch:
508-
- main
509-
event:
510-
- push
511-
paths:
512-
include:
513-
- packages/mongodb-rag-ingest/**/*
514-
- packages/ingest-mongodb-public/**/*
515-
- packages/mongodb-rag-core/**/*
516-
- package-lock.json
517-
- package.json
518-
519-
steps:
520-
# Builds and publishes Docker image for staging
521-
- name: publish-staging-ingest-coach-gtm
522-
image: plugins/kaniko-ecr
523-
environment:
524-
LG_ARTIFACTORY_TOKEN:
525-
from_secret: lg_artifactory_token
526-
LG_ARTIFACTORY_EMAIL:
527-
from_secret: lg_artifactory_email
528-
settings:
529-
dockerfile: ingest-service.dockerfile
530-
create_repository: true
531-
registry: 795250896452.dkr.ecr.us-east-1.amazonaws.com
532-
repo: docs/${DRONE_REPO_NAME}-ingest-coach-gtm
533-
build_args:
534-
- LG_ARTIFACTORY_TOKEN
535-
- LG_ARTIFACTORY_EMAIL
536-
tags:
537-
- git-${DRONE_COMMIT_SHA:0:7}-coach-gtm-staging
538-
- latest-coach-gtm-staging
539-
access_key:
540-
from_secret: ecr_access_key
541-
secret_key:
542-
from_secret: ecr_secret_key
543-
544-
# Promotes current drone build to staging environment
545-
- name: promote-staging-ingest-coach-gtm
546-
image: drone/cli:1.4.0-alpine
547-
commands:
548-
- drone build promote mongodb/chatbot ${DRONE_BUILD_NUMBER} staging
549-
environment:
550-
DRONE_SERVER: ${DRONE_SYSTEM_PROTO}://${DRONE_SYSTEM_HOST}
551-
DRONE_TOKEN:
552-
from_secret: drone_token
553-
554-
---
555-
kind: pipeline
556-
type: kubernetes
557-
name: staging-deploy-ingest-coach-gtm
558-
559-
trigger:
560-
event:
561-
- promote
562-
target:
563-
- staging
564-
paths:
565-
include:
566-
- packages/mongodb-rag-ingest/**/*
567-
- packages/ingest-mongodb-public/**/*
568-
- packages/mongodb-rag-core/**/*
569-
- package-lock.json
570-
- package.json
571-
572-
branch:
573-
- main
574-
575-
steps:
576-
# Deploys docker image associated with staging build that triggered promotion
577-
- name: deploy-staging-ingest-coach-gtm
578-
image: quay.io/mongodb/drone-helm:v3
579-
settings:
580-
chart: mongodb/cronjobs
581-
chart_version: 1.10.0
582-
add_repos: [mongodb=https://10gen.github.io/helm-charts]
583-
namespace: docs
584-
release: ingest-coach-gtm-staging
585-
values: image.tag=git-${DRONE_COMMIT_SHA:0:7}-coach-gtm-staging,image.repository=795250896452.dkr.ecr.us-east-1.amazonaws.com/docs/${DRONE_REPO_NAME}-ingest-coach-gtm
586-
values_files:
587-
- packages/ingest-mongodb-public/environments/coachGtm.staging.yml
588-
api_server: https://api.staging.corp.mongodb.com
589-
kubernetes_token:
590-
from_secret: staging_kubernetes_token
591499

592500
---
593501
depends_on: ["test-all"]

packages/chatbot-server-mongodb-public/src/eval/bin/generateEvalCasesYamlFromCSV.ts

Lines changed: 4 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -93,20 +93,6 @@ function normalizeUrl(url: string): string {
9393
return url.replace(/^https?:\/\/(www\.)?/i, "");
9494
}
9595

96-
const findMissingResources = async (
97-
expectedUrls: string[]
98-
): Promise<string[]> => {
99-
const results = await Promise.all(
100-
expectedUrls.map(async (url) => {
101-
const page = await pageStore.loadPage({
102-
query: { url: { $regex: new RegExp(normalizeUrl(url)) } },
103-
});
104-
return !page ? url : null;
105-
})
106-
);
107-
return results.filter((url) => url !== null) as string[];
108-
};
109-
11096
/**
11197
Main function to read CSV file, transform evaluation cases, and write to YAML file.
11298
@param csvFilePath - Path to the input CSV file
@@ -130,7 +116,10 @@ async function main({
130116
const expectedUrls = Array.from(
131117
new Set(evalCases.flatMap((caseItem) => caseItem.expectedLinks ?? []))
132118
);
133-
const urlsNotIngested = await findMissingResources(expectedUrls);
119+
const urlsNotIngested = await pageStore.getMissingPagesByUrl({
120+
expectedUrls,
121+
urlTransformer: normalizeUrl,
122+
});
134123
if (urlsNotIngested.length > 0) {
135124
console.warn(
136125
`Warning: ${urlsNotIngested.length}/${

packages/ingest-mongodb-public/.env.example

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,5 @@ DEVCENTER_CONNECTION_URI="mongodb+srv://<username>:<password>@devhub-cluster.sew
1414
## MongoDB Marketing CMS (mongodb.com)
1515
MONGODB_DOT_COM_CONNECTION_URI="mongodb+srv://..."
1616
MONGODB_DOT_COM_DB_NAME=<name of DB in dotcom cluster>
17-
MONGODB_COACH_GTM_DATABASE_NAME=<some DB name>
1817
## Docs style guide, meta reference (mongodb.com/docs/meta)
1918
MONGODB_META_DATABASE_NAME="docs-meta"

packages/ingest-mongodb-public/environments/coachGtm.staging.yml

Lines changed: 0 additions & 42 deletions
This file was deleted.

packages/ingest-mongodb-public/package.json

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@
2222
"ingest:pages": "../../node_modules/mongodb-rag-ingest/build/main.js pages init --config ./build/config.js && ingest pages update --config ./build/config.js",
2323
"ingest:embed": "../../node_modules/mongodb-rag-ingest/build/main.js embed init --config ./build/config.js && ingest embed update --config ./build/config.js",
2424
"ingest:pages:meta": "../../node_modules/mongodb-rag-ingest/build/main.js pages update --config ./build/meta.config.js",
25-
"ingest:k8s:coachGtmPages": "../../node_modules/mongodb-rag-ingest/build/main.js pages update --config ./build/coachGtm.config.js",
2625
"test": "node --experimental-vm-modules ../../node_modules/jest/bin/jest.js --forceExit"
2726
},
2827
"devDependencies": {

packages/ingest-mongodb-public/src/PublicIngestEnvVars.ts

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,6 @@ export const PUBLIC_INGEST_MONGODB_DOT_COM_ENV_VARS = {
99
MONGODB_DOT_COM_DB_NAME: "",
1010
};
1111

12-
export const PUBLIC_INGEST_COACHGTM_ENV_VARS = {
13-
MONGODB_COACH_GTM_DATABASE_NAME: "",
14-
};
15-
1612
export const PUBLIC_INGEST_MONGODB_DOCS_META_ENV_VARS = {
1713
MONGODB_META_DATABASE_NAME: "",
1814
};

packages/ingest-mongodb-public/src/coachGtm.config.ts

Lines changed: 0 additions & 88 deletions
This file was deleted.

packages/ingest-mongodb-public/src/sources/MongoDbDotComDataSource.test.ts

Lines changed: 0 additions & 21 deletions
This file was deleted.

0 commit comments

Comments
 (0)