diff --git a/mlops-roadshow/1-data-prep-feature-store.ipynb b/mlops-roadshow/1-data-prep-feature-store.ipynb index 95ed14c..b973df6 100644 --- a/mlops-roadshow/1-data-prep-feature-store.ipynb +++ b/mlops-roadshow/1-data-prep-feature-store.ipynb @@ -1270,7 +1270,9 @@ "fs_df = pd.DataFrame()\n", "while len(fs_df) == 0:\n", " if len(fs_df.columns) > 0:\n", - " time.sleep(120)\n", + " print('The features have not yet been ingested to our Feature Store. '\n", + " 'Let\\'s try again in a minute.')\n", + " time.sleep(60)\n", " query_string = f'SELECT * FROM \"sagemaker_featurestore\".\"{train_table}\" ORDER BY record_id'\n", " query_results= 'sagemaker-featurestore'\n", " output_location = f's3://{bucket}/{query_results}/query_results/'\n", @@ -1325,41 +1327,6 @@ "%store raw_s3" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, { "cell_type": "code", "execution_count": null, diff --git a/mlops-roadshow/2-training-registry.ipynb b/mlops-roadshow/2-training-registry.ipynb index 2216a02..546f44f 100644 --- a/mlops-roadshow/2-training-registry.ipynb +++ b/mlops-roadshow/2-training-registry.ipynb @@ -805,7 +805,7 @@ "\n", "model_package = best_estimator.register(content_types=['text/csv'],\n", " response_types=['application/json'],\n", - " inference_instances=['ml.t2.medium', 'ml.m5.xlarge'],\n", + " inference_instances=['ml.m5.xlarge'],\n", " transform_instances=['ml.m5.xlarge'],\n", " image_uri=best_estimator.image_uri,\n", " model_package_group_name=model_package_group_name,\n", diff --git a/mlops-roadshow/3-deployment.ipynb b/mlops-roadshow/3-deployment.ipynb index 958d323..03b20db 100644 --- a/mlops-roadshow/3-deployment.ipynb +++ b/mlops-roadshow/3-deployment.ipynb @@ -138,9 +138,11 @@ "metadata": {}, "outputs": [], "source": [ - "random_forest_regressor_model = ModelPackage(role_arn,\n", - " model_package_arn=model_package_arn,\n", - " name=model_name)" + "xgboost_regressor_model = ModelPackage(\n", + " role_arn,\n", + " model_package_arn=model_package_arn,\n", + " name=model_name\n", + ")" ] }, { @@ -158,8 +160,10 @@ "metadata": {}, "outputs": [], "source": [ - "sagemaker_client.update_model_package(ModelPackageArn=random_forest_regressor_model.model_package_arn,\n", - " ModelApprovalStatus='Approved')" + "sagemaker_client.update_model_package(\n", + " ModelPackageArn=xgboost_regressor_model.model_package_arn,\n", + " ModelApprovalStatus='Approved'\n", + ")" ] }, { @@ -177,9 +181,11 @@ "metadata": {}, "outputs": [], "source": [ - "random_forest_regressor_model.deploy(initial_instance_count=1,\n", - " instance_type='ml.t2.medium',\n", - " endpoint_name=f'{model_name}-endpoint')" + "xgboost_regressor_model.deploy(\n", + " initial_instance_count=1,\n", + " instance_type='ml.m5.xlarge',\n", + " endpoint_name=f'{model_name}-endpoint'\n", + ")" ] }, { diff --git a/mlops-roadshow/4-sagemaker-pipeline.ipynb b/mlops-roadshow/4-sagemaker-pipeline.ipynb index b0484df..b2bf767 100644 --- a/mlops-roadshow/4-sagemaker-pipeline.ipynb +++ b/mlops-roadshow/4-sagemaker-pipeline.ipynb @@ -47,7 +47,11 @@ "\n", "Here, we will put on the hat of a `DevOps/MLOps Engineer` and perform the task of orchestration which includes building pipeline steps that include all the previous notebooks components into one singular entity. This pipeline entity accomplishes a repeatable and reliable orchestration of each step in the ML workflow.\n", "\n", - "For this task we will be using Amazon SageMaker Pipeline capabilities.\n", + "For this task we will be using Amazon SageMaker Pipeline capabilities. We will be creating two SageMaker Pipelines, one for model training and one for model deployment.\n", + "\n", + "
\n", + "\n", + "
\n", "\n", "Let's get started!" ] @@ -253,6 +257,14 @@ "![](./pipeline_scripts/images/sagemaker-pipelines-dag.png)" ] }, + { + "cell_type": "markdown", + "id": "85c57073-02f7-4fa1-aea8-f9bfb0231a1f", + "metadata": {}, + "source": [ + "You can also include other steps to your pipeline, for example for performing Hyperparameter Optimization (HPO) on your training pipeline. [Pipeline Steps Types](https://docs.aws.amazon.com/sagemaker/latest/dg/build-and-manage-steps.html#build-and-manage-steps-types) has a list of all posible pipeline step types that you can use to build your pipeline and [this workshop](https://aws.amazon.com/getting-started/hands-on/machine-learning-tutorial-mlops-automate-ml-workflows/#) explains how to build a SageMaker pipeline with steps for data bias check and model explainability." + ] + }, { "cell_type": "markdown", "id": "29cefc9b", @@ -544,7 +556,7 @@ "model_registry_args = model.register(\n", " content_types=['text/csv'],\n", " response_types=['application/json'],\n", - " inference_instances=['ml.t2.medium', 'ml.m5.xlarge'],\n", + " inference_instances=['ml.m5.xlarge'],\n", " transform_instances=['ml.m5.xlarge'],\n", " model_package_group_name=model_package_group_name,\n", " approval_status='PendingManualApproval',\n", diff --git a/mlops-roadshow/README.md b/mlops-roadshow/README.md index 38ccab3..dfc8a2e 100644 --- a/mlops-roadshow/README.md +++ b/mlops-roadshow/README.md @@ -6,33 +6,23 @@ This Lab covers core Amazon SageMaker components and how they help our customers to transition their model's journey experimentation to production, through the development of MLOps foundations. -Building an MLOps foundation that can cover the operations, people, and technology needs of -enterprise customers is challenging. Therefore, we define the following maturity model -that defines the necessary capabilities of MLOps in four key phases. +We typically see this journey/adoption broken up into four stages: -![MLOPs stages of adoption](pipeline_scripts/images/mlops-stages-of-adoption1.png) +![MLOPs stages of adoption](pipeline_scripts/images/mlops-stages-of-adoption.png) -Each stage represents practices that align to the increased adoption of MLOps -practices to move from a project or repeatable stage to adopting ML workloads -at scale. Check this [Blog Post](https://aws.amazon.com/blogs/machine-learning/mlops-foundation-roadmap-for-enterprises-with-amazon-sagemaker/) -for further details on the MLOps Foundations Roadmap for Enterprises on Amazon SageMaker. +Each stage represents practices that align to the increased adoption of MLOps practices to move from an initial or repeatable stage to adopting ML workloads at scale. -On this workshop we will cover the initial stages of the process: -**Initial** and **Repeatable**. +This lab will cover the Initial and Repeatable stages. Often customers will span multiple stages as they begin to adopt practices that are common within each stage. -In the Initial stage, you begin to incorporate ML projects into your overall strategy to -drive business outcomes. At this stage, the Machine Learning Development Cycle (MLDC) -typically involves a lot of manual hand-offs and processes. For example, the data engineer -manually hands off the data to the data scientists. The data scientists manually hand off -their model to the deployment team which can consist of DevOps or MLOps engineers. +In the Initial stage, you begin to incorporate ML projects into your overall strategy to drive business outcomes. At this stage, the Machine Learning Development Cycle (MLDC) typically involves a lot of manual hand-offs and processes. For example, the data engineer manually hands off the data to the data scientists. The data scientists manually hand off their model to the deployment team which can consist of DevOps or MLOps engineers. -In this Initial stage, you might have or will have multiple models running in production with -manual handoffs and processes and that starts to become very cumbersome. -So there becomes a need to increase the level of automation to ensure the deployments are -repeatable. +In this Initial stage, you might have or will have multiple models running in production with manual handoffs and processes and that starts to become very cumbersome. So there becomes a need to increase the level of automation to ensure the deployments are repeatable. This is where we move to the Repeatable stage where we automate most handoffs by orchestrating each step of the MLDC together. -This is where we move to the Repeatable stage where we automate most handoffs by orchestrating -each step of the MLDC together. +In the Repeatable stage, customers focus on automation, relying on lessons learned from DevOps. Data scientists, Data Engineers, and DevOps Engineers work to automate model training and deployment as well as reduce manual hand-offs between steps. That automation reduces manual errors and increases MLDC velocity. + +In the Reliable stage, customers shift focus to improving quality. The quality checks that data scientists often perform manually, like checking for data drift and evaluating model metrics, are built into the MLDC pipeline. That requires putting ML monitoring tools into place, and lets the pipelines use best-of-breed patterns like blue/green upgrades. At this stage, CI/CD practices such as source and version control along with automated quality gates are implemented. Defect rates begin to drop. + +Finally, as ML becomes a widespread and valuable practice in the organization, customers reach the Scalable stage. At the Scalable phase, customers see ML become widely impactful across the organization. The MLDC cycle time drops as cross-functional ML teams become more productive. Data scientists enjoy the benefits of codified best practices and shared model and feature repositories, which take time and effort to collect but often don’t directly benefit a specific team. At this stage, mechanisms are established for sharing practices and assets across the organization. This stage is as much about people and process as about technology, and customers often see data scientists assigned directly to project teams or rotating from the Center of Excellence (CoE) into other business units. ## Setting up the Lab environment diff --git a/mlops-roadshow/pipeline_scripts/images/training_and_deployment_pipelines.png b/mlops-roadshow/pipeline_scripts/images/training_and_deployment_pipelines.png new file mode 100644 index 0000000..000658d Binary files /dev/null and b/mlops-roadshow/pipeline_scripts/images/training_and_deployment_pipelines.png differ