@@ -20,6 +20,7 @@ BUNDLE_VERSION ?= $(VERSION:v%=%)
2020 # KUEUE_VERSION defines the default version of Kueue (used for testing)
2121 KUEUE_VERSION ?= v0.6.2
2222
23+ USE_RHOAI ?= true
2324# KUBERAY_VERSION defines the default version of the KubeRay operator (used for testing)
2425KUBERAY_VERSION ?= v1.1.0
2526
@@ -419,3 +420,119 @@ image-mnist-job-test-push: image-mnist-job-test-build ## Push container image wi
419420.PHONY : kueue-setup
420421kueue-setup :
421422 bash scripts/setup-kueue-resources.sh
423+ # RHOAI/ODH related resources installation
424+
425+ # #@ all-in-one
426+ .PHONY : all-in-one
427+ all-in-one :
428+ @echo -e " \n ==> Installing Everything needed for distributed AI platform on OpenShift cluster \n"
429+ -make delete-nfd-operator
430+ -make delete-ai-platform-operator
431+ -make delete-nvidia-operator
432+ -make install-ai-platform-operator
433+ -make install-nfd-operator
434+ -make install-nvidia-operator
435+
436+ .PHONY : delete-all-in-one
437+ delete-all-in-one :
438+ @echo -e " \n ==> Removing Everything needed for distributed AI platform on OpenShift cluster \n"
439+ -make delete-rhoai
440+ -make delete-nfd-operator
441+ -make delete-nvidia-operator
442+ -make delete-ai-platform-operator
443+
444+ # #@ general
445+ .PHONY : delete-ai-platform-operator
446+ delete-ai-platform-operator :
447+ ifeq ($(USE_RHOAI ) , true) # # Delete RHOAI Operator
448+ -make delete-rhoai-operator
449+ else ## Delete Open Data Hub Operator
450+ -make delete-opendatahub-operator
451+ endif
452+
453+ .PHONY : install-ai-platform-operator
454+ install-ai-platform-operator :
455+ ifeq ($(USE_RHOAI ) , true) # # Delete RHOAI Operator
456+ -make install-rhoai-operator
457+ else ## Delete Open Data Hub Operator
458+ -make install-opendatahub-operator
459+ endif
460+
461+ .PHONY : delete-rhoai-operator
462+ delete-rhoai-operator : # # Delete RHOAI Operator
463+ @echo -e " \n==> Deleting OpenShift AI Operator \n"
464+ -oc delete subscription rhods-operator -n redhat-ods-operator
465+ -export CLUSTER_SERVICE_VERSION=` oc get clusterserviceversion -n redhat-ods-operator -l operators.coreos.com/rhods-operator.redhat-ods-operator -o custom-columns=:metadata.name` ; \
466+ oc delete clusterserviceversion $$ CLUSTER_SERVICE_VERSION -n redhat-ods-operator
467+
468+ .PHONY : install-rhoai-operator
469+ install-rhoai-operator : # # Install RHOAI Operator
470+ @echo -e " \n==> Installing OpenShift AI Operator \n"
471+ -oc create ns redhat-ods-operator
472+ oc create -f contrib/configuration/rhoai-operator-subscription.yaml
473+ @echo Waiting for rhoai-operator Subscription to be ready
474+ oc wait -n redhat-ods-operator subscription/rhods-operator --for=jsonpath=' {.status.state}' =AtLatestKnown --timeout=180s
475+ @echo -e " \n==> Creating default Data Science Cluster \n"
476+ oc apply -f contrib/configuration/default-dsc.yaml
477+
478+ .PHONY : delete-opendatahub-operator
479+ delete-opendatahub-operator : # # Delete OpenDataHub operator
480+ @echo -e " \n==> Deleting OpenDataHub Operator \n"
481+ -oc delete subscription opendatahub-operator -n openshift-operators
482+ -export CLUSTER_SERVICE_VERSION=` oc get clusterserviceversion -n openshift-operators -l operators.coreos.com/opendatahub-operator.openshift-operators -o custom-columns=:metadata.name` ; \
483+ oc delete clusterserviceversion $$ CLUSTER_SERVICE_VERSION -n openshift-operators
484+
485+ .PHONY : install-opendatahub-operator
486+ install-opendatahub-operator : # # Install OpenDataHub operator
487+ @echo -e " \n==> Installing OpenDataHub Operator \n"
488+ -oc create ns opendatahub
489+ oc create -f contrib/configuration/opendatahub-operator-subscription.yaml
490+ @echo Waiting for opendatahub-operator Subscription to be ready
491+ oc wait -n openshift-operators subscription/opendatahub-operator --for=jsonpath=' {.status.state}' =AtLatestKnown --timeout=180s
492+
493+ # #@ GPU Support
494+ .PHONY : install-nfd-operator
495+ install-nfd-operator : # # Install NFD operator ( Node Feature Discovery )
496+ @echo -e " \n==> Installing NFD Operator \n"
497+ -oc create ns openshift-nfd
498+ oc create -f contrib/configuration/nfd-operator-subscription.yaml
499+ @echo -e " \n==> Creating default NodeFeatureDiscovery CR \n"
500+ @while [[ -z $$ (oc get customresourcedefinition nodefeaturediscoveries.nfd.openshift.io) ]]; do echo " ." ; sleep 10; done
501+ @while [[ -z $$ (oc get csv -n openshift-nfd --selector operators.coreos.com/nfd.openshift-nfd) ]]; do echo " ." ; sleep 10; done
502+ oc get csv -n openshift-nfd --selector operators.coreos.com/nfd.openshift-nfd -ojsonpath={.items[0].metadata.annotations.alm-examples} | jq ' .[] | select(.kind=="NodeFeatureDiscovery")' | oc apply -f -
503+
504+ .PHONY : delete-nfd-operator
505+ delete-nfd-operator : # # Delete NFD operator
506+ @echo -e " \n==> Deleting NodeFeatureDiscovery CR \n"
507+ oc delete NodeFeatureDiscovery --all -n openshift-nfd
508+ @while [[ -n $$ (oc get NodeFeatureDiscovery -n openshift-nfd) ]]; do echo " ." ; sleep 10; done
509+ @echo -e " \n==> Deleting NFD Operator \n"
510+ -oc delete subscription nfd -n openshift-nfd
511+ -export CLUSTER_SERVICE_VERSION=` oc get clusterserviceversion -n openshift-nfd -l operators.coreos.com/nfd.openshift-nfd -o custom-columns=:metadata.name` ; \
512+ oc delete clusterserviceversion $$ CLUSTER_SERVICE_VERSION -n openshift-nfd
513+ -oc delete ns openshift-nfd
514+
515+ .PHONY : install-nvidia-operator
516+ install-nvidia-operator : # # Install nvidia operator
517+ @echo -e " \n==> Installing nvidia Operator \n"
518+ -oc create ns nvidia-gpu-operator
519+ oc create -f contrib/configuration/nvidia-operator-subscription.yaml
520+ @echo -e " \n==> Creating default ClusterPolicy CR \n"
521+ @while [[ -z $$ (oc get customresourcedefinition clusterpolicies.nvidia.com) ]]; do echo " ." ; sleep 10; done
522+ @while [[ -z $$ (oc get csv -n nvidia-gpu-operator --selector operators.coreos.com/gpu-operator-certified.nvidia-gpu-operator) ]]; do echo " ." ; sleep 10; done
523+ oc get csv -n nvidia-gpu-operator --selector operators.coreos.com/gpu-operator-certified.nvidia-gpu-operator -ojsonpath={.items[0].metadata.annotations.alm-examples} | jq .[] | oc apply -f -
524+ # ifeq ($(USE_RHOAI), true) ## Delete RHOAI Operator
525+ # oc delete configmap migration-gpu-status -n redhat-ods-applications
526+ # -export REPLICASET_NAME=`oc get replicaset -n redhat-ods-applications -l app=rhods-dashboard -o custom-columns=:metadata.name`
527+ # oc delete replicaset $$REPLICASET_NAME -n redhat-ods-applications
528+ # endif
529+ .PHONY : delete-nvidia-operator
530+ delete-nvidia-operator : # # Delete nvidia operator
531+ @echo -e " \n==> Deleting ClusterPolicy CR \n"
532+ oc delete ClusterPolicy --all -n nvidia-gpu-operator
533+ @while [[ -n $$ (oc get ClusterPolicy -n nvidia-gpu-operator) ]]; do echo " ." ; sleep 10; done
534+ @echo -e " \n==> Deleting nvidia Operator \n"
535+ -oc delete subscription gpu-operator-certified -n nvidia-gpu-operator
536+ -export CLUSTER_SERVICE_VERSION=` oc get clusterserviceversion -n nvidia-gpu-operator -l operators.coreos.com/gpu-operator-certified.nvidia-gpu-operator -o custom-columns=:metadata.name` ; \
537+ oc delete clusterserviceversion $$ CLUSTER_SERVICE_VERSION -n nvidia-gpu-operator
538+ -oc delete ns nvidia-gpu-operator
0 commit comments