|
7 | 7 | "source": [ |
8 | 8 | "<font color=gray>Oracle Cloud Infrastructure Data Science Sample Notebook\n", |
9 | 9 | "\n", |
10 | | - "Copyright (c) 2021 Oracle, Inc. All rights reserved. <br>\n", |
| 10 | + "Copyright (c) 2021, 2023 Oracle, Inc. All rights reserved. <br>\n", |
11 | 11 | "Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl.\n", |
12 | 12 | "</font>" |
13 | 13 | ] |
|
35 | 35 | "id": "9325ef67", |
36 | 36 | "metadata": {}, |
37 | 37 | "source": [ |
38 | | - "* We recommend that you run this notebook in a notebook session using a conda environment that has ADS version 2.3.1 installed\n", |
39 | | - "* You need access to the public internet\n", |
40 | | - "\n", |
41 | | - "***\n", |
42 | | - " \n", |
43 | | - "<font color=gray>Datasets are provided as a convenience. Datasets are considered Third Party Content and are not considered Materials under your agreement with Oracle applicable to the Services.\n", |
44 | | - " \n", |
45 | | - "The dataset `oracle_classification_dataset1` is distributed under the [UPL license](oracle_data/UPL.txt). \n", |
46 | | - "</font>\n", |
47 | | - "***" |
| 38 | + "* We recommend that you run this notebook in a notebook session using a conda environment that has ADS version 2.8.10 installed" |
48 | 39 | ] |
49 | 40 | }, |
50 | 41 | { |
|
54 | 45 | "metadata": {}, |
55 | 46 | "outputs": [], |
56 | 47 | "source": [ |
57 | | - "import ads\n", |
58 | | - "import logging\n", |
59 | | - "import os\n", |
60 | | - "import tempfile\n", |
61 | | - "import warnings\n", |
62 | | - "\n", |
63 | | - "from ads.catalog.model import ModelCatalog\n", |
64 | | - "from ads.common.model import ADSModel\n", |
65 | | - "from ads.common.model_export_util import prepare_generic_model\n", |
66 | | - "from ads.common.model_metadata import (MetadataCustomCategory,\n", |
67 | | - " UseCaseType,\n", |
68 | | - " Framework)\n", |
69 | | - "from ads.dataset.factory import DatasetFactory\n", |
70 | | - "from ads.feature_engineering.schema import Expression, Schema\n", |
71 | | - "from os import path\n", |
72 | 48 | "from sklearn.ensemble import RandomForestClassifier\n", |
73 | | - "\n", |
74 | | - "logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.ERROR)\n", |
75 | | - "warnings.filterwarnings('ignore')\n", |
76 | | - "ads.set_documentation_mode(False)" |
| 49 | + "from sklearn.datasets import make_classification\n", |
| 50 | + "from sklearn.model_selection import train_test_split\n", |
| 51 | + "import ads\n", |
| 52 | + "import os" |
77 | 53 | ] |
78 | 54 | }, |
79 | 55 | { |
80 | 56 | "cell_type": "code", |
81 | | - "execution_count": null, |
| 57 | + "execution_count": 6, |
82 | 58 | "id": "e49fd60d", |
83 | 59 | "metadata": {}, |
84 | 60 | "outputs": [], |
|
95 | 71 | "metadata": {}, |
96 | 72 | "outputs": [], |
97 | 73 | "source": [ |
98 | | - "# Load the dataset\n", |
99 | | - "ds_path = path.join(\"/\", \"opt\", \"notebooks\", \"ads-examples\", \"oracle_data\", \"oracle_classification_dataset1_150K.csv\")\n", |
100 | | - "\n", |
101 | | - "ds = DatasetFactory.open(ds_path, target=\"class\")\n", |
102 | | - "\n", |
103 | | - "ds\n", |
104 | | - "# Data preprocessing\n", |
105 | | - "transformed_ds = ds.auto_transform(fix_imbalance=False)\n", |
106 | | - "train, test = transformed_ds.train_test_split(test_size=0.15)\n", |
107 | | - "\n", |
108 | | - "# Build the model and convert it to an ADSModel object\n", |
109 | | - "rf_clf = RandomForestClassifier(n_estimators=10).fit(train.X.values, train.y.values)\n", |
110 | | - "rf_model = ADSModel.from_estimator(rf_clf)" |
| 74 | + "seed = 42\n", |
| 75 | + "# make some classification data\n", |
| 76 | + "X, y = make_classification(n_samples=10000, n_features=15, n_classes=2, flip_y=0.05)\n", |
| 77 | + "trainx, testx, trainy, testy = train_test_split(X, y, test_size=30, random_state=seed)\n", |
| 78 | + "model = RandomForestClassifier(\n", |
| 79 | + " n_estimators=100, random_state=42\n", |
| 80 | + " )\n", |
| 81 | + "# train a random forest classifier\n", |
| 82 | + "model.fit(\n", |
| 83 | + " trainx,\n", |
| 84 | + " trainy,\n", |
| 85 | + " )" |
111 | 86 | ] |
112 | 87 | }, |
113 | 88 | { |
|
118 | 93 | "outputs": [], |
119 | 94 | "source": [ |
120 | 95 | "# Prepare the model artifacts\n", |
121 | | - "path_to_ADS_model_artifact = tempfile.mkdtemp()\n", |
| 96 | + "from ads.model.framework.sklearn_model import SklearnModel\n", |
| 97 | + "from ads.common.model_metadata import UseCaseType\n", |
122 | 98 | "\n", |
123 | | - "rf_model_artifact = rf_model.prepare(path_to_ADS_model_artifact, use_case_type=UseCaseType.BINARY_CLASSIFICATION,\n", |
124 | | - " force_overwrite=True, data_sample=test, data_science_env=True,\n", |
125 | | - " fn_artifact_files_included=False)" |
| 99 | + "sklearn_model = SklearnModel(estimator=model, artifact_dir=\"~/sklearn_artifact_dir\")\n", |
| 100 | + "sklearn_model.prepare(\n", |
| 101 | + " inference_conda_env=\"generalml_p38_cpu_v1\",\n", |
| 102 | + " training_conda_env=\"generalml_p38_cpu_v1\",\n", |
| 103 | + " X_sample=trainx,\n", |
| 104 | + " y_sample=trainy,\n", |
| 105 | + " use_case_type=UseCaseType.BINARY_CLASSIFICATION,\n", |
| 106 | + ")" |
126 | 107 | ] |
127 | 108 | }, |
128 | 109 | { |
|
133 | 114 | "outputs": [], |
134 | 115 | "source": [ |
135 | 116 | "# Saving the model artifact to the model catalog:\n", |
136 | | - "mc_model = rf_model_artifact.save(project_id=os.environ['PROJECT_OCID'],\n", |
| 117 | + "mc_model = sklearn_model.save(project_id=os.environ['PROJECT_OCID'],\n", |
137 | 118 | " compartment_id=os.environ['NB_SESSION_COMPARTMENT_OCID'],\n", |
138 | 119 | " training_id=os.environ['NB_SESSION_OCID'],\n", |
139 | 120 | " display_name=\"<replace-with-your-display-name>\",\n", |
140 | 121 | " description=\"<replace-with-description>\",\n", |
141 | 122 | " ignore_pending_changes=True,\n", |
142 | | - " timeout=1800,\n", |
143 | 123 | " ignore_introspection=True,\n", |
| 124 | + " bucket_uri=\"oci://<replace-with-your-bucket-name>\",\n", |
144 | 125 | " )\n", |
145 | 126 | "mc_model" |
146 | 127 | ] |
|
156 | 137 | ], |
157 | 138 | "metadata": { |
158 | 139 | "kernelspec": { |
159 | | - "display_name": "Python [conda env:dataexpl_p37_cpu_v2]", |
| 140 | + "display_name": "ads_testing", |
160 | 141 | "language": "python", |
161 | | - "name": "conda-env-dataexpl_p37_cpu_v2-py" |
| 142 | + "name": "python3" |
162 | 143 | }, |
163 | 144 | "language_info": { |
164 | 145 | "codemirror_mode": { |
|
170 | 151 | "name": "python", |
171 | 152 | "nbconvert_exporter": "python", |
172 | 153 | "pygments_lexer": "ipython3", |
173 | | - "version": "3.7.10" |
| 154 | + "version": "3.8.18" |
174 | 155 | } |
175 | 156 | }, |
176 | 157 | "nbformat": 4, |
|
0 commit comments