diff --git a/examples/data-science-analystics/cpu-bokeh-geoviz/Icon_bokeh_geo.png b/examples/data-science-analystics/cpu-bokeh-geoviz/Icon_bokeh_geo.png new file mode 100644 index 00000000..d379555a Binary files /dev/null and b/examples/data-science-analystics/cpu-bokeh-geoviz/Icon_bokeh_geo.png differ diff --git a/examples/data-science-analystics/cpu-bokeh-geoviz/bokeh_geo_visualization.ipynb b/examples/data-science-analystics/cpu-bokeh-geoviz/bokeh_geo_visualization.ipynb new file mode 100644 index 00000000..e443cf34 --- /dev/null +++ b/examples/data-science-analystics/cpu-bokeh-geoviz/bokeh_geo_visualization.ipynb @@ -0,0 +1,380 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 🌍 Bokeh Notebook — Geo Visualization\n", + "\n", + "
\n", + " \n", + "
\n", + "\n", + "### **Template Review**\n", + "This template provides a production-ready setup for geospatial data analysis and interactive mapping on **Saturn Cloud**. Optimized for **CPU resources**, it demonstrates how to process geographic coordinates and render high-performance interactive maps. The primary goal is to showcase \"Map Incidents\" with interactive hover filters, allowing users to inspect localized data points dynamically.\n", + "\n", + "### **Dataset Overview**\n", + "The template utilizes a **Geospatial Incident** toy dataset. This dataset contains simulated event coordinates (latitude and longitude), incident types, and severity levels. It serves as a benchmark for testing spatial joins, coordinate reference system (CRS) transformations, and interactive glyph rendering in a mapping environment.\n", + "\n", + "### **Tech Stack**\n", + "* **Python**: The core language for spatial logic and data processing.\n", + "* **GeoPandas**: Extends Pandas to allow spatial operations on geometric types, handling the transformation of raw coordinates into map-ready shapes.\n", + "* **Bokeh**: A powerful visualization library used here to create interactive, web-ready maps with custom hover tools and real-time filtering capabilities.\n", + "\n", + "---\n", + "\n", + "## 🚀 Quick Start\n", + "The Saturn Cloud environment is pre-configured for Jupyter. Run the following cells to install the specialized geospatial libraries and launch the interactive map.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### **Step 1: Install Required Libraries**\n", + "In this step, we install the specific libraries needed for geospatial visualization. This includes **Bokeh** for the interactive mapping engine and **GeoPandas** for handling spatial data structures." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Collecting bokeh\n", + " Downloading bokeh-3.8.1-py3-none-any.whl.metadata (10 kB)\n", + "Collecting geopandas\n", + " Downloading geopandas-1.1.2-py3-none-any.whl.metadata (2.3 kB)\n", + "Collecting shapely\n", + " Downloading shapely-2.1.2-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (6.8 kB)\n", + "Collecting Jinja2>=2.9 (from bokeh)\n", + " Using cached jinja2-3.1.6-py3-none-any.whl.metadata (2.9 kB)\n", + "Collecting contourpy>=1.2 (from bokeh)\n", + " Using cached contourpy-1.3.3-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (5.5 kB)\n", + "Requirement already satisfied: narwhals>=1.13 in /home/kali/Documents/templates/cpu-plotly-env/lib/python3.13/site-packages (from bokeh) (2.14.0)\n", + "Requirement already satisfied: numpy>=1.16 in /home/kali/Documents/templates/cpu-plotly-env/lib/python3.13/site-packages (from bokeh) (2.4.0)\n", + "Requirement already satisfied: packaging>=16.8 in /home/kali/Documents/templates/cpu-plotly-env/lib/python3.13/site-packages (from bokeh) (25.0)\n", + "Requirement already satisfied: pandas>=1.2 in /home/kali/Documents/templates/cpu-plotly-env/lib/python3.13/site-packages (from bokeh) (2.3.3)\n", + "Collecting pillow>=7.1.0 (from bokeh)\n", + " Downloading pillow-12.1.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (8.8 kB)\n", + "Collecting PyYAML>=3.10 (from bokeh)\n", + " Using cached pyyaml-6.0.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl.metadata (2.4 kB)\n", + "Requirement already satisfied: tornado>=6.2 in /home/kali/Documents/templates/cpu-plotly-env/lib/python3.13/site-packages (from bokeh) (6.5.4)\n", + "Collecting xyzservices>=2021.09.1 (from bokeh)\n", + " Downloading xyzservices-2025.11.0-py3-none-any.whl.metadata (4.3 kB)\n", + "Collecting pyogrio>=0.7.2 (from geopandas)\n", + " Downloading pyogrio-0.12.1-cp313-cp313-manylinux_2_28_x86_64.whl.metadata (5.9 kB)\n", + "Collecting pyproj>=3.5.0 (from geopandas)\n", + " Downloading pyproj-3.7.2-cp313-cp313-manylinux_2_28_x86_64.whl.metadata (31 kB)\n", + "Collecting MarkupSafe>=2.0 (from Jinja2>=2.9->bokeh)\n", + " Using cached markupsafe-3.0.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl.metadata (2.7 kB)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /home/kali/Documents/templates/cpu-plotly-env/lib/python3.13/site-packages (from pandas>=1.2->bokeh) (2.9.0.post0)\n", + "Requirement already satisfied: pytz>=2020.1 in /home/kali/Documents/templates/cpu-plotly-env/lib/python3.13/site-packages (from pandas>=1.2->bokeh) (2025.2)\n", + "Requirement already satisfied: tzdata>=2022.7 in /home/kali/Documents/templates/cpu-plotly-env/lib/python3.13/site-packages (from pandas>=1.2->bokeh) (2025.3)\n", + "Collecting certifi (from pyogrio>=0.7.2->geopandas)\n", + " Downloading certifi-2026.1.4-py3-none-any.whl.metadata (2.5 kB)\n", + "Requirement already satisfied: six>=1.5 in /home/kali/Documents/templates/cpu-plotly-env/lib/python3.13/site-packages (from python-dateutil>=2.8.2->pandas>=1.2->bokeh) (1.17.0)\n", + "Downloading bokeh-3.8.1-py3-none-any.whl (7.2 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.2/7.2 MB\u001b[0m \u001b[31m9.0 MB/s\u001b[0m \u001b[33m0:00:01\u001b[0m eta \u001b[36m0:00:01\u001b[0m\n", + "\u001b[?25hDownloading geopandas-1.1.2-py3-none-any.whl (341 kB)\n", + "Downloading shapely-2.1.2-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (3.1 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.1/3.1 MB\u001b[0m \u001b[31m32.0 MB/s\u001b[0m \u001b[33m0:00:00\u001b[0m\n", + "\u001b[?25hUsing cached contourpy-1.3.3-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (362 kB)\n", + "Using cached jinja2-3.1.6-py3-none-any.whl (134 kB)\n", + "Using cached markupsafe-3.0.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl (22 kB)\n", + "Downloading pillow-12.1.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (7.0 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.0/7.0 MB\u001b[0m \u001b[31m34.2 MB/s\u001b[0m \u001b[33m0:00:00\u001b[0m eta \u001b[36m0:00:01\u001b[0m\n", + "\u001b[?25hDownloading pyogrio-0.12.1-cp313-cp313-manylinux_2_28_x86_64.whl (32.5 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m32.5/32.5 MB\u001b[0m \u001b[31m3.9 MB/s\u001b[0m \u001b[33m0:00:08\u001b[0m eta \u001b[36m0:00:01\u001b[0m\n", + "\u001b[?25hDownloading pyproj-3.7.2-cp313-cp313-manylinux_2_28_x86_64.whl (9.5 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m9.5/9.5 MB\u001b[0m \u001b[31m8.8 MB/s\u001b[0m \u001b[33m0:00:01\u001b[0mm eta \u001b[36m0:00:01\u001b[0m\n", + "\u001b[?25hUsing cached pyyaml-6.0.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl (801 kB)\n", + "Downloading xyzservices-2025.11.0-py3-none-any.whl (93 kB)\n", + "Downloading certifi-2026.1.4-py3-none-any.whl (152 kB)\n", + "Installing collected packages: xyzservices, shapely, PyYAML, pillow, MarkupSafe, contourpy, certifi, pyproj, pyogrio, Jinja2, geopandas, bokeh\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m12/12\u001b[0m [bokeh]m11/12\u001b[0m [bokeh]das]\n", + "\u001b[1A\u001b[2KSuccessfully installed Jinja2-3.1.6 MarkupSafe-3.0.3 PyYAML-6.0.3 bokeh-3.8.1 certifi-2026.1.4 contourpy-1.3.3 geopandas-1.1.2 pillow-12.1.0 pyogrio-0.12.1 pyproj-3.7.2 shapely-2.1.2 xyzservices-2025.11.0\n" + ] + } + ], + "source": [ + "# Install geospatial and interactive visualization libraries\n", + "!pip install bokeh geopandas shapely" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### **Step 2: Load and Prepare Geospatial Data**\n", + "We initialize a GeoDataFrame containing incident coordinates. We ensure the data is projected into the Web Mercator format (EPSG:3857), which is the standard coordinate system used by Bokeh and most web-based map tiles." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Incident_IDTypeSeveritylatlonxy
01MaintenanceLow37.7749-122.4194-1.362767e+074.547675e+06
12EmergencyHigh37.7849-122.4094-1.362655e+074.549084e+06
23InquiryMedium37.7649-122.4294-1.362878e+074.546267e+06
34MaintenanceLow37.7549-122.4394-1.362989e+074.544859e+06
\n", + "
" + ], + "text/plain": [ + " Incident_ID Type Severity lat lon x \\\n", + "0 1 Maintenance Low 37.7749 -122.4194 -1.362767e+07 \n", + "1 2 Emergency High 37.7849 -122.4094 -1.362655e+07 \n", + "2 3 Inquiry Medium 37.7649 -122.4294 -1.362878e+07 \n", + "3 4 Maintenance Low 37.7549 -122.4394 -1.362989e+07 \n", + "\n", + " y \n", + "0 4.547675e+06 \n", + "1 4.549084e+06 \n", + "2 4.546267e+06 \n", + "3 4.544859e+06 " + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import geopandas as gpd\n", + "import pandas as pd\n", + "\n", + "# Create toy incident data\n", + "data = {\n", + " 'Incident_ID': [1, 2, 3, 4],\n", + " 'Type': ['Maintenance', 'Emergency', 'Inquiry', 'Maintenance'],\n", + " 'Severity': ['Low', 'High', 'Medium', 'Low'],\n", + " 'lat': [37.7749, 37.7849, 37.7649, 37.7549],\n", + " 'lon': [-122.4194, -122.4094, -122.4294, -122.4394]\n", + "}\n", + "\n", + "df = pd.DataFrame(data)\n", + "gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.lon, df.lat), crs=\"EPSG:4326\")\n", + "\n", + "# Project to Web Mercator (Required for Bokeh OSM tiles)\n", + "gdf = gdf.to_crs(\"EPSG:3857\")\n", + "gdf['x'] = gdf.geometry.x\n", + "gdf['y'] = gdf.geometry.y\n", + "\n", + "# IMPORTANT: Drop the 'geometry' column to avoid SerializationError\n", + "bokeh_data = gdf.drop(columns='geometry')\n", + "bokeh_data.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### **Step 3: Build the Interactive Map with Hover Filters**\n", + "Using Bokeh, we render a map background (tile provider) and overlay the incident points. We configure a **HoverTool** to display incident details when the user moves their cursor over a point." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + " \n", + "
\n", + " \n", + " Loading BokehJS ...\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/javascript": "'use strict';\n(function(root) {\n function now() {\n return new Date();\n }\n\n const force = true;\n\n if (typeof root._bokeh_onload_callbacks === \"undefined\" || force === true) {\n root._bokeh_onload_callbacks = [];\n root._bokeh_is_loading = undefined;\n }\n\nconst JS_MIME_TYPE = 'application/javascript';\n const HTML_MIME_TYPE = 'text/html';\n const EXEC_MIME_TYPE = 'application/vnd.bokehjs_exec.v0+json';\n const CLASS_NAME = 'output_bokeh rendered_html';\n\n /**\n * Render data to the DOM node\n */\n function render(props, node) {\n const script = document.createElement(\"script\");\n node.appendChild(script);\n }\n\n /**\n * Handle when an output is cleared or removed\n */\n function handleClearOutput(event, handle) {\n function drop(id) {\n const view = Bokeh.index.get_by_id(id)\n if (view != null) {\n view.model.document.clear()\n Bokeh.index.delete(view)\n }\n }\n\n const cell = handle.cell;\n\n const id = cell.output_area._bokeh_element_id;\n const server_id = cell.output_area._bokeh_server_id;\n\n // Clean up Bokeh references\n if (id != null) {\n drop(id)\n }\n\n if (server_id !== undefined) {\n // Clean up Bokeh references\n const cmd_clean = \"from bokeh.io.state import curstate; print(curstate().uuid_to_server['\" + server_id + \"'].get_sessions()[0].document.roots[0]._id)\";\n cell.notebook.kernel.execute(cmd_clean, {\n iopub: {\n output: function(msg) {\n const id = msg.content.text.trim()\n drop(id)\n }\n }\n });\n // Destroy server and session\n const cmd_destroy = \"import bokeh.io.notebook as ion; ion.destroy_server('\" + server_id + \"')\";\n cell.notebook.kernel.execute(cmd_destroy);\n }\n }\n\n /**\n * Handle when a new output is added\n */\n function handleAddOutput(event, handle) {\n const output_area = handle.output_area;\n const output = handle.output;\n\n // limit handleAddOutput to display_data with EXEC_MIME_TYPE content only\n if ((output.output_type != \"display_data\") || (!Object.prototype.hasOwnProperty.call(output.data, EXEC_MIME_TYPE))) {\n return\n }\n\n const toinsert = output_area.element.find(\".\" + CLASS_NAME.split(' ')[0]);\n\n if (output.metadata[EXEC_MIME_TYPE][\"id\"] !== undefined) {\n toinsert[toinsert.length - 1].firstChild.textContent = output.data[JS_MIME_TYPE];\n // store reference to embed id on output_area\n output_area._bokeh_element_id = output.metadata[EXEC_MIME_TYPE][\"id\"];\n }\n if (output.metadata[EXEC_MIME_TYPE][\"server_id\"] !== undefined) {\n const bk_div = document.createElement(\"div\");\n bk_div.innerHTML = output.data[HTML_MIME_TYPE];\n const script_attrs = bk_div.children[0].attributes;\n for (let i = 0; i < script_attrs.length; i++) {\n toinsert[toinsert.length - 1].firstChild.setAttribute(script_attrs[i].name, script_attrs[i].value);\n toinsert[toinsert.length - 1].firstChild.textContent = bk_div.children[0].textContent\n }\n // store reference to server id on output_area\n output_area._bokeh_server_id = output.metadata[EXEC_MIME_TYPE][\"server_id\"];\n }\n }\n\n function register_renderer(events, OutputArea) {\n\n function append_mime(data, metadata, element) {\n // create a DOM node to render to\n const toinsert = this.create_output_subarea(\n metadata,\n CLASS_NAME,\n EXEC_MIME_TYPE\n );\n this.keyboard_manager.register_events(toinsert);\n // Render to node\n const props = {data: data, metadata: metadata[EXEC_MIME_TYPE]};\n render(props, toinsert[toinsert.length - 1]);\n element.append(toinsert);\n return toinsert\n }\n\n /* Handle when an output is cleared or removed */\n events.on('clear_output.CodeCell', handleClearOutput);\n events.on('delete.Cell', handleClearOutput);\n\n /* Handle when a new output is added */\n events.on('output_added.OutputArea', handleAddOutput);\n\n /**\n * Register the mime type and append_mime function with output_area\n */\n OutputArea.prototype.register_mime_type(EXEC_MIME_TYPE, append_mime, {\n /* Is output safe? */\n safe: true,\n /* Index of renderer in `output_area.display_order` */\n index: 0\n });\n }\n\n // register the mime type if in Jupyter Notebook environment and previously unregistered\n if (root.Jupyter !== undefined) {\n const events = require('base/js/events');\n const OutputArea = require('notebook/js/outputarea').OutputArea;\n\n if (OutputArea.prototype.mime_types().indexOf(EXEC_MIME_TYPE) == -1) {\n register_renderer(events, OutputArea);\n }\n }\n if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n root._bokeh_timeout = Date.now() + 5000;\n root._bokeh_failed_load = false;\n }\n\n const NB_LOAD_WARNING = {'data': {'text/html':\n \"
\\n\"+\n \"

\\n\"+\n \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n \"

\\n\"+\n \"\\n\"+\n \"\\n\"+\n \"from bokeh.resources import INLINE\\n\"+\n \"output_notebook(resources=INLINE)\\n\"+\n \"\\n\"+\n \"
\"}};\n\n function display_loaded(error = null) {\n const el = document.getElementById(\"aa4b90b4-4ab9-48de-9028-3b59eea0acea\");\n if (el != null) {\n const html = (() => {\n if (typeof root.Bokeh === \"undefined\") {\n if (error == null) {\n return \"BokehJS is loading ...\";\n } else {\n return \"BokehJS failed to load.\";\n }\n } else {\n const prefix = `BokehJS ${root.Bokeh.version}`;\n if (error == null) {\n return `${prefix} successfully loaded.`;\n } else {\n return `${prefix} encountered errors while loading and may not function as expected.`;\n }\n }\n })();\n el.innerHTML = html;\n\n if (error != null) {\n const wrapper = document.createElement(\"div\");\n wrapper.style.overflow = \"auto\";\n wrapper.style.height = \"5em\";\n wrapper.style.resize = \"vertical\";\n const content = document.createElement(\"div\");\n content.style.fontFamily = \"monospace\";\n content.style.whiteSpace = \"pre-wrap\";\n content.style.backgroundColor = \"rgb(255, 221, 221)\";\n content.textContent = error.stack ?? error.toString();\n wrapper.append(content);\n el.append(wrapper);\n }\n } else if (Date.now() < root._bokeh_timeout) {\n setTimeout(() => display_loaded(error), 100);\n }\n }\n\n function run_callbacks() {\n try {\n root._bokeh_onload_callbacks.forEach(function(callback) {\n if (callback != null)\n callback();\n });\n } finally {\n delete root._bokeh_onload_callbacks\n }\n console.debug(\"Bokeh: all callbacks have finished\");\n }\n\n function load_libs(css_urls, js_urls, callback) {\n if (css_urls == null) css_urls = [];\n if (js_urls == null) js_urls = [];\n\n root._bokeh_onload_callbacks.push(callback);\n if (root._bokeh_is_loading > 0) {\n console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n return null;\n }\n if (js_urls == null || js_urls.length === 0) {\n run_callbacks();\n return null;\n }\n console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n root._bokeh_is_loading = css_urls.length + js_urls.length;\n\n function on_load() {\n root._bokeh_is_loading--;\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n run_callbacks()\n }\n }\n\n function on_error(url) {\n console.error(\"failed to load \" + url);\n }\n\n for (let i = 0; i < css_urls.length; i++) {\n const url = css_urls[i];\n const element = document.createElement(\"link\");\n element.onload = on_load;\n element.onerror = on_error.bind(null, url);\n element.rel = \"stylesheet\";\n element.type = \"text/css\";\n element.href = url;\n console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n document.body.appendChild(element);\n }\n\n for (let i = 0; i < js_urls.length; i++) {\n const url = js_urls[i];\n const element = document.createElement('script');\n element.onload = on_load;\n element.onerror = on_error.bind(null, url);\n element.async = false;\n element.src = url;\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n document.head.appendChild(element);\n }\n };\n\n function inject_raw_css(css) {\n const element = document.createElement(\"style\");\n element.appendChild(document.createTextNode(css));\n document.body.appendChild(element);\n }\n\n const js_urls = [\"https://cdn.bokeh.org/bokeh/release/bokeh-3.8.1.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-gl-3.8.1.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-widgets-3.8.1.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-tables-3.8.1.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-mathjax-3.8.1.min.js\"];\n const css_urls = [];\n\n const inline_js = [ function(Bokeh) {\n Bokeh.set_log_level(\"info\");\n },\nfunction(Bokeh) {\n }\n ];\n\n function run_inline_js() {\n if (root.Bokeh !== undefined || force === true) {\n try {\n for (let i = 0; i < inline_js.length; i++) {\n inline_js[i].call(root, root.Bokeh);\n }\n\n } catch (error) {display_loaded(error);throw error;\n }if (force === true) {\n display_loaded();\n }} else if (Date.now() < root._bokeh_timeout) {\n setTimeout(run_inline_js, 100);\n } else if (!root._bokeh_failed_load) {\n console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n root._bokeh_failed_load = true;\n } else if (force !== true) {\n const cell = $(document.getElementById(\"aa4b90b4-4ab9-48de-9028-3b59eea0acea\")).parents('.cell').data().cell;\n cell.output_area.append_execute_result(NB_LOAD_WARNING)\n }\n }\n\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: BokehJS loaded, going straight to plotting\");\n run_inline_js();\n } else {\n load_libs(css_urls, js_urls, function() {\n console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n run_inline_js();\n });\n }\n}(window));", + "application/vnd.bokehjs_load.v0+json": "" + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "BokehDeprecationWarning: 'circle() method with size value' was deprecated in Bokeh 3.4.0 and will be removed, use 'scatter(size=...) instead' instead.\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/javascript": "(function(root) {\n function embed_document(root) {\n const docs_json = {\"8e3d71e5-8281-4bfd-a36b-ac439d7d8b11\":{\"version\":\"3.8.1\",\"title\":\"Bokeh Application\",\"config\":{\"type\":\"object\",\"name\":\"DocumentConfig\",\"id\":\"p1116\",\"attributes\":{\"notifications\":{\"type\":\"object\",\"name\":\"Notifications\",\"id\":\"p1117\"}}},\"roots\":[{\"type\":\"object\",\"name\":\"Figure\",\"id\":\"p1068\",\"attributes\":{\"x_range\":{\"type\":\"object\",\"name\":\"DataRange1d\",\"id\":\"p1069\"},\"y_range\":{\"type\":\"object\",\"name\":\"DataRange1d\",\"id\":\"p1070\"},\"x_scale\":{\"type\":\"object\",\"name\":\"LinearScale\",\"id\":\"p1078\"},\"y_scale\":{\"type\":\"object\",\"name\":\"LinearScale\",\"id\":\"p1079\"},\"title\":{\"type\":\"object\",\"name\":\"Title\",\"id\":\"p1071\",\"attributes\":{\"text\":\"Incident Map: Localized Hover Filters\"}},\"renderers\":[{\"type\":\"object\",\"name\":\"TileRenderer\",\"id\":\"p1104\",\"attributes\":{\"tile_source\":{\"type\":\"object\",\"name\":\"WMTSTileSource\",\"id\":\"p1103\",\"attributes\":{\"url\":\"https://tile.openstreetmap.org/{z}/{x}/{y}.png\",\"max_zoom\":19,\"attribution\":\"© OpenStreetMap contributors\"}}}},{\"type\":\"object\",\"name\":\"GlyphRenderer\",\"id\":\"p1112\",\"attributes\":{\"data_source\":{\"type\":\"object\",\"name\":\"ColumnDataSource\",\"id\":\"p1065\",\"attributes\":{\"selected\":{\"type\":\"object\",\"name\":\"Selection\",\"id\":\"p1066\",\"attributes\":{\"indices\":[],\"line_indices\":[]}},\"selection_policy\":{\"type\":\"object\",\"name\":\"UnionRenderers\",\"id\":\"p1067\"},\"data\":{\"type\":\"map\",\"entries\":[[\"index\",{\"type\":\"ndarray\",\"array\":{\"type\":\"bytes\",\"data\":\"H4sIAAEAAAAC/2NgYGBgBGImIGYGYgCpr44nEAAAAA==\"},\"shape\":[4],\"dtype\":\"int32\",\"order\":\"little\"}],[\"Incident_ID\",{\"type\":\"ndarray\",\"array\":{\"type\":\"bytes\",\"data\":\"H4sIAAEAAAAC/2NkYGBgAmJmIGYBYgDv1AWvEAAAAA==\"},\"shape\":[4],\"dtype\":\"int32\",\"order\":\"little\"}],[\"Type\",{\"type\":\"ndarray\",\"array\":[\"Maintenance\",\"Emergency\",\"Inquiry\",\"Maintenance\"],\"shape\":[4],\"dtype\":\"object\",\"order\":\"little\"}],[\"Severity\",{\"type\":\"ndarray\",\"array\":[\"Low\",\"High\",\"Medium\",\"Low\"],\"shape\":[4],\"dtype\":\"object\",\"order\":\"little\"}],[\"lat\",{\"type\":\"ndarray\",\"array\":{\"type\":\"bytes\",\"data\":\"H4sIAAEAAAAC/7twNeyN/mMnh40B2bPKnzg5vItysnvx0MmB94HuhAUPnBwA9FebOiAAAAA=\"},\"shape\":[4],\"dtype\":\"float64\",\"order\":\"little\"}],[\"lon\",{\"type\":\"ndarray\",\"array\":{\"type\":\"bytes\",\"data\":\"H4sIAAEAAAAC/wv4I1F8fVbcgQf7+OYYA+mDlspe1bPjDhiV6yrKz4k7AADU+4dfIAAAAA==\"},\"shape\":[4],\"dtype\":\"float64\",\"order\":\"little\"}],[\"x\",{\"type\":\"ndarray\",\"array\":{\"type\":\"bytes\",\"data\":\"H4sIAAEAAAAC/2u7uFZD6V/mwR+KhUzT/2YeFG985bcWyF9uoF5q8T/zIABQkA6JIAAAAA==\"},\"shape\":[4],\"dtype\":\"float64\",\"order\":\"little\"}],[\"y\",{\"type\":\"ndarray\",\"array\":{\"type\":\"bytes\",\"data\":\"H4sIAAEAAAAC/2NtWXtNLDLQ8VhS+OeyqEBHv7Umx7aFBzquDnp9MCws0BEAp5Od8iAAAAA=\"},\"shape\":[4],\"dtype\":\"float64\",\"order\":\"little\"}]]}}},\"view\":{\"type\":\"object\",\"name\":\"CDSView\",\"id\":\"p1113\",\"attributes\":{\"filter\":{\"type\":\"object\",\"name\":\"AllIndices\",\"id\":\"p1114\"}}},\"glyph\":{\"type\":\"object\",\"name\":\"Scatter\",\"id\":\"p1109\",\"attributes\":{\"x\":{\"type\":\"field\",\"field\":\"x\"},\"y\":{\"type\":\"field\",\"field\":\"y\"},\"size\":{\"type\":\"value\",\"value\":12},\"line_color\":{\"type\":\"value\",\"value\":\"firebrick\"},\"line_alpha\":{\"type\":\"value\",\"value\":0.8},\"fill_color\":{\"type\":\"value\",\"value\":\"firebrick\"},\"fill_alpha\":{\"type\":\"value\",\"value\":0.8},\"hatch_color\":{\"type\":\"value\",\"value\":\"firebrick\"},\"hatch_alpha\":{\"type\":\"value\",\"value\":0.8}}},\"nonselection_glyph\":{\"type\":\"object\",\"name\":\"Scatter\",\"id\":\"p1110\",\"attributes\":{\"x\":{\"type\":\"field\",\"field\":\"x\"},\"y\":{\"type\":\"field\",\"field\":\"y\"},\"size\":{\"type\":\"value\",\"value\":12},\"line_color\":{\"type\":\"value\",\"value\":\"firebrick\"},\"line_alpha\":{\"type\":\"value\",\"value\":0.1},\"fill_color\":{\"type\":\"value\",\"value\":\"firebrick\"},\"fill_alpha\":{\"type\":\"value\",\"value\":0.1},\"hatch_color\":{\"type\":\"value\",\"value\":\"firebrick\"},\"hatch_alpha\":{\"type\":\"value\",\"value\":0.1}}},\"muted_glyph\":{\"type\":\"object\",\"name\":\"Scatter\",\"id\":\"p1111\",\"attributes\":{\"x\":{\"type\":\"field\",\"field\":\"x\"},\"y\":{\"type\":\"field\",\"field\":\"y\"},\"size\":{\"type\":\"value\",\"value\":12},\"line_color\":{\"type\":\"value\",\"value\":\"firebrick\"},\"line_alpha\":{\"type\":\"value\",\"value\":0.2},\"fill_color\":{\"type\":\"value\",\"value\":\"firebrick\"},\"fill_alpha\":{\"type\":\"value\",\"value\":0.2},\"hatch_color\":{\"type\":\"value\",\"value\":\"firebrick\"},\"hatch_alpha\":{\"type\":\"value\",\"value\":0.2}}}}}],\"toolbar\":{\"type\":\"object\",\"name\":\"Toolbar\",\"id\":\"p1077\",\"attributes\":{\"tools\":[{\"type\":\"object\",\"name\":\"PanTool\",\"id\":\"p1090\"},{\"type\":\"object\",\"name\":\"WheelZoomTool\",\"id\":\"p1091\",\"attributes\":{\"renderers\":\"auto\"}},{\"type\":\"object\",\"name\":\"BoxZoomTool\",\"id\":\"p1092\",\"attributes\":{\"dimensions\":\"both\",\"overlay\":{\"type\":\"object\",\"name\":\"BoxAnnotation\",\"id\":\"p1093\",\"attributes\":{\"syncable\":false,\"line_color\":\"black\",\"line_alpha\":1.0,\"line_width\":2,\"line_dash\":[4,4],\"fill_color\":\"lightgrey\",\"fill_alpha\":0.5,\"level\":\"overlay\",\"visible\":false,\"left\":{\"type\":\"number\",\"value\":\"nan\"},\"right\":{\"type\":\"number\",\"value\":\"nan\"},\"top\":{\"type\":\"number\",\"value\":\"nan\"},\"bottom\":{\"type\":\"number\",\"value\":\"nan\"},\"left_units\":\"canvas\",\"right_units\":\"canvas\",\"top_units\":\"canvas\",\"bottom_units\":\"canvas\",\"handles\":{\"type\":\"object\",\"name\":\"BoxInteractionHandles\",\"id\":\"p1099\",\"attributes\":{\"all\":{\"type\":\"object\",\"name\":\"AreaVisuals\",\"id\":\"p1098\",\"attributes\":{\"fill_color\":\"white\",\"hover_fill_color\":\"lightgray\"}}}}}}}},{\"type\":\"object\",\"name\":\"SaveTool\",\"id\":\"p1100\"},{\"type\":\"object\",\"name\":\"ResetTool\",\"id\":\"p1101\"},{\"type\":\"object\",\"name\":\"HelpTool\",\"id\":\"p1102\"},{\"type\":\"object\",\"name\":\"HoverTool\",\"id\":\"p1115\",\"attributes\":{\"renderers\":\"auto\",\"tooltips\":[[\"ID\",\"@Incident_ID\"],[\"Type\",\"@Type\"],[\"Severity\",\"@Severity\"]],\"sort_by\":null}}],\"active_scroll\":{\"id\":\"p1091\"}}},\"left\":[{\"type\":\"object\",\"name\":\"MercatorAxis\",\"id\":\"p1085\",\"attributes\":{\"ticker\":{\"type\":\"object\",\"name\":\"MercatorTicker\",\"id\":\"p1086\",\"attributes\":{\"mantissas\":[1,2,5],\"dimension\":\"lat\"}},\"formatter\":{\"type\":\"object\",\"name\":\"MercatorTickFormatter\",\"id\":\"p1087\",\"attributes\":{\"dimension\":\"lat\"}},\"major_label_policy\":{\"type\":\"object\",\"name\":\"AllLabels\",\"id\":\"p1088\"}}}],\"below\":[{\"type\":\"object\",\"name\":\"MercatorAxis\",\"id\":\"p1080\",\"attributes\":{\"ticker\":{\"type\":\"object\",\"name\":\"MercatorTicker\",\"id\":\"p1081\",\"attributes\":{\"mantissas\":[1,2,5],\"dimension\":\"lon\"}},\"formatter\":{\"type\":\"object\",\"name\":\"MercatorTickFormatter\",\"id\":\"p1082\",\"attributes\":{\"dimension\":\"lon\"}},\"major_label_policy\":{\"type\":\"object\",\"name\":\"AllLabels\",\"id\":\"p1083\"}}}],\"center\":[{\"type\":\"object\",\"name\":\"Grid\",\"id\":\"p1084\",\"attributes\":{\"axis\":{\"id\":\"p1080\"}}},{\"type\":\"object\",\"name\":\"Grid\",\"id\":\"p1089\",\"attributes\":{\"dimension\":1,\"axis\":{\"id\":\"p1085\"}}}]}}]}};\n const render_items = [{\"docid\":\"8e3d71e5-8281-4bfd-a36b-ac439d7d8b11\",\"roots\":{\"p1068\":\"cb8e91d7-4403-4554-a068-d4cf6bfd015a\"},\"root_ids\":[\"p1068\"]}];\n void root.Bokeh.embed.embed_items_notebook(docs_json, render_items);\n }\n if (root.Bokeh !== undefined) {\n embed_document(root);\n } else {\n let attempts = 0;\n const timer = setInterval(function(root) {\n if (root.Bokeh !== undefined) {\n clearInterval(timer);\n embed_document(root);\n } else {\n attempts++;\n if (attempts > 100) {\n clearInterval(timer);\n console.log(\"Bokeh: ERROR: Unable to run BokehJS code because BokehJS library is missing\");\n }\n }\n }, 10, root)\n }\n})(window);", + "application/vnd.bokehjs_exec.v0+json": "" + }, + "metadata": { + "application/vnd.bokehjs_exec.v0+json": { + "id": "p1068" + } + }, + "output_type": "display_data" + } + ], + "source": [ + "from bokeh.plotting import figure, show\n", + "from bokeh.models import HoverTool, ColumnDataSource\n", + "from bokeh.io import output_notebook\n", + "\n", + "output_notebook()\n", + "\n", + "# Initialize the Bokeh ColumnDataSource with the clean DataFrame\n", + "source = ColumnDataSource(bokeh_data)\n", + "\n", + "# Initialize map figure\n", + "p = figure(x_axis_type=\"mercator\", y_axis_type=\"mercator\", \n", + " title=\"Incident Map: Localized Hover Filters\",\n", + " active_scroll=\"wheel_zoom\")\n", + "\n", + "# Add OpenStreetMap background tiles\n", + "p.add_tile(\"OSM\")\n", + "\n", + "# Plot incidents using the 'source' argument\n", + "p.circle(x='x', y='y', size=12, color=\"firebrick\", alpha=0.8, source=source)\n", + "\n", + "# Configure hover tooltips\n", + "hover = HoverTool()\n", + "hover.tooltips = [\n", + " (\"ID\", \"@Incident_ID\"),\n", + " (\"Type\", \"@Type\"),\n", + " (\"Severity\", \"@Severity\")\n", + "]\n", + "p.add_tools(hover)\n", + "\n", + "show(p)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 🔗 Resources and Support\n", + "For further information on the platform or the libraries used in this template, please refer to the following official links:\n", + "\n", + "* **Platform**: [Saturn Cloud Dashboard](https://saturncloud.io/)\n", + "* **Support**: [Saturn Cloud Documentation](https://saturncloud.io/docs/)\n", + "* **Library**: [Bokeh Documentation](https://docs.bokeh.org/)\n", + "* **Library**: [GeoPandas Documentation](https://geopandas.org/)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "cpu-plotly-env", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/examples/data-science-analystics/cpu-bokeh-geoviz/make_notebook.py b/examples/data-science-analystics/cpu-bokeh-geoviz/make_notebook.py new file mode 100644 index 00000000..b3ac581b --- /dev/null +++ b/examples/data-science-analystics/cpu-bokeh-geoviz/make_notebook.py @@ -0,0 +1,173 @@ +import json + +# Define the notebook structure with your new standard format +notebook_content = { + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "
\n", + " \n", + "
\n", + "\n", + "# 🌍 Bokeh Notebook — Geo Visualization\n", + "\n", + "### **Template Review**\n", + "This template provides a production-ready setup for geospatial data analysis and interactive mapping on **Saturn Cloud**. Optimized for **CPU resources**, it demonstrates how to process geographic coordinates and render high-performance interactive maps. The primary goal is to showcase \"Map Incidents\" with interactive hover filters, allowing users to inspect localized data points dynamically.\n", + "\n", + "### **Dataset Overview**\n", + "The template utilizes a **Geospatial Incident** toy dataset. This dataset contains simulated event coordinates (latitude and longitude), incident types, and severity levels. It serves as a benchmark for testing spatial joins, coordinate reference system (CRS) transformations, and interactive glyph rendering in a mapping environment.\n", + "\n", + "### **Tech Stack**\n", + "* **Python**: The core language for spatial logic and data processing.\n", + "* **GeoPandas**: Extends Pandas to allow spatial operations on geometric types, handling the transformation of raw coordinates into map-ready shapes.\n", + "* **Bokeh**: A powerful visualization library used here to create interactive, web-ready maps with custom hover tools and real-time filtering capabilities.\n", + "\n", + "---\n", + "\n", + "## 🚀 Quick Start\n", + "The Saturn Cloud environment is pre-configured for Jupyter. Run the following cells to install the specialized geospatial libraries and launch the interactive map.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### **Step 1: Install Required Libraries**\n", + "In this step, we install the specific libraries needed for geospatial visualization. This includes **Bokeh** for the interactive mapping engine and **GeoPandas** for handling spatial data structures." + ] + }, + { + "cell_type": "code", + "execution_count": None, + "metadata": {}, + "outputs": [], + "source": [ + "# Install geospatial and interactive visualization libraries\n", + "!pip install bokeh geopandas shapely" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### **Step 2: Load and Prepare Geospatial Data**\n", + "We initialize a GeoDataFrame containing incident coordinates. We ensure the data is projected into the Web Mercator format (EPSG:3857), which is the standard coordinate system used by Bokeh and most web-based map tiles." + ] + }, + { + "cell_type": "code", + "execution_count": None, + "metadata": {}, + "outputs": [], + "source": [ + "import geopandas as gpd\n", + "from shapely.geometry import Point\n", + "import pandas as pd\n", + "\n", + "# Create toy incident data\n", + "data = {\n", + " 'Incident_ID': [1, 2, 3, 4],\n", + " 'Type': ['Maintenance', 'Emergency', 'Inquiry', 'Maintenance'],\n", + " 'Severity': ['Low', 'High', 'Medium', 'Low'],\n", + " 'lat': [37.7749, 37.7849, 37.7649, 37.7549],\n", + " 'lon': [-122.4194, -122.4094, -122.4294, -122.4394]\n", + "}\n", + "\n", + "df = pd.DataFrame(data)\n", + "# Convert to GeoDataFrame\n", + "gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.lon, df.lat), crs=\"EPSG:4326\")\n", + "\n", + "# Project to Web Mercator for Bokeh compatibility\n", + "gdf = gdf.to_crs(\"EPSG:3857\")\n", + "gdf['x'] = gdf.geometry.x\n", + "gdf['y'] = gdf.geometry.y\n", + "gdf.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### **Step 3: Build the Interactive Map with Hover Filters**\n", + "Using Bokeh, we render a map background (tile provider) and overlay the incident points. We configure a **HoverTool** to display incident details when the user moves their cursor over a point." + ] + }, + { + "cell_type": "code", + "execution_count": None, + "metadata": {}, + "outputs": [], + "source": [ + "from bokeh.plotting import figure, show\n", + "from bokeh.models import HoverTool\n", + "from bokeh.io import output_notebook\n", + "\n", + "output_notebook()\n", + "\n", + "# Initialize the map figure\n", + "p = figure(x_axis_type=\"mercator\", y_axis_type=\"mercator\", \n", + " title=\"Incident Map: Localized Hover Filters\",\n", + " active_scroll=\"wheel_zoom\")\n", + "\n", + "# Add OpenStreetMap background tiles\n", + "p.add_tile(\"OSM\")\n", + "\n", + "# Plot incidents as circles\n", + "p.circle(x='x', y='y', size=10, color=\"red\", alpha=0.7, source=gdf)\n", + "\n", + "# Add Hover Tool with filters for ID, Type, and Severity\n", + "hover = HoverTool()\n", + "hover.tooltips = [\n", + " (\"ID\", \"@Incident_ID\"),\n", + " (\"Type\", \"@Type\"),\n", + " (\"Severity\", \"@Severity\")\n", + "]\n", + "p.add_tools(hover)\n", + "\n", + "show(p)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 🔗 Resources and Support\n", + "For further information on the platform or the libraries used in this template, please refer to the following official links:\n", + "\n", + "* **Platform**: [Saturn Cloud Dashboard](https://saturncloud.io/)\n", + "* **Support**: [Saturn Cloud Documentation](https://saturncloud.io/docs/)\n", + "* **Library**: [Bokeh Documentation](https://docs.bokeh.org/)\n", + "* **Library**: [GeoPandas Documentation](https://geopandas.org/)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} + +# Save as .ipynb +with open("bokeh_geo_visualization.ipynb", "w", encoding="utf-8") as f: + json.dump(notebook_content, f, indent=1) + +print("SUCCESS: bokeh_geo_visualization.ipynb has been created!") \ No newline at end of file diff --git a/examples/data-science-analystics/cpu-data-versioning/asset/DAG-icon.png b/examples/data-science-analystics/cpu-data-versioning/asset/DAG-icon.png new file mode 100644 index 00000000..d70d587e Binary files /dev/null and b/examples/data-science-analystics/cpu-data-versioning/asset/DAG-icon.png differ diff --git a/examples/data-science-analystics/cpu-data-versioning/data_versioning_dvc.ipynb b/examples/data-science-analystics/cpu-data-versioning/data_versioning_dvc.ipynb new file mode 100644 index 00000000..2deddfe8 --- /dev/null +++ b/examples/data-science-analystics/cpu-data-versioning/data_versioning_dvc.ipynb @@ -0,0 +1,176 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "f7a343b1", + "metadata": {}, + "source": [ + "# 📂 Data Versioning with DVC & S3\n", + "\n", + "
\n", + " \n", + "
\n", + "\n", + "### **Template Review**\n", + "This template demonstrates a professional data management workflow using **DVC**. Optimized for **Saturn Cloud Jupyter Notebooks**, it allows you to track large datasets and maintain 100% reproducibility without bloat in your Git repository. \n", + "\n", + "**Core Workflow:** We will use a **Local Directory** to simulate an S3 bucket for free, while providing the boilerplate code required to switch to a **Live AWS S3** bucket.\n", + "\n", + "### **Tech Stack**\n", + "* **DVC**: The core data versioning engine.\n", + "* **S3 (Boto3)**: Backend support for cloud object storage.\n", + "* **Infrastructure**: [Saturn Cloud](https://saturncloud.io/) CPU Jupyter Instance." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2a1efbac", + "metadata": {}, + "outputs": [], + "source": [ + "# Wrap package in quotes to prevent 'zsh: no matches found' errors\n", + "!pip install \"dvc[s3]\" boto3 pandas -q" + ] + }, + { + "cell_type": "markdown", + "id": "c4bc7738", + "metadata": {}, + "source": [ + "### **Step 1: Initialize DVC**\n", + "Set up the local environment to track data metadata." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c9992ae3", + "metadata": {}, + "outputs": [], + "source": [ + "!dvc init --no-scm -f\n", + "print(\"DVC initialized successfully.\")" + ] + }, + { + "cell_type": "markdown", + "id": "d81638aa", + "metadata": {}, + "source": [ + "### **Step 2: Configure Remotes (Local vs. Remote S3)**\n", + "We use a local folder to simulate S3 for testing, but provide the logic for a real S3 bucket." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30f75985", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "\n", + "# --- OPTION A: LOCAL SIMULATION (Free) ---\n", + "local_storage = \"/tmp/dvc_simulated_s3\"\n", + "os.makedirs(local_storage, exist_ok=True)\n", + "!dvc remote add -d local_remote {local_storage} -f\n", + "\n", + "# --- OPTION B: REMOTE S3 BUCKET (Production Setup) ---\n", + "# To use a real bucket, uncomment the lines below and provide your URI\n", + "# S3_URI = \"s3://your-real-bucket-name/data-folder\"\n", + "# !dvc remote add -d s3_remote {S3_URI} -f\n", + "\n", + "print(f\"✅ Active Remote set to: {local_storage}\")" + ] + }, + { + "cell_type": "markdown", + "id": "8006eadb", + "metadata": {}, + "source": [ + "### **Step 3: Track, Version, and Push Data**\n", + "We create a dummy dataset and 'push' it to our remote simulation." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "21d00790", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "\n", + "# 1. Create dataset\n", + "df = pd.DataFrame({'feature_x': [10, 20, 30], 'label': [1, 0, 1]})\n", + "df.to_csv('my_dataset.csv', index=False)\n", + "\n", + "# 2. Track with DVC\n", + "!dvc add my_dataset.csv\n", + "\n", + "# 3. Push to storage (Local Sim or S3)\n", + "!dvc push\n", + "\n", + "print(\"\\nData pushed to remote storage. Tracking file 'my_dataset.csv.dvc' created.\")" + ] + }, + { + "cell_type": "markdown", + "id": "59b5e4d7", + "metadata": {}, + "source": [ + "### **Step 4: The Recovery Test**\n", + "Prove reproducibility by deleting the local data and pulling it back from the remote." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7fd78c57", + "metadata": {}, + "outputs": [], + "source": [ + "os.remove('my_dataset.csv')\n", + "print(\"Local dataset deleted.\")\n", + "\n", + "!dvc pull\n", + "print(\"\\n✅ Data successfully recovered from remote storage!\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---\n", + "## 🏁 Conclusion & Next Steps\n", + "You have successfully versioned a dataset using **DVC**. To transition this to a production environment, simply update your DVC remote to a real S3 URI and ensure your **AWS Access Keys** are set in your environment variables.\n", + "\n", + "### **Resources & Backlinks**\n", + "* **Cloud Infrastructure**: [Deploy on Saturn Cloud](https://saturncloud.io/)\n", + "* **DVC Guide**: [S3 Remote Configuration](https://dvc.org/doc/user-guide/data-management/remote-storage/amazon-s3)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "cpu-plotly-env", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.11" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/examples/data-science-analystics/cpu-duckdb-sql/assets/barChart.png b/examples/data-science-analystics/cpu-duckdb-sql/assets/barChart.png new file mode 100644 index 00000000..1b44ce89 Binary files /dev/null and b/examples/data-science-analystics/cpu-duckdb-sql/assets/barChart.png differ diff --git a/examples/data-science-analystics/cpu-duckdb-sql/assets/sql-1.png b/examples/data-science-analystics/cpu-duckdb-sql/assets/sql-1.png new file mode 100644 index 00000000..ab2ed810 Binary files /dev/null and b/examples/data-science-analystics/cpu-duckdb-sql/assets/sql-1.png differ diff --git a/examples/data-science-analystics/cpu-duckdb-sql/sql_bi_analytics.ipynb b/examples/data-science-analystics/cpu-duckdb-sql/sql_bi_analytics.ipynb new file mode 100644 index 00000000..39a8a1c9 --- /dev/null +++ b/examples/data-science-analystics/cpu-duckdb-sql/sql_bi_analytics.ipynb @@ -0,0 +1,159 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 🦆 SQL/BI Notebook (DuckDB/Polars)\n", + "\n", + "
\n", + "
\n", + "
\n", + "\n", + "### **Template Review**\n", + "This template demonstrates a modern high-performance BI pipeline within a **Jupyter Notebook** environment. Optimized for **CPU execution**, it leverages **Polars** for fast data handling and **DuckDB** for analytical SQL queries on local files.\n", + "\n", + "**Core Workflow:** Raw CSV → Optimized Parquet → SQL Aggregation → Interactive Plotly Visualization.\n", + "\n", + "### **Tech Stack**\n", + "* **DuckDB**: In-process analytical SQL engine.\n", + "* **Polars**: Lightning-fast DataFrame library.\n", + "* **Plotly**: Interactive data visualization.\n", + "* **PyArrow**: Backend for Parquet storage." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Install the BI tech stack\n", + "!pip install duckdb polars plotly pandas pyarrow" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### **Step 1: Data Ingestion & Parquet Conversion**\n", + "We use **Polars** to read the existing CSV data and immediately convert it to **Parquet**. Parquet is a columnar format that is significantly faster for SQL engines like DuckDB to query." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import polars as pl\n", + "\n", + "# Load existing dataset (Penguins dataset for BI demonstration)\n", + "df_raw = pl.read_csv(\"https://raw.githubusercontent.com/mwaskom/seaborn-data/master/penguins.csv\")\n", + "\n", + "# Write to optimized Parquet format\n", + "df_raw.write_parquet(\"analytics_data.parquet\")\n", + "\n", + "print(f\"Ingested {len(df_raw)} rows and saved to 'analytics_data.parquet'\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### **Step 2: Analytical SQL with DuckDB**\n", + "Instead of loading everything into memory, we use **DuckDB** to run SQL directly against the Parquet file. This is highly efficient for CPU resources." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import duckdb\n", + "\n", + "# Connect to in-memory DuckDB\n", + "con = duckdb.connect()\n", + "\n", + "# Run SQL aggregation\n", + "sql_query = \"\"\"\n", + "SELECT \n", + " species, \n", + " island, \n", + " AVG(body_mass_g) as avg_mass,\n", + " COUNT(*) as specimen_count\n", + "FROM 'analytics_data.parquet'\n", + "WHERE body_mass_g IS NOT NULL\n", + "GROUP BY species, island\n", + "ORDER BY avg_mass DESC\n", + "\"\"\"\n", + "\n", + "df_bi = con.execute(sql_query).df()\n", + "df_bi.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### **Step 3: Interactive BI Visualization**\n", + "Finally, we use **Plotly** to visualize the SQL results. This provides interactive charts that allow for zooming and filtering within the notebook." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import plotly.express as px\n", + "\n", + "# Create interactive bar chart\n", + "fig = px.bar(\n", + " df_bi, \n", + " x=\"species\", \n", + " y=\"avg_mass\", \n", + " color=\"island\",\n", + " barmode=\"group\",\n", + " title=\"Average Body Mass by Species & Island (SQL Processed)\",\n", + " labels={\"avg_mass\": \"Average Mass (g)\", \"species\": \"Species\"}\n", + ")\n", + "\n", + "fig.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 🔗 Resources\n", + "* **Platform**: [Saturn Cloud](https://saturncloud.io/)\n", + "* **DuckDB Docs**: [Querying Parquet Files](https://duckdb.org/docs/data/parquet/overview)\n", + "* **Polars Docs**: [Fast DataFrames](https://pola-rs.github.io/polars-book/)\n", + "* **Plotly Docs**: [Interactive Python Charts](https://plotly.com/python/)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "cpu-plotly-env", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.11" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/examples/data-science-analystics/cpu-forecast-anomaly/forecasting_anomaly_detection.ipynb b/examples/data-science-analystics/cpu-forecast-anomaly/forecasting_anomaly_detection.ipynb new file mode 100644 index 00000000..b72c0621 --- /dev/null +++ b/examples/data-science-analystics/cpu-forecast-anomaly/forecasting_anomaly_detection.ipynb @@ -0,0 +1,162 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "# 📈 Forecasting & Anomaly Detection\n", + "\n", + "
\n", + " \n", + "
\n", + "\n", + "### **Overview**\n", + "This template provides an end-to-end time-series analysis and outlier detection pipeline designed to run on **Saturn Cloud Jupyter Notebooks**. Optimized for **CPU execution**, it demonstrates an ensemble approach using **Prophet** for seasonality and **Orbit** for Bayesian structural modeling. By leveraging the scalable environment of **Saturn Cloud**, this workflow can handle complex Bayesian sampling (MAP estimation) with high efficiency.\n", + "\n", + "### **Dataset Overview**\n", + "The template utilizes existing time-series data (Log-scale Wikipedia page views), which is a standard benchmark for evaluating forecasting accuracy.\n", + "\n", + "### **Tech Stack**\n", + "* **Python**: Core language for time-series logic.\n", + "* **Prophet (Meta)**: Decomposable model for handling seasonality and holidays.\n", + "* **Orbit (Uber)**: Bayesian Damped Local Trend (DLT) framework.\n", + "* **Infrastructure**: [Saturn Cloud](https://saturncloud.io/) CPU-based Jupyter Instance." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Install core forecasting and Bayesian libraries\n", + "!pip install prophet orbit-ml pandas matplotlib -q" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### **Step 1: Load Existing Data**\n", + "We ingest historical time-series data and prepare it for the modeling engines." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "url = \"https://raw.githubusercontent.com/facebook/prophet/main/examples/example_wp_log_peyton_manning.csv\"\n", + "df = pd.read_csv(url)\n", + "df['ds'] = pd.to_datetime(df['ds'])\n", + "df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### **Step 2: Bayesian Trend Modeling (Orbit)**\n", + "We use Uber's **Orbit** library to fit a Damped Local Trend (DLT) model." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from orbit.models import DLT\n", + "model_orbit = DLT(response_col='y', date_col='ds', seasonality=52)\n", + "model_orbit.fit(df)\n", + "print(\"Orbit Bayesian model training complete.\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### **Step 3: Anomaly Detection (Prophet)**\n", + "Identifying historical data points that deviate from the 95% confidence interval." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from prophet import Prophet\n", + "import matplotlib.pyplot as plt\n", + "m = Prophet(interval_width=0.95)\n", + "m.fit(df)\n", + "forecast = m.predict(df)\n", + "performance = pd.merge(df, forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']], on='ds')\n", + "performance['anomaly'] = (performance['y'] > performance['yhat_upper']) | (performance['y'] < performance['yhat_lower'])\n", + "anomalies = performance[performance['anomaly'] == True]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### **Step 4: Final Visualization**\n", + "Visualizing the anomaly alerts in a unified chart." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=(14, 7))\n", + "plt.plot(performance['ds'], performance['y'], color='black', label='Actual', alpha=0.5)\n", + "plt.fill_between(performance['ds'], performance['yhat_lower'], performance['yhat_upper'], color='blue', alpha=0.2, label='Confidence Interval')\n", + "plt.scatter(anomalies['ds'], anomalies['y'], color='red', label='Anomaly Alert', s=10)\n", + "plt.title(\"Forecasting Result\")\n", + "plt.legend()\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---\n", + "## 🏁 Conclusion & Next Steps\n", + "This template demonstrates how Bayesian structural models can be successfully deployed for anomaly detection. For a verifiable and reachable production environment, you can scale this notebook into a scheduled job or an API endpoint using the **Saturn Cloud** platform.\n", + "\n", + "### **Resources & Backlinks**\n", + "* **Cloud Infrastructure**: [Deploy this on Saturn Cloud](https://saturncloud.io/)\n", + "* **Orbit Library**: [Bayesian DLT Documentation](https://orbit-ml.readthedocs.io/)\n", + "* **Prophet Guide**: [Meta's Forecasting Docs](https://facebook.github.io/prophet/)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "cpu-plotly-env", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.11" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/examples/data-science-analystics/cpu-forecast-anomaly/line-dot-graph.png b/examples/data-science-analystics/cpu-forecast-anomaly/line-dot-graph.png new file mode 100644 index 00000000..9cdccc89 Binary files /dev/null and b/examples/data-science-analystics/cpu-forecast-anomaly/line-dot-graph.png differ diff --git a/examples/data-science-analystics/cpu-plotly-timeseries/cpu-plotly-timeseries.png b/examples/data-science-analystics/cpu-plotly-timeseries/cpu-plotly-timeseries.png new file mode 100644 index 00000000..2607ce12 Binary files /dev/null and b/examples/data-science-analystics/cpu-plotly-timeseries/cpu-plotly-timeseries.png differ diff --git a/examples/data-science-analystics/cpu-plotly-timeseries/cpu_plotly_timeseries.ipynb b/examples/data-science-analystics/cpu-plotly-timeseries/cpu_plotly_timeseries.ipynb new file mode 100644 index 00000000..d3ad4d46 --- /dev/null +++ b/examples/data-science-analystics/cpu-plotly-timeseries/cpu_plotly_timeseries.ipynb @@ -0,0 +1,1425 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 📊 Plotly Notebook — Interactive Time Series\n", + "\n", + "
\n", + " \n", + "
\n", + "\n", + "\n", + "### **Template Review**\n", + "This template provides a streamlined environment for time-series analysis and interactive data exploration on **Saturn Cloud**. Optimized for **CPU resources**, it demonstrates how to visualize historical patterns and generate trend forecasts without the need for complex hardware configurations. The core focus is providing an interactive interface for deep-diving into data via browser-based zooming and sliders.\n", + "\n", + "### **Dataset Overview**\n", + "The template utilizes the classic **Air Passengers** toy dataset. This dataset tracks monthly international airline passenger totals from **1949 to 1960**. It is a standard benchmark for demonstrating long-term growth trends and seasonal fluctuations in time-series data.\n", + "\n", + "### **Tech Stack**\n", + "* **Python**: The base language for data logic and execution.\n", + "* **Pandas**: Utilized for efficient data manipulation and calculating rolling averages.\n", + "* **Plotly**: An advanced library that renders charts as interactive objects, enabling zoom, range sliders, and unified hover tooltips.\n", + "\n", + "---\n", + "\n", + "## 🚀 Quick Start\n", + "Since the Saturn Cloud environment is pre-configured for Jupyter, you can simply run the cells below to install the necessary libraries and begin your analysis.\n" + ] + }, + { + "cell_type": "markdown", + "id": "743b36b5", + "metadata": {}, + "source": [ + "### **Step 1: Install Required Libraries**\n", + "In this step, we install the specific libraries needed for interactive visualization. This includes **Plotly** for graphing, **Pandas** for data handling, and **IPywidgets** to ensure interactivity within the Jupyter environment." + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "a0a431b7", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: plotly in /home/kali/Documents/templates/cpu-plotly-env/lib/python3.13/site-packages (6.5.0)\n", + "Requirement already satisfied: pandas in /home/kali/Documents/templates/cpu-plotly-env/lib/python3.13/site-packages (2.3.3)\n", + "Requirement already satisfied: nbformat in /home/kali/Documents/templates/cpu-plotly-env/lib/python3.13/site-packages (5.10.4)\n", + "Requirement already satisfied: anywidget in /home/kali/Documents/templates/cpu-plotly-env/lib/python3.13/site-packages (0.9.21)\n", + "Requirement already satisfied: ipywidgets in /home/kali/Documents/templates/cpu-plotly-env/lib/python3.13/site-packages (8.1.8)\n", + "Requirement already satisfied: narwhals>=1.15.1 in /home/kali/Documents/templates/cpu-plotly-env/lib/python3.13/site-packages (from plotly) (2.14.0)\n", + "Requirement already satisfied: packaging in /home/kali/Documents/templates/cpu-plotly-env/lib/python3.13/site-packages (from plotly) (25.0)\n", + "Requirement already satisfied: numpy>=1.26.0 in /home/kali/Documents/templates/cpu-plotly-env/lib/python3.13/site-packages (from pandas) (2.4.0)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /home/kali/Documents/templates/cpu-plotly-env/lib/python3.13/site-packages (from pandas) (2.9.0.post0)\n", + "Requirement already satisfied: pytz>=2020.1 in /home/kali/Documents/templates/cpu-plotly-env/lib/python3.13/site-packages (from pandas) (2025.2)\n", + "Requirement already satisfied: tzdata>=2022.7 in /home/kali/Documents/templates/cpu-plotly-env/lib/python3.13/site-packages (from pandas) (2025.3)\n", + "Requirement already satisfied: fastjsonschema>=2.15 in /home/kali/Documents/templates/cpu-plotly-env/lib/python3.13/site-packages (from nbformat) (2.21.2)\n", + "Requirement already satisfied: jsonschema>=2.6 in /home/kali/Documents/templates/cpu-plotly-env/lib/python3.13/site-packages (from nbformat) (4.25.1)\n", + "Requirement already satisfied: jupyter-core!=5.0.*,>=4.12 in /home/kali/Documents/templates/cpu-plotly-env/lib/python3.13/site-packages (from nbformat) (5.9.1)\n", + "Requirement already satisfied: traitlets>=5.1 in /home/kali/Documents/templates/cpu-plotly-env/lib/python3.13/site-packages (from nbformat) (5.14.3)\n", + "Requirement already satisfied: psygnal>=0.8.1 in /home/kali/Documents/templates/cpu-plotly-env/lib/python3.13/site-packages (from anywidget) (0.15.0)\n", + "Requirement already satisfied: typing-extensions>=4.2.0 in /home/kali/Documents/templates/cpu-plotly-env/lib/python3.13/site-packages (from anywidget) (4.15.0)\n", + "Requirement already satisfied: comm>=0.1.3 in /home/kali/Documents/templates/cpu-plotly-env/lib/python3.13/site-packages (from ipywidgets) (0.2.3)\n", + "Requirement already satisfied: ipython>=6.1.0 in /home/kali/Documents/templates/cpu-plotly-env/lib/python3.13/site-packages (from ipywidgets) (9.8.0)\n", + "Requirement already satisfied: widgetsnbextension~=4.0.14 in /home/kali/Documents/templates/cpu-plotly-env/lib/python3.13/site-packages (from ipywidgets) (4.0.15)\n", + "Requirement already satisfied: jupyterlab_widgets~=3.0.15 in /home/kali/Documents/templates/cpu-plotly-env/lib/python3.13/site-packages (from ipywidgets) (3.0.16)\n", + "Requirement already satisfied: decorator>=4.3.2 in /home/kali/Documents/templates/cpu-plotly-env/lib/python3.13/site-packages (from ipython>=6.1.0->ipywidgets) (5.2.1)\n", + "Requirement already satisfied: ipython-pygments-lexers>=1.0.0 in /home/kali/Documents/templates/cpu-plotly-env/lib/python3.13/site-packages (from ipython>=6.1.0->ipywidgets) (1.1.1)\n", + "Requirement already satisfied: jedi>=0.18.1 in /home/kali/Documents/templates/cpu-plotly-env/lib/python3.13/site-packages (from ipython>=6.1.0->ipywidgets) (0.19.2)\n", + "Requirement already satisfied: matplotlib-inline>=0.1.5 in /home/kali/Documents/templates/cpu-plotly-env/lib/python3.13/site-packages (from ipython>=6.1.0->ipywidgets) (0.2.1)\n", + "Requirement already satisfied: pexpect>4.3 in /home/kali/Documents/templates/cpu-plotly-env/lib/python3.13/site-packages (from ipython>=6.1.0->ipywidgets) (4.9.0)\n", + "Requirement already satisfied: prompt_toolkit<3.1.0,>=3.0.41 in /home/kali/Documents/templates/cpu-plotly-env/lib/python3.13/site-packages (from ipython>=6.1.0->ipywidgets) (3.0.52)\n", + "Requirement already satisfied: pygments>=2.11.0 in /home/kali/Documents/templates/cpu-plotly-env/lib/python3.13/site-packages (from ipython>=6.1.0->ipywidgets) (2.19.2)\n", + "Requirement already satisfied: stack_data>=0.6.0 in /home/kali/Documents/templates/cpu-plotly-env/lib/python3.13/site-packages (from ipython>=6.1.0->ipywidgets) (0.6.3)\n", + "Requirement already satisfied: wcwidth in /home/kali/Documents/templates/cpu-plotly-env/lib/python3.13/site-packages (from prompt_toolkit<3.1.0,>=3.0.41->ipython>=6.1.0->ipywidgets) (0.2.14)\n", + "Requirement already satisfied: parso<0.9.0,>=0.8.4 in /home/kali/Documents/templates/cpu-plotly-env/lib/python3.13/site-packages (from jedi>=0.18.1->ipython>=6.1.0->ipywidgets) (0.8.5)\n", + "Requirement already satisfied: attrs>=22.2.0 in /home/kali/Documents/templates/cpu-plotly-env/lib/python3.13/site-packages (from jsonschema>=2.6->nbformat) (25.4.0)\n", + "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /home/kali/Documents/templates/cpu-plotly-env/lib/python3.13/site-packages (from jsonschema>=2.6->nbformat) (2025.9.1)\n", + "Requirement already satisfied: referencing>=0.28.4 in /home/kali/Documents/templates/cpu-plotly-env/lib/python3.13/site-packages (from jsonschema>=2.6->nbformat) (0.37.0)\n", + "Requirement already satisfied: rpds-py>=0.7.1 in /home/kali/Documents/templates/cpu-plotly-env/lib/python3.13/site-packages (from jsonschema>=2.6->nbformat) (0.30.0)\n", + "Requirement already satisfied: platformdirs>=2.5 in /home/kali/Documents/templates/cpu-plotly-env/lib/python3.13/site-packages (from jupyter-core!=5.0.*,>=4.12->nbformat) (4.5.1)\n", + "Requirement already satisfied: ptyprocess>=0.5 in /home/kali/Documents/templates/cpu-plotly-env/lib/python3.13/site-packages (from pexpect>4.3->ipython>=6.1.0->ipywidgets) (0.7.0)\n", + "Requirement already satisfied: six>=1.5 in /home/kali/Documents/templates/cpu-plotly-env/lib/python3.13/site-packages (from python-dateutil>=2.8.2->pandas) (1.17.0)\n", + "Requirement already satisfied: executing>=1.2.0 in /home/kali/Documents/templates/cpu-plotly-env/lib/python3.13/site-packages (from stack_data>=0.6.0->ipython>=6.1.0->ipywidgets) (2.2.1)\n", + "Requirement already satisfied: asttokens>=2.1.0 in /home/kali/Documents/templates/cpu-plotly-env/lib/python3.13/site-packages (from stack_data>=0.6.0->ipython>=6.1.0->ipywidgets) (3.0.1)\n", + "Requirement already satisfied: pure-eval in /home/kali/Documents/templates/cpu-plotly-env/lib/python3.13/site-packages (from stack_data>=0.6.0->ipython>=6.1.0->ipywidgets) (0.2.3)\n" + ] + } + ], + "source": [ + "# Install specific libraries for this template\n", + "!pip install plotly pandas nbformat anywidget ipywidgets" + ] + }, + { + "cell_type": "markdown", + "id": "8e730f39", + "metadata": {}, + "source": [ + "### **Step 2: Load and Prepare the Dataset**\n", + "We load the **Air Passengers** CSV file directly from a verified online repository. We use Pandas to parse the 'Month' column as a date index, which is essential for accurate time-series plotting." + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "0125a37d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Passengers
Month
1949-01-01112
1949-02-01118
1949-03-01132
1949-04-01129
1949-05-01121
\n", + "
" + ], + "text/plain": [ + " Passengers\n", + "Month \n", + "1949-01-01 112\n", + "1949-02-01 118\n", + "1949-03-01 132\n", + "1949-04-01 129\n", + "1949-05-01 121" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "import plotly.graph_objects as go\n", + "import numpy as np\n", + "\n", + "# Load data and set the Month as the index\n", + "url = 'https://raw.githubusercontent.com/jbrownlee/Datasets/master/airline-passengers.csv'\n", + "df = pd.read_csv(url, header=0, index_col=0, parse_dates=[0])\n", + "df.columns = ['Passengers']\n", + "df.head()" + ] + }, + { + "cell_type": "markdown", + "id": "7d3e039f", + "metadata": {}, + "source": [ + "### **Step 3: Generate a Simple Forecast Trend**\n", + "To visualize the underlying growth of the data, we calculate a **12-month Rolling Mean**. This \"toy forecast\" smooths out monthly seasonal noise to show the general direction of passenger growth over the years." + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "e4de6620", + "metadata": {}, + "outputs": [], + "source": [ + "# Calculate rolling average to reveal the trend\n", + "df['Trend_Forecast'] = df['Passengers'].rolling(window=12).mean()" + ] + }, + { + "cell_type": "markdown", + "id": "0ad18a0c", + "metadata": {}, + "source": [ + "### **Step 4: Build the Interactive Visualization**\n", + "Finally, we use Plotly to create an interactive chart. We include a **Range Slider** at the bottom for quick zooming and **Range Selectors** (1y, 5y) to instantly jump to specific time windows. Hovering over the lines will show exact values for both actual data and the trend forecast." + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "a6d6025a", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.plotly.v1+json": { + "config": { + "plotlyServerURL": "https://plot.ly" + }, + "data": [ + { + "mode": "lines", + "name": "Actual Passengers", + "type": "scatter", + "x": [ + "1949-01-01T00:00:00.000000000", + "1949-02-01T00:00:00.000000000", + "1949-03-01T00:00:00.000000000", + "1949-04-01T00:00:00.000000000", + "1949-05-01T00:00:00.000000000", + "1949-06-01T00:00:00.000000000", + "1949-07-01T00:00:00.000000000", + "1949-08-01T00:00:00.000000000", + "1949-09-01T00:00:00.000000000", + "1949-10-01T00:00:00.000000000", + "1949-11-01T00:00:00.000000000", + "1949-12-01T00:00:00.000000000", + "1950-01-01T00:00:00.000000000", + "1950-02-01T00:00:00.000000000", + "1950-03-01T00:00:00.000000000", + "1950-04-01T00:00:00.000000000", + "1950-05-01T00:00:00.000000000", + "1950-06-01T00:00:00.000000000", + "1950-07-01T00:00:00.000000000", + "1950-08-01T00:00:00.000000000", + "1950-09-01T00:00:00.000000000", + "1950-10-01T00:00:00.000000000", + "1950-11-01T00:00:00.000000000", + "1950-12-01T00:00:00.000000000", + "1951-01-01T00:00:00.000000000", + "1951-02-01T00:00:00.000000000", + "1951-03-01T00:00:00.000000000", + "1951-04-01T00:00:00.000000000", + "1951-05-01T00:00:00.000000000", + "1951-06-01T00:00:00.000000000", + "1951-07-01T00:00:00.000000000", + "1951-08-01T00:00:00.000000000", + "1951-09-01T00:00:00.000000000", + "1951-10-01T00:00:00.000000000", + "1951-11-01T00:00:00.000000000", + "1951-12-01T00:00:00.000000000", + "1952-01-01T00:00:00.000000000", + "1952-02-01T00:00:00.000000000", + "1952-03-01T00:00:00.000000000", + "1952-04-01T00:00:00.000000000", + "1952-05-01T00:00:00.000000000", + "1952-06-01T00:00:00.000000000", + "1952-07-01T00:00:00.000000000", + "1952-08-01T00:00:00.000000000", + "1952-09-01T00:00:00.000000000", + "1952-10-01T00:00:00.000000000", + "1952-11-01T00:00:00.000000000", + "1952-12-01T00:00:00.000000000", + "1953-01-01T00:00:00.000000000", + "1953-02-01T00:00:00.000000000", + "1953-03-01T00:00:00.000000000", + "1953-04-01T00:00:00.000000000", + "1953-05-01T00:00:00.000000000", + "1953-06-01T00:00:00.000000000", + "1953-07-01T00:00:00.000000000", + "1953-08-01T00:00:00.000000000", + "1953-09-01T00:00:00.000000000", + "1953-10-01T00:00:00.000000000", + "1953-11-01T00:00:00.000000000", + "1953-12-01T00:00:00.000000000", + "1954-01-01T00:00:00.000000000", + "1954-02-01T00:00:00.000000000", + "1954-03-01T00:00:00.000000000", + "1954-04-01T00:00:00.000000000", + "1954-05-01T00:00:00.000000000", + "1954-06-01T00:00:00.000000000", + "1954-07-01T00:00:00.000000000", + "1954-08-01T00:00:00.000000000", + "1954-09-01T00:00:00.000000000", + "1954-10-01T00:00:00.000000000", + "1954-11-01T00:00:00.000000000", + "1954-12-01T00:00:00.000000000", + "1955-01-01T00:00:00.000000000", + "1955-02-01T00:00:00.000000000", + "1955-03-01T00:00:00.000000000", + "1955-04-01T00:00:00.000000000", + "1955-05-01T00:00:00.000000000", + "1955-06-01T00:00:00.000000000", + "1955-07-01T00:00:00.000000000", + "1955-08-01T00:00:00.000000000", + "1955-09-01T00:00:00.000000000", + "1955-10-01T00:00:00.000000000", + "1955-11-01T00:00:00.000000000", + "1955-12-01T00:00:00.000000000", + "1956-01-01T00:00:00.000000000", + "1956-02-01T00:00:00.000000000", + "1956-03-01T00:00:00.000000000", + "1956-04-01T00:00:00.000000000", + "1956-05-01T00:00:00.000000000", + "1956-06-01T00:00:00.000000000", + "1956-07-01T00:00:00.000000000", + "1956-08-01T00:00:00.000000000", + "1956-09-01T00:00:00.000000000", + "1956-10-01T00:00:00.000000000", + "1956-11-01T00:00:00.000000000", + "1956-12-01T00:00:00.000000000", + "1957-01-01T00:00:00.000000000", + "1957-02-01T00:00:00.000000000", + "1957-03-01T00:00:00.000000000", + "1957-04-01T00:00:00.000000000", + "1957-05-01T00:00:00.000000000", + "1957-06-01T00:00:00.000000000", + "1957-07-01T00:00:00.000000000", + "1957-08-01T00:00:00.000000000", + "1957-09-01T00:00:00.000000000", + "1957-10-01T00:00:00.000000000", + "1957-11-01T00:00:00.000000000", + "1957-12-01T00:00:00.000000000", + "1958-01-01T00:00:00.000000000", + "1958-02-01T00:00:00.000000000", + "1958-03-01T00:00:00.000000000", + "1958-04-01T00:00:00.000000000", + "1958-05-01T00:00:00.000000000", + "1958-06-01T00:00:00.000000000", + "1958-07-01T00:00:00.000000000", + "1958-08-01T00:00:00.000000000", + "1958-09-01T00:00:00.000000000", + "1958-10-01T00:00:00.000000000", + "1958-11-01T00:00:00.000000000", + "1958-12-01T00:00:00.000000000", + "1959-01-01T00:00:00.000000000", + "1959-02-01T00:00:00.000000000", + "1959-03-01T00:00:00.000000000", + "1959-04-01T00:00:00.000000000", + "1959-05-01T00:00:00.000000000", + "1959-06-01T00:00:00.000000000", + "1959-07-01T00:00:00.000000000", + "1959-08-01T00:00:00.000000000", + "1959-09-01T00:00:00.000000000", + "1959-10-01T00:00:00.000000000", + "1959-11-01T00:00:00.000000000", + "1959-12-01T00:00:00.000000000", + "1960-01-01T00:00:00.000000000", + "1960-02-01T00:00:00.000000000", + "1960-03-01T00:00:00.000000000", + "1960-04-01T00:00:00.000000000", + "1960-05-01T00:00:00.000000000", + "1960-06-01T00:00:00.000000000", + "1960-07-01T00:00:00.000000000", + "1960-08-01T00:00:00.000000000", + "1960-09-01T00:00:00.000000000", + "1960-10-01T00:00:00.000000000", + "1960-11-01T00:00:00.000000000", + "1960-12-01T00:00:00.000000000" + ], + "y": { + "bdata": "cAB2AIQAgQB5AIcAlACUAIgAdwBoAHYAcwB+AI0AhwB9AJUAqgCqAJ4AhQByAIwAkQCWALIAowCsALIAxwDHALgAogCSAKYAqwC0AMEAtQC3ANoA5gDyANEAvwCsAMIAxADEAOwA6wDlAPMACAEQAe0A0wC0AMkAzAC8AOsA4wDqAAgBLgElAQMB5QDLAOUA8gDpAAsBDQEOATsBbAFbATgBEgHtABYBHAEVAT0BOQE+AXYBnQGVAWMBMgEPATIBOwEtAWQBXAFjAaYB0QHTAZQBWwExAVABVAE+AWoBXAFrAbMB6wH5AZQBZwE2AVEBaAFWAZYBjAGkAdgBJAIvAs8BlwFqAZUBoQGHAaMBzQHYARcCbgJeAvwBzQGGAbAB", + "dtype": "i2" + } + }, + { + "line": { + "dash": "dot" + }, + "name": "12m Forecast", + "type": "scatter", + "x": [ + "1949-01-01T00:00:00.000000000", + "1949-02-01T00:00:00.000000000", + "1949-03-01T00:00:00.000000000", + "1949-04-01T00:00:00.000000000", + "1949-05-01T00:00:00.000000000", + "1949-06-01T00:00:00.000000000", + "1949-07-01T00:00:00.000000000", + "1949-08-01T00:00:00.000000000", + "1949-09-01T00:00:00.000000000", + "1949-10-01T00:00:00.000000000", + "1949-11-01T00:00:00.000000000", + "1949-12-01T00:00:00.000000000", + "1950-01-01T00:00:00.000000000", + "1950-02-01T00:00:00.000000000", + "1950-03-01T00:00:00.000000000", + "1950-04-01T00:00:00.000000000", + "1950-05-01T00:00:00.000000000", + "1950-06-01T00:00:00.000000000", + "1950-07-01T00:00:00.000000000", + "1950-08-01T00:00:00.000000000", + "1950-09-01T00:00:00.000000000", + "1950-10-01T00:00:00.000000000", + "1950-11-01T00:00:00.000000000", + "1950-12-01T00:00:00.000000000", + "1951-01-01T00:00:00.000000000", + "1951-02-01T00:00:00.000000000", + "1951-03-01T00:00:00.000000000", + "1951-04-01T00:00:00.000000000", + "1951-05-01T00:00:00.000000000", + "1951-06-01T00:00:00.000000000", + "1951-07-01T00:00:00.000000000", + "1951-08-01T00:00:00.000000000", + "1951-09-01T00:00:00.000000000", + "1951-10-01T00:00:00.000000000", + "1951-11-01T00:00:00.000000000", + "1951-12-01T00:00:00.000000000", + "1952-01-01T00:00:00.000000000", + "1952-02-01T00:00:00.000000000", + "1952-03-01T00:00:00.000000000", + "1952-04-01T00:00:00.000000000", + "1952-05-01T00:00:00.000000000", + "1952-06-01T00:00:00.000000000", + "1952-07-01T00:00:00.000000000", + "1952-08-01T00:00:00.000000000", + "1952-09-01T00:00:00.000000000", + "1952-10-01T00:00:00.000000000", + "1952-11-01T00:00:00.000000000", + "1952-12-01T00:00:00.000000000", + "1953-01-01T00:00:00.000000000", + "1953-02-01T00:00:00.000000000", + "1953-03-01T00:00:00.000000000", + "1953-04-01T00:00:00.000000000", + "1953-05-01T00:00:00.000000000", + "1953-06-01T00:00:00.000000000", + "1953-07-01T00:00:00.000000000", + "1953-08-01T00:00:00.000000000", + "1953-09-01T00:00:00.000000000", + "1953-10-01T00:00:00.000000000", + "1953-11-01T00:00:00.000000000", + "1953-12-01T00:00:00.000000000", + "1954-01-01T00:00:00.000000000", + "1954-02-01T00:00:00.000000000", + "1954-03-01T00:00:00.000000000", + "1954-04-01T00:00:00.000000000", + "1954-05-01T00:00:00.000000000", + "1954-06-01T00:00:00.000000000", + "1954-07-01T00:00:00.000000000", + "1954-08-01T00:00:00.000000000", + "1954-09-01T00:00:00.000000000", + "1954-10-01T00:00:00.000000000", + "1954-11-01T00:00:00.000000000", + "1954-12-01T00:00:00.000000000", + "1955-01-01T00:00:00.000000000", + "1955-02-01T00:00:00.000000000", + "1955-03-01T00:00:00.000000000", + "1955-04-01T00:00:00.000000000", + "1955-05-01T00:00:00.000000000", + "1955-06-01T00:00:00.000000000", + "1955-07-01T00:00:00.000000000", + "1955-08-01T00:00:00.000000000", + "1955-09-01T00:00:00.000000000", + "1955-10-01T00:00:00.000000000", + "1955-11-01T00:00:00.000000000", + "1955-12-01T00:00:00.000000000", + "1956-01-01T00:00:00.000000000", + "1956-02-01T00:00:00.000000000", + "1956-03-01T00:00:00.000000000", + "1956-04-01T00:00:00.000000000", + "1956-05-01T00:00:00.000000000", + "1956-06-01T00:00:00.000000000", + "1956-07-01T00:00:00.000000000", + "1956-08-01T00:00:00.000000000", + "1956-09-01T00:00:00.000000000", + "1956-10-01T00:00:00.000000000", + "1956-11-01T00:00:00.000000000", + "1956-12-01T00:00:00.000000000", + "1957-01-01T00:00:00.000000000", + "1957-02-01T00:00:00.000000000", + "1957-03-01T00:00:00.000000000", + "1957-04-01T00:00:00.000000000", + "1957-05-01T00:00:00.000000000", + "1957-06-01T00:00:00.000000000", + "1957-07-01T00:00:00.000000000", + "1957-08-01T00:00:00.000000000", + "1957-09-01T00:00:00.000000000", + "1957-10-01T00:00:00.000000000", + "1957-11-01T00:00:00.000000000", + "1957-12-01T00:00:00.000000000", + "1958-01-01T00:00:00.000000000", + "1958-02-01T00:00:00.000000000", + "1958-03-01T00:00:00.000000000", + "1958-04-01T00:00:00.000000000", + "1958-05-01T00:00:00.000000000", + "1958-06-01T00:00:00.000000000", + "1958-07-01T00:00:00.000000000", + "1958-08-01T00:00:00.000000000", + "1958-09-01T00:00:00.000000000", + "1958-10-01T00:00:00.000000000", + "1958-11-01T00:00:00.000000000", + "1958-12-01T00:00:00.000000000", + "1959-01-01T00:00:00.000000000", + "1959-02-01T00:00:00.000000000", + "1959-03-01T00:00:00.000000000", + "1959-04-01T00:00:00.000000000", + "1959-05-01T00:00:00.000000000", + "1959-06-01T00:00:00.000000000", + "1959-07-01T00:00:00.000000000", + "1959-08-01T00:00:00.000000000", + "1959-09-01T00:00:00.000000000", + "1959-10-01T00:00:00.000000000", + "1959-11-01T00:00:00.000000000", + "1959-12-01T00:00:00.000000000", + "1960-01-01T00:00:00.000000000", + "1960-02-01T00:00:00.000000000", + "1960-03-01T00:00:00.000000000", + "1960-04-01T00:00:00.000000000", + "1960-05-01T00:00:00.000000000", + "1960-06-01T00:00:00.000000000", + "1960-07-01T00:00:00.000000000", + "1960-08-01T00:00:00.000000000", + "1960-09-01T00:00:00.000000000", + "1960-10-01T00:00:00.000000000", + "1960-11-01T00:00:00.000000000", + "1960-12-01T00:00:00.000000000" + ], + "y": { + "bdata": "AAAAAAAA+H8AAAAAAAD4fwAAAAAAAPh/AAAAAAAA+H8AAAAAAAD4fwAAAAAAAPh/AAAAAAAA+H8AAAAAAAD4fwAAAAAAAPh/AAAAAAAA+H8AAAAAAAD4f6uqqqqqql9Aq6qqqqq6X0BVVVVVVeVfQKuqqqqqCmBAq6qqqqoaYEBVVVVVVSVgQKuqqqqqSmBAVVVVVVWFYEAAAAAAAMBgQKuqqqqq+mBAAAAAAAAgYUCrqqqqqjphQFVVVVVVdWFAVVVVVVXFYUBVVVVVVQViQAAAAAAAaGJAq6qqqqqyYkAAAAAAADBjQFVVVVVVfWNAq6qqqqrKY0AAAAAAABhkQFVVVVVVXWRAq6qqqqqqZEAAAAAAAABlQFVVVVVVRWVAq6qqqqqKZUCrqqqqqtplQKuqqqqqAmZAq6qqqqoyZkAAAAAAAFBmQKuqqqqqumZAVVVVVVUNZ0AAAAAAAIBnQKuqqqqqwmdAAAAAAAAQaEBVVVVVVVVoQAAAAAAAoGhAq6qqqqriaEBVVVVVVQ1pQAAAAAAAgGlAAAAAAAAQakCrqqqqqopqQFVVVVVVzWpAAAAAAAAoa0AAAAAAAHhrQKuqqqqqwmtAAAAAAAD4a0BVVVVVVQ1sQAAAAAAAIGxAVVVVVVU1bEAAAAAAACBsQFVVVVVVHWxAAAAAAAAIbEBVVVVVVRVsQFVVVVVVTWxAq6qqqqqybECrqqqqqupsQFVVVVVVJW1AVVVVVVVVbUCrqqqqqpJtQFVVVVVV3W1Aq6qqqqpCbkCrqqqqqrpuQAAAAAAAEG9AAAAAAACAb0AAAAAAAOBvQAAAAAAANHBAq6qqqqqGcECrqqqqqs5wQFVVVVVVFXFAVVVVVVVRcUCrqqqqqn5xQAAAAAAAwHFAAAAAAAD4cUCrqqqqqjJyQFVVVVVVdXJAAAAAAACwckAAAAAAAPByQKuqqqqqPnNAAAAAAACAc0BVVVVVVc1zQKuqqqqqBnRAVVVVVVUxdECrqqqqql50QAAAAAAAhHRAVVVVVVWtdEBVVVVVVc10QFVVVVVVAXVAAAAAAAAwdUBVVVVVVWF1QFVVVVVVoXVAq6qqqqrmdUBVVVVVVTl2QKuqqqqqenZAVVVVVVWxdkCrqqqqqt52QKuqqqqqBndAAAAAAAAod0Crqqqqqj53QKuqqqqqRndAq6qqqqpGd0BVVVVVVVF3QKuqqqqqYndAVVVVVVWFd0AAAAAAALh3QAAAAAAAuHdAAAAAAADId0Crqqqqqs53QAAAAAAA0HdAq6qqqqrqd0Crqqqqqgp4QFVVVVVVRXhAVVVVVVWFeEBVVVVVVdF4QKuqqqqqAnlAq6qqqqpOeUCrqqqqqpZ5QFVVVVVV5XlAVVVVVVUlekCrqqqqqmp6QFVVVVVVxXpAVVVVVVURe0CrqqqqqlJ7QAAAAAAAZHtAq6qqqqq6e0AAAAAAAAB8QAAAAAAAVHxAq6qqqqq2fEBVVVVVVfV8QFVVVVVVMX1AVVVVVVV5fUCrqqqqqp59QKuqqqqqwn1A", + "dtype": "f8" + } + } + ], + "layout": { + "hovermode": "x unified", + "template": { + "data": { + "bar": [ + { + "error_x": { + "color": "#2a3f5f" + }, + "error_y": { + "color": "#2a3f5f" + }, + "marker": { + "line": { + "color": "white", + "width": 0.5 + }, + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "bar" + } + ], + "barpolar": [ + { + "marker": { + "line": { + "color": "white", + "width": 0.5 + }, + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "barpolar" + } + ], + "carpet": [ + { + "aaxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "#C8D4E3", + "linecolor": "#C8D4E3", + "minorgridcolor": "#C8D4E3", + "startlinecolor": "#2a3f5f" + }, + "baxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "#C8D4E3", + "linecolor": "#C8D4E3", + "minorgridcolor": "#C8D4E3", + "startlinecolor": "#2a3f5f" + }, + "type": "carpet" + } + ], + "choropleth": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "choropleth" + } + ], + "contour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "contour" + } + ], + "contourcarpet": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "contourcarpet" + } + ], + "heatmap": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmap" + } + ], + "histogram": [ + { + "marker": { + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "histogram" + } + ], + "histogram2d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2d" + } + ], + "histogram2dcontour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2dcontour" + } + ], + "mesh3d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "mesh3d" + } + ], + "parcoords": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "parcoords" + } + ], + "pie": [ + { + "automargin": true, + "type": "pie" + } + ], + "scatter": [ + { + "fillpattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + }, + "type": "scatter" + } + ], + "scatter3d": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter3d" + } + ], + "scattercarpet": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattercarpet" + } + ], + "scattergeo": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergeo" + } + ], + "scattergl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergl" + } + ], + "scattermap": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattermap" + } + ], + "scattermapbox": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattermapbox" + } + ], + "scatterpolar": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolar" + } + ], + "scatterpolargl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolargl" + } + ], + "scatterternary": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterternary" + } + ], + "surface": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "surface" + } + ], + "table": [ + { + "cells": { + "fill": { + "color": "#EBF0F8" + }, + "line": { + "color": "white" + } + }, + "header": { + "fill": { + "color": "#C8D4E3" + }, + "line": { + "color": "white" + } + }, + "type": "table" + } + ] + }, + "layout": { + "annotationdefaults": { + "arrowcolor": "#2a3f5f", + "arrowhead": 0, + "arrowwidth": 1 + }, + "autotypenumbers": "strict", + "coloraxis": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "colorscale": { + "diverging": [ + [ + 0, + "#8e0152" + ], + [ + 0.1, + "#c51b7d" + ], + [ + 0.2, + "#de77ae" + ], + [ + 0.3, + "#f1b6da" + ], + [ + 0.4, + "#fde0ef" + ], + [ + 0.5, + "#f7f7f7" + ], + [ + 0.6, + "#e6f5d0" + ], + [ + 0.7, + "#b8e186" + ], + [ + 0.8, + "#7fbc41" + ], + [ + 0.9, + "#4d9221" + ], + [ + 1, + "#276419" + ] + ], + "sequential": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "sequentialminus": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ] + }, + "colorway": [ + "#636efa", + "#EF553B", + "#00cc96", + "#ab63fa", + "#FFA15A", + "#19d3f3", + "#FF6692", + "#B6E880", + "#FF97FF", + "#FECB52" + ], + "font": { + "color": "#2a3f5f" + }, + "geo": { + "bgcolor": "white", + "lakecolor": "white", + "landcolor": "white", + "showlakes": true, + "showland": true, + "subunitcolor": "#C8D4E3" + }, + "hoverlabel": { + "align": "left" + }, + "hovermode": "closest", + "mapbox": { + "style": "light" + }, + "paper_bgcolor": "white", + "plot_bgcolor": "white", + "polar": { + "angularaxis": { + "gridcolor": "#EBF0F8", + "linecolor": "#EBF0F8", + "ticks": "" + }, + "bgcolor": "white", + "radialaxis": { + "gridcolor": "#EBF0F8", + "linecolor": "#EBF0F8", + "ticks": "" + } + }, + "scene": { + "xaxis": { + "backgroundcolor": "white", + "gridcolor": "#DFE8F3", + "gridwidth": 2, + "linecolor": "#EBF0F8", + "showbackground": true, + "ticks": "", + "zerolinecolor": "#EBF0F8" + }, + "yaxis": { + "backgroundcolor": "white", + "gridcolor": "#DFE8F3", + "gridwidth": 2, + "linecolor": "#EBF0F8", + "showbackground": true, + "ticks": "", + "zerolinecolor": "#EBF0F8" + }, + "zaxis": { + "backgroundcolor": "white", + "gridcolor": "#DFE8F3", + "gridwidth": 2, + "linecolor": "#EBF0F8", + "showbackground": true, + "ticks": "", + "zerolinecolor": "#EBF0F8" + } + }, + "shapedefaults": { + "line": { + "color": "#2a3f5f" + } + }, + "ternary": { + "aaxis": { + "gridcolor": "#DFE8F3", + "linecolor": "#A2B1C6", + "ticks": "" + }, + "baxis": { + "gridcolor": "#DFE8F3", + "linecolor": "#A2B1C6", + "ticks": "" + }, + "bgcolor": "white", + "caxis": { + "gridcolor": "#DFE8F3", + "linecolor": "#A2B1C6", + "ticks": "" + } + }, + "title": { + "x": 0.05 + }, + "xaxis": { + "automargin": true, + "gridcolor": "#EBF0F8", + "linecolor": "#EBF0F8", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "#EBF0F8", + "zerolinewidth": 2 + }, + "yaxis": { + "automargin": true, + "gridcolor": "#EBF0F8", + "linecolor": "#EBF0F8", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "#EBF0F8", + "zerolinewidth": 2 + } + } + }, + "title": { + "text": "Interactive Air Passengers: Zoom + Forecast" + }, + "xaxis": { + "rangeselector": { + "buttons": [ + { + "count": 1, + "label": "1y", + "step": "year", + "stepmode": "backward" + }, + { + "count": 5, + "label": "5y", + "step": "year", + "stepmode": "backward" + }, + { + "step": "all" + } + ] + }, + "rangeslider": { + "visible": true + }, + "type": "date" + } + } + } + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Initialize figure with actual and forecast traces\n", + "fig = go.Figure()\n", + "\n", + "fig.add_trace(go.Scatter(x=df.index, y=df['Passengers'], name='Actual Passengers', mode='lines'))\n", + "fig.add_trace(go.Scatter(x=df.index, y=df['Trend_Forecast'], name='12m Forecast', line=dict(dash='dot')))\n", + "\n", + "# Configure Layout for Interactivity\n", + "fig.update_layout(\n", + " title='Interactive Air Passengers: Zoom + Forecast',\n", + " xaxis=dict(\n", + " rangeselector=dict(\n", + " buttons=list([\n", + " dict(count=1, label=\"1y\", step=\"year\", stepmode=\"backward\"),\n", + " dict(count=5, label=\"5y\", step=\"year\", stepmode=\"backward\"),\n", + " dict(step=\"all\")\n", + " ])\n", + " ),\n", + " rangeslider=dict(visible=True), \n", + " type=\"date\"\n", + " ),\n", + " hovermode='x unified',\n", + " template='plotly_white'\n", + ")\n", + "\n", + "fig.show()" + ] + }, + { + "cell_type": "markdown", + "id": "72df77be", + "metadata": {}, + "source": [ + "## 🔗 Resources and Support\n", + "For further information on the platform or the libraries used in this template, please refer to the following official links:\n", + "\n", + "* **Platform**: [Saturn Cloud Dashboard](https://saturncloud.io/)\n", + "* **Support**: [Saturn Cloud Documentation](https://saturncloud.io/docs/)\n", + "* **Library**: [Plotly Python Documentation](https://plotly.com/python/)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/examples/data-science-analystics/cpu-rshiny-kpi/Icon_r_shiny_kpi.png b/examples/data-science-analystics/cpu-rshiny-kpi/Icon_r_shiny_kpi.png new file mode 100644 index 00000000..c0ac0a03 Binary files /dev/null and b/examples/data-science-analystics/cpu-rshiny-kpi/Icon_r_shiny_kpi.png differ diff --git a/examples/data-science-analystics/cpu-rshiny-kpi/README.md b/examples/data-science-analystics/cpu-rshiny-kpi/README.md new file mode 100644 index 00000000..8de1dd7c --- /dev/null +++ b/examples/data-science-analystics/cpu-rshiny-kpi/README.md @@ -0,0 +1,66 @@ +### **README.md** + +# 📊 R Shiny KPI Dashboard + +
+ +
+ +### **Overview** + +This template provides a professional-grade executive dashboard built for **R Server** environments. Optimized for **CPU resources**, it demonstrates high-efficiency data aggregation and reactive visualization. The "3-tile" architecture is designed to provide immediate clarity on Key Performance Indicators (KPIs), transforming complex datasets into actionable business insights through a clean, modern interface. + +### **Dataset Overview** + +The template utilizes the **Diamonds** dataset (built into the Tidyverse), which contains the physical attributes and prices of over 54,000 diamonds. This dataset is used to calculate critical retail KPIs, including **Total Revenue**, **Average Price**, and **Inventory Volume**, filtered by the quality of the diamond cut. + +### **Tech Stack** + +* **R**: The core statistical engine used for data processing. +* **Shiny**: The reactive web framework that powers the interactive dashboard elements. +* **Tidyverse**: Utilized for efficient data manipulation (`dplyr`) and high-quality plotting (`ggplot2`). +* **bslib/bsicons**: Provides the Bootstrap 5 styling for the KPI "value boxes" and professional iconography. + +--- + +## 🛠️ Local Setup Instructions (Linux/Kali) + +### 1. Install System Dependencies + +Before running the R packages, ensure your Linux system has the required development headers: + +```bash +sudo apt update && sudo apt install -y libcurl4-openssl-dev libssl-dev libxml2-dev libfontconfig1-dev libfreetype6-dev libpng-dev libtiff5-dev libjpeg-dev libharfbuzz-dev libfribidi-dev + +``` +### 2. Install R Libraries + +Open your RStudio console and run: + +```r +install.packages(c("shiny", "tidyverse", "bslib", "bsicons")) + +``` + +### 3. Launch the Dashboard + +1. Open `app.R` in RStudio. +2. Click the **"Run App"** button at the top of the editor. + +--- + +## 📈 Dashboard Features + +* **Key Performance Tiles**: Instant summary of Revenue, Avg Price, and Count. +* **Dynamic Sidebar**: Multi-select filters for diamond quality (Cut) that update the entire UI in real-time. +* **Trend Analysis**: Scatter plot with a smoothed trend line to visualize the relationship between carat weight and price. + +--- + +## 🔗 Resources and Support + +* **Platform**: [Saturn Cloud R Documentation](https://saturncloud.io/docs/) +* **Library**: [Shiny Official Gallery](https://shiny.posit.co/r/gallery/) +* **Library**: [Tidyverse Documentation](https://www.tidyverse.org/) + +--- \ No newline at end of file diff --git a/examples/data-science-analystics/cpu-rshiny-kpi/app.R b/examples/data-science-analystics/cpu-rshiny-kpi/app.R new file mode 100644 index 00000000..68be423a --- /dev/null +++ b/examples/data-science-analystics/cpu-rshiny-kpi/app.R @@ -0,0 +1,85 @@ +library(shiny) +library(tidyverse) +library(bslib) +library(bsicons) + +# --- 1. REAL DATA PREPARATION --- +# Using the real 'diamonds' dataset from ggplot2 +data <- diamonds %>% + mutate(SaleID = row_number()) %>% + rename(Sales = price) + +# --- 2. UI DEFINITION --- +ui <- page_sidebar( + title = "Real-World KPI Dashboard (Diamonds)", + + sidebar = sidebar( + title = "Data Filters", + # Filter by diamond cut (Quality) + selectInput("cut", "Select Diamond Cut", + choices = unique(data$cut), + selected = "Ideal", + multiple = TRUE), + helpText("The KPIs will update based on the quality of diamonds selected.") + ), + + # 3-Tile KPI Layout + layout_column_wrap( + width = 1/3, + value_box( + title = "Total Revenue", + value = uiOutput("total_sales"), + showcase = bs_icon("gem"), + theme = "primary" + ), + value_box( + title = "Avg Diamond Price", + value = uiOutput("avg_sale"), + showcase = bs_icon("cash-stack"), + theme = "success" + ), + value_box( + title = "Inventory Count", + value = uiOutput("total_cust"), + showcase = bs_icon("box-seam"), + theme = "info" + ) + ), + + card( + card_header("Price Distribution (by Carat)"), + plotOutput("sales_plot") + ) +) + +# --- 3. SERVER LOGIC --- +server <- function(input, output) { + + # Reactive filtering based on user input + filtered_data <- reactive({ + data %>% + filter(cut %in% input$cut) + }) + + output$total_sales <- renderText({ + paste0("$", format(round(sum(filtered_data()$Sales), 0), big.mark=",")) + }) + + output$avg_sale <- renderText({ + paste0("$", round(mean(filtered_data()$Sales), 2)) + }) + + output$total_cust <- renderText({ + format(nrow(filtered_data()), big.mark=",") + }) + + output$sales_plot <- renderPlot({ + ggplot(filtered_data(), aes(x = carat, y = Sales)) + + geom_point(alpha = 0.2, color = "#3498db") + + geom_smooth(color = "red") + + theme_minimal() + + labs(x = "Carat Weight", y = "Price ($)") + }) +} + +shinyApp(ui = ui, server = server) \ No newline at end of file diff --git a/examples/data-science-analystics/cpu-sklearn-classifier/Icon_sklearn_classifier.png b/examples/data-science-analystics/cpu-sklearn-classifier/Icon_sklearn_classifier.png new file mode 100644 index 00000000..cc9e5e71 Binary files /dev/null and b/examples/data-science-analystics/cpu-sklearn-classifier/Icon_sklearn_classifier.png differ diff --git a/examples/data-science-analystics/cpu-sklearn-classifier/tabular_classifier.ipynb b/examples/data-science-analystics/cpu-sklearn-classifier/tabular_classifier.ipynb new file mode 100644 index 00000000..da4ff58c --- /dev/null +++ b/examples/data-science-analystics/cpu-sklearn-classifier/tabular_classifier.ipynb @@ -0,0 +1,216 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 🤖 scikit-learn Tabular Classifier\n", + "\n", + "
\n", + " \n", + "
\n", + "\n", + "### **Template Review**\n", + "This template provides a robust workflow for supervised learning on tabular data using **Jupyter Notebook**. Optimized for **CPU resources**, it covers the end-to-end pipeline from data preprocessing to model evaluation. The primary goal is to train a **Gradient Boosting Machine (GBM)** and extract **feature importance** to understand which variables drive the model's decisions.\n", + "\n", + "### **Dataset Overview**\n", + "The template uses the **Wine Quality** dataset, which contains chemical properties of various wines and a target quality label. It is a classic tabular dataset ideal for demonstrating classification performance and feature ranking.\n", + "\n", + "### **Tech Stack**\n", + "* **Python**: The base language for the machine learning pipeline.\n", + "* **scikit-learn (sklearn)**: Provides the GBM implementation and evaluation metrics (Confusion Matrix, Accuracy).\n", + "* **Pandas**: Used for data manipulation, cleaning, and feature engineering.\n", + "\n", + "---\n", + "\n", + "## 🚀 Getting Started\n", + "Let's ensure the environment has the required libraries installed before proceeding." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: pandas in /home/kali/Documents/templates/cpu-plotly-env/lib/python3.13/site-packages (2.3.3)\n", + "Requirement already satisfied: scikit-learn in /home/kali/Documents/templates/cpu-plotly-env/lib/python3.13/site-packages (1.8.0)\n", + "Requirement already satisfied: matplotlib in /home/kali/Documents/templates/cpu-plotly-env/lib/python3.13/site-packages (3.10.8)\n", + "Requirement already satisfied: seaborn in /home/kali/Documents/templates/cpu-plotly-env/lib/python3.13/site-packages (0.13.2)\n", + "Requirement already satisfied: numpy>=1.26.0 in /home/kali/Documents/templates/cpu-plotly-env/lib/python3.13/site-packages (from pandas) (2.4.0)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /home/kali/Documents/templates/cpu-plotly-env/lib/python3.13/site-packages (from pandas) (2.9.0.post0)\n", + "Requirement already satisfied: pytz>=2020.1 in /home/kali/Documents/templates/cpu-plotly-env/lib/python3.13/site-packages (from pandas) (2025.2)\n", + "Requirement already satisfied: tzdata>=2022.7 in /home/kali/Documents/templates/cpu-plotly-env/lib/python3.13/site-packages (from pandas) (2025.3)\n", + "Requirement already satisfied: scipy>=1.10.0 in /home/kali/Documents/templates/cpu-plotly-env/lib/python3.13/site-packages (from scikit-learn) (1.16.3)\n", + "Requirement already satisfied: joblib>=1.3.0 in /home/kali/Documents/templates/cpu-plotly-env/lib/python3.13/site-packages (from scikit-learn) (1.5.3)\n", + "Requirement already satisfied: threadpoolctl>=3.2.0 in /home/kali/Documents/templates/cpu-plotly-env/lib/python3.13/site-packages (from scikit-learn) (3.6.0)\n", + "Requirement already satisfied: contourpy>=1.0.1 in /home/kali/Documents/templates/cpu-plotly-env/lib/python3.13/site-packages (from matplotlib) (1.3.3)\n", + "Requirement already satisfied: cycler>=0.10 in /home/kali/Documents/templates/cpu-plotly-env/lib/python3.13/site-packages (from matplotlib) (0.12.1)\n", + "Requirement already satisfied: fonttools>=4.22.0 in /home/kali/Documents/templates/cpu-plotly-env/lib/python3.13/site-packages (from matplotlib) (4.61.1)\n", + "Requirement already satisfied: kiwisolver>=1.3.1 in /home/kali/Documents/templates/cpu-plotly-env/lib/python3.13/site-packages (from matplotlib) (1.4.9)\n", + "Requirement already satisfied: packaging>=20.0 in /home/kali/Documents/templates/cpu-plotly-env/lib/python3.13/site-packages (from matplotlib) (25.0)\n", + "Requirement already satisfied: pillow>=8 in /home/kali/Documents/templates/cpu-plotly-env/lib/python3.13/site-packages (from matplotlib) (12.1.0)\n", + "Requirement already satisfied: pyparsing>=3 in /home/kali/Documents/templates/cpu-plotly-env/lib/python3.13/site-packages (from matplotlib) (3.3.1)\n", + "Requirement already satisfied: six>=1.5 in /home/kali/Documents/templates/cpu-plotly-env/lib/python3.13/site-packages (from python-dateutil>=2.8.2->pandas) (1.17.0)\n" + ] + } + ], + "source": [ + "# Install core machine learning libraries\n", + "!pip install pandas scikit-learn matplotlib seaborn" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### **Step 1: Data Loading & Preprocessing**\n", + "We load the tabular data using **Pandas** and split it into training and testing sets to evaluate model generalization." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Training samples: 142 | Testing samples: 36\n" + ] + } + ], + "source": [ + "import pandas as pd\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.datasets import load_wine\n", + "\n", + "# Load dataset\n", + "data = load_wine()\n", + "df = pd.DataFrame(data.data, columns=data.feature_names)\n", + "df['target'] = data.target\n", + "\n", + "# Train/Test Split\n", + "X = df.drop('target', axis=1)\n", + "y = df['target']\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n", + "print(f\"Training samples: {len(X_train)} | Testing samples: {len(X_test)}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### **Step 2: Train Gradient Boosting Classifier**\n", + "We utilize the `GradientBoostingClassifier` from **sklearn**. This model builds trees sequentially to minimize errors from previous iterations." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " precision recall f1-score support\n", + "\n", + " 0 0.93 1.00 0.97 14\n", + " 1 0.93 0.93 0.93 14\n", + " 2 1.00 0.88 0.93 8\n", + "\n", + " accuracy 0.94 36\n", + " macro avg 0.95 0.93 0.94 36\n", + "weighted avg 0.95 0.94 0.94 36\n", + "\n" + ] + } + ], + "source": [ + "from sklearn.ensemble import GradientBoostingClassifier\n", + "from sklearn.metrics import classification_report\n", + "\n", + "# Initialize and train the GBM\n", + "model = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=3)\n", + "model.fit(X_train, y_train)\n", + "\n", + "# Predictions\n", + "y_pred = model.predict(X_test)\n", + "print(classification_report(y_test, y_pred))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### **Step 3: Feature Importance Analysis**\n", + "Understanding which features contribute most to the prediction is key for model interpretability." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "\n", + "# Extract and plot feature importance\n", + "feat_importances = pd.Series(model.feature_importances_, index=X.columns)\n", + "plt.figure(figsize=(10, 6))\n", + "feat_importances.nlargest(10).plot(kind='barh', color='#6c5ce7')\n", + "plt.title('Top 10 Feature Importances')\n", + "plt.xlabel('Importance Score')\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 🔗 Resources and Support\n", + "* **Scikit-learn Docs**: [Gradient Boosting Classifier](https://scikit-learn.org/stable/modules/ensemble.html#gradient-boosting)\n", + "* **Pandas Docs**: [Dataframe Manipulation](https://pandas.pydata.org/docs/)\n", + "* **Platform**: [Saturn Cloud Jupyter Notebooks](https://saturncloud.io/)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "cpu-plotly-env", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.11" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/examples/data-science-analystics/cpu-streamlit-eda/README.md b/examples/data-science-analystics/cpu-streamlit-eda/README.md new file mode 100644 index 00000000..99917ce2 --- /dev/null +++ b/examples/data-science-analystics/cpu-streamlit-eda/README.md @@ -0,0 +1,65 @@ + +# 🚀 Streamlit EDA Dashboard + +
+ +
+ +### **Overview** +This template provides a rapid deployment setup for a web-based Exploratory Data Analysis (EDA) tool. Designed for **CPU resources**, it allows you to transform a local Python environment into a functional analytics dashboard. The primary goal is to provide a "One-click profiler" that automates data inspection, statistical summaries, and distribution plotting through an intuitive browser interface. + +### **Dataset Overview** +The template utilizes the **Tips** toy dataset, which contains records of restaurant bills, tip amounts, and demographic data such as the day of the week and time of day. It is an excellent dataset for demonstrating the power of categorical filtering and numerical profiling in an automated dashboard environment. + +### **Tech Stack** +* **Python**: The core logic layer for data processing and app execution. +* **Pandas**: Manages the underlying data frames and performs statistical profiling calculations. +* **Streamlit**: The primary framework used to build the interactive UI and handle real-time visualization updates. + +--- + +## 🛠️ Local Setup Instructions + +### 1. Create and Activate Virtual Environment +Open your terminal on your host machine and execute the following: +```bash +# Create environment +python -m venv streamlit_env + +# Activate (Windows) +streamlit_env\Scripts\activate + +# Activate (macOS/Linux) +source streamlit_env/bin/activate + +``` + +### 2. Install Dependencies + +```bash +pip install streamlit pandas matplotlib seaborn + +``` +or + +```bash +pip install -r requirements.txt + +``` + +### 3. Run the Dashboard + +```bash +streamlit run app.py + +``` + +--- + +## 🔗 Resources and Support + +* **Dashboard Platform**: [Streamlit Cloud](https://streamlit.io/cloud) +* **Library**: [Streamlit Documentation](https://docs.streamlit.io/) +* **Library**: [Pandas API Reference](https://pandas.pydata.org/docs/) + +--- \ No newline at end of file diff --git a/examples/data-science-analystics/cpu-streamlit-eda/app.py b/examples/data-science-analystics/cpu-streamlit-eda/app.py new file mode 100644 index 00000000..2e56861e --- /dev/null +++ b/examples/data-science-analystics/cpu-streamlit-eda/app.py @@ -0,0 +1,47 @@ +import streamlit as st +import pandas as pd +import seaborn as sns +import matplotlib.pyplot as plt + +# --- PAGE CONFIGURATION --- +st.set_page_config(page_title="EDA Profiler", layout="wide") + +# --- INTRODUCTION --- +st.title("📊 One-Click EDA Dashboard") +st.markdown("This dashboard provides instant statistical profiling and visualization for the Tips dataset.") + +# --- STEP 2: LOAD AND PREPARE DATASET --- +# We load the Tips dataset from seaborn's built-in repository. +@st.cache_data # Cache data to prevent reloading on every interaction +def load_data(): + return sns.load_dataset('tips') + +df = load_data() + +# --- STEP 3: SIDEBAR FILTERS --- +# We enable global filters to allow users to slice data by specific days of the week. +st.sidebar.header("Global Filters") +selected_day = st.sidebar.multiselect( + "Select Day", + options=df['day'].unique(), + default=df['day'].unique() +) +filtered_df = df[df['day'].isin(selected_day)] + +# --- STEP 4: STATISTICS PROFILER --- +# This block calculates and displays the numerical summary statistics for the filtered data. +col1, col2 = st.columns(2) +with col1: + st.subheader("🔢 Statistical Summary") + st.dataframe(filtered_df.describe(), use_container_width=True) + +with col2: + st.subheader("📋 Raw Data Sample") + st.write(filtered_df.head(10)) + +# --- STEP 5: VISUALIZATION --- +# Using Seaborn and Matplotlib to render the distribution of bill amounts interactively. +st.subheader("📈 Distribution of Total Bills") +fig, ax = plt.subplots(figsize=(10, 4)) +sns.histplot(filtered_df['total_bill'], kde=True, ax=ax, color="#FF4B4B") +st.pyplot(fig) \ No newline at end of file diff --git a/examples/data-science-analystics/cpu-streamlit-eda/icon-bar-histogram.png b/examples/data-science-analystics/cpu-streamlit-eda/icon-bar-histogram.png new file mode 100644 index 00000000..dd51412d Binary files /dev/null and b/examples/data-science-analystics/cpu-streamlit-eda/icon-bar-histogram.png differ diff --git a/examples/data-science-analystics/cpu-streamlit-eda/requirements.txt b/examples/data-science-analystics/cpu-streamlit-eda/requirements.txt new file mode 100644 index 00000000..b40c761d --- /dev/null +++ b/examples/data-science-analystics/cpu-streamlit-eda/requirements.txt @@ -0,0 +1,4 @@ +streamlit +pandas +matplotlib +seaborn \ No newline at end of file diff --git a/examples/data-science-analystics/cpu-xgboost-serve/Icon-xgboost.png b/examples/data-science-analystics/cpu-xgboost-serve/Icon-xgboost.png new file mode 100644 index 00000000..4958660d Binary files /dev/null and b/examples/data-science-analystics/cpu-xgboost-serve/Icon-xgboost.png differ diff --git a/examples/data-science-analystics/cpu-xgboost-serve/main.py b/examples/data-science-analystics/cpu-xgboost-serve/main.py new file mode 100644 index 00000000..5669e186 --- /dev/null +++ b/examples/data-science-analystics/cpu-xgboost-serve/main.py @@ -0,0 +1,24 @@ +from fastapi import FastAPI +import joblib +import numpy as np +from pydantic import BaseModel + +app = FastAPI(title="Titanic Survival Predictor") +model = joblib.load("model.joblib") + +class Passenger(BaseModel): + features: list[float] # [Pclass, Sex, Age, SibSp, Parch, Fare] + +@app.post("/predict") +def predict(data: Passenger): + features = np.array(data.features).reshape(1, -1) + prediction = model.predict(features)[0] + prob = model.predict_proba(features)[0] + + status = "SURVIVED" if prediction == 1 else "DID NOT SURVIVE" + + return { + "result": status, + "probability": f"{round(np.max(prob) * 100, 2)}%", + "data_source": "Existing Titanic Dataset" + } \ No newline at end of file diff --git a/examples/data-science-analystics/cpu-xgboost-serve/requirements.txt b/examples/data-science-analystics/cpu-xgboost-serve/requirements.txt new file mode 100644 index 00000000..81027390 --- /dev/null +++ b/examples/data-science-analystics/cpu-xgboost-serve/requirements.txt @@ -0,0 +1,8 @@ +xgboost +fastapi +uvicorn +scikit-learn +joblib +numpy +requests +pandas \ No newline at end of file diff --git a/examples/data-science-analystics/cpu-xgboost-serve/setup.sh b/examples/data-science-analystics/cpu-xgboost-serve/setup.sh new file mode 100755 index 00000000..4c44f03b --- /dev/null +++ b/examples/data-science-analystics/cpu-xgboost-serve/setup.sh @@ -0,0 +1,25 @@ +#!/bin/bash + +# Define colors for output +GREEN='\033[0-32m' +NC='\033[0m' # No Color + +echo -e "${GREEN}🚀 Starting setup for XGBoost Train & Serve...${NC}" + +# 1. Create Virtual Environment +echo "Creating virtual environment 'venv_xgboost'..." +python3 -m venv venv_xgboost + +# 2. Activate Environment and Install Requirements +echo "Installing dependencies from requirements.txt..." +source venv_xgboost/bin/activate +pip install --upgrade pip +pip install -r requirements.txt + +# 3. Initial Model Training +echo "Performing initial model training on Titanic dataset..." +python train.py + +echo -e "${GREEN}✅ Setup Complete!${NC}" +echo "To start the server, run:" +echo "source venv_xgboost/bin/activate && uvicorn main:app --reload" \ No newline at end of file diff --git a/examples/data-science-analystics/cpu-xgboost-serve/test_api.py b/examples/data-science-analystics/cpu-xgboost-serve/test_api.py new file mode 100644 index 00000000..9dbb10a3 --- /dev/null +++ b/examples/data-science-analystics/cpu-xgboost-serve/test_api.py @@ -0,0 +1,14 @@ +import requests + +url = "http://127.0.0.1:8000/predict" + +# Profile: 1st Class, Female (1), Age 22, 0 siblings, 0 parents, Fare 71.0 +passenger_1 = [1, 1, 22, 0, 0, 71.0] + +print(f"🚀 Predicting Survival for Passenger...") +response = requests.post(url, json={"features": passenger_1}) +result = response.json() + +print("\n--- Prediction Reply ---") +print(f"Outcome: {result['result']}") +print(f"Confidence: {result['probability']}") \ No newline at end of file diff --git a/examples/data-science-analystics/cpu-xgboost-serve/train.py b/examples/data-science-analystics/cpu-xgboost-serve/train.py new file mode 100644 index 00000000..27005ab7 --- /dev/null +++ b/examples/data-science-analystics/cpu-xgboost-serve/train.py @@ -0,0 +1,26 @@ +import xgboost as xgb +import pandas as pd +from sklearn.model_selection import train_test_split +from sklearn.preprocessing import LabelEncoder +import joblib + +# 1. Load existing Telco Churn dataset +df = pd.read_csv("https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv") + +# Let's use the 'Titanic' dataset to predict 'Survival' (0 = No, 1 = Yes) +# It is the most famous existing dataset for tabular classification. +df = df[['Survived', 'Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare']].dropna() +df['Sex'] = LabelEncoder().fit_transform(df['Sex']) # Convert to numbers + +X = df.drop('Survived', axis=1) +y = df['Survived'] + +X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) + +# 2. Train XGBoost +model = xgb.XGBClassifier(n_estimators=100) +model.fit(X_train, y_train) + +# 3. Save +joblib.dump(model, "model.joblib") +print("✅ Success: Titanic Survival model trained using existing data.") \ No newline at end of file