Skip to content

Commit 4bbc954

Browse files
committed
add command line download option
1 parent 13c2d92 commit 4bbc954

File tree

1 file changed

+80
-59
lines changed

1 file changed

+80
-59
lines changed

notebooks/getting_started/part2_searching_basics.ipynb

Lines changed: 80 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,19 @@
22
"cells": [
33
{
44
"cell_type": "markdown",
5-
"metadata": {},
5+
"metadata": {
6+
"id": "view-in-github",
7+
"colab_type": "text"
8+
},
9+
"source": [
10+
"<a href=\"https://colab.research.google.com/github/ImagingDataCommons/IDC-Tutorials/blob/master/notebooks/getting_started/part2_searching_basics.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
11+
]
12+
},
13+
{
14+
"cell_type": "markdown",
15+
"metadata": {
16+
"id": "2XpwtDBF3yH5"
17+
},
618
"source": [
719
"<a href=\"https://colab.research.google.com/github/ImagingDataCommons/IDC-Tutorials/blob/master/notebooks/getting_started/part2_searching_basics.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
820
]
@@ -33,7 +45,7 @@
3345
"---\n",
3446
"Initial version: Nov 2022\n",
3547
"\n",
36-
"Updated: May 2024\n"
48+
"Updated: June 2024\n"
3749
]
3850
},
3951
{
@@ -66,49 +78,12 @@
6678
},
6779
{
6880
"cell_type": "code",
69-
"execution_count": 1,
81+
"execution_count": null,
7082
"metadata": {
7183
"cellView": "form",
72-
"colab": {
73-
"base_uri": "https://localhost:8080/"
74-
},
75-
"id": "bDGChJBK9ooq",
76-
"outputId": "d89226e0-d1bc-4873-ee1d-159320c0602a"
84+
"id": "bDGChJBK9ooq"
7785
},
78-
"outputs": [
79-
{
80-
"name": "stdout",
81-
"output_type": "stream",
82-
"text": [
83-
"Collecting idc-index\n",
84-
" Downloading idc_index-0.5.7-py3-none-any.whl (18 kB)\n",
85-
"Requirement already satisfied: duckdb>=0.10.0 in /usr/local/lib/python3.10/dist-packages (from idc-index) (0.10.2)\n",
86-
"Collecting idc-index-data==18.0.1 (from idc-index)\n",
87-
" Downloading idc_index_data-18.0.1-py3-none-any.whl (54.0 MB)\n",
88-
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m54.0/54.0 MB\u001b[0m \u001b[31m9.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
89-
"\u001b[?25hRequirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from idc-index) (24.0)\n",
90-
"Requirement already satisfied: pandas<2.2 in /usr/local/lib/python3.10/dist-packages (from idc-index) (2.0.3)\n",
91-
"Requirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from idc-index) (5.9.5)\n",
92-
"Requirement already satisfied: pyarrow in /usr/local/lib/python3.10/dist-packages (from idc-index) (14.0.2)\n",
93-
"Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from idc-index) (2.31.0)\n",
94-
"Collecting s5cmd (from idc-index)\n",
95-
" Downloading s5cmd-0.2.0-py3-none-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (4.7 MB)\n",
96-
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.7/4.7 MB\u001b[0m \u001b[31m74.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
97-
"\u001b[?25hRequirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from idc-index) (4.66.4)\n",
98-
"Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.10/dist-packages (from pandas<2.2->idc-index) (2.8.2)\n",
99-
"Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas<2.2->idc-index) (2023.4)\n",
100-
"Requirement already satisfied: tzdata>=2022.1 in /usr/local/lib/python3.10/dist-packages (from pandas<2.2->idc-index) (2024.1)\n",
101-
"Requirement already satisfied: numpy>=1.21.0 in /usr/local/lib/python3.10/dist-packages (from pandas<2.2->idc-index) (1.25.2)\n",
102-
"Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->idc-index) (3.3.2)\n",
103-
"Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->idc-index) (3.7)\n",
104-
"Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->idc-index) (2.0.7)\n",
105-
"Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->idc-index) (2024.2.2)\n",
106-
"Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.2->pandas<2.2->idc-index) (1.16.0)\n",
107-
"Installing collected packages: s5cmd, idc-index-data, idc-index\n",
108-
"Successfully installed idc-index-0.5.7 idc-index-data-18.0.1 s5cmd-0.2.0\n"
109-
]
110-
}
111-
],
86+
"outputs": [],
11287
"source": [
11388
"!pip install idc-index --upgrade"
11489
]
@@ -165,7 +140,7 @@
165140
},
166141
{
167142
"cell_type": "code",
168-
"execution_count": 5,
143+
"execution_count": null,
169144
"metadata": {
170145
"colab": {
171146
"base_uri": "https://localhost:8080/"
@@ -194,6 +169,51 @@
194169
"#idc_client.download_from_selection(collection_id=\"ct_phantom4radiomics\", downloadDir=\".\")\n"
195170
]
196171
},
172+
{
173+
"cell_type": "markdown",
174+
"source": [
175+
"You also have the choice of downloading from IDC using command-line helper tool that is installed as part of the `idc-index` package. The cell below shows how to download that same study from the command line."
176+
],
177+
"metadata": {
178+
"id": "0s2Ok9iK5Ae-"
179+
}
180+
},
181+
{
182+
"cell_type": "code",
183+
"source": [
184+
"!idc download-from-selection --study-instance-uid 1.2.840.113654.2.55.68425808326883186792123057288612355322 --download-dir ."
185+
],
186+
"metadata": {
187+
"id": "jyEEknBR5P1V",
188+
"outputId": "1b4d9271-0417-46cc-bea5-30b8719f0f2e",
189+
"colab": {
190+
"base_uri": "https://localhost:8080/"
191+
}
192+
},
193+
"execution_count": 6,
194+
"outputs": [
195+
{
196+
"output_type": "stream",
197+
"name": "stdout",
198+
"text": [
199+
"2024-06-17 20:31:29,827 - Total size of files to download: 314.45 MB\n",
200+
"2024-06-17 20:31:29,827 - Total free space on disk: 212.901072896GB\n",
201+
"2024-06-17 20:31:29,987 - \n",
202+
"Temporary download manifest is generated and is passed to self._s5cmd_run\n",
203+
"\n",
204+
"2024-06-17 20:31:29,987 - Not using s5cmd sync dry run as the destination folder is empty or sync dry or progress bar is not requested\n",
205+
"2024-06-17 20:31:29,987 - Inputs received for tracking download:\n",
206+
"2024-06-17 20:31:29,988 - size_MB: 314.45\n",
207+
"2024-06-17 20:31:29,988 - downloadDir: /content\n",
208+
"2024-06-17 20:31:29,988 - show_progress_bar: True\n",
209+
"2024-06-17 20:31:29,988 - Initial size of the directory: 0 bytes\n",
210+
"2024-06-17 20:31:29,988 - Approx. Size of the files need to be downloaded: 314450000.0 bytes\n",
211+
"Downloading data: 100% 314M/314M [00:09<00:00, 34.3MB/s]\n",
212+
"2024-06-17 20:31:39,153 - Successfully downloaded files to /content\n"
213+
]
214+
}
215+
]
216+
},
197217
{
198218
"cell_type": "markdown",
199219
"metadata": {
@@ -239,7 +259,7 @@
239259
},
240260
{
241261
"cell_type": "code",
242-
"execution_count": 6,
262+
"execution_count": null,
243263
"metadata": {
244264
"colab": {
245265
"base_uri": "https://localhost:8080/"
@@ -324,7 +344,7 @@
324344
},
325345
{
326346
"cell_type": "code",
327-
"execution_count": 11,
347+
"execution_count": null,
328348
"metadata": {
329349
"id": "KwmtVQYiHupY"
330350
},
@@ -346,7 +366,7 @@
346366
},
347367
{
348368
"cell_type": "code",
349-
"execution_count": 13,
369+
"execution_count": null,
350370
"metadata": {
351371
"colab": {
352372
"base_uri": "https://localhost:8080/"
@@ -441,7 +461,7 @@
441461
},
442462
{
443463
"cell_type": "code",
444-
"execution_count": 15,
464+
"execution_count": null,
445465
"metadata": {
446466
"id": "kakilA1TIqJm"
447467
},
@@ -482,7 +502,7 @@
482502
},
483503
{
484504
"cell_type": "code",
485-
"execution_count": 16,
505+
"execution_count": null,
486506
"metadata": {
487507
"colab": {
488508
"base_uri": "https://localhost:8080/",
@@ -904,7 +924,7 @@
904924
},
905925
{
906926
"cell_type": "code",
907-
"execution_count": 17,
927+
"execution_count": null,
908928
"metadata": {
909929
"id": "xfCV61zeKMFp"
910930
},
@@ -1805,7 +1825,7 @@
18051825
},
18061826
{
18071827
"cell_type": "code",
1808-
"execution_count": 18,
1828+
"execution_count": null,
18091829
"metadata": {
18101830
"colab": {
18111831
"base_uri": "https://localhost:8080/"
@@ -1898,7 +1918,7 @@
18981918
},
18991919
{
19001920
"cell_type": "code",
1901-
"execution_count": 19,
1921+
"execution_count": 4,
19021922
"metadata": {
19031923
"id": "vaCquh8_a8z1"
19041924
},
@@ -1915,7 +1935,7 @@
19151935
},
19161936
{
19171937
"cell_type": "code",
1918-
"execution_count": 20,
1938+
"execution_count": null,
19191939
"metadata": {
19201940
"colab": {
19211941
"base_uri": "https://localhost:8080/",
@@ -2688,7 +2708,7 @@
26882708
},
26892709
{
26902710
"cell_type": "code",
2691-
"execution_count": 21,
2711+
"execution_count": null,
26922712
"metadata": {
26932713
"colab": {
26942714
"base_uri": "https://localhost:8080/"
@@ -2725,7 +2745,7 @@
27252745
},
27262746
{
27272747
"cell_type": "code",
2728-
"execution_count": 22,
2748+
"execution_count": null,
27292749
"metadata": {
27302750
"colab": {
27312751
"base_uri": "https://localhost:8080/",
@@ -2854,14 +2874,14 @@
28542874
"source": [
28552875
"If you use data from IDC, you should also acknowledge IDC as the source of the data, and cite individual datasets that you used.\n",
28562876
"\n",
2857-
"To help you comply with the attribution requirements, `idc-index` provides a convenience function `citations_from_selection` that will look up the DOIs and generate the list of citations.",
2877+
"To help you comply with the attribution requirements, `idc-index` provides a convenience function `citations_from_selection` that will look up the DOIs and generate the list of citations.\n",
28582878
"\n",
2859-
"\nWARNING: As of May 30, 2024, due to server issues at api.crossref.org, the following cell may not work. In the future, we will replace the API call to CrossRef with a cached list of publication to address this issue.\n"
2879+
"WARNING: As of May 30, 2024, due to server issues at api.crossref.org, the following cell may not work. In the future, we will replace the API call to CrossRef with a cached list of publication to address this issue.\n"
28602880
]
28612881
},
28622882
{
28632883
"cell_type": "code",
2864-
"execution_count": 23,
2884+
"execution_count": null,
28652885
"metadata": {
28662886
"colab": {
28672887
"base_uri": "https://localhost:8080/"
@@ -2956,7 +2976,8 @@
29562976
"metadata": {
29572977
"colab": {
29582978
"provenance": [],
2959-
"toc_visible": true
2979+
"toc_visible": true,
2980+
"include_colab_link": true
29602981
},
29612982
"gpuClass": "standard",
29622983
"kernelspec": {
@@ -2970,4 +2991,4 @@
29702991
},
29712992
"nbformat": 4,
29722993
"nbformat_minor": 0
2973-
}
2994+
}

0 commit comments

Comments
 (0)