|
29 | 29 | "\n", |
30 | 30 | "In this notebook you will be introduced into how IDC organizes the metadata accompanying images available in IDC, and how that metadata can be used to define subsets of data.\n", |
31 | 31 | "\n", |
32 | | - "This documentation page can be used as a complement if you would like to learn more about how IDC metadata is organized: https://learn.canceridc.dev/data/organization-of-data/files-and-metadata.\n", |
33 | | - "\n", |
34 | 32 | "---\n", |
35 | 33 | "Initial version: Nov 2022\n", |
36 | 34 | "\n", |
37 | | - "Updated: Oct 2023\n" |
| 35 | + "Updated:\n" |
38 | 36 | ] |
39 | 37 | }, |
40 | 38 | { |
|
233 | 231 | "\n", |
234 | 232 | "\n", |
235 | 233 | "\n", |
236 | | - "* \"Primary site location\" maps to the `tcia_tumorLocation` column\n", |
| 234 | + "* \"Primary site location\" maps to the `collection_tumorLocation` column\n", |
237 | 235 | "* \"Body Part Examined\" -> `BodyPartExamined`\n", |
238 | 236 | "* \"Modality\" -> `Modality`\n", |
239 | 237 | "\n" |
|
245 | 243 | "id": "3kkHUgqaP2tl" |
246 | 244 | }, |
247 | 245 | "source": [ |
248 | | - "### Exercise 1: Write the query to list all values for `tcia_tumorLocation`\n", |
| 246 | + "### Exercise 1: Write the query to list all values for `collection_tumorLocation`\n", |
249 | 247 | "\n", |
250 | 248 | "In the cell below, we pre-filled the code by copying the previous cell. All you need to do is write the query!\n", |
251 | 249 | "\n", |
|
308 | 306 | "SELECT\n", |
309 | 307 | " collection_id,\n", |
310 | 308 | " STRING_AGG(DISTINCT(Modality)) as modalities,\n", |
311 | | - " STRING_AGG(DISTINCT(tcia_tumorLocation)) as tumor_locations,\n", |
312 | | - " STRING_AGG(DISTINCT(tcia_species)) as species\n", |
| 309 | + " STRING_AGG(DISTINCT(collection_tumorLocation)) as tumor_locations,\n", |
| 310 | + " STRING_AGG(DISTINCT(collection_species)) as species\n", |
313 | 311 | "FROM\n", |
314 | 312 | " bigquery-public-data.idc_current.dicom_all\n", |
315 | 313 | "GROUP BY\n", |
|
371 | 369 | "WHERE\n", |
372 | 370 | " # write the selection criteria under this line!\n", |
373 | 371 | " # Use AND operator to combine the filter values for the\n", |
374 | | - " # Modality and tcia_tumorLocation to select collections that\n", |
| 372 | + " # Modality and collection_tumorLocation to select collections that\n", |
375 | 373 | " # include MR images for Lung cancer locations\n", |
376 | 374 | " # Note that SQL uses single = for comparison, and strings should\n", |
377 | 375 | " # be enclosed in \"\"\n", |
|
467 | 465 | "SELECT\n", |
468 | 466 | " collection_id,\n", |
469 | 467 | " STRING_AGG(DISTINCT(Modality)) as modalities,\n", |
470 | | - " STRING_AGG(DISTINCT(tcia_tumorLocation)) as tumor_locations,\n", |
471 | | - " STRING_AGG(DISTINCT(tcia_species)) as species\n", |
| 468 | + " STRING_AGG(DISTINCT(collection_tumorLocation)) as tumor_locations,\n", |
| 469 | + " STRING_AGG(DISTINCT(collection_species)) as species\n", |
472 | 470 | " # count number of distinct patients\n", |
473 | 471 | " # count number of distinct studies\n", |
474 | 472 | " # count number of distinct series\n", |
|
521 | 519 | "FROM\n", |
522 | 520 | " bigquery-public-data.idc_current.dicom_all\n", |
523 | 521 | "WHERE\n", |
524 | | - " Modality = \"MR\" AND tcia_tumorLocation = \"Liver\"\n", |
| 522 | + " Modality = \"MR\" AND collection_tumorLocation = \"Liver\"\n", |
525 | 523 | "\n", |
526 | 524 | "# note the use of this new operator that makes the query\n", |
527 | 525 | "# return just the first one of the matching rows\n", |
|
577 | 575 | " SELECT\n", |
578 | 576 | " StudyInstanceUID,\n", |
579 | 577 | " STRING_AGG(DISTINCT(collection_id)) AS collection_id,\n", |
580 | | - " STRING_AGG(DISTINCT(tcia_tumorLocation)) AS tcia_tumorLocation,\n", |
| 578 | + " STRING_AGG(DISTINCT(collection_tumorLocation)) AS collection_tumorLocation,\n", |
581 | 579 | " ARRAY_AGG(DISTINCT(Modality)) AS modalities\n", |
582 | 580 | " FROM\n", |
583 | 581 | " bigquery-public-data.idc_current.dicom_all\n", |
|
592 | 590 | "WHERE\n", |
593 | 591 | " \"CT\" IN UNNEST(modalities)\n", |
594 | 592 | " AND \"SEG\" IN UNNEST(modalities)\n", |
595 | | - " AND tcia_tumorLocation = \"Lung\"\n", |
| 593 | + " AND collection_tumorLocation = \"Lung\"\n", |
596 | 594 | "LIMIT\n", |
597 | 595 | " 1\n", |
598 | 596 | "\"\"\"\n", |
|
0 commit comments