|
7 | 7 | #include <boost/program_options.hpp> |
8 | 8 | #include <bitset> |
9 | 9 | #include <regex> |
| 10 | +#include <filesystem> |
| 11 | +#include <ostream> |
10 | 12 |
|
11 | 13 | #include "otc/error.h" |
12 | 14 | #include "otc/tree.h" |
|
17 | 19 | #include "otc/config_file.h" |
18 | 20 |
|
19 | 21 | using namespace otc; |
| 22 | +namespace fs = std::filesystem; |
20 | 23 |
|
21 | 24 | using std::string; |
22 | 25 | using std::vector; |
@@ -72,6 +75,7 @@ variables_map parse_cmd_line(int argc,char* argv[]) { |
72 | 75 | ("high-degree-nodes",value<int>(),"Show the top <arg> high-degree nodes") |
73 | 76 | ("write-tree,T","Write out the result as a tree") |
74 | 77 | ("write-taxonomy",value<string>(),"Write out the result as a taxonomy to directory 'arg'") |
| 78 | + ("write-dwca",value<string>(),"Write out the result as a taxonomy to directory 'arg' as a Darwin Core Archive") |
75 | 79 | ("name,N", value<OttId>(), "Return name of the given ID") |
76 | 80 | ("uniqname,U", value<OttId>(), "Return unique name for the given ID") |
77 | 81 | ("report-lost-taxa",value<string>(), "A tree to report missing taxa for") |
@@ -283,6 +287,93 @@ std::function<bool(tax_flags)> get_flags_match(variables_map& args) { |
283 | 287 | } |
284 | 288 | } |
285 | 289 |
|
| 290 | +void write_eml_xml(const std::string ofn) { |
| 291 | + string content = "<?xml version=\"1.0\"?>" |
| 292 | +"<eml:eml xmlns:eml=\"eml://ecoinformatics.org/eml-2.1.1\" xmlns:md=\"eml://ecoinformatics.org/methods-2.1.1\" xmlns:proj=\"eml://ecoinformatics.org/project-2.1.1\" xmlns:d\"=\"eml://ecoinformatics.org/dataset-2.1.1\" xmlns:res=\"eml://ecoinformatics.org/resource-2.1.1\" xmlns:dc=\"http://purl.org/dc/terms/\" xmlns:xsi=\"http://www.w3.org/\"2001/XMLSchema-instance\" packageId=\"/2020-5-30::0:53:12\" system=\"http://globalnames.org\" xml:lang=\"en\" xsi:schemaLocation=\"eml://ecoinformatics.org/eml-2.1.1 \"http://rs.gbif.org/schema/eml-gbif-profile/1.0.1/eml.xsd\">\n" |
| 293 | +" <dataset id=\"\">\n" |
| 294 | +" <title>Open Tree of Life Taxonomy</title>\n" |
| 295 | +" <license/>\n" |
| 296 | +" <metadataProvider>\n" |
| 297 | +" <individualName>\n" |
| 298 | +" <givenName>Mark</givenName>\n" |
| 299 | +" <surName>Holder</surName>\n" |
| 300 | +" </individualName>\n" |
| 301 | +" <electronicMailAddress>mtholder@gmail.com</electronicMailAddress>\n" |
| 302 | +" </metadataProvider>\n" |
| 303 | +" <pubDate></pubDate>\n" |
| 304 | +" <abstract>\n" |
| 305 | +" <para></para>\n" |
| 306 | +" </abstract>\n" |
| 307 | +" <contact>\n" |
| 308 | +" <references>1</references>\n" |
| 309 | +" </contact>\n" |
| 310 | +" </dataset>\n" |
| 311 | +" <additionalMetadata>\n" |
| 312 | +" <metadata>\n" |
| 313 | +" <citation/>\n" |
| 314 | +" </metadata>\n" |
| 315 | +" </additionalMetadata>\n" |
| 316 | +"</eml:eml>\n"; |
| 317 | + std::ofstream tf (ofn); |
| 318 | + tf << content; |
| 319 | + tf.close(); |
| 320 | +} |
| 321 | + |
| 322 | +void write_meta_xml(const std::string ofn) { |
| 323 | + string content = "<?xml version=\"1.0\"?>\n" |
| 324 | +"<archive xmlns=\"http://rs.tdwg.org/dwc/text/\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://rs.tdwg.org/dwc/terms/xsd/archive/ http://darwincore.googlecode.com/svn/trunk/text/tdwg_dwc_text.xsd\">\n" |
| 325 | +" <core encoding=\"UTF-8\" fieldsTerminatedBy=\",\" fieldsEnclosedBy=\""\" linesTerminatedBy=\" \" rowType=\"http://rs.tdwg.org/dwc/terms/Taxon\" ignoreHeaderLines=\"1\">\n" |
| 326 | +" <files>\n" |
| 327 | +" <location>taxa.txt</location>\n" |
| 328 | +" </files>\n" |
| 329 | +" <id index=\"0\"/>\n" |
| 330 | +" <field term=\"http://rs.tdwg.org/dwc/terms/scientificName\" index=\"1\"/>\n" |
| 331 | +" <field term=\"http://rs.tdwg.org/dwc/terms/kingdom\" index=\"2\"/>\n" |
| 332 | +" <field term=\"http://rs.tdwg.org/dwc/terms/phylum\" index=\"3\"/>\n" |
| 333 | +" <field term=\"http://rs.tdwg.org/dwc/terms/class\" index=\"4\"/>\n" |
| 334 | +" <field term=\"http://rs.tdwg.org/dwc/terms/order\" index=\"5\"/>\n" |
| 335 | +" <field term=\"http://rs.tdwg.org/dwc/terms/family\" index=\"6\"/>\n" |
| 336 | +" <field term=\"http://rs.tdwg.org/dwc/terms/genus\" index=\"7\"/>\n" |
| 337 | +" <field term=\"http://rs.tdwg.org/dwc/terms/nomenclaturalCode\" index=\"8\"/>\n" |
| 338 | +" </core>\n" |
| 339 | +"</archive>\n"; |
| 340 | + std::ofstream tf (ofn); |
| 341 | + tf << content; |
| 342 | + tf.close(); |
| 343 | +} |
| 344 | + |
| 345 | +void write_taxa_txt(const std::string ofn, |
| 346 | + const Taxonomy & taxonomy, |
| 347 | + const Tree_t & the_tree ){ |
| 348 | + // <id index="0"/> |
| 349 | + // <field term="http://rs.tdwg.org/dwc/terms/scientificName" index="1"/> |
| 350 | + // <field term="http://rs.tdwg.org/dwc/terms/kingdom" index="2"/> |
| 351 | + // <field term="http://rs.tdwg.org/dwc/terms/phylum" index="3"/> |
| 352 | + // <field term="http://rs.tdwg.org/dwc/terms/class" index="4"/> |
| 353 | + // <field term="http://rs.tdwg.org/dwc/terms/order" index="5"/> |
| 354 | + // <field term="http://rs.tdwg.org/dwc/terms/family" index="6"/> |
| 355 | + // <field term="http://rs.tdwg.org/dwc/terms/genus" index="7"/> |
| 356 | + // <field term="http://rs.tdwg.org/dwc/terms/nomenclaturalCode" index="8"/> |
| 357 | + std::ofstream tf (ofn); |
| 358 | + for (auto nd : iter_pre_const(the_tree)) { |
| 359 | + if (nd->has_ott_id()) { |
| 360 | + cout << nd->get_ott_id() << "\n"; |
| 361 | + } |
| 362 | + } |
| 363 | + tf.close(); |
| 364 | +} |
| 365 | + |
| 366 | +void write_taxonomy_as_dwca(const std::string & dir, |
| 367 | + const Taxonomy & taxonomy, |
| 368 | + const Tree_t & the_tree ) { |
| 369 | + fs::path new_dir = dir; |
| 370 | + if (! fs::exists(new_dir)) { |
| 371 | + fs::create_directories(new_dir); |
| 372 | + } |
| 373 | + write_eml_xml((new_dir/"eml.xml").string()); |
| 374 | + write_meta_xml((new_dir/"meta.xml").string()); |
| 375 | + write_taxa_txt((new_dir/"taxa.txt").string(), taxonomy, the_tree); |
| 376 | +} |
286 | 377 |
|
287 | 378 | int main(int argc, char* argv[]) |
288 | 379 | { |
@@ -376,6 +467,13 @@ int main(int argc, char* argv[]) |
376 | 467 | if (args.count("write-taxonomy")) { |
377 | 468 | taxonomy.write(args["write-taxonomy"].as<string>(), false, !root_changed); |
378 | 469 | } |
| 470 | + if (args.count("write-dwca")) { |
| 471 | + //taxonID,scientificName,kingdom,phylum,class,order,family,genus,nomenclaturalCode |
| 472 | + auto nodeNamer = [](const auto& record){return string(record.name)+"_ott"+std::to_string(record.id);}; |
| 473 | + const auto the_tree_ptr = taxonomy.get_tree<Tree_t>(nodeNamer); |
| 474 | + const Tree_t & the_tree = *the_tree_ptr; |
| 475 | + write_taxonomy_as_dwca(args["write-dwca"].as<string>(), taxonomy, the_tree ); |
| 476 | + } |
379 | 477 | if (args.count("name")) { |
380 | 478 | OttId id = args["name"].as<OttId>(); |
381 | 479 | std::cout << taxonomy.record_from_id(id).name << std::endl; |
|
0 commit comments