@@ -335,7 +335,8 @@ void write_meta_xml(const std::string ofn) {
335335" <field term=\" http://rs.tdwg.org/dwc/terms/class\" index=\" 6\" />\n "
336336" <field term=\" http://rs.tdwg.org/dwc/terms/order\" index=\" 7\" />\n "
337337" <field term=\" http://rs.tdwg.org/dwc/terms/family\" index=\" 8\" />\n "
338- " <field term=\" http://rs.tdwg.org/dwc/terms/genus\" index=\" 8\" />\n "
338+ " <field term=\" http://rs.tdwg.org/dwc/terms/genus\" index=\" 9\" />\n "
339+ " <field term=\" http://rs.tdwg.org/dwc/terms/species\" index=\" 10\" />\n "
339340" </core>\n "
340341" </archive>\n " ;
341342 std::ofstream tf (ofn);
@@ -344,7 +345,18 @@ void write_meta_xml(const std::string ofn) {
344345}
345346
346347std::unordered_map<TaxonomicRank, int > g_rank2num;
347-
348+ const char dwca_sep = ' ,' ;
349+ string escape_for_dwca (const string & ins) {
350+ if (ins.find (dwca_sep) == string::npos) {
351+ return ins;
352+ }
353+ string estr;
354+ estr.reserve (2 + ins.length ());
355+ estr.append (1 , ' \" ' );
356+ estr.append (ins);
357+ estr.append (1 , ' \" ' );
358+ return estr;
359+ }
348360void ini_global_rank_num () {
349361 g_rank2num[TaxonomicRank::RANK_SPECIES] = 5 ;
350362 g_rank2num[TaxonomicRank::RANK_GENUS] = 4 ;
@@ -357,45 +369,81 @@ void ini_global_rank_num() {
357369
358370void write_ranks (std::ostream & out,
359371 const Tree_t::node_type & node,
360- const TaxonomyRecord & rec
372+ const TaxonomyRecord & rec,
373+ const Taxonomy & taxonomy
361374 ) {
362- string kingdom_s; // 0
363- string phylum_s; // 1
364- string class_s; // 2
365- string order_s; // 3
366- string family_s; // 4
367- string genus_s; // 5
368- string species_s; // 6
369375 int ini_looking_for = 6 ;
370376 int looking_for = ini_looking_for;
371377 std::vector<std::string> n_rank_names;
372378 n_rank_names.resize (1 + looking_for);
373379 auto tr = string_to_rank (rec.rank , true );
374- if (tr == TaxonomicRank::RANK_NO_RANK
375- || tr == TaxonomicRank::RANK_NO_RANK_TERMINAL) {
376- // pass
377- } else {
378- auto to_num_it = g_rank2num.find (tr);
379- if (to_num_it != g_rank2num.end ()) {
380- looking_for = to_num_it->second ;
381- n_rank_names.at (1 + looking_for) = rec.name ;
380+ auto to_num_it = g_rank2num.find (tr);
381+ if (to_num_it != g_rank2num.end ()) {
382+ looking_for = to_num_it->second ;
383+ n_rank_names.at (1 + looking_for) = rec.name ;
384+ }
385+ auto * curr_nd = &node;
386+ while (looking_for >= 0 ) {
387+ curr_nd = curr_nd->get_parent ();
388+ if (curr_nd == nullptr ) {
389+ break ;
382390 }
391+ assert (curr_nd->has_ott_id ());
392+ auto ott_id = curr_nd->get_ott_id ();
393+ const auto & curr_rec = taxonomy.record_from_id (ott_id);
394+ auto ctr = string_to_rank (curr_rec.rank , true );
395+ auto cto_num_it = g_rank2num.find (ctr);
396+ if (cto_num_it != g_rank2num.end ()) {
397+ looking_for = cto_num_it->second ;
398+ n_rank_names.at (1 + looking_for) = curr_rec.name ;
399+ }
400+ }
401+ for (auto nm : n_rank_names) {
402+ out << dwca_sep << escape_for_dwca (nm);
383403 }
384404}
385405
386406void write_taxa_txt (const std::string ofn,
387407 const Taxonomy & taxonomy,
388408 const Tree_t & the_tree ) {
389409 std::ofstream tf (ofn);
390- std::ostream * osp = &cout;
410+ // std::ostream * osp = &cout;
411+ std::ostream * osp = &tf;
412+ *osp << " taxonId"
413+ << dwca_sep << " parentNameUsageID"
414+ << dwca_sep << " scientificName"
415+ << dwca_sep << " taxonRank"
416+ << dwca_sep << " kingdom"
417+ << dwca_sep << " phylum"
418+ << dwca_sep << " class"
419+ << dwca_sep << " order"
420+ << dwca_sep << " family"
421+ << dwca_sep << " genus"
422+ << dwca_sep << " species" << ' \n ' ;
391423 for (auto nd : iter_pre_const (the_tree)) {
392424 assert (nd->has_ott_id ());
393425 auto ott_id = nd->get_ott_id ();
394426 const auto & rec = taxonomy.record_from_id (ott_id);
395- *osp << ott_id
396- << ' \t ' << rec.name
397- << ' \t ' << rec.rank ;
398- write_ranks (*osp, *nd, rec);
427+ *osp << ott_id << dwca_sep;
428+ auto par = nd->get_parent ();
429+ if (par != nullptr ) {
430+ *osp << par->get_ott_id ();
431+ }
432+
433+ std::ostringstream rnss;
434+ rnss << rec.name ;
435+ string rn = rnss.str ();
436+ *osp << dwca_sep << escape_for_dwca (rn)
437+ << dwca_sep ;
438+ auto tr = string_to_rank (rec.rank );
439+ if (tr != TaxonomicRank::RANK_NO_RANK
440+ && tr != TaxonomicRank::RANK_NO_RANK_TERMINAL
441+ && tr != TaxonomicRank::RANK_INFRASPECIFICNAME) {
442+ *osp << rec.rank ;
443+ } else {
444+ // pass
445+ }
446+ write_ranks (*osp, *nd, rec, taxonomy);
399447 *osp << " \n " ;
400448 }
401449 tf.close ();
0 commit comments