Skip to content

Commit 33c76d2

Browse files
committed
appears to export taxonomy, but not synonyms
1 parent c582d22 commit 33c76d2

File tree

1 file changed

+71
-23
lines changed

1 file changed

+71
-23
lines changed

tools/taxonomy-parser.cpp

Lines changed: 71 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -335,7 +335,8 @@ void write_meta_xml(const std::string ofn) {
335335
" <field term=\"http://rs.tdwg.org/dwc/terms/class\" index=\"6\"/>\n"
336336
" <field term=\"http://rs.tdwg.org/dwc/terms/order\" index=\"7\"/>\n"
337337
" <field term=\"http://rs.tdwg.org/dwc/terms/family\" index=\"8\"/>\n"
338-
" <field term=\"http://rs.tdwg.org/dwc/terms/genus\" index=\"8\"/>\n"
338+
" <field term=\"http://rs.tdwg.org/dwc/terms/genus\" index=\"9\"/>\n"
339+
" <field term=\"http://rs.tdwg.org/dwc/terms/species\" index=\"10\"/>\n"
339340
" </core>\n"
340341
"</archive>\n";
341342
std::ofstream tf (ofn);
@@ -344,7 +345,18 @@ void write_meta_xml(const std::string ofn) {
344345
}
345346

346347
std::unordered_map<TaxonomicRank, int> g_rank2num;
347-
348+
const char dwca_sep = ',';
349+
string escape_for_dwca(const string & ins) {
350+
if (ins.find(dwca_sep) == string::npos) {
351+
return ins;
352+
}
353+
string estr;
354+
estr.reserve(2 + ins.length());
355+
estr.append(1, '\"');
356+
estr.append(ins);
357+
estr.append(1, '\"');
358+
return estr;
359+
}
348360
void ini_global_rank_num() {
349361
g_rank2num[TaxonomicRank::RANK_SPECIES] = 5;
350362
g_rank2num[TaxonomicRank::RANK_GENUS] = 4;
@@ -357,45 +369,81 @@ void ini_global_rank_num() {
357369

358370
void write_ranks(std::ostream & out,
359371
const Tree_t::node_type & node,
360-
const TaxonomyRecord & rec
372+
const TaxonomyRecord & rec,
373+
const Taxonomy & taxonomy
361374
) {
362-
string kingdom_s; // 0
363-
string phylum_s; // 1
364-
string class_s; // 2
365-
string order_s; // 3
366-
string family_s; // 4
367-
string genus_s; // 5
368-
string species_s; // 6
369375
int ini_looking_for = 6;
370376
int looking_for = ini_looking_for;
371377
std::vector<std::string> n_rank_names;
372378
n_rank_names.resize(1 + looking_for);
373379
auto tr = string_to_rank(rec.rank, true);
374-
if (tr == TaxonomicRank::RANK_NO_RANK
375-
|| tr == TaxonomicRank::RANK_NO_RANK_TERMINAL) {
376-
// pass
377-
} else {
378-
auto to_num_it = g_rank2num.find(tr);
379-
if (to_num_it != g_rank2num.end()) {
380-
looking_for = to_num_it->second;
381-
n_rank_names.at(1 + looking_for) = rec.name;
380+
auto to_num_it = g_rank2num.find(tr);
381+
if (to_num_it != g_rank2num.end()) {
382+
looking_for = to_num_it->second;
383+
n_rank_names.at(1 + looking_for) = rec.name;
384+
}
385+
auto * curr_nd = &node;
386+
while (looking_for >= 0) {
387+
curr_nd = curr_nd->get_parent();
388+
if (curr_nd == nullptr) {
389+
break;
382390
}
391+
assert(curr_nd->has_ott_id());
392+
auto ott_id = curr_nd->get_ott_id();
393+
const auto & curr_rec = taxonomy.record_from_id(ott_id);
394+
auto ctr = string_to_rank(curr_rec.rank, true);
395+
auto cto_num_it = g_rank2num.find(ctr);
396+
if (cto_num_it != g_rank2num.end()) {
397+
looking_for = cto_num_it->second;
398+
n_rank_names.at(1 + looking_for) = curr_rec.name;
399+
}
400+
}
401+
for (auto nm : n_rank_names) {
402+
out << dwca_sep << escape_for_dwca(nm);
383403
}
384404
}
385405

386406
void write_taxa_txt(const std::string ofn,
387407
const Taxonomy & taxonomy,
388408
const Tree_t & the_tree ) {
389409
std::ofstream tf (ofn);
390-
std::ostream * osp = &cout;
410+
// std::ostream * osp = &cout;
411+
std::ostream * osp = &tf;
412+
*osp << "taxonId"
413+
<< dwca_sep << "parentNameUsageID"
414+
<< dwca_sep << "scientificName"
415+
<< dwca_sep << "taxonRank"
416+
<< dwca_sep << "kingdom"
417+
<< dwca_sep << "phylum"
418+
<< dwca_sep << "class"
419+
<< dwca_sep << "order"
420+
<< dwca_sep << "family"
421+
<< dwca_sep << "genus"
422+
<< dwca_sep << "species" << '\n';
391423
for (auto nd : iter_pre_const(the_tree)) {
392424
assert(nd->has_ott_id());
393425
auto ott_id = nd->get_ott_id();
394426
const auto & rec = taxonomy.record_from_id(ott_id);
395-
*osp << ott_id
396-
<< '\t' << rec.name
397-
<< '\t' << rec.rank;
398-
write_ranks(*osp, *nd, rec);
427+
*osp << ott_id << dwca_sep;
428+
auto par = nd->get_parent();
429+
if (par != nullptr) {
430+
*osp << par->get_ott_id();
431+
}
432+
433+
std::ostringstream rnss;
434+
rnss << rec.name;
435+
string rn = rnss.str();
436+
*osp << dwca_sep << escape_for_dwca(rn)
437+
<< dwca_sep ;
438+
auto tr = string_to_rank(rec.rank);
439+
if (tr != TaxonomicRank::RANK_NO_RANK
440+
&& tr != TaxonomicRank::RANK_NO_RANK_TERMINAL
441+
&& tr != TaxonomicRank::RANK_INFRASPECIFICNAME) {
442+
*osp << rec.rank;
443+
} else {
444+
// pass
445+
}
446+
write_ranks(*osp, *nd, rec, taxonomy);
399447
*osp << "\n";
400448
}
401449
tf.close();

0 commit comments

Comments
 (0)