44#include " ../Utils/Graph.hpp"
55#include " ../PairwiseAlignment/NeedlemanWunshReusable.hpp"
66
7- std::vector<std::vector<unsigned char >> star_alignment::StarAligner::align (const std::vector<sequence_type>& sequences)
7+ std::vector<std::vector<unsigned char >> star_alignment::StarAligner::align (const std::vector<sequence_type> & sequences)
88{
99 return StarAligner (sequences)._align ();
1010}
1111
12- star_alignment::StarAligner::StarAligner (const std::vector<sequence_type>& sequences):
12+ auto star_alignment::StarAligner::get_gaps (const std::vector<sequence_type> &sequences) -> std::vector<std::vector<utils::Insertion>>
13+ {
14+ return StarAligner (sequences)._get_gaps ();
15+ }
16+
17+ star_alignment::StarAligner::StarAligner (const std::vector<sequence_type> &sequences):
1318 _sequences(sequences),
1419 _row(_sequences.size()),
1520 _lengths(_set_lengths()),
@@ -41,12 +46,18 @@ std::vector<std::vector<unsigned char>> star_alignment::StarAligner::_align() co
4146 return _insert_gaps (_merge_results (_pairwise_align ()));
4247}
4348
44- auto star_alignment::StarAligner::_pairwise_align () const -> std::vector<std::array<std::vector<indel>, 2>>
49+ auto star_alignment::StarAligner::_get_gaps () const
50+ -> std::vector<std::vector<utils::Insertion>>
51+ {
52+ return _merge_results (_pairwise_align ());
53+ }
54+
55+ auto star_alignment::StarAligner::_pairwise_align () const -> std::vector<std::array<std::vector<utils::Insertion>, 2>>
4556{
4657 static constexpr size_t threshold = 15 ;
4758
4859 suffix_tree::SuffixTree<nucleic_acid_pseudo::NUMBER> st (_centre.cbegin (), _centre.cend (), nucleic_acid_pseudo::GAP);
49- std::vector<std::array<std::vector<indel >, 2 >> all_pairwise_gaps;
60+ std::vector<std::array<std::vector<utils::Insertion >, 2 >> all_pairwise_gaps;
5061
5162 using iter = std::vector<unsigned char >::const_iterator;
5263 pairwise_alignment::NeedlemanWunshReusable<iter, iter, decltype (pairwise_alignment::default_scoring_matrix)>
@@ -86,7 +97,7 @@ auto star_alignment::StarAligner::_pairwise_align() const -> std::vector<std::ar
8697 }));
8798 }
8899
89- std::array<std::vector<indel >, 2 > pairwise_gaps;
100+ std::array<std::vector<utils::Insertion >, 2 > pairwise_gaps;
90101 for (size_t j = 0 ; j != intervals.size (); ++j)
91102 {
92103 const size_t centre_begin = intervals[j][0 ];
@@ -101,8 +112,8 @@ auto star_alignment::StarAligner::_pairwise_align() const -> std::vector<std::ar
101112 auto [lhs_gaps, rhs_gaps] = nw (_centre.cbegin () + centre_begin, _centre.cbegin () + centre_end,
102113 _sequences[i].cbegin () + sequence_begin, _sequences[i].cbegin () + sequence_end, flag);
103114
104- _converse (lhs_gaps, pairwise_gaps[0 ], centre_begin);
105- _converse (rhs_gaps, pairwise_gaps[1 ], sequence_begin);
115+ _append (lhs_gaps, pairwise_gaps[0 ], centre_begin);
116+ _append (rhs_gaps, pairwise_gaps[1 ], sequence_begin);
106117 }
107118
108119 size_t sequence_gap_num = 0 ;
@@ -114,7 +125,7 @@ auto star_alignment::StarAligner::_pairwise_align() const -> std::vector<std::ar
114125 return all_pairwise_gaps;
115126}
116127
117- auto star_alignment::StarAligner::_optimal_path (const std::vector<triple>& identical_substrings)
128+ auto star_alignment::StarAligner::_optimal_path (const std::vector<triple> & identical_substrings)
118129 -> std::vector<triple>
119130{
120131 std::vector<triple> optimal_identical_substrings;
@@ -164,25 +175,25 @@ auto star_alignment::StarAligner::_optimal_path(const std::vector<triple>& ident
164175 return optimal_identical_substrings;
165176}
166177
167- void star_alignment::StarAligner::_converse (const std::vector<size_t >& src_gaps, std::vector<indel>& des_gaps, size_t start)
178+ void star_alignment::StarAligner::_append (const std::vector<size_t > & src_gaps, std::vector<utils::Insertion> & des_gaps, size_t start)
168179{
169180 for (size_t i = 0 ; i != src_gaps.size (); ++i)
170181 if (src_gaps[i])
171182 {
172183 if (des_gaps.size () && des_gaps.back ().index == start + i)
173184 des_gaps.back ().number += src_gaps[i];
174185 else
175- des_gaps.push_back (indel ({ start + i, src_gaps[i] }));
186+ des_gaps.push_back (utils::Insertion ({ start + i, src_gaps[i] }));
176187 }
177188}
178189
179- auto star_alignment::StarAligner::_merge_results (const std::vector<std::array<std::vector<indel >, 2 >>& pairwise_gaps) const
180- -> std::vector<std::vector<indel >>
190+ auto star_alignment::StarAligner::_merge_results (const std::vector<std::array<std::vector<utils::Insertion >, 2 >> & pairwise_gaps) const
191+ -> std::vector<std::vector<utils::Insertion >>
181192{
182- std::vector<indel > final_centre_gaps;
193+ std::vector<utils::Insertion > final_centre_gaps;
183194 for (size_t i = 0 ; i != _row; ++i)
184195 {
185- const auto & curr_centre_gaps = pairwise_gaps[i][0 ];
196+ const auto & curr_centre_gaps = pairwise_gaps[i][0 ];
186197 for (size_t lhs_pointer = 0 , rhs_pointer = 0 ; rhs_pointer != curr_centre_gaps.size (); )
187198 {
188199 if (lhs_pointer == final_centre_gaps.size ())
@@ -211,16 +222,20 @@ auto star_alignment::StarAligner::_merge_results(const std::vector<std::array<st
211222 }
212223 }
213224
214- std::vector<std::vector<indel >> final_sequence_gaps;
225+ std::vector<std::vector<utils::Insertion >> final_sequence_gaps;
215226 final_sequence_gaps.reserve (_row);
216227 for (size_t i = 0 ; i != _row; ++i)
217228 {
218- const auto & curr_centre_gaps = pairwise_gaps[i][0 ];
219- const auto & curr_sequence_gaps = pairwise_gaps[i][1 ];
229+ const auto & curr_centre_gaps = pairwise_gaps[i][0 ];
230+ const auto & curr_sequence_gaps = pairwise_gaps[i][1 ];
220231
221- std::vector<indel> centre_addition = _minus (final_centre_gaps, curr_centre_gaps);
232+ std::vector<utils::Insertion> centre_addition;
233+ centre_addition.reserve (final_centre_gaps.size ());
234+ utils::Insertion::minus (final_centre_gaps.cbegin (), final_centre_gaps.cend (),
235+ curr_centre_gaps.cbegin (), curr_centre_gaps.cend (),
236+ std::back_inserter (centre_addition));
222237
223- std::vector<indel > sequence_addition;
238+ std::vector<utils::Insertion > sequence_addition;
224239 for (size_t centre_index = 0 , sequence_index = 0 , centre_gaps_index = 0 , sequence_gaps_index = 0 , centre_addition_index = 0 ;
225240 centre_addition_index != centre_addition.size (); ++centre_addition_index)
226241 {
@@ -247,66 +262,21 @@ auto star_alignment::StarAligner::_merge_results(const std::vector<std::array<st
247262 if (sequence_addition.size () && sequence_index == sequence_addition.back ().index )
248263 sequence_addition.back ().number += curr_addition.number ;
249264 else
250- sequence_addition.push_back (indel ({ sequence_index, curr_addition.number }));
265+ sequence_addition.push_back (utils::Insertion ({ sequence_index, curr_addition.number }));
251266 }
252267
253- final_sequence_gaps.push_back (_add (curr_sequence_gaps, sequence_addition));
268+ std::vector<utils::Insertion> indels_of_current_sequence;
269+ indels_of_current_sequence.reserve (curr_sequence_gaps.size () + sequence_addition.size ());
270+ utils::Insertion::plus (curr_sequence_gaps.cbegin (), curr_sequence_gaps.cend (),
271+ sequence_addition.cbegin (), sequence_addition.cend (),
272+ std::back_inserter (indels_of_current_sequence));
273+ final_sequence_gaps.push_back (indels_of_current_sequence);
254274 }
255275
256276 return final_sequence_gaps;
257277}
258278
259- auto star_alignment::StarAligner::_add (const std::vector<indel>& lhs, const std::vector<indel>& rhs)
260- -> std::vector<indel>
261- {
262- std::vector<indel> sum;
263-
264- size_t lhs_index = 0 , rhs_index = 0 ;
265- for (; lhs_index != lhs.size () && rhs_index != rhs.size (); )
266- if (lhs[lhs_index].index < rhs[rhs_index].index )
267- {
268- sum.push_back (lhs[lhs_index]);
269- ++lhs_index;
270- }
271- else if (lhs[lhs_index].index > rhs[rhs_index].index )
272- {
273- sum.push_back (rhs[rhs_index]);
274- ++rhs_index;
275- }
276- else
277- {
278- sum.push_back (indel ({ lhs[lhs_index].index , lhs[lhs_index].number + rhs[rhs_index].number }));
279- ++lhs_index;
280- ++rhs_index;
281- }
282-
283- sum.insert (sum.cend (), lhs.cbegin () + lhs_index, lhs.cend ());
284- sum.insert (sum.cend (), rhs.cbegin () + rhs_index, rhs.cend ());
285- return sum;
286- }
287-
288- // assume lhs >= rhs
289- auto star_alignment::StarAligner::_minus (const std::vector<indel>& lhs, const std::vector<indel>& rhs)
290- -> std::vector<indel>
291- {
292- std::vector<indel> difference;
293- difference.reserve (lhs.size ());
294-
295- size_t lhs_index = 0 ;
296- for (size_t rhs_index = 0 ; rhs_index != rhs.size (); ++lhs_index, ++rhs_index)
297- {
298- while (lhs[lhs_index].index != rhs[rhs_index].index )
299- difference.push_back (lhs[lhs_index++]);
300-
301- size_t distance = lhs[lhs_index].number - rhs[rhs_index].number ;
302- if (distance) difference.push_back (indel ({ lhs[lhs_index].index , distance }));
303- }
304-
305- difference.insert (difference.cend (), lhs.cbegin () + lhs_index, lhs.cend ());
306- return difference;
307- }
308-
309- auto star_alignment::StarAligner::_insert_gaps (const std::vector<std::vector<indel>>& gaps) const
279+ auto star_alignment::StarAligner::_insert_gaps (const std::vector<std::vector<utils::Insertion>> &gaps) const
310280 -> std::vector<sequence_type>
311281{
312282 size_t length = _lengths[0 ];
@@ -316,25 +286,10 @@ auto star_alignment::StarAligner::_insert_gaps(const std::vector<std::vector<ind
316286 aligned.reserve (_row);
317287
318288 for (size_t i = 0 ; i != _row; ++i)
319- aligned.push_back (_insert_gaps (_sequences[i], gaps[i], length));
320- return aligned;
321- }
322-
323- auto star_alignment::StarAligner::_insert_gaps (const sequence_type& sequence, const std::vector<indel>& gaps, size_t length)
324- -> sequence_type
325- {
326- sequence_type gapped;
327- gapped.reserve (length);
328-
329- size_t sequence_index = 0 ;
330- for (const auto gap : gaps)
331289 {
332- const size_t new_sequence_index = sequence_index + (gap.index - sequence_index);
333- gapped.insert (gapped.cend (), sequence.cbegin () + sequence_index, sequence.cbegin () + new_sequence_index);
334- gapped.insert (gapped.cend (), gap.number , nucleic_acid_pseudo::GAP);
335-
336- sequence_index = new_sequence_index;
290+ aligned.emplace_back (length);
291+ utils::Insertion::insert_gaps (_sequences[i].cbegin (), _sequences[i].cend (),
292+ gaps[i].cbegin (), gaps[i].cend (), aligned.back ().begin (), nucleic_acid_pseudo::GAP);
337293 }
338- gapped.insert (gapped.cend (), sequence.cbegin () + sequence_index, sequence.cend ());
339- return gapped;
294+ return aligned;
340295}
0 commit comments