@@ -103,6 +103,7 @@ dispatch_copy_async(InputIterator first,
103103 InputIterator last,
104104 OutputIterator result,
105105 command_queue &queue,
106+ const wait_list &events,
106107 typename boost::enable_if<
107108 mpl::and_<
108109 mpl::not_<
@@ -118,7 +119,7 @@ dispatch_copy_async(InputIterator first,
118119 " copy_async() is only supported for contiguous host iterators"
119120 );
120121
121- return copy_to_device_async (first, last, result, queue);
122+ return copy_to_device_async (first, last, result, queue, events );
122123}
123124
124125// host -> device (async)
@@ -129,6 +130,7 @@ dispatch_copy_async(InputIterator first,
129130 InputIterator last,
130131 OutputIterator result,
131132 command_queue &queue,
133+ const wait_list &events,
132134 typename boost::enable_if<
133135 mpl::and_<
134136 mpl::not_<
@@ -167,7 +169,7 @@ dispatch_copy_async(InputIterator first,
167169 context
168170 );
169171 return copy_on_device_async (
170- mapped_host.begin (), mapped_host.end (), result, queue
172+ mapped_host.begin (), mapped_host.end (), result, queue, events
171173 );
172174}
173175
@@ -179,6 +181,7 @@ dispatch_copy(InputIterator first,
179181 InputIterator last,
180182 OutputIterator result,
181183 command_queue &queue,
184+ const wait_list &events,
182185 typename boost::enable_if<
183186 mpl::and_<
184187 mpl::not_<
@@ -190,7 +193,7 @@ dispatch_copy(InputIterator first,
190193 >
191194 >::type* = 0 )
192195{
193- return copy_to_device (first, last, result, queue);
196+ return copy_to_device (first, last, result, queue, events );
194197}
195198
196199// host -> device
@@ -202,6 +205,7 @@ dispatch_copy(InputIterator first,
202205 InputIterator last,
203206 OutputIterator result,
204207 command_queue &queue,
208+ const wait_list &events,
205209 typename boost::enable_if<
206210 mpl::and_<
207211 mpl::not_<
@@ -258,13 +262,15 @@ dispatch_copy(InputIterator first,
258262
259263 // [0; map_copy_threshold) -> copy_to_device_map()
260264 if (input_size_bytes < map_copy_threshold) {
261- return copy_to_device_map (first, last, result, queue);
265+ return copy_to_device_map (first, last, result, queue, events );
262266 }
263267 // [map_copy_threshold; direct_copy_threshold) -> convert [first; last)
264268 // on host and then perform copy_to_device()
265269 else if (input_size_bytes < direct_copy_threshold) {
266270 std::vector<output_type> vector (first, last);
267- return copy_to_device (vector.begin (), vector.end (), result, queue);
271+ return copy_to_device (
272+ vector.begin (), vector.end (), result, queue, events
273+ );
268274 }
269275
270276 // [direct_copy_threshold; inf) -> map [first; last) to device and
@@ -275,7 +281,7 @@ dispatch_copy(InputIterator first,
275281 // return the result.
276282 // At this point we are sure that count > 1 (first != last), so event
277283 // returned by dispatch_copy_async() must be valid.
278- return dispatch_copy_async (first, last, result, queue).get ();
284+ return dispatch_copy_async (first, last, result, queue, events ).get ();
279285}
280286
281287// host -> device
@@ -286,6 +292,7 @@ dispatch_copy(InputIterator first,
286292 InputIterator last,
287293 OutputIterator result,
288294 command_queue &queue,
295+ const wait_list &events,
289296 typename boost::enable_if<
290297 mpl::and_<
291298 mpl::not_<
@@ -345,12 +352,12 @@ dispatch_copy(InputIterator first,
345352 // copy_to_device_map() is used for every input
346353 if (input_size_bytes < map_copy_threshold
347354 || direct_copy_threshold <= map_copy_threshold) {
348- return copy_to_device_map (first, last, result, queue);
355+ return copy_to_device_map (first, last, result, queue, events );
349356 }
350357 // [map_copy_threshold; inf) -> convert [first; last)
351358 // on host and then perform copy_to_device()
352359 std::vector<output_type> vector (first, last);
353- return copy_to_device (vector.begin (), vector.end (), result, queue);
360+ return copy_to_device (vector.begin (), vector.end (), result, queue, events );
354361}
355362
356363// device -> host (async)
@@ -360,6 +367,7 @@ dispatch_copy_async(InputIterator first,
360367 InputIterator last,
361368 OutputIterator result,
362369 command_queue &queue,
370+ const wait_list &events,
363371 typename boost::enable_if<
364372 mpl::and_<
365373 is_device_iterator<InputIterator>,
@@ -375,7 +383,7 @@ dispatch_copy_async(InputIterator first,
375383 " copy_async() is only supported for contiguous host iterators"
376384 );
377385
378- return copy_to_host_async (first, last, result, queue);
386+ return copy_to_host_async (first, last, result, queue, events );
379387}
380388
381389// device -> host (async)
@@ -386,6 +394,7 @@ dispatch_copy_async(InputIterator first,
386394 InputIterator last,
387395 OutputIterator result,
388396 command_queue &queue,
397+ const wait_list &events,
389398 typename boost::enable_if<
390399 mpl::and_<
391400 is_device_iterator<InputIterator>,
@@ -426,7 +435,8 @@ dispatch_copy_async(InputIterator first,
426435 first,
427436 last,
428437 make_buffer_iterator<output_type>(mapped_host),
429- queue
438+ queue,
439+ events
430440 );
431441 // update host memory asynchronously by maping and unmaping memory
432442 event map_event;
@@ -451,6 +461,7 @@ dispatch_copy(InputIterator first,
451461 InputIterator last,
452462 OutputIterator result,
453463 command_queue &queue,
464+ const wait_list &events,
454465 typename boost::enable_if<
455466 mpl::and_<
456467 is_device_iterator<InputIterator>,
@@ -465,7 +476,7 @@ dispatch_copy(InputIterator first,
465476 >
466477 >::type* = 0 )
467478{
468- return copy_to_host (first, last, result, queue);
479+ return copy_to_host (first, last, result, queue, events );
469480}
470481
471482// device -> host
@@ -478,6 +489,7 @@ dispatch_copy(InputIterator first,
478489 InputIterator last,
479490 OutputIterator result,
480491 command_queue &queue,
492+ const wait_list &events,
481493 typename boost::enable_if<
482494 mpl::and_<
483495 is_device_iterator<InputIterator>,
@@ -540,12 +552,12 @@ dispatch_copy(InputIterator first,
540552 // copy_to_host_map() is used for every input
541553 if (input_size_bytes < map_copy_threshold
542554 || direct_copy_threshold <= map_copy_threshold) {
543- return copy_to_host_map (first, last, result, queue);
555+ return copy_to_host_map (first, last, result, queue, events );
544556 }
545557 // [map_copy_threshold; inf) -> copy [first;last) to temporary vector
546558 // then copy (and convert) to result using std::copy()
547559 std::vector<input_type> vector (count);
548- copy_to_host (first, last, vector.begin (), queue);
560+ copy_to_host (first, last, vector.begin (), queue, events );
549561 return std::copy (vector.begin (), vector.end (), result);
550562}
551563
@@ -559,6 +571,7 @@ dispatch_copy(InputIterator first,
559571 InputIterator last,
560572 OutputIterator result,
561573 command_queue &queue,
574+ const wait_list &events,
562575 typename boost::enable_if<
563576 mpl::and_<
564577 is_device_iterator<InputIterator>,
@@ -618,13 +631,13 @@ dispatch_copy(InputIterator first,
618631
619632 // [0; map_copy_threshold) -> copy_to_host_map()
620633 if (input_size_bytes < map_copy_threshold) {
621- return copy_to_host_map (first, last, result, queue);
634+ return copy_to_host_map (first, last, result, queue, events );
622635 }
623636 // [map_copy_threshold; direct_copy_threshold) -> copy [first;last) to
624637 // temporary vector then copy (and convert) to result using std::copy()
625638 else if (input_size_bytes < direct_copy_threshold) {
626639 std::vector<input_type> vector (count);
627- copy_to_host (first, last, vector.begin (), queue);
640+ copy_to_host (first, last, vector.begin (), queue, events );
628641 return std::copy (vector.begin (), vector.end (), result);
629642 }
630643
@@ -636,7 +649,7 @@ dispatch_copy(InputIterator first,
636649 // return the result.
637650 // At this point we are sure that count > 1 (first != last), so event
638651 // returned by dispatch_copy_async() must be valid.
639- return dispatch_copy_async (first, last, result, queue).get ();
652+ return dispatch_copy_async (first, last, result, queue, events ).get ();
640653}
641654
642655// device -> device
@@ -646,6 +659,7 @@ dispatch_copy(InputIterator first,
646659 InputIterator last,
647660 OutputIterator result,
648661 command_queue &queue,
662+ const wait_list &events,
649663 typename boost::enable_if<
650664 mpl::and_<
651665 is_device_iterator<InputIterator>,
@@ -658,7 +672,7 @@ dispatch_copy(InputIterator first,
658672 >
659673 >::type* = 0 )
660674{
661- return copy_on_device (first, last, result, queue);
675+ return copy_on_device (first, last, result, queue, events );
662676}
663677
664678// device -> device (specialization for buffer iterators)
@@ -668,6 +682,7 @@ dispatch_copy(InputIterator first,
668682 InputIterator last,
669683 OutputIterator result,
670684 command_queue &queue,
685+ const wait_list &events,
671686 typename boost::enable_if<
672687 mpl::and_<
673688 is_device_iterator<InputIterator>,
@@ -691,7 +706,8 @@ dispatch_copy(InputIterator first,
691706 result.get_buffer (),
692707 first.get_index () * sizeof (value_type),
693708 result.get_index () * sizeof (value_type),
694- static_cast <size_t >(n) * sizeof (value_type));
709+ static_cast <size_t >(n) * sizeof (value_type),
710+ events);
695711 return result + n;
696712}
697713
@@ -702,6 +718,7 @@ dispatch_copy_async(InputIterator first,
702718 InputIterator last,
703719 OutputIterator result,
704720 command_queue &queue,
721+ const wait_list &events,
705722 typename boost::enable_if<
706723 mpl::and_<
707724 is_device_iterator<InputIterator>,
@@ -714,7 +731,7 @@ dispatch_copy_async(InputIterator first,
714731 >
715732 >::type* = 0 )
716733{
717- return copy_on_device_async (first, last, result, queue);
734+ return copy_on_device_async (first, last, result, queue, events );
718735}
719736
720737// device -> device (async, specialization for buffer iterators)
@@ -724,6 +741,7 @@ dispatch_copy_async(InputIterator first,
724741 InputIterator last,
725742 OutputIterator result,
726743 command_queue &queue,
744+ const wait_list &events,
727745 typename boost::enable_if<
728746 mpl::and_<
729747 is_device_iterator<InputIterator>,
@@ -749,7 +767,8 @@ dispatch_copy_async(InputIterator first,
749767 result.get_buffer (),
750768 first.get_index () * sizeof (value_type),
751769 result.get_index () * sizeof (value_type),
752- static_cast <size_t >(n) * sizeof (value_type)
770+ static_cast <size_t >(n) * sizeof (value_type),
771+ events
753772 );
754773
755774 return make_future (result + n, event_);
@@ -762,12 +781,14 @@ dispatch_copy(InputIterator first,
762781 InputIterator last,
763782 OutputIterator result,
764783 command_queue &queue,
784+ const wait_list &events,
765785 typename boost::enable_if_c<
766786 !is_device_iterator<InputIterator>::value &&
767787 !is_device_iterator<OutputIterator>::value
768788 >::type* = 0 )
769789{
770790 (void ) queue;
791+ (void ) events;
771792
772793 return std::copy (first, last, result);
773794}
@@ -833,9 +854,10 @@ template<class InputIterator, class OutputIterator>
833854inline OutputIterator copy (InputIterator first,
834855 InputIterator last,
835856 OutputIterator result,
836- command_queue &queue = system::default_queue())
857+ command_queue &queue = system::default_queue(),
858+ const wait_list &events = wait_list())
837859{
838- return detail::dispatch_copy (first, last, result, queue);
860+ return detail::dispatch_copy (first, last, result, queue, events );
839861}
840862
841863// / Copies the values in the range [\p first, \p last) to the range
@@ -847,9 +869,10 @@ inline future<OutputIterator>
847869copy_async (InputIterator first,
848870 InputIterator last,
849871 OutputIterator result,
850- command_queue &queue = system::default_queue())
872+ command_queue &queue = system::default_queue(),
873+ const wait_list &events = wait_list())
851874{
852- return detail::dispatch_copy_async (first, last, result, queue);
875+ return detail::dispatch_copy_async (first, last, result, queue, events );
853876}
854877
855878} // end compute namespace
0 commit comments