host_tensor.hpp Source File

host_tensor.hpp Source File#

Composable Kernel: host_tensor.hpp Source File
library/utility/host_tensor.hpp
Go to the documentation of this file.
1// SPDX-License-Identifier: MIT
2// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
3
4#pragma once
5
6#include <algorithm>
7#include <cassert>
8#include <iostream>
9#include <fstream>
10#include <numeric>
11#include <random>
12#include <thread>
13#include <utility>
14#include <vector>
15
17#include "ck/utility/span.hpp"
19
23
25
26template <typename Range>
27std::ostream& LogRange(std::ostream& os, Range&& range, std::string delim)
28{
29 bool first = true;
30 for(auto&& v : range)
31 {
32 if(first)
33 first = false;
34 else
35 os << delim;
36 os << v;
37 }
38 return os;
39}
40
41template <typename T, typename Range>
42std::ostream& LogRangeAsType(std::ostream& os, Range&& range, std::string delim)
43{
44 bool first = true;
45 for(auto&& v : range)
46 {
47 if(first)
48 first = false;
49 else
50 os << delim;
51
52 using RangeType = ck::remove_cvref_t<decltype(v)>;
53 if constexpr(std::is_same_v<RangeType, ck::f8_t> || std::is_same_v<RangeType, ck::bf8_t> ||
54 std::is_same_v<RangeType, ck::bhalf_t>)
55 {
56 os << ck::type_convert<float>(v);
57 }
58 else if constexpr(std::is_same_v<RangeType, ck::pk_i4_t> ||
59 std::is_same_v<RangeType, ck::f4x2_pk_t>)
60 {
61 const auto packed_floats = ck::type_convert<ck::float2_t>(v);
62 const ck::vector_type<float, 2> vector_of_floats{packed_floats};
63 os << vector_of_floats.template AsType<float>()[ck::Number<0>{}] << delim
64 << vector_of_floats.template AsType<float>()[ck::Number<1>{}];
65 }
66 else
67 {
68 os << static_cast<T>(v);
69 }
70 }
71 return os;
72}
73
74template <typename F, typename T, std::size_t... Is>
75auto call_f_unpack_args_impl(F f, T args, std::index_sequence<Is...>)
76{
77 return f(std::get<Is>(args)...);
78}
79
80template <typename F, typename T>
81auto call_f_unpack_args(F f, T args)
82{
83 constexpr std::size_t N = std::tuple_size<T>{};
84
85 return call_f_unpack_args_impl(f, args, std::make_index_sequence<N>{});
86}
87
88template <typename F, typename T, std::size_t... Is>
89auto construct_f_unpack_args_impl(T args, std::index_sequence<Is...>)
90{
91 return F(std::get<Is>(args)...);
92}
93
94template <typename F, typename T>
96{
97 constexpr std::size_t N = std::tuple_size<T>{};
98
99 return construct_f_unpack_args_impl<F>(args, std::make_index_sequence<N>{});
100}
101
171{
174
175 // Runtime tag describing which layout is picked when layout is not specified explicitly at
176 // construction time.
183
184 // Master constructor
185 template <typename Layout>
186 HostTensorDescriptor(std::vector<std::size_t> lens,
187 std::vector<std::size_t> strides,
188 const Layout& layout = DefaultLayout())
189 : mLens(std::move(lens)), mStrides(std::move(strides))
190 {
191 // To support legacy use cases, when layout is not passed in
192 const auto new_layout = HandleDefaultLayout(layout);
193 if(dbg)
194 {
195 std::cout << "Original Lens: [";
196 LogRange(std::cout, mLens, ", ") << "] and Strides: [";
197 LogRange(std::cout, mStrides, ", ") << "]" << std::endl;
198 std::cout << "Layout: " << layout << " --> " << new_layout << std::endl;
199 }
200
201 // Handling the strides and validation based on the chosen layout
202 DispatchChosenLayout(new_layout, layout, [&](auto selected_layout) {
203 this->CalculateStrides(selected_layout);
204 this->ValidateStrides(selected_layout);
205 });
206 }
207
209
210 // Helper that invokes a callable with a concrete layout object whose type
211 // matches the chosen tag (so template code depending on the layout type
212 // can still leverage if constexpr branches).
213 template <typename F, typename OrigLayout>
214 void DispatchChosenLayout(ChosenLayout tag, const OrigLayout& orig, F&& f) const
215 {
216 switch(tag)
217 {
221 default: f(orig); break;
222 }
223 }
224
225 template <typename Layout>
227 {
228 if constexpr(!std::is_same_v<Layout, DefaultLayout>)
229 {
231 }
232 else
233 {
234 if(mStrides.empty())
235 {
236 // No strides provided -> assume RowMajor
238 }
239
240 const auto rank = mLens.size();
241
242 if(rank > 2)
243 {
244 // Keep as-is - validation will warn/throw later
246 }
247
248 if(rank == 0)
249 {
250 // Keep as-is - validation will warn/throw later
252 }
253
254 if(rank == 1)
255 {
256 // Treat 1D tensor as RowMajor
258 }
259
260 // rank == 2
261 if(mStrides.size() == 2)
262 {
263 // RowMajor pattern (?, 1)
264 if(mStrides[1] == 1)
265 {
267 }
268
269 // ColumnMajor pattern (1, ?)
270 if(mStrides[0] == 1)
271 {
273 }
274 }
275
276 // Fallback: leave as-is
278 }
279 }
280
281 template <typename Layout>
283 {
284 if constexpr(std::is_same_v<Layout, ck::tensor_layout::BypassLayoutVerification>)
285 return;
286 // This is a workaround if the original stride value is -1 (which means "unknown") has been
287 // passed in and casted to size_t (unsigned).
288 auto strides_int = AsInt(mStrides);
289
290 // case of empty strides or all-zero: auto-calculate based on layout and tensor dimensions
291 if(mStrides.empty() || std::all_of(strides_int.begin(), strides_int.end(), [](int stride) {
292 return stride <= 0;
293 }))
294 {
295
296 if constexpr(!(std::is_same_v<ck::tensor_layout::gemm::RowMajor, Layout> ||
297 std::is_same_v<ck::tensor_layout::gemm::ColumnMajor, Layout>))
298 {
299 std::cerr << "Only RowMajor and ColumnMajor layouts are supported for empty "
300 "strides, got "
301 << layout << ". Will calculate strides as RowMajor." << std::endl;
302 }
303
304 mStrides.clear();
305 mStrides.resize(mLens.size(), 0);
306 if(mStrides.empty())
307 return;
308
309 mStrides.back() = 1;
310 std::partial_sum(mLens.rbegin(),
311 mLens.rend() - 1,
312 mStrides.rbegin() + 1,
313 std::multiplies<std::size_t>());
314
315 if constexpr(std::is_same_v<ck::tensor_layout::gemm::ColumnMajor, Layout>)
316 {
317 // swap the last two strides
318 if(mStrides.size() >= 2)
319 std::swap(mStrides[mStrides.size() - 1], mStrides[mStrides.size() - 2]);
320 }
321 }
322 // The other case is if one of the strides is unknown
323 // Currently, only GEMM RowMajor and ColumnMajor layouts are supported and only in the lower
324 // two dimensions, e.g. {..., 0, N} or {..., M, 0}. The higher dimensions are left
325 // untouched.
326 else if constexpr(std::is_same_v<ck::tensor_layout::gemm::RowMajor, Layout> ||
327 std::is_same_v<ck::tensor_layout::gemm::ColumnMajor, Layout>)
328 {
329 auto rank = mStrides.size();
330 if(mLens.size() >= 2 && rank >= 2)
331 {
332 const auto inner_idx =
333 std::is_same_v<ck::tensor_layout::gemm::RowMajor, Layout> ? rank - 1 : rank - 2;
334 const auto outer_idx = inner_idx == rank - 1 ? rank - 2 : rank - 1;
335 if(mStrides[inner_idx] <= 0)
336 {
337 mStrides[inner_idx] = 1;
338 }
339 if(mStrides[outer_idx] <= 0)
340 {
341 mStrides[outer_idx] = mLens[inner_idx] * mStrides[inner_idx];
342 }
343 }
344 }
345 }
346
347 template <typename Layout>
348 void ValidateStrides(const Layout& layout) const
349 {
350 if constexpr(std::is_same_v<ck::tensor_layout::BypassLayoutVerification, Layout>)
351 {
352 return;
353 }
354
355 if(mLens.empty())
356 {
357 throw std::runtime_error(
358 "HostTensorDescriptor::ValidateStrides: empty tensor dimensions is not allowed.");
359 }
360
361 const int rank = mLens.size();
362 if(rank == 1) // skip any 1D tensors
363 {
364 return;
365 }
366
367 if constexpr(std::is_same_v<ck::tensor_layout::BaseTensorLayout, Layout>)
368 {
369 // Any legacy code that doesn't pass layout to HostTensorDescriptor ctor will
370 // hit this case (unless it is a special case - see `HandleDefaultLayout`).
371 throw std::runtime_error("HostTensorDescriptor::ValidateStrides: Abstract tensor "
372 "layout BaseTensorLayout can't be verified. Pls "
373 "pass specific tensor layout to HostTensorDescriptor (or "
374 "ck::tensor_layout::BypassLayoutVerification)");
375 }
376
377 // GEMM cases
378 if constexpr(std::is_base_of_v<ck::tensor_layout::gemm::BaseGemmLayout, Layout>)
379 {
380 if(mLens.size() != mStrides.size())
381 {
382 std::ostringstream oss;
383 oss << "HostTensorDescriptor::ValidateStrides: mismatch between tensor rank and "
384 "size of strides: "
385 << *this;
386 throw std::runtime_error(oss.str());
387 }
388
389 // in GEMM, strides must be all positive or all zeros (auto-derived from tensor
390 // dimensions)
391 auto strides_int = AsInt(mStrides);
392 if(std::any_of(
393 strides_int.begin(), strides_int.end(), [](int stride) { return stride <= 0; }))
394 {
395 std::ostringstream oss;
396 oss << "Stride values must be positive or all-zeros (auto-derived from tensor "
397 "dimensions). Instead got ";
398 std::copy(
399 strides_int.begin(), strides_int.end(), std::ostream_iterator<int>(oss, " "));
400 throw std::runtime_error(oss.str());
401 }
402
403 if constexpr(std::is_same_v<ck::tensor_layout::gemm::RowMajor, Layout> ||
404 std::is_same_v<ck::tensor_layout::gemm::ColumnMajor, Layout>)
405 {
406 // The logic here assumes the GEMM with tensor of more than 2 dims, will always have
407 // HW dimesnsions as the inner ones e.g. batched GEMM is either BHW or BWH
408 const auto inner_idx =
409 std::is_same_v<ck::tensor_layout::gemm::RowMajor, Layout> ? rank - 1 : rank - 2;
410 const auto outer_idx = inner_idx == rank - 1 ? rank - 2 : rank - 1;
411
412 if(mStrides[outer_idx] < mLens[inner_idx] * mStrides[inner_idx])
413 {
414 std::ostringstream oss;
415 oss << "Invalid strides for " << layout << ": " << *this;
416 throw std::runtime_error(oss.str());
417 }
418
419 // For higher dimensions, validate strides assuming RowMajor
420 for(int i = 1; i < rank - 2; ++i)
421 {
422 if(mStrides[i - 1] < mStrides[i] * mLens[i])
423 {
424 std::ostringstream oss;
425 oss << "Invalid strides for higher dimensions in " << layout << ": "
426 << *this;
427 throw std::runtime_error(oss.str());
428 }
429 }
430 }
431 else
432 {
433 std::ostringstream oss;
434 oss << "Error: Unsupported GEMM layout: " << layout;
435 throw std::runtime_error(oss.str());
436 }
437 }
438 // Convolution cases
439 else if constexpr(std::is_base_of_v<ck::tensor_layout::convolution::BaseConvolutionLayout,
440 Layout>)
441 {
442 // TBD: implement verification for Conv layouts
443 // For now, just print warning and return
444 std::cerr << "Warning: Tensor layout verification for ck::tensor_layout::convolution "
445 "layouts is not supported yet. Skipping..."
446 << std::endl;
447 return;
448 }
449 else
450 {
451 std::ostringstream oss;
452 oss << "Error: Tensor layout verification for " << layout << " is not supported yet.";
453 throw std::runtime_error(oss.str());
454 }
455 }
456
457 template <typename X,
458 typename Layout = DefaultLayout,
459 typename = std::enable_if_t<std::is_convertible_v<X, std::size_t> &&
460 std::is_convertible_v<Layout, BaseTensorLayout>>>
461 HostTensorDescriptor(const std::initializer_list<X>& lens, const Layout& layout = Layout{})
462 : HostTensorDescriptor(std::vector<std::size_t>(lens.begin(), lens.end()), {}, layout)
463 {
464 if(dbg)
465 std::cout << "HostTensorDescriptor ctor (" << __LINE__ << ")" << std::endl;
466 }
467
468 template <typename Layout = DefaultLayout,
469 typename = std::enable_if_t<std::is_convertible_v<Layout, BaseTensorLayout>>>
470 HostTensorDescriptor(const std::initializer_list<ck::long_index_t>& lens,
471 const Layout& layout = Layout{})
472 : HostTensorDescriptor(std::vector<std::size_t>(lens.begin(), lens.end()), {}, layout)
473 {
474 if(dbg)
475 std::cout << "HostTensorDescriptor ctor (" << __LINE__ << ")" << std::endl;
476 }
477
478 template <typename Lengths,
479 typename Layout = DefaultLayout,
480 typename = std::enable_if_t<
481 (std::is_convertible_v<ck::ranges::range_value_t<Lengths>, std::size_t> ||
482 std::is_convertible_v<ck::ranges::range_value_t<Lengths>, ck::long_index_t>) &&
483 std::is_convertible_v<Layout, BaseTensorLayout>>>
484 HostTensorDescriptor(const Lengths& lens, const Layout& layout = Layout{})
485 : HostTensorDescriptor(std::vector<std::size_t>(lens.begin(), lens.end()), {}, layout)
486 {
487 if(dbg)
488 std::cout << "HostTensorDescriptor ctor (" << __LINE__ << ")" << std::endl;
489 }
490
491 template <typename X,
492 typename Y,
493 typename = std::enable_if_t<std::is_convertible_v<X, std::size_t> &&
494 std::is_convertible_v<Y, std::size_t>>,
495 typename Layout = DefaultLayout>
496 HostTensorDescriptor(const std::initializer_list<X>& lens,
497 const std::initializer_list<Y>& strides,
498 const Layout& layout = Layout{})
499 : HostTensorDescriptor(std::vector<std::size_t>(lens.begin(), lens.end()),
500 std::vector<std::size_t>(strides.begin(), strides.end()),
501 layout)
502 {
503 if(dbg)
504 std::cout << "HostTensorDescriptor ctor (" << __LINE__ << ")" << std::endl;
505 }
506
507 // HostTensorDescriptor({row, col}, {row_stride, col_stride})
508 template <typename Layout = DefaultLayout>
509 HostTensorDescriptor(const std::initializer_list<ck::long_index_t>& lens,
510 const std::initializer_list<ck::long_index_t>& strides,
511 const Layout& layout = Layout{})
512 : HostTensorDescriptor(std::vector<std::size_t>(lens.begin(), lens.end()),
513 std::vector<std::size_t>(strides.begin(), strides.end()),
514 layout)
515 {
516 if(dbg)
517 std::cout << "HostTensorDescriptor ctor (" << __LINE__ << ")" << std::endl;
518 }
519
520 // HostTensorDescriptor({row, col}, strides)
521 template <typename Strides, typename Layout = DefaultLayout>
522 HostTensorDescriptor(const std::initializer_list<std::size_t>& lens,
523 const Strides& strides,
524 const Layout& layout = Layout{})
525 : HostTensorDescriptor(std::vector<std::size_t>(lens.begin(), lens.end()),
526 std::vector<std::size_t>(strides.begin(), strides.end()),
527 layout)
528 {
529 if(dbg)
530 std::cout << "HostTensorDescriptor ctor (" << __LINE__ << ")" << std::endl;
531 }
532
533 template <typename Lengths,
534 typename Strides,
535 typename Layout = DefaultLayout,
536 typename = std::enable_if_t<
537 ((std::is_convertible_v<ck::ranges::range_value_t<Lengths>, std::size_t> &&
538 std::is_convertible_v<ck::ranges::range_value_t<Strides>, std::size_t>) ||
539 (std::is_convertible_v<ck::ranges::range_value_t<Lengths>, ck::long_index_t> &&
540 std::is_convertible_v<ck::ranges::range_value_t<Strides>, ck::long_index_t>)) &&
541 std::is_convertible_v<Layout, BaseTensorLayout>>>
542 HostTensorDescriptor(const Lengths& lens,
543 const Strides& strides,
544 const Layout& layout = Layout{})
545 : HostTensorDescriptor(std::vector<std::size_t>(lens.begin(), lens.end()),
546 std::vector<std::size_t>(strides.begin(), strides.end()),
547 layout)
548 {
549 if(dbg)
550 std::cout << "HostTensorDescriptor ctor (" << __LINE__ << ")" << std::endl;
551 }
552
553 std::size_t GetNumOfDimension() const;
554 std::size_t GetElementSize() const;
555 std::size_t GetElementSpaceSize() const;
556
557 const std::vector<std::size_t>& GetLengths() const;
558 const std::vector<std::size_t>& GetStrides() const;
559
560 template <typename... Is>
561 std::size_t GetOffsetFromMultiIndex(Is... is) const
562 {
563 assert(sizeof...(Is) == this->GetNumOfDimension());
564 std::initializer_list<std::size_t> iss{static_cast<std::size_t>(is)...};
565 return std::inner_product(iss.begin(), iss.end(), mStrides.begin(), std::size_t{0});
566 }
567
568 std::size_t GetOffsetFromMultiIndex(const std::vector<std::size_t>& iss) const
569 {
570 return std::inner_product(iss.begin(), iss.end(), mStrides.begin(), std::size_t{0});
571 }
572
573 friend std::ostream& operator<<(std::ostream& os, const HostTensorDescriptor& desc);
574 friend std::ostream& operator<<(std::ostream& os, ChosenLayout tag);
575
576 private:
577 std::vector<std::size_t> mLens;
578 std::vector<std::size_t> mStrides;
579 static constexpr bool dbg = false;
580
587 std::vector<int> AsInt(const std::vector<size_t>& vec) const
588 {
589 std::vector<int> strides_int(vec.size());
590 std::transform(vec.begin(), vec.end(), strides_int.begin(), [](std::size_t stride) {
591 return static_cast<int>(stride);
592 });
593 return strides_int;
594 }
595};
596
597template <typename New2Old, typename NewLayout = HostTensorDescriptor::BaseTensorLayout>
600 const New2Old& new2old,
601 const NewLayout& new_layout = NewLayout())
602{
603 std::vector<std::size_t> new_lengths(a.GetNumOfDimension());
604 std::vector<std::size_t> new_strides(a.GetNumOfDimension());
605
606 for(std::size_t i = 0; i < a.GetNumOfDimension(); i++)
607 {
608 new_lengths[i] = a.GetLengths()[new2old[i]];
609 new_strides[i] = a.GetStrides()[new2old[i]];
610 }
611
612 return HostTensorDescriptor(new_lengths, new_strides, new_layout);
613}
614
615struct joinable_thread : std::thread
616{
617 template <typename... Xs>
618 joinable_thread(Xs&&... xs) : std::thread(std::forward<Xs>(xs)...)
619 {
620 }
621
624
626 {
627 if(this->joinable())
628 this->join();
629 }
630};
631
632template <typename F, typename... Xs>
634{
635 F mF;
636 static constexpr std::size_t NDIM = sizeof...(Xs);
637 std::array<std::size_t, NDIM> mLens;
638 std::array<std::size_t, NDIM> mStrides;
639 std::size_t mN1d;
640
641 ParallelTensorFunctor(F f, Xs... xs) : mF(f), mLens({static_cast<std::size_t>(xs)...})
642 {
643 mStrides.back() = 1;
644 std::partial_sum(mLens.rbegin(),
645 mLens.rend() - 1,
646 mStrides.rbegin() + 1,
647 std::multiplies<std::size_t>());
648 mN1d = mStrides[0] * mLens[0];
649 }
650
651 std::array<std::size_t, NDIM> GetNdIndices(std::size_t i) const
652 {
653 std::array<std::size_t, NDIM> indices;
654
655 for(std::size_t idim = 0; idim < NDIM; ++idim)
656 {
657 indices[idim] = i / mStrides[idim];
658 i -= indices[idim] * mStrides[idim];
659 }
660
661 return indices;
662 }
663
664 void operator()(std::size_t num_thread = 1) const
665 {
666 std::size_t work_per_thread = (mN1d + num_thread - 1) / num_thread;
667
668 std::vector<joinable_thread> threads(num_thread);
669
670 for(std::size_t it = 0; it < num_thread; ++it)
671 {
672 std::size_t iw_begin = it * work_per_thread;
673 std::size_t iw_end = std::min((it + 1) * work_per_thread, mN1d);
674
675 auto f = [=, *this] {
676 for(std::size_t iw = iw_begin; iw < iw_end; ++iw)
677 {
679 }
680 };
681 threads[it] = joinable_thread(f);
682 }
683 }
684};
685
686template <typename F, typename... Xs>
687auto make_ParallelTensorFunctor(F f, Xs... xs)
688{
689 return ParallelTensorFunctor<F, Xs...>(f, xs...);
690}
691
692template <typename T>
693struct Tensor
694{
696 using Data = std::vector<T>;
697
698 template <typename X>
699 Tensor(std::initializer_list<X> lens) : mDesc(lens), mData(GetElementSpaceSize())
700 {
701 }
702
703 template <typename X, typename Y>
704 Tensor(std::initializer_list<X> lens, std::initializer_list<Y> strides)
705 : mDesc(lens, strides), mData(GetElementSpaceSize())
706 {
707 }
708
709 template <typename Lengths>
710 Tensor(const Lengths& lens) : mDesc(lens), mData(GetElementSpaceSize())
711 {
712 }
713
714 template <typename Lengths, typename Strides>
715 Tensor(const Lengths& lens, const Strides& strides)
716 : mDesc(lens, strides), mData(GetElementSpaceSize())
717 {
718 }
719
720 template <typename X, typename... Rest, std::enable_if_t<(sizeof...(Rest) > 0), int> = 0>
721 Tensor(std::initializer_list<X> lens, Rest&&... rest)
722 : mDesc(lens, std::forward<Rest>(rest)...), mData(GetElementSpaceSize())
723 {
724 }
725
726 template <typename X,
727 typename Y,
728 typename... Rest,
729 std::enable_if_t<(sizeof...(Rest) > 0), int> = 0>
730 Tensor(std::initializer_list<X> lens, std::initializer_list<Y> strides, Rest&&... rest)
731 : mDesc(lens, strides, std::forward<Rest>(rest)...), mData(GetElementSpaceSize())
732 {
733 }
734
735 template <typename Lengths, typename... Rest, std::enable_if_t<(sizeof...(Rest) > 0), int> = 0>
736 Tensor(const Lengths& lens, Rest&&... rest)
737 : mDesc(lens, std::forward<Rest>(rest)...), mData(GetElementSpaceSize())
738 {
739 }
740
741 template <typename Lengths,
742 typename Strides,
743 typename... Rest,
744 std::enable_if_t<(sizeof...(Rest) > 0), int> = 0>
745 Tensor(const Lengths& lens, const Strides& strides, Rest&&... rest)
746 : mDesc(lens, strides, std::forward<Rest>(rest)...), mData(GetElementSpaceSize())
747 {
748 }
749
750 Tensor(const Descriptor& desc) : mDesc(desc), mData(GetElementSpaceSize()) {}
751
752 template <typename OutT>
754 {
755 Tensor<OutT> ret(mDesc);
756
758 mData, ret.mData.begin(), [](auto value) { return ck::type_convert<OutT>(value); });
759
760 return ret;
761 }
762
763 Tensor() = delete;
764 Tensor(const Tensor&) = default;
765 Tensor(Tensor&&) = default;
766
767 ~Tensor() = default;
768
769 Tensor& operator=(const Tensor&) = default;
770 Tensor& operator=(Tensor&&) = default;
771
772 template <typename FromT>
773 explicit Tensor(const Tensor<FromT>& other) : Tensor(other.template CopyAsType<T>())
774 {
775 }
776 void savetxt(std::string file_name, std::string dtype = "float")
777 {
778 std::ofstream file(file_name);
779
780 if(file.is_open())
781 {
782 for(auto& itm : mData)
783 {
784 if(dtype == "float")
785 file << ck::type_convert<float>(itm) << std::endl;
786 else if(dtype == "int")
787 file << ck::type_convert<int>(itm) << std::endl;
788 else
789 // TODO: we didn't implement operator<< for all custom
790 // data types, here fall back to float in case compile error
791 file << ck::type_convert<float>(itm) << std::endl;
792 }
793 file.close();
794 }
795 else
796 {
797 // Print an error message to the standard error
798 // stream if the file cannot be opened.
799 throw std::runtime_error(std::string("unable to open file:") + file_name);
800 }
801 }
802 decltype(auto) GetLengths() const { return mDesc.GetLengths(); }
803
804 decltype(auto) GetStrides() const { return mDesc.GetStrides(); }
805
806 std::size_t GetNumOfDimension() const { return mDesc.GetNumOfDimension(); }
807
808 std::size_t GetElementSize() const { return mDesc.GetElementSize(); }
809
810 std::size_t GetElementSpaceSize() const
811 {
813 {
814 return (mDesc.GetElementSpaceSize() + 1) / ck::packed_size_v<ck::remove_cvref_t<T>>;
815 }
816 else
817 {
818 return mDesc.GetElementSpaceSize();
819 }
820 }
821
822 std::size_t GetElementSpaceSizeInBytes() const { return sizeof(T) * GetElementSpaceSize(); }
823
825
826 template <typename F>
827 void ForEach_impl(F&& f, std::vector<size_t>& idx, size_t rank)
828 {
829 if(rank == mDesc.GetNumOfDimension())
830 {
831 f(*this, idx);
832 return;
833 }
834 // else
835 for(size_t i = 0; i < mDesc.GetLengths()[rank]; i++)
836 {
837 idx[rank] = i;
838 ForEach_impl(std::forward<F>(f), idx, rank + 1);
839 }
840 }
841
842 template <typename F>
843 void ForEach(F&& f)
844 {
845 std::vector<size_t> idx(mDesc.GetNumOfDimension(), 0);
846 ForEach_impl(std::forward<F>(f), idx, size_t(0));
847 }
848
849 template <typename F>
850 void ForEach_impl(const F&& f, std::vector<size_t>& idx, size_t rank) const
851 {
852 if(rank == mDesc.GetNumOfDimension())
853 {
854 f(*this, idx);
855 return;
856 }
857 // else
858 for(size_t i = 0; i < mDesc.GetLengths()[rank]; i++)
859 {
860 idx[rank] = i;
861 ForEach_impl(std::forward<const F>(f), idx, rank + 1);
862 }
863 }
864
865 template <typename F>
866 void ForEach(const F&& f) const
867 {
868 std::vector<size_t> idx(mDesc.GetNumOfDimension(), 0);
869 ForEach_impl(std::forward<const F>(f), idx, size_t(0));
870 }
871
872 template <typename G>
873 void GenerateTensorValue(G g, std::size_t num_thread = 1)
874 {
875 switch(mDesc.GetNumOfDimension())
876 {
877 case 1: {
878 auto f = [&](auto i) { (*this)(i) = g(i); };
879 make_ParallelTensorFunctor(f, mDesc.GetLengths()[0])(num_thread);
880 break;
881 }
882 case 2: {
883 auto f = [&](auto i0, auto i1) { (*this)(i0, i1) = g(i0, i1); };
884 make_ParallelTensorFunctor(f, mDesc.GetLengths()[0], mDesc.GetLengths()[1])(num_thread);
885 break;
886 }
887 case 3: {
888 auto f = [&](auto i0, auto i1, auto i2) { (*this)(i0, i1, i2) = g(i0, i1, i2); };
890 f, mDesc.GetLengths()[0], mDesc.GetLengths()[1], mDesc.GetLengths()[2])(num_thread);
891 break;
892 }
893 case 4: {
894 auto f = [&](auto i0, auto i1, auto i2, auto i3) {
895 (*this)(i0, i1, i2, i3) = g(i0, i1, i2, i3);
896 };
898 mDesc.GetLengths()[0],
899 mDesc.GetLengths()[1],
900 mDesc.GetLengths()[2],
901 mDesc.GetLengths()[3])(num_thread);
902 break;
903 }
904 case 5: {
905 auto f = [&](auto i0, auto i1, auto i2, auto i3, auto i4) {
906 (*this)(i0, i1, i2, i3, i4) = g(i0, i1, i2, i3, i4);
907 };
909 mDesc.GetLengths()[0],
910 mDesc.GetLengths()[1],
911 mDesc.GetLengths()[2],
912 mDesc.GetLengths()[3],
913 mDesc.GetLengths()[4])(num_thread);
914 break;
915 }
916 case 6: {
917 auto f = [&](auto i0, auto i1, auto i2, auto i3, auto i4, auto i5) {
918 (*this)(i0, i1, i2, i3, i4, i5) = g(i0, i1, i2, i3, i4, i5);
919 };
921 mDesc.GetLengths()[0],
922 mDesc.GetLengths()[1],
923 mDesc.GetLengths()[2],
924 mDesc.GetLengths()[3],
925 mDesc.GetLengths()[4],
926 mDesc.GetLengths()[5])(num_thread);
927 break;
928 }
929 case 12: {
930 auto f = [&](auto i0,
931 auto i1,
932 auto i2,
933 auto i3,
934 auto i4,
935 auto i5,
936 auto i6,
937 auto i7,
938 auto i8,
939 auto i9,
940 auto i10,
941 auto i11) {
942 (*this)(i0, i1, i2, i3, i4, i5, i6, i7, i8, i9, i10, i11) =
943 g(i0, i1, i2, i3, i4, i5, i6, i7, i8, i9, i10, i11);
944 };
946 mDesc.GetLengths()[0],
947 mDesc.GetLengths()[1],
948 mDesc.GetLengths()[2],
949 mDesc.GetLengths()[3],
950 mDesc.GetLengths()[4],
951 mDesc.GetLengths()[5],
952 mDesc.GetLengths()[6],
953 mDesc.GetLengths()[7],
954 mDesc.GetLengths()[8],
955 mDesc.GetLengths()[9],
956 mDesc.GetLengths()[10],
957 mDesc.GetLengths()[11])(num_thread);
958 break;
959 }
960 default: throw std::runtime_error("unspported dimension");
961 }
962 }
963
964 // Generate random values with multiple threads. Guaranteed to give the same sequence with any
965 // number of threads provided.
966 template <typename Distribution = std::uniform_real_distribution<float>,
967 typename Mapping = ck::identity,
968 typename Generator = std::minstd_rand>
969 void GenerateTensorDistr(Distribution dis = {0.f, 1.f},
970 Mapping fn = {},
971 const Generator g = Generator(0), // default seed 0
972 std::size_t num_thread = -1)
973 {
975 using ck::math::min;
976 if(num_thread == -1ULL)
977 num_thread = min(ck::get_available_cpu_cores(), 80U); // max 80 threads
978 // At least 2MB per thread
979 num_thread = min(num_thread, integer_divide_ceil(this->GetElementSpaceSize(), 0x200000));
980 constexpr std::size_t BLOCK_BYTES = 64;
981 constexpr std::size_t BLOCK_SIZE = BLOCK_BYTES / sizeof(T);
982
983 const std::size_t num_blocks = integer_divide_ceil(this->GetElementSpaceSize(), BLOCK_SIZE);
984 const std::size_t blocks_per_thread = integer_divide_ceil(num_blocks, num_thread);
985
986 std::vector<std::thread> threads;
987 threads.reserve(num_thread - 1);
988 const auto dst = const_cast<T*>(this->mData.data());
989 const auto element_space_size = this->GetElementSpaceSize();
990 for(int it = num_thread - 1; it >= 0; --it)
991 {
992 std::size_t ib_begin = it * blocks_per_thread;
993 std::size_t ib_end = min(ib_begin + blocks_per_thread, num_blocks);
994
995 auto job = [=]() {
996 auto g_ = g; // copy
997 auto dis_ = dis; // copy
998 g_.discard(ib_begin * BLOCK_SIZE * ck::packed_size_v<T>);
999 auto t_fn = [&]() {
1000 // As user can pass integer distribution in dis, we must ensure that the correct
1001 // constructor/converter is called at all times. For f4/f6/f8 types, to ensure
1002 // correct results, we convert from float to the target type. In these cases
1003 // integer constructors are interpreted as direct initialization of the internal
1004 // storage with binary values instead of treating integers as subset of floats.
1006 return ck::type_convert<T>(static_cast<float>(fn(dis_(g_))));
1007 else if constexpr(ck::packed_size_v<T> == 1)
1008 return ck::type_convert<T>(fn(dis_(g_)));
1009 else if constexpr(ck::is_same_v<T, ck::f4x2_pk_t>)
1010 return ck::f4x2_pk_t{ck::type_convert<ck::f4x2_t>(
1012 ck::type_convert<float>(fn(dis_(g_)))})};
1013 else if constexpr(ck::is_same_v<T, ck::f6x32_pk_t> ||
1015 {
1016 return ck::type_convert<T>(
1018 ck::type_convert<float>(fn(dis_(g_))),
1019 ck::type_convert<float>(fn(dis_(g_))),
1020 ck::type_convert<float>(fn(dis_(g_))),
1021 ck::type_convert<float>(fn(dis_(g_))),
1022 ck::type_convert<float>(fn(dis_(g_))),
1023 ck::type_convert<float>(fn(dis_(g_))),
1024 ck::type_convert<float>(fn(dis_(g_))),
1025 ck::type_convert<float>(fn(dis_(g_))),
1026 ck::type_convert<float>(fn(dis_(g_))),
1027 ck::type_convert<float>(fn(dis_(g_))),
1028 ck::type_convert<float>(fn(dis_(g_))),
1029 ck::type_convert<float>(fn(dis_(g_))),
1030 ck::type_convert<float>(fn(dis_(g_))),
1031 ck::type_convert<float>(fn(dis_(g_))),
1032 ck::type_convert<float>(fn(dis_(g_))),
1033 ck::type_convert<float>(fn(dis_(g_))),
1034 ck::type_convert<float>(fn(dis_(g_))),
1035 ck::type_convert<float>(fn(dis_(g_))),
1036 ck::type_convert<float>(fn(dis_(g_))),
1037 ck::type_convert<float>(fn(dis_(g_))),
1038 ck::type_convert<float>(fn(dis_(g_))),
1039 ck::type_convert<float>(fn(dis_(g_))),
1040 ck::type_convert<float>(fn(dis_(g_))),
1041 ck::type_convert<float>(fn(dis_(g_))),
1042 ck::type_convert<float>(fn(dis_(g_))),
1043 ck::type_convert<float>(fn(dis_(g_))),
1044 ck::type_convert<float>(fn(dis_(g_))),
1045 ck::type_convert<float>(fn(dis_(g_))),
1046 ck::type_convert<float>(fn(dis_(g_))),
1047 ck::type_convert<float>(fn(dis_(g_))),
1048 ck::type_convert<float>(fn(dis_(g_)))});
1049 }
1050 else if constexpr(ck::is_same_v<T, ck::f6x16_pk_t> ||
1052 {
1053 return ck::type_convert<T>(
1055 ck::type_convert<float>(fn(dis_(g_))),
1056 ck::type_convert<float>(fn(dis_(g_))),
1057 ck::type_convert<float>(fn(dis_(g_))),
1058 ck::type_convert<float>(fn(dis_(g_))),
1059 ck::type_convert<float>(fn(dis_(g_))),
1060 ck::type_convert<float>(fn(dis_(g_))),
1061 ck::type_convert<float>(fn(dis_(g_))),
1062 ck::type_convert<float>(fn(dis_(g_))),
1063 ck::type_convert<float>(fn(dis_(g_))),
1064 ck::type_convert<float>(fn(dis_(g_))),
1065 ck::type_convert<float>(fn(dis_(g_))),
1066 ck::type_convert<float>(fn(dis_(g_))),
1067 ck::type_convert<float>(fn(dis_(g_))),
1068 ck::type_convert<float>(fn(dis_(g_))),
1069 ck::type_convert<float>(fn(dis_(g_)))});
1070 }
1071 else
1072 static_assert(false, "Unsupported packed size for T");
1073 };
1074
1075 std::size_t ib = ib_begin;
1076 for(; ib < ib_end - 1; ++ib)
1077 ck::static_for<0, BLOCK_SIZE, 1>{}([&](auto iw_) {
1078 constexpr size_t iw = iw_.value;
1079 dst[ib * BLOCK_SIZE + iw] = t_fn();
1080 });
1081 for(std::size_t iw = 0; iw < BLOCK_SIZE; ++iw)
1082 if(ib * BLOCK_SIZE + iw < element_space_size)
1083 dst[ib * BLOCK_SIZE + iw] = t_fn();
1084 };
1085
1086 if(it > 0)
1087 threads.emplace_back(std::move(job));
1088 else
1089 job(); // last job run in the main thread
1090 }
1091 for(auto& t : threads)
1092 t.join();
1093 }
1094
1095 template <typename... Is>
1096 std::size_t GetOffsetFromMultiIndex(Is... is) const
1097 {
1098 return mDesc.GetOffsetFromMultiIndex(is...) / ck::packed_size_v<ck::remove_cvref_t<T>>;
1099 }
1100
1101 template <typename... Is>
1102 T& operator()(Is... is)
1103 {
1104 return mData[mDesc.GetOffsetFromMultiIndex(is...) /
1106 }
1107
1108 template <typename... Is>
1109 const T& operator()(Is... is) const
1110 {
1111 return mData[mDesc.GetOffsetFromMultiIndex(is...) /
1113 }
1114
1115 T& operator()(const std::vector<std::size_t>& idx)
1116 {
1117 return mData[mDesc.GetOffsetFromMultiIndex(idx) / ck::packed_size_v<ck::remove_cvref_t<T>>];
1118 }
1119
1120 const T& operator()(const std::vector<std::size_t>& idx) const
1121 {
1122 return mData[mDesc.GetOffsetFromMultiIndex(idx) / ck::packed_size_v<ck::remove_cvref_t<T>>];
1123 }
1124
1125 typename Data::iterator begin() { return mData.begin(); }
1126
1127 typename Data::iterator end() { return mData.end(); }
1128
1129 typename Data::pointer data() { return mData.data(); }
1130
1131 typename Data::const_iterator begin() const { return mData.begin(); }
1132
1133 typename Data::const_iterator end() const { return mData.end(); }
1134
1135 typename Data::const_pointer data() const { return mData.data(); }
1136
1137 typename Data::size_type size() const { return mData.size(); }
1138
1139 template <typename U = T>
1140 auto AsSpan() const
1141 {
1142 constexpr std::size_t FromSize = sizeof(T);
1143 constexpr std::size_t ToSize = sizeof(U);
1144
1145 using Element = std::add_const_t<std::remove_reference_t<U>>;
1146 return ck::span<Element>{reinterpret_cast<Element*>(data()), size() * FromSize / ToSize};
1147 }
1148
1149 template <typename U = T>
1150 auto AsSpan()
1151 {
1152 constexpr std::size_t FromSize = sizeof(T);
1153 constexpr std::size_t ToSize = sizeof(U);
1154
1155 using Element = std::remove_reference_t<U>;
1156 return ck::span<Element>{reinterpret_cast<Element*>(data()), size() * FromSize / ToSize};
1157 }
1158
1161};
Definition utility/span.hpp:14
__host__ __device__ constexpr auto rank(const Layout< Shape, UnrolledDescriptorType > &layout)
Get layout rank (num elements in shape).
Definition layout_utils.hpp:310
auto call_f_unpack_args_impl(F f, T args, std::index_sequence< Is... >)
Definition library/utility/host_tensor.hpp:75
std::ostream & LogRangeAsType(std::ostream &os, Range &&range, std::string delim)
Definition library/utility/host_tensor.hpp:42
auto construct_f_unpack_args_impl(T args, std::index_sequence< Is... >)
Definition library/utility/host_tensor.hpp:89
std::ostream & LogRange(std::ostream &os, Range &&range, std::string delim)
Definition library/utility/host_tensor.hpp:27
HostTensorDescriptor transpose_host_tensor_descriptor_given_new2old(const HostTensorDescriptor &a, const New2Old &new2old, const NewLayout &new_layout=NewLayout())
Definition library/utility/host_tensor.hpp:599
auto call_f_unpack_args(F f, T args)
Definition library/utility/host_tensor.hpp:81
auto construct_f_unpack_args(F, T args)
Definition library/utility/host_tensor.hpp:95
auto make_ParallelTensorFunctor(F f, Xs... xs)
Definition library/utility/host_tensor.hpp:687
__host__ __device__ constexpr auto integer_divide_ceil(X x, Y y)
Definition utility/math.hpp:72
__host__ __device__ constexpr T min(T x)
Definition utility/math.hpp:116
auto fill(OutputRange &&range, const T &init) -> std::void_t< decltype(std::fill(std::begin(std::forward< OutputRange >(range)), std::end(std::forward< OutputRange >(range)), init))>
Definition algorithm.hpp:25
auto transform(InputRange &&range, OutputIterator iter, UnaryOperation unary_op) -> decltype(std::transform(std::begin(range), std::end(range), iter, unary_op))
Definition algorithm.hpp:36
typename vector_type< float, 16 >::type float16_t
Definition dtype_vector.hpp:2148
unsigned int get_available_cpu_cores()
Definition thread.hpp:11
remove_cv_t< remove_reference_t< T > > remove_cvref_t
Definition type.hpp:297
integral_constant< index_t, N > Number
Definition number.hpp:12
typename vector_type< float, 2 >::type float2_t
Definition dtype_vector.hpp:2145
__host__ __device__ constexpr Y type_convert(X x)
Definition utility/type_convert.hpp:98
typename vector_type< float, 32 >::type float32_t
Definition dtype_vector.hpp:2149
constexpr bool is_same_v
Definition type.hpp:283
constexpr bool is_packed_type_v
Definition data_type.hpp:414
int64_t long_index_t
Definition ck.hpp:300
constexpr index_t packed_size_v
Definition data_type.hpp:411
STL namespace.
const GenericPointer< typename T::ValueType > T2 value
Definition pointer.h:1697
const GenericPointer< typename T::ValueType > T2 T::AllocatorType & a
Definition pointer.h:1517
A descriptor class for host tensors that manages tensor dimensions, strides, and layout.
Definition library/utility/host_tensor.hpp:171
HostTensorDescriptor()
Definition library/utility/host_tensor.hpp:208
void DispatchChosenLayout(ChosenLayout tag, const OrigLayout &orig, F &&f) const
Definition library/utility/host_tensor.hpp:214
HostTensorDescriptor(const std::initializer_list< ck::long_index_t > &lens, const std::initializer_list< ck::long_index_t > &strides, const Layout &layout=Layout{})
Definition library/utility/host_tensor.hpp:509
const std::vector< std::size_t > & GetStrides() const
HostTensorDescriptor(const std::initializer_list< X > &lens, const Layout &layout=Layout{})
Definition library/utility/host_tensor.hpp:461
std::size_t GetElementSize() const
HostTensorDescriptor(const std::initializer_list< ck::long_index_t > &lens, const Layout &layout=Layout{})
Definition library/utility/host_tensor.hpp:470
std::size_t GetOffsetFromMultiIndex(Is... is) const
Definition library/utility/host_tensor.hpp:561
void ValidateStrides(const Layout &layout) const
Definition library/utility/host_tensor.hpp:348
HostTensorDescriptor(std::vector< std::size_t > lens, std::vector< std::size_t > strides, const Layout &layout=DefaultLayout())
Definition library/utility/host_tensor.hpp:186
void CalculateStrides(const Layout &layout)
Definition library/utility/host_tensor.hpp:282
friend std::ostream & operator<<(std::ostream &os, const HostTensorDescriptor &desc)
HostTensorDescriptor(const std::initializer_list< X > &lens, const std::initializer_list< Y > &strides, const Layout &layout=Layout{})
Definition library/utility/host_tensor.hpp:496
HostTensorDescriptor(const std::initializer_list< std::size_t > &lens, const Strides &strides, const Layout &layout=Layout{})
Definition library/utility/host_tensor.hpp:522
ChosenLayout HandleDefaultLayout(const Layout &)
Definition library/utility/host_tensor.hpp:226
HostTensorDescriptor(const Lengths &lens, const Strides &strides, const Layout &layout=Layout{})
Definition library/utility/host_tensor.hpp:542
const std::vector< std::size_t > & GetLengths() const
std::size_t GetNumOfDimension() const
friend std::ostream & operator<<(std::ostream &os, ChosenLayout tag)
HostTensorDescriptor(const Lengths &lens, const Layout &layout=Layout{})
Definition library/utility/host_tensor.hpp:484
std::size_t GetElementSpaceSize() const
BaseTensorLayout DefaultLayout
Definition library/utility/host_tensor.hpp:173
ck::tensor_layout::BaseTensorLayout BaseTensorLayout
Definition library/utility/host_tensor.hpp:172
ChosenLayout
Definition library/utility/host_tensor.hpp:178
@ Original
Definition library/utility/host_tensor.hpp:179
@ RowMajor
Definition library/utility/host_tensor.hpp:180
@ ColumnMajor
Definition library/utility/host_tensor.hpp:181
std::size_t GetOffsetFromMultiIndex(const std::vector< std::size_t > &iss) const
Definition library/utility/host_tensor.hpp:568
Layout wrapper that performs the tensor descriptor logic.
Definition layout.hpp:24
Definition library/utility/host_tensor.hpp:634
F mF
Definition library/utility/host_tensor.hpp:635
std::size_t mN1d
Definition library/utility/host_tensor.hpp:639
ParallelTensorFunctor(F f, Xs... xs)
Definition library/utility/host_tensor.hpp:641
std::array< std::size_t, NDIM > mLens
Definition library/utility/host_tensor.hpp:637
std::array< std::size_t, NDIM > mStrides
Definition library/utility/host_tensor.hpp:638
void operator()(std::size_t num_thread=1) const
Definition library/utility/host_tensor.hpp:664
std::array< std::size_t, NDIM > GetNdIndices(std::size_t i) const
Definition library/utility/host_tensor.hpp:651
static constexpr std::size_t NDIM
Definition library/utility/host_tensor.hpp:636
auto AsSpan() const
Definition library/utility/host_tensor.hpp:1140
Tensor(std::initializer_list< X > lens, std::initializer_list< Y > strides, Rest &&... rest)
Definition library/utility/host_tensor.hpp:730
Tensor(const Lengths &lens, Rest &&... rest)
Definition library/utility/host_tensor.hpp:736
Tensor(const Lengths &lens, const Strides &strides)
Definition library/utility/host_tensor.hpp:715
Tensor()=delete
std::size_t GetNumOfDimension() const
Definition library/utility/host_tensor.hpp:806
void ForEach(const F &&f) const
Definition library/utility/host_tensor.hpp:866
decltype(auto) GetLengths() const
Definition library/utility/host_tensor.hpp:802
Data::const_iterator end() const
Definition library/utility/host_tensor.hpp:1133
std::size_t GetOffsetFromMultiIndex(Is... is) const
Definition library/utility/host_tensor.hpp:1096
void ForEach(F &&f)
Definition library/utility/host_tensor.hpp:843
Data::pointer data()
Definition library/utility/host_tensor.hpp:1129
Tensor & operator=(const Tensor &)=default
void ForEach_impl(F &&f, std::vector< size_t > &idx, size_t rank)
Definition library/utility/host_tensor.hpp:827
std::size_t GetElementSpaceSizeInBytes() const
Definition library/utility/host_tensor.hpp:822
void ForEach_impl(const F &&f, std::vector< size_t > &idx, size_t rank) const
Definition library/utility/host_tensor.hpp:850
std::vector< T > Data
Definition library/utility/host_tensor.hpp:696
Data mData
Definition library/utility/host_tensor.hpp:1160
Data::iterator end()
Definition library/utility/host_tensor.hpp:1127
void GenerateTensorDistr(Distribution dis={0.f, 1.f}, Mapping fn={}, const Generator g=Generator(0), std::size_t num_thread=-1)
Definition library/utility/host_tensor.hpp:969
std::size_t GetElementSize() const
Definition library/utility/host_tensor.hpp:808
~Tensor()=default
const T & operator()(const std::vector< std::size_t > &idx) const
Definition library/utility/host_tensor.hpp:1120
Tensor(const Lengths &lens, const Strides &strides, Rest &&... rest)
Definition library/utility/host_tensor.hpp:745
void SetZero()
Definition library/utility/host_tensor.hpp:824
Tensor(const Lengths &lens)
Definition library/utility/host_tensor.hpp:710
void savetxt(std::string file_name, std::string dtype="float")
Definition library/utility/host_tensor.hpp:776
Tensor(Tensor &&)=default
Data::const_pointer data() const
Definition library/utility/host_tensor.hpp:1135
auto AsSpan()
Definition library/utility/host_tensor.hpp:1150
Data::iterator begin()
Definition library/utility/host_tensor.hpp:1125
HostTensorDescriptor Descriptor
Definition library/utility/host_tensor.hpp:695
Tensor(std::initializer_list< X > lens, std::initializer_list< Y > strides)
Definition library/utility/host_tensor.hpp:704
Tensor(const Tensor &)=default
const T & operator()(Is... is) const
Definition library/utility/host_tensor.hpp:1109
Tensor(const Descriptor &desc)
Definition library/utility/host_tensor.hpp:750
Descriptor mDesc
Definition library/utility/host_tensor.hpp:1159
Tensor & operator=(Tensor &&)=default
Data::const_iterator begin() const
Definition library/utility/host_tensor.hpp:1131
T & operator()(const std::vector< std::size_t > &idx)
Definition library/utility/host_tensor.hpp:1115
std::size_t GetElementSpaceSize() const
Definition library/utility/host_tensor.hpp:810
Tensor(const Tensor< FromT > &other)
Definition library/utility/host_tensor.hpp:773
T & operator()(Is... is)
Definition library/utility/host_tensor.hpp:1102
Data::size_type size() const
Definition library/utility/host_tensor.hpp:1137
Tensor< OutT > CopyAsType() const
Definition library/utility/host_tensor.hpp:753
void GenerateTensorValue(G g, std::size_t num_thread=1)
Definition library/utility/host_tensor.hpp:873
decltype(auto) GetStrides() const
Definition library/utility/host_tensor.hpp:804
Tensor(std::initializer_list< X > lens)
Definition library/utility/host_tensor.hpp:699
Tensor(std::initializer_list< X > lens, Rest &&... rest)
Definition library/utility/host_tensor.hpp:721
Definition tensor_operation/gpu/device/tensor_layout.hpp:10
Definition tensor_operation/gpu/device/tensor_layout.hpp:45
Definition tensor_operation/gpu/device/tensor_layout.hpp:31
Definition tensor_operation/gpu/device/tensor_layout.hpp:26
Definition dtype_vector.hpp:10
Definition library/utility/host_tensor.hpp:616
joinable_thread(joinable_thread &&)=default
joinable_thread(Xs &&... xs)
Definition library/utility/host_tensor.hpp:618
~joinable_thread()
Definition library/utility/host_tensor.hpp:625
joinable_thread & operator=(joinable_thread &&)=default
__host__ __device__ constexpr const auto & layout(const Tensor< BufferAddressSpace, ElementType, Shape, UnrolledDescriptorType > &tensor)
Get Tensor Layout.
Definition tensor_utils.hpp:162