![]() |
CUTLASS
CUDA Templates for Linear Algebra Subroutines and Solvers
|
#include <regular_tile_iterator_tensor_op_sm70.h>
Public Types | |
| using | Shape = Shape_ |
| using | Element = Element_ |
| using | Layout = layout::RowMajorVoltaTensorOpMultiplicandBCongruous< sizeof_bits< Element_ >::value > |
| using | Index = typename Layout::Index |
| using | LongIndex = typename Layout::LongIndex |
| using | TensorRef = TensorRef< Element, Layout > |
| using | TensorCoord = typename Layout::TensorCoord |
| using | ThreadMap = ThreadMap_ |
| using | UnderlyingIterator = RegularTileIterator< layout::PitchLinearShape< Shape::kColumn, Shape::kRow >, Element, layout::VoltaTensorOpMultiplicandBCongruous< sizeof_bits< Element_ >::value >,(kAdvanceRank==0?1:0), ThreadMap_ > |
| Underlying iterator type. More... | |
| using | Fragment = Array< Element, UnderlyingIterator::Fragment::kElements > |
| Fragment object to be loaded or stored. More... | |
Public Member Functions | |
| CUTLASS_HOST_DEVICE | RegularTileIterator (TensorRef ref, int thread_id) |
| Construct a TileIterator with zero threadblock offset. More... | |
| CUTLASS_HOST_DEVICE void | add_pointer_offset (LongIndex pointer_offset) |
| Adds a pointer offset in units of Element. More... | |
| CUTLASS_DEVICE void | add_tile_offset (TensorCoord const &coord) |
| Adds a tile offset. More... | |
| CUTLASS_HOST_DEVICE RegularTileIterator & | operator++ () |
| Advances to the next tile in memory. More... | |
| CUTLASS_HOST_DEVICE RegularTileIterator | operator++ (int) |
| Advances to the next tile in memory. More... | |
| CUTLASS_DEVICE void | load_with_pointer_offset (Fragment &frag, Index pointer_offset) |
| Loads a fragment from memory. More... | |
| CUTLASS_DEVICE void | load (Fragment &frag) |
| Loads a fragment from memory. More... | |
| CUTLASS_DEVICE void | store_with_pointer_offset (Fragment const &frag, Index pointer_offset) |
| Store a fragment to memory. More... | |
| CUTLASS_DEVICE void | store (Fragment const &frag) |
| Store a fragment to memory. More... | |
Static Public Attributes | |
| static int const | kAdvanceRank = AdvanceRank |
Tile Iterator specialized for row-major congruous TensorOp formats.
Satisfies: ForwardTileIteratorConcept | ReadableContiguousTileIteratorConcept | WriteableContiguousTileIteratorConcept
| using cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::RowMajorVoltaTensorOpMultiplicandBCongruous< sizeof_bits< Element_ >::value >, AdvanceRank, ThreadMap_, Alignment >::Element = Element_ |
| using cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::RowMajorVoltaTensorOpMultiplicandBCongruous< sizeof_bits< Element_ >::value >, AdvanceRank, ThreadMap_, Alignment >::Fragment = Array<Element, UnderlyingIterator::Fragment::kElements> |
| using cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::RowMajorVoltaTensorOpMultiplicandBCongruous< sizeof_bits< Element_ >::value >, AdvanceRank, ThreadMap_, Alignment >::Index = typename Layout::Index |
| using cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::RowMajorVoltaTensorOpMultiplicandBCongruous< sizeof_bits< Element_ >::value >, AdvanceRank, ThreadMap_, Alignment >::Layout = layout::RowMajorVoltaTensorOpMultiplicandBCongruous<sizeof_bits<Element_>::value> |
| using cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::RowMajorVoltaTensorOpMultiplicandBCongruous< sizeof_bits< Element_ >::value >, AdvanceRank, ThreadMap_, Alignment >::LongIndex = typename Layout::LongIndex |
| using cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::RowMajorVoltaTensorOpMultiplicandBCongruous< sizeof_bits< Element_ >::value >, AdvanceRank, ThreadMap_, Alignment >::Shape = Shape_ |
| using cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::RowMajorVoltaTensorOpMultiplicandBCongruous< sizeof_bits< Element_ >::value >, AdvanceRank, ThreadMap_, Alignment >::TensorCoord = typename Layout::TensorCoord |
| using cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::RowMajorVoltaTensorOpMultiplicandBCongruous< sizeof_bits< Element_ >::value >, AdvanceRank, ThreadMap_, Alignment >::TensorRef = TensorRef<Element, Layout> |
| using cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::RowMajorVoltaTensorOpMultiplicandBCongruous< sizeof_bits< Element_ >::value >, AdvanceRank, ThreadMap_, Alignment >::ThreadMap = ThreadMap_ |
| using cutlass::transform::threadblock::RegularTileIterator< Shape_, Element_, layout::RowMajorVoltaTensorOpMultiplicandBCongruous< sizeof_bits< Element_ >::value >, AdvanceRank, ThreadMap_, Alignment >::UnderlyingIterator = RegularTileIterator< layout::PitchLinearShape<Shape::kColumn, Shape::kRow>, Element, layout::VoltaTensorOpMultiplicandBCongruous<sizeof_bits<Element_>::value>, (kAdvanceRank == 0 ? 1 : 0), ThreadMap_> |
|
inline |
| ref | Pointer to start of tensor |
| thread_id | ID of each participating thread |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
static |
1.8.11