|
| using | Shape = Shape_ |
| |
| using | Element = Element_ |
| |
| using | Layout = layout::PitchLinear |
| |
| using | ThreadMap = ThreadMap_ |
| |
| using | Index = typename Layout::Index |
| |
| using | LongIndex = typename Layout::LongIndex |
| |
| using | TensorRef = TensorRef< Element, Layout > |
| |
| using | TensorView = TensorView< Element, Layout > |
| |
| using | TensorCoord = typename Layout::TensorCoord |
| |
| using | Pointer = Element * |
| |
| using | NonConstPointer = typename platform::remove_const< Element >::type * |
| |
| using | AccessType = AlignedArray< Element, AccessSize,(AccessSize *sizeof_bits< Element >::value/8)> |
| | Type used for internal memory accesses. More...
|
| |
| using | TileAccessIterator = PredicatedTileAccessIterator< Shape, Element, Layout, kAdvanceRank, ThreadMap, AccessType > |
| | Underlying iterator to compute the addresses. More...
|
| |
| using | Fragment = cutlass::Array< Element, ThreadMap::Iterations::kCount *ThreadMap::kElementsPerAccess > |
| | Fragment object to be loaded or stored. More...
|
| |
| using | Mask = typename TileAccessIterator::Mask |
| | Predicate vector stores mask to guard accesses. More...
|
| |
|
| CUTLASS_HOST_DEVICE | PredicatedTileIterator (Params const ¶ms, Pointer pointer, TensorCoord extent, int thread_id, TensorCoord const &threadblock_offset) |
| |
| CUTLASS_HOST_DEVICE | PredicatedTileIterator (Params const ¶ms, Pointer pointer, TensorCoord extent, int thread_id) |
| | Construct a PredicatedTileIterator with zero threadblock offset. More...
|
| |
| CUTLASS_HOST_DEVICE void | add_pointer_offset (LongIndex pointer_offset) |
| | Adds a pointer offset in units of Element. More...
|
| |
| CUTLASS_HOST_DEVICE PredicatedTileIterator & | operator++ () |
| |
| CUTLASS_HOST_DEVICE PredicatedTileIterator | operator++ (int) |
| |
| CUTLASS_HOST_DEVICE void | clear_mask () |
| | Clears the predicate set efficiently. More...
|
| |
| CUTLASS_HOST_DEVICE void | enable_mask () |
| | Clears the predicate set efficiently. More...
|
| |
| CUTLASS_HOST_DEVICE void | set_mask (Mask const &mask) |
| | Sets the predicate mask, overriding value stored in predicate iterator. More...
|
| |
| CUTLASS_HOST_DEVICE void | get_mask (Mask &mask) |
| | Gets the mask. More...
|
| |
| CUTLASS_DEVICE void | load_with_pointer_offset (Fragment &frag, Index pointer_offset) |
| |
| CUTLASS_DEVICE void | load (Fragment &frag) |
| | Loads a fragment from memory. More...
|
| |
| CUTLASS_DEVICE void | store_with_pointer_offset (Fragment const &frag, Index pointer_offset) |
| | Store a fragment to memory. More...
|
| |
| CUTLASS_DEVICE void | store (Fragment const &frag) |
| | Store a fragment to memory. More...
|
| |
template<typename Shape_, typename Element_, int AdvanceRank, typename ThreadMap_, int AccessSize>
class cutlass::transform::threadblock::PredicatedTileIterator< Shape_, Element_, layout::PitchLinear, AdvanceRank, ThreadMap_, AccessSize >
Specialization of PredicatedTileIterator for pitch-linear data.
Satisfies: ForwardTileIteratorConcept | ReadableContiguousTileIteratorConcept | WriteableContiguousTileIteratorConcept | MaskedTileIteratorConcept
template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , int AccessSize>
Advances to the next tile in memory.
The first time this method is called, predicates are updated, and the iterator's internal pointer is reverted to the first "steady state" tile. Subsequent calls are lightweight and must only update the internal pointer.
template<typename Shape_ , typename Element_ , int AdvanceRank, typename ThreadMap_ , int AccessSize>
Advances to the next tile in memory.
The first time this method is called, predicates are updated, and the iterator's internal pointer is reverted to the first "steady state" tile. Subsequent calls are lightweight and must only update the internal pointer.