From 52c262484f9e3aa19ef4421b970a2fb0445ca433 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 31 Jul 2019 00:14:43 +0000 Subject: [PATCH] TableGen: Add MinAlignment predicate AMDGPU uses some custom code predicates for testing alignments. I'm still having trouble comprehending the behavior of predicate bits in the PatFrag hierarchy. Any attempt to abstract these properties unexpectdly fails to apply them. llvm-svn: 367373 --- .../llvm/CodeGen/GlobalISel/InstructionSelector.h | 7 +++ .../CodeGen/GlobalISel/InstructionSelectorImpl.h | 24 ++++++++++ llvm/include/llvm/Target/TargetSelectionDAG.td | 6 ++- llvm/lib/Target/AMDGPU/AMDGPUInstructions.td | 40 ++++++++-------- llvm/lib/Target/AMDGPU/SIInstrInfo.td | 55 +++++++++++++--------- llvm/test/TableGen/address-space-patfrags.td | 37 ++++++++------- llvm/utils/TableGen/CodeGenDAGPatterns.cpp | 20 +++++++- llvm/utils/TableGen/CodeGenDAGPatterns.h | 1 + llvm/utils/TableGen/GlobalISelEmitter.cpp | 46 ++++++++++++++++++ 9 files changed, 174 insertions(+), 62 deletions(-) diff --git a/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelector.h b/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelector.h index e9b93be..8f7d78b 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelector.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelector.h @@ -148,6 +148,13 @@ enum { /// - AddrSpaceN+1 ... GIM_CheckMemoryAddressSpace, + /// Check the minimum alignment of the memory access for the given machine + /// memory operand. + /// - InsnID - Instruction ID + /// - MMOIdx - MMO index + /// - MinAlign - Minimum acceptable alignment + GIM_CheckMemoryAlignment, + /// Check the size of the memory access for the given machine memory operand /// against the size of an operand. /// - InsnID - Instruction ID diff --git a/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h b/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h index e8ee4af..f476271 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h @@ -409,6 +409,30 @@ bool InstructionSelector::executeMatchTable( return false; break; } + case GIM_CheckMemoryAlignment: { + int64_t InsnID = MatchTable[CurrentIdx++]; + int64_t MMOIdx = MatchTable[CurrentIdx++]; + unsigned MinAlign = MatchTable[CurrentIdx++]; + + assert(State.MIs[InsnID] != nullptr && "Used insn before defined"); + + if (State.MIs[InsnID]->getNumMemOperands() <= MMOIdx) { + if (handleReject() == RejectAndGiveUp) + return false; + break; + } + + MachineMemOperand *MMO + = *(State.MIs[InsnID]->memoperands_begin() + MMOIdx); + DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), + dbgs() << CurrentIdx << ": GIM_CheckMemoryAlignment" + << "(MIs[" << InsnID << "]->memoperands() + " << MMOIdx + << ")->getAlignment() >= " << MinAlign << ")\n"); + if (MMO->getAlignment() < MinAlign && handleReject() == RejectAndGiveUp) + return false; + + break; + } case GIM_CheckMemorySizeEqualTo: { int64_t InsnID = MatchTable[CurrentIdx++]; int64_t MMOIdx = MatchTable[CurrentIdx++]; diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td index b913a05..1504a5b 100644 --- a/llvm/include/llvm/Target/TargetSelectionDAG.td +++ b/llvm/include/llvm/Target/TargetSelectionDAG.td @@ -741,6 +741,10 @@ class PatFrags frags, code pred = [{}], // If this empty, accept any address space. list AddressSpaces = ?; + // cast(N)->getAlignment() >= + // If this is empty, accept any alignment. + int MinAlignment = ?; + // cast(N)->getOrdering() == AtomicOrdering::Monotonic bit IsAtomicOrderingMonotonic = ?; // cast(N)->getOrdering() == AtomicOrdering::Acquire @@ -766,8 +770,6 @@ class PatFrags frags, code pred = [{}], // cast(N)->getMemoryVT().getScalarType() == MVT::; // cast(N)->getMemoryVT().getScalarType() == MVT::; ValueType ScalarMemoryVT = ?; - - // TODO: Add alignment } // PatFrag - A version of PatFrags matching only a single fragment. diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td index 885bfc7..0be9ec3 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td @@ -307,13 +307,9 @@ class AddressSpaceList AS> { list AddrSpaces = AS; } -class Aligned8Bytes : PatFrag (N)->getAlignment() % 8 == 0; -}]>; - -class Aligned16Bytes : PatFrag (N)->getAlignment() >= 16; -}]>; +class Aligned { + int MinAlignment = Bytes; +} class LoadFrag : PatFrag<(ops node:$ptr), (op node:$ptr)>; @@ -481,21 +477,27 @@ def store_local_hi16 : StoreHi16 , LocalAddress; def truncstorei8_local_hi16 : StoreHi16, LocalAddress; def atomic_store_local : LocalStore ; -def load_align8_local : Aligned8Bytes < - (ops node:$ptr), (load_local node:$ptr) ->; -def load_align16_local : Aligned16Bytes < - (ops node:$ptr), (load_local node:$ptr) ->; +def load_align8_local : PatFrag <(ops node:$ptr), (load_local node:$ptr)> { + let IsLoad = 1; + let MinAlignment = 8; +} -def store_align8_local : Aligned8Bytes < - (ops node:$val, node:$ptr), (store_local node:$val, node:$ptr) ->; +def load_align16_local : PatFrag <(ops node:$ptr), (load_local node:$ptr)> { + let IsLoad = 1; + let MinAlignment = 16; +} + +def store_align8_local: PatFrag<(ops node:$val, node:$ptr), + (store_local node:$val, node:$ptr)>, Aligned<8> { + let IsStore = 1; + +} +def store_align16_local: PatFrag<(ops node:$val, node:$ptr), + (store_local node:$val, node:$ptr)>, Aligned<16> { + let IsStore = 1; +} -def store_align16_local : Aligned16Bytes < - (ops node:$val, node:$ptr), (store_local node:$val, node:$ptr) ->; def atomic_store_flat : FlatStore ; def truncstorei8_hi16_flat : StoreHi16, FlatStoreAddress; diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index bd9b90d..a035bb6 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -329,10 +329,12 @@ def AMDGPUatomic_ld_glue : SDNode <"ISD::ATOMIC_LOAD", SDTAtomicLoad, def unindexedload_glue : PatFrag <(ops node:$ptr), (AMDGPUld_glue node:$ptr)> { let IsUnindexed = 1; + let IsLoad = 1; } def load_glue : PatFrag <(ops node:$ptr), (unindexedload_glue node:$ptr)> { let IsNonExtLoad = 1; + let IsLoad = 1; } def atomic_load_32_glue : PatFrag<(ops node:$ptr), @@ -347,7 +349,7 @@ def atomic_load_64_glue : PatFrag<(ops node:$ptr), let MemoryVT = i64; } -def extload_glue : PatFrag<(ops node:$ptr), (load_glue node:$ptr)> { +def extload_glue : PatFrag<(ops node:$ptr), (unindexedload_glue node:$ptr)> { let IsLoad = 1; let IsAnyExtLoad = 1; } @@ -392,23 +394,31 @@ def sextloadi16_glue : PatFrag<(ops node:$ptr), (sextload_glue node:$ptr)> { let MemoryVT = i16; } -def load_glue_align8 : Aligned8Bytes < - (ops node:$ptr), (load_glue node:$ptr) ->; -def load_glue_align16 : Aligned16Bytes < - (ops node:$ptr), (load_glue node:$ptr) ->; +let IsLoad = 1, AddressSpaces = LoadAddress_local.AddrSpaces in { +def load_local_m0 : PatFrag<(ops node:$ptr), (load_glue node:$ptr)>; + +let MemoryVT = i8 in { +def extloadi8_local_m0 : PatFrag<(ops node:$ptr), (extloadi8_glue node:$ptr)>; +def sextloadi8_local_m0 : PatFrag<(ops node:$ptr), (sextloadi8_glue node:$ptr)>; +def zextloadi8_local_m0 : PatFrag<(ops node:$ptr), (zextloadi8_glue node:$ptr)>; +} + +let MemoryVT = i16 in { +def extloadi16_local_m0 : PatFrag<(ops node:$ptr), (extloadi16_glue node:$ptr)>; +def sextloadi16_local_m0 : PatFrag<(ops node:$ptr), (sextloadi16_glue node:$ptr)>; +def zextloadi16_local_m0 : PatFrag<(ops node:$ptr), (zextloadi16_glue node:$ptr)>; +} + +def load_align8_local_m0 : LoadFrag , LocalAddress { + let MinAlignment = 8; +} +def load_align16_local_m0 : LoadFrag , LocalAddress { + let MinAlignment = 16; +} + +} // End IsLoad = 1 -def load_local_m0 : LoadFrag, LocalAddress; -def sextloadi8_local_m0 : LoadFrag, LocalAddress; -def sextloadi16_local_m0 : LoadFrag, LocalAddress; -def extloadi8_local_m0 : LoadFrag, LocalAddress; -def zextloadi8_local_m0 : LoadFrag, LocalAddress; -def extloadi16_local_m0 : LoadFrag, LocalAddress; -def zextloadi16_local_m0 : LoadFrag, LocalAddress; -def load_align8_local_m0 : LoadFrag , LocalAddress; -def load_align16_local_m0 : LoadFrag , LocalAddress; def atomic_load_32_local_m0 : LoadFrag, LocalAddress; def atomic_load_64_local_m0 : LoadFrag, LocalAddress; @@ -455,13 +465,11 @@ def truncstorei16_glue : PatFrag<(ops node:$val, node:$ptr), let MemoryVT = i16; } -def store_glue_align8 : Aligned8Bytes < - (ops node:$value, node:$ptr), (store_glue node:$value, node:$ptr) ->; - -def store_glue_align16 : Aligned16Bytes < - (ops node:$value, node:$ptr), (store_glue node:$value, node:$ptr) ->; +let IsStore = 1, AddressSpaces = StoreAddress_local.AddrSpaces in { +def store_glue_align8 : PatFrag<(ops node:$val, node:$ptr), + (store_glue node:$val, node:$ptr)>, Aligned<8>; +def store_glue_align16 : PatFrag<(ops node:$val, node:$ptr), + (store_glue node:$val, node:$ptr)>, Aligned<16>; def store_local_m0 : StoreFrag, LocalAddress; def truncstorei8_local_m0 : StoreFrag, LocalAddress; @@ -470,6 +478,7 @@ def atomic_store_local_m0 : StoreFrag, LocalAddress; def store_align8_local_m0 : StoreFrag, LocalAddress; def store_align16_local_m0 : StoreFrag, LocalAddress; +} def si_setcc_uniform : PatFrag < (ops node:$lhs, node:$rhs, node:$cond), diff --git a/llvm/test/TableGen/address-space-patfrags.td b/llvm/test/TableGen/address-space-patfrags.td index 029170e..82f924b 100644 --- a/llvm/test/TableGen/address-space-patfrags.td +++ b/llvm/test/TableGen/address-space-patfrags.td @@ -19,6 +19,7 @@ def pat_frag_a : PatFrag <(ops node:$ptr), (load node:$ptr), [{}]> { let AddressSpaces = [ 999 ]; let IsLoad = 1; // FIXME: Can this be inferred? let MemoryVT = i32; + let MinAlignment = 2; } // With multiple address spaces @@ -44,49 +45,53 @@ def inst_c : Instruction { } // SDAG: case 2: { -// SDAG: // Predicate_pat_frag_a +// SDAG-NEXT: // Predicate_pat_frag_b // SDAG-NEXT: SDNode *N = Node; // SDAG-NEXT: (void)N; // SDAG-NEXT: unsigned AddrSpace = cast(N)->getAddressSpace(); - -// SDAG-NEXT: if (AddrSpace != 999) +// SDAG-NEXT: if (AddrSpace != 123 && AddrSpace != 455) // SDAG-NEXT: return false; // SDAG-NEXT: if (cast(N)->getMemoryVT() != MVT::i32) return false; // SDAG-NEXT: return true; -// GISEL: GIM_Try, /*On fail goto*//*Label 0*/ 47, // Rule ID 0 // + +// GISEL: GIM_Try, /*On fail goto*//*Label 0*/ {{[0-9]+}}, // Rule ID 0 // // GISEL-NEXT: GIM_CheckNumOperands, /*MI*/0, /*Expected*/2, // GISEL-NEXT: GIM_CheckOpcode, /*MI*/0, TargetOpcode::G_LOAD, // GISEL-NEXT: GIM_CheckMemorySizeEqualToLLT, /*MI*/0, /*MMO*/0, /*OpIdx*/0, -// GISEL-NEXT: GIM_CheckMemoryAddressSpace, /*MI*/0, /*MMO*/0, /*NumAddrSpace*/1, /*AddrSpace*/999, +// GISEL-NEXT: GIM_CheckMemoryAddressSpace, /*MI*/0, /*MMO*/0, /*NumAddrSpace*/2, /*AddrSpace*/123, /*AddrSpace*/455, // GISEL-NEXT: GIM_CheckMemorySizeEqualTo, /*MI*/0, /*MMO*/0, /*Size*/4, // GISEL-NEXT: GIM_CheckAtomicOrdering, /*MI*/0, /*Order*/(int64_t)AtomicOrdering::NotAtomic, def : Pat < - (pat_frag_a GPR32:$src), - (inst_a GPR32:$src) + (pat_frag_b GPR32:$src), + (inst_b GPR32:$src) >; + // SDAG: case 3: { -// SDAG-NEXT: // Predicate_pat_frag_b +// SDAG: // Predicate_pat_frag_a // SDAG-NEXT: SDNode *N = Node; // SDAG-NEXT: (void)N; // SDAG-NEXT: unsigned AddrSpace = cast(N)->getAddressSpace(); -// SDAG-NEXT: if (AddrSpace != 123 && AddrSpace != 455) + +// SDAG-NEXT: if (AddrSpace != 999) +// SDAG-NEXT: return false; +// SDAG-NEXT: if (cast(N)->getAlignment() < 2) // SDAG-NEXT: return false; // SDAG-NEXT: if (cast(N)->getMemoryVT() != MVT::i32) return false; // SDAG-NEXT: return true; - -// GISEL: GIM_Try, /*On fail goto*//*Label 1*/ 95, // Rule ID 1 // +// GISEL: GIM_Try, /*On fail goto*//*Label 1*/ {{[0-9]+}}, // Rule ID 1 // // GISEL-NEXT: GIM_CheckNumOperands, /*MI*/0, /*Expected*/2, // GISEL-NEXT: GIM_CheckOpcode, /*MI*/0, TargetOpcode::G_LOAD, // GISEL-NEXT: GIM_CheckMemorySizeEqualToLLT, /*MI*/0, /*MMO*/0, /*OpIdx*/0, -// GISEL-NEXT: GIM_CheckMemoryAddressSpace, /*MI*/0, /*MMO*/0, /*NumAddrSpace*/2, /*AddrSpace*/123, /*AddrSpace*/455, +// GISEL-NEXT: GIM_CheckMemoryAddressSpace, /*MI*/0, /*MMO*/0, /*NumAddrSpace*/1, /*AddrSpace*/999, +// GISEL-NEXT: GIM_CheckMemoryAlignment, /*MI*/0, /*MMO*/0, /*MinAlign*/2, // GISEL-NEXT: GIM_CheckMemorySizeEqualTo, /*MI*/0, /*MMO*/0, /*Size*/4, // GISEL-NEXT: GIM_CheckAtomicOrdering, /*MI*/0, /*Order*/(int64_t)AtomicOrdering::NotAtomic, def : Pat < - (pat_frag_b GPR32:$src), - (inst_b GPR32:$src) + (pat_frag_a GPR32:$src), + (inst_a GPR32:$src) >; @@ -98,7 +103,7 @@ def truncstorei16_addrspace : PatFrag<(ops node:$val, node:$ptr), } // Test truncstore without a specific MemoryVT -// GISEL: GIM_Try, /*On fail goto*//*Label 2*/ 133, // Rule ID 2 // +// GISEL: GIM_Try, /*On fail goto*//*Label 2*/ {{[0-9]+}}, // Rule ID 2 // // GISEL-NEXT: GIM_CheckNumOperands, /*MI*/0, /*Expected*/2, // GISEL-NEXT: GIM_CheckOpcode, /*MI*/0, TargetOpcode::G_STORE, // GISEL-NEXT: GIM_CheckMemorySizeLessThanLLT, /*MI*/0, /*MMO*/0, /*OpIdx*/0, @@ -111,7 +116,7 @@ def : Pat < >; // Test truncstore with specific MemoryVT -// GISEL: GIM_Try, /*On fail goto*//*Label 3*/ 181, // Rule ID 3 // +// GISEL: GIM_Try, /*On fail goto*//*Label 3*/ {{[0-9]+}}, // Rule ID 3 // // GISEL-NEXT: GIM_CheckNumOperands, /*MI*/0, /*Expected*/2, // GISEL-NEXT: GIM_CheckOpcode, /*MI*/0, TargetOpcode::G_STORE, // GISEL-NEXT: GIM_CheckMemorySizeLessThanLLT, /*MI*/0, /*MMO*/0, /*OpIdx*/0, diff --git a/llvm/utils/TableGen/CodeGenDAGPatterns.cpp b/llvm/utils/TableGen/CodeGenDAGPatterns.cpp index 75890d2..de42365 100644 --- a/llvm/utils/TableGen/CodeGenDAGPatterns.cpp +++ b/llvm/utils/TableGen/CodeGenDAGPatterns.cpp @@ -883,7 +883,8 @@ std::string TreePredicateFn::getPredCode() const { if (isLoad()) { if (!isUnindexed() && !isNonExtLoad() && !isAnyExtLoad() && !isSignExtLoad() && !isZeroExtLoad() && getMemoryVT() == nullptr && - getScalarMemoryVT() == nullptr) + getScalarMemoryVT() == nullptr && getAddressSpaces() == nullptr && + getMinAlignment() < 1) PrintFatalError(getOrigPatFragRecord()->getRecord()->getLoc(), "IsLoad cannot be used by itself"); } else { @@ -903,7 +904,8 @@ std::string TreePredicateFn::getPredCode() const { if (isStore()) { if (!isUnindexed() && !isTruncStore() && !isNonTruncStore() && - getMemoryVT() == nullptr && getScalarMemoryVT() == nullptr) + getMemoryVT() == nullptr && getScalarMemoryVT() == nullptr && + getAddressSpaces() == nullptr && getMinAlignment() < 1) PrintFatalError(getOrigPatFragRecord()->getRecord()->getLoc(), "IsStore cannot be used by itself"); } else { @@ -977,6 +979,13 @@ std::string TreePredicateFn::getPredCode() const { Code += ")\nreturn false;\n"; } + int64_t MinAlign = getMinAlignment(); + if (MinAlign > 0) { + Code += "if (cast(N)->getAlignment() < "; + Code += utostr(MinAlign); + Code += ")\nreturn false;\n"; + } + Record *MemoryVT = getMemoryVT(); if (MemoryVT) @@ -1177,6 +1186,13 @@ ListInit *TreePredicateFn::getAddressSpaces() const { return R->getValueAsListInit("AddressSpaces"); } +int64_t TreePredicateFn::getMinAlignment() const { + Record *R = getOrigPatFragRecord()->getRecord(); + if (R->isValueUnset("MinAlignment")) + return 0; + return R->getValueAsInt("MinAlignment"); +} + Record *TreePredicateFn::getScalarMemoryVT() const { Record *R = getOrigPatFragRecord()->getRecord(); if (R->isValueUnset("ScalarMemoryVT")) diff --git a/llvm/utils/TableGen/CodeGenDAGPatterns.h b/llvm/utils/TableGen/CodeGenDAGPatterns.h index d41de68..80fc932 100644 --- a/llvm/utils/TableGen/CodeGenDAGPatterns.h +++ b/llvm/utils/TableGen/CodeGenDAGPatterns.h @@ -594,6 +594,7 @@ public: Record *getScalarMemoryVT() const; ListInit *getAddressSpaces() const; + int64_t getMinAlignment() const; // If true, indicates that GlobalISel-based C++ code was supplied. bool hasGISelPredicateCode() const; diff --git a/llvm/utils/TableGen/GlobalISelEmitter.cpp b/llvm/utils/TableGen/GlobalISelEmitter.cpp index 65c9606..7e17daf 100644 --- a/llvm/utils/TableGen/GlobalISelEmitter.cpp +++ b/llvm/utils/TableGen/GlobalISelEmitter.cpp @@ -249,6 +249,10 @@ static std::string explainPredicates(const TreePatternNode *N) { OS << ']'; } + int64_t MinAlign = P.getMinAlignment(); + if (MinAlign > 0) + Explanation += " MinAlign=" + utostr(MinAlign); + if (P.isAtomicOrderingMonotonic()) Explanation += " monotonic"; if (P.isAtomicOrderingAcquire()) @@ -329,6 +333,9 @@ static Error isTrivialOperatorNode(const TreePatternNode *N) { const ListInit *AddrSpaces = Predicate.getAddressSpaces(); if (AddrSpaces && !AddrSpaces->empty()) continue; + + if (Predicate.getMinAlignment() > 0) + continue; } if (Predicate.isAtomic() && Predicate.getMemoryVT()) @@ -1052,6 +1059,7 @@ public: IPM_MemoryLLTSize, IPM_MemoryVsLLTSize, IPM_MemoryAddressSpace, + IPM_MemoryAlignment, IPM_GenericPredicate, OPM_SameOperand, OPM_ComplexPattern, @@ -1849,6 +1857,40 @@ public: } }; +class MemoryAlignmentPredicateMatcher : public InstructionPredicateMatcher { +protected: + unsigned MMOIdx; + int MinAlign; + +public: + MemoryAlignmentPredicateMatcher(unsigned InsnVarID, unsigned MMOIdx, + int MinAlign) + : InstructionPredicateMatcher(IPM_MemoryAlignment, InsnVarID), + MMOIdx(MMOIdx), MinAlign(MinAlign) { + assert(MinAlign > 0); + } + + static bool classof(const PredicateMatcher *P) { + return P->getKind() == IPM_MemoryAlignment; + } + + bool isIdentical(const PredicateMatcher &B) const override { + if (!InstructionPredicateMatcher::isIdentical(B)) + return false; + auto *Other = cast(&B); + return MMOIdx == Other->MMOIdx && MinAlign == Other->MinAlign; + } + + void emitPredicateOpcodes(MatchTable &Table, + RuleMatcher &Rule) const override { + Table << MatchTable::Opcode("GIM_CheckMemoryAlignment") + << MatchTable::Comment("MI") << MatchTable::IntValue(InsnVarID) + << MatchTable::Comment("MMO") << MatchTable::IntValue(MMOIdx) + << MatchTable::Comment("MinAlign") << MatchTable::IntValue(MinAlign) + << MatchTable::LineBreak; + } +}; + /// Generates code to check that the size of an MMO is less-than, equal-to, or /// greater than a given LLT. class MemoryVsLLTSizePredicateMatcher : public InstructionPredicateMatcher { @@ -3287,6 +3329,10 @@ Expected GlobalISelEmitter::createAndImportSelDAGMatcher( 0, ParsedAddrSpaces); } } + + int64_t MinAlign = Predicate.getMinAlignment(); + if (MinAlign > 0) + InsnMatcher.addPredicate(0, MinAlign); } // G_LOAD is used for both non-extending and any-extending loads. -- 2.7.4