From 670c49c33c689dd9fd281990202dbbb02e4c1ea6 Mon Sep 17 00:00:00 2001 From: Thomas Lively Date: Thu, 26 Feb 2026 21:40:13 -0800 Subject: [PATCH 1/4] [WIP] Update effects for acquire and release operations One of the primary use cases of `EffectAnalyzer` is to determine whether one expression (or a sequence of expressions) can be reordered before or after another expression (or sequence of expressions). Until now, the check whether effects "invalidate" each other and would prevent reordering has been symmetrical, checking read-write, write-write, and write-read conflicts as well as symmetric conditions that would prevent reordering. But acquire load and release store operations break this symmetry. Consider an expression A containing and an acquire load (and no other effects) and an expression B containing a release store (and no other effects), where we know the accesses do not alias. Then if A is before B, it is not valid to reorder the expressions to have B before A because the acquire load cannot move forward past the release store and the release store cannot move back past the acquire load. However, if B is before A, it _is_ valid to reorder them, since memory accesses are generally allowed to move forward past acquire loads and back past release stores. Update `EffectAnalyzer` to take these allowed reorderings into account by tracking the strictest memory orders used to read or write to shared locations in the analyzed expressions. Also update it to differentiate between shared and unshared memories, structs, and arrays to account for both the fact that accesses to shared and unshared locations can never alias and the fact that accesses to unshared locations can never participate in synchronization. While we're at it, treat effects due to e.g. nullability of reference parameters and mutability of fields more uniformly across expressions. Now that the effect comparison to see whether reordering is prevented is no longer symmetric, rename it from `invalidates` to the more descriptive `orderedBefore`, where `A.orderedBefore(B)` returns true iff `A` ocurring before `B` implies that there is an ordering constraint that would prevent reordering `A` and `B` past each other. To accomodate users trying to move expressions in either direction, also add an `A.orderedAfter(B)` method for the case where `A` occurs after `B`. WIP because I need to add more testing for the effect analysis itself and for the passes to ensure they are performing effect comparisons in the correct directions. --- src/ir/effects.cpp | 28 +- src/ir/effects.h | 562 ++++++++++++------ src/ir/localize.h | 4 +- src/passes/CodePushing.cpp | 4 +- src/passes/HeapStoreOptimization.cpp | 8 +- src/passes/LocalCSE.cpp | 13 +- src/passes/LoopInvariantCodeMotion.cpp | 6 +- src/passes/MergeBlocks.cpp | 12 +- src/passes/Monomorphize.cpp | 2 +- src/passes/OptimizeCasts.cpp | 22 +- src/passes/OptimizeInstructions.cpp | 16 +- src/passes/RemoveUnusedBrs.cpp | 13 +- src/passes/SimplifyLocals.cpp | 4 +- src/wasm2js.h | 6 +- test/lit/passes/gufa-refs.wast | 46 +- .../simplify-locals-atomic-effects.wast | 96 +++ test/lit/passes/vacuum-gc-atomics.wast | 48 +- test/lit/passes/vacuum-tnh.wast | 2 +- 18 files changed, 605 insertions(+), 287 deletions(-) create mode 100644 test/lit/passes/simplify-locals-atomic-effects.wast diff --git a/src/ir/effects.cpp b/src/ir/effects.cpp index af89dde2ca7..b278879eed5 100644 --- a/src/ir/effects.cpp +++ b/src/ir/effects.cpp @@ -45,6 +45,12 @@ std::ostream& operator<<(std::ostream& o, wasm::EffectAnalyzer& effects) { if (effects.writesMemory) { o << "writesMemory\n"; } + if (effects.readsSharedMemory) { + o << "readsSharedMemory\n"; + } + if (effects.writesSharedMemory) { + o << "writesSharedMemory\n"; + } if (effects.readsTable) { o << "readsTable\n"; } @@ -57,20 +63,35 @@ std::ostream& operator<<(std::ostream& o, wasm::EffectAnalyzer& effects) { if (effects.writesStruct) { o << "writesStruct\n"; } - if (effects.readsArray) { + if (effects.readsSharedMutableStruct) { + o << "readsSharedMutableStruct\n"; + } + if (effects.writesSharedStruct) { + o << "writesSharedStruct\n"; + } + if (effects.readsMutableArray) { o << "readsArray\n"; } if (effects.writesArray) { o << "writesArray\n"; } + if (effects.readsSharedMutableArray) { + o << "readsSharedMutableArray\n"; + } + if (effects.writesSharedArray) { + o << "writesSharedArray\n"; + } if (effects.trap) { o << "trap\n"; } if (effects.implicitTrap) { o << "implicitTrap\n"; } - if (effects.isAtomic) { - o << "isAtomic\n"; + if (effects.readOrder != wasm::MemoryOrder::Unordered) { + o << "readOrder " << effects.readOrder << "\n"; + } + if (effects.writeOrder != wasm::MemoryOrder::Unordered) { + o << "writeOrder " << effects.writeOrder << "\n"; } if (effects.throws_) { o << "throws_\n"; @@ -135,7 +156,6 @@ std::ostream& operator<<(std::ostream& o, wasm::EffectAnalyzer& effects) { if (effects.hasExternalBreakTargets()) { o << "hasExternalBreakTargets\n"; } - o << "}"; return o; } diff --git a/src/ir/effects.h b/src/ir/effects.h index 5ad1c4b955f..4ac9f227692 100644 --- a/src/ir/effects.h +++ b/src/ir/effects.h @@ -20,6 +20,7 @@ #include "ir/intrinsics.h" #include "pass.h" #include "wasm-traversal.h" +#include "wasm.h" namespace wasm { @@ -107,14 +108,21 @@ class EffectAnalyzer { std::set globalsWritten; bool readsMemory = false; bool writesMemory = false; + bool readsSharedMemory = false; + bool writesSharedMemory = false; bool readsTable = false; bool writesTable = false; // TODO: More specific type-based alias analysis, and not just at the // struct/array level. bool readsMutableStruct = false; bool writesStruct = false; - bool readsArray = false; + bool readsSharedMutableStruct = false; + bool writesSharedStruct = false; + bool readsMutableArray = false; bool writesArray = false; + bool readsSharedMutableArray = false; + bool writesSharedArray = false; + // A trap, either from an unreachable instruction, or from an implicit trap // that we do not ignore (see below). // @@ -136,9 +144,7 @@ class EffectAnalyzer { // A trap from an instruction like a load or div/rem, which may trap on corner // cases. If we do not ignore implicit traps then these are counted as a trap. bool implicitTrap = false; - // An atomic load/store/RMW/Cmpxchg or an operator that has a defined ordering - // wrt atomics (e.g. memory.grow) - bool isAtomic = false; + // Whether an exception may be thrown. bool throws_ = false; // The nested depth of try-catch_all. If an instruction that may throw is // inside an inner try-catch_all, we don't mark it as 'throws_', because it @@ -171,6 +177,11 @@ class EffectAnalyzer { // more here.) bool hasReturnCallThrow = false; + // The strongest memory orders used to read and write to any shared location + // (e.g. shared memories, shared GC structs, etc), if any. + MemoryOrder readOrder = MemoryOrder::Unordered; + MemoryOrder writeOrder = MemoryOrder::Unordered; + // Helper functions to check for various effect types bool accessesLocal() const { @@ -180,11 +191,22 @@ class EffectAnalyzer { return globalsWritten.size() + mutableGlobalsRead.size() > 0; } bool accessesMemory() const { return calls || readsMemory || writesMemory; } + bool accessesSharedMemory() const { + return calls || readsSharedMemory || writesSharedMemory; + } bool accessesTable() const { return calls || readsTable || writesTable; } bool accessesMutableStruct() const { return calls || readsMutableStruct || writesStruct; } - bool accessesArray() const { return calls || readsArray || writesArray; } + bool accessesSharedMutableStruct() const { + return calls || readsSharedMutableStruct || writesSharedStruct; + } + bool accessesArray() const { + return calls || readsMutableArray || writesArray; + } + bool accessesSharedArray() const { + return calls || readsSharedMutableArray || writesSharedArray; + } bool throws() const { return throws_ || !delegateTargets.empty(); } // Check whether this may transfer control flow to somewhere outside of this // expression (aside from just flowing out normally). That includes a break @@ -199,17 +221,30 @@ class EffectAnalyzer { // Changes something in globally-stored state. bool writesGlobalState() const { - return globalsWritten.size() || writesMemory || writesTable || - writesStruct || writesArray || isAtomic || calls; + return globalsWritten.size() || writesMemory || writesSharedMemory || + writesTable || writesStruct || writesSharedStruct || writesArray || + writesSharedArray || calls; } bool readsMutableGlobalState() const { - return mutableGlobalsRead.size() || readsMemory || readsTable || - readsMutableStruct || readsArray || isAtomic || calls; + return mutableGlobalsRead.size() || readsMemory || readsSharedMemory || + readsTable || readsMutableStruct || readsSharedMutableStruct || + readsMutableArray || readsSharedMutableArray || calls; + } + bool accessesSharedGlobalState() const { + return readsSharedMemory || writesSharedMemory || + readsSharedMutableStruct || writesSharedStruct || + readsSharedMutableArray || writesSharedArray || calls; + } + + bool hasSynchronization() const { + return readOrder != MemoryOrder::Unordered || + writeOrder != MemoryOrder::Unordered; } bool hasNonTrapSideEffects() const { return localsWritten.size() > 0 || danglingPop || writesGlobalState() || - throws() || transfersControlFlow() || mayNotReturn; + throws() || transfersControlFlow() || hasSynchronization() || + mayNotReturn; } bool hasSideEffects() const { return trap || hasNonTrapSideEffects(); } @@ -245,8 +280,8 @@ class EffectAnalyzer { // check if we break to anything external from ourselves bool hasExternalBreakTargets() const { return !breakTargets.empty(); } - // Checks if these effects would invalidate another set of effects (e.g., if - // we write, we invalidate someone that reads). + // Checks if these effects must remain ordered before another set of effects + // (e.g., if we write, we must remain ordered before someone that reads). // // This assumes the things whose effects we are comparing will both execute, // at least if neither of them transfers control flow away. That is, we assume @@ -278,26 +313,59 @@ class EffectAnalyzer { // example we can't reorder A and B if B traps, but in the first example we // can reorder them even if B traps (even if A has a global effect like a // global.set, since we assume B does not trap in traps-never-happen). - bool invalidates(const EffectAnalyzer& other) { + bool orderedBefore(const EffectAnalyzer& other) const { + // Cannot reorder control flow and side effects. if ((transfersControlFlow() && other.hasSideEffects()) || - (other.transfersControlFlow() && hasSideEffects()) || - ((writesMemory || calls) && other.accessesMemory()) || + (other.transfersControlFlow() && hasSideEffects())) { + return true; + } + // write-write, write-read, and read-write conflicts on possibly aliasing + // locations prevent reordering. Calls may access these locations. + if (((writesMemory || calls) && other.accessesMemory()) || ((other.writesMemory || other.calls) && accessesMemory()) || + ((writesSharedMemory || calls) && other.accessesSharedMemory()) || + ((other.writesSharedMemory || other.calls) && accessesSharedMemory()) || ((writesTable || calls) && other.accessesTable()) || ((other.writesTable || other.calls) && accessesTable()) || ((writesStruct || calls) && other.accessesMutableStruct()) || ((other.writesStruct || other.calls) && accessesMutableStruct()) || + ((writesSharedStruct || calls) && + other.accessesSharedMutableStruct()) || + ((other.writesSharedStruct || other.calls) && + accessesSharedMutableStruct()) || ((writesArray || calls) && other.accessesArray()) || ((other.writesArray || other.calls) && accessesArray()) || - (danglingPop || other.danglingPop)) { + ((writesSharedArray || calls) && other.accessesSharedArray()) || + ((other.writesSharedArray || other.calls) && accessesSharedArray())) { return true; } - // All atomics are sequentially consistent for now, and ordered wrt other - // memory references. - if ((isAtomic && other.accessesMemory()) || - (other.isAtomic && accessesMemory())) { + // Cannot reorder anything before dangling pops. + if (danglingPop) { return true; } + // Shared location accesses cannot be reordered after (but may be able to be + // reordered before) release stores. + if (other.writeOrder >= MemoryOrder::AcqRel && + accessesSharedGlobalState()) { + return true; + } + // Shared location accesses cannot be reordered before (but may be able to + // be reordered after) acquire loads. + if (readOrder >= MemoryOrder::AcqRel && other.accessesSharedGlobalState()) { + return true; + } + // No shared location accesses may be reordered in either direction around a + // seqcst operation. + if (((readOrder == MemoryOrder::SeqCst || + writeOrder == MemoryOrder::SeqCst) && + other.accessesSharedGlobalState()) || + ((other.readOrder == MemoryOrder::SeqCst || + other.writeOrder == MemoryOrder::SeqCst) && + accessesSharedGlobalState())) { + return true; + } + // write-write, write-read, and read-write conflicts on a local prevent + // reordering. for (auto local : localsWritten) { if (other.localsRead.count(local) || other.localsWritten.count(local)) { return true; @@ -308,6 +376,8 @@ class EffectAnalyzer { return true; } } + // write-write, write-read, and read-write conflicts on globals prevent + // reordering. Unlike for locals, calls can access globals. if ((other.calls && accessesMutableGlobal()) || (calls && other.accessesMutableGlobal())) { return true; @@ -345,24 +415,39 @@ class EffectAnalyzer { return false; } + bool orderedAfter(const EffectAnalyzer& other) { + return other.orderedBefore(*this); + } + void mergeIn(const EffectAnalyzer& other) { branchesOut = branchesOut || other.branchesOut; calls = calls || other.calls; readsMemory = readsMemory || other.readsMemory; writesMemory = writesMemory || other.writesMemory; + readsSharedMemory = readsSharedMemory || other.readsSharedMemory; + writesSharedMemory = writesSharedMemory || other.writesSharedMemory; readsTable = readsTable || other.readsTable; writesTable = writesTable || other.writesTable; readsMutableStruct = readsMutableStruct || other.readsMutableStruct; writesStruct = writesStruct || other.writesStruct; - readsArray = readsArray || other.readsArray; + readsSharedMutableStruct = + readsSharedMutableStruct || other.readsSharedMutableStruct; + writesSharedStruct = writesSharedStruct || other.writesSharedStruct; + readsMutableArray = readsMutableArray || other.readsMutableArray; writesArray = writesArray || other.writesArray; + readsSharedMutableArray = + readsSharedMutableArray || other.readsSharedMutableArray; + writesSharedArray = writesSharedArray || other.writesSharedArray; trap = trap || other.trap; implicitTrap = implicitTrap || other.implicitTrap; trapsNeverHappen = trapsNeverHappen || other.trapsNeverHappen; - isAtomic = isAtomic || other.isAtomic; throws_ = throws_ || other.throws_; danglingPop = danglingPop || other.danglingPop; mayNotReturn = mayNotReturn || other.mayNotReturn; + hasReturnCallThrow = hasReturnCallThrow || other.hasReturnCallThrow; + readOrder = std::max(readOrder, other.readOrder); + writeOrder = std::max(writeOrder, other.writeOrder); + for (auto i : other.localsRead) { localsRead.insert(i); } @@ -597,56 +682,88 @@ class EffectAnalyzer { parent.globalsWritten.insert(curr->name); } void visitLoad(Load* curr) { - parent.readsMemory = true; - parent.isAtomic |= curr->isAtomic(); + if (parent.module.getMemory(curr->memory)->shared) { + parent.readsSharedMemory = true; + parent.readOrder = std::max(parent.readOrder, curr->order); + } else { + parent.readsMemory = true; + } parent.implicitTrap = true; } void visitStore(Store* curr) { - parent.writesMemory = true; - parent.isAtomic |= curr->isAtomic(); + if (parent.module.getMemory(curr->memory)->shared) { + parent.writesSharedMemory = true; + parent.writeOrder = std::max(parent.writeOrder, curr->order); + } else { + parent.writesMemory = true; + } parent.implicitTrap = true; } void visitAtomicRMW(AtomicRMW* curr) { - parent.readsMemory = true; - parent.writesMemory = true; - parent.isAtomic = true; + if (parent.module.getMemory(curr->memory)->shared) { + parent.readsSharedMemory = true; + parent.writesSharedMemory = true; + parent.readOrder = std::max(parent.readOrder, curr->order); + parent.writeOrder = std::max(parent.writeOrder, curr->order); + } else { + parent.readsMemory = true; + parent.writesMemory = true; + } parent.implicitTrap = true; } void visitAtomicCmpxchg(AtomicCmpxchg* curr) { - parent.readsMemory = true; - parent.writesMemory = true; - parent.isAtomic = true; + if (parent.module.getMemory(curr->memory)->shared) { + parent.readsSharedMemory = true; + parent.writesSharedMemory = true; + parent.readOrder = std::max(parent.readOrder, curr->order); + parent.writeOrder = std::max(parent.writeOrder, curr->order); + } else { + parent.readsMemory = true; + parent.writesMemory = true; + } parent.implicitTrap = true; } void visitAtomicWait(AtomicWait* curr) { - parent.readsMemory = true; + // Waits on unshared memories trap. + if (!parent.module.getMemory(curr->memory)->shared) { + parent.trap = true; + return; + } // AtomicWait doesn't strictly write memory, but it does modify the // waiters list associated with the specified address, which we can think // of as a write. - parent.writesMemory = true; - parent.isAtomic = true; + parent.readsSharedMemory = true; + parent.writesSharedMemory = true; + parent.readOrder = parent.writeOrder = MemoryOrder::SeqCst; + // Traps on unaligned accesses. parent.implicitTrap = true; } void visitAtomicNotify(AtomicNotify* curr) { - // AtomicNotify doesn't strictly write memory, but it does modify the - // waiters list associated with the specified address, which we can think - // of as a write. - parent.readsMemory = true; - parent.writesMemory = true; - parent.isAtomic = true; + // Notifies on unshared memories just return 0 or trap on unaligned + // accesses. parent.implicitTrap = true; + if (!parent.module.getMemory(curr->memory)->shared) { + return; + } + // AtomicNotify doesn't strictly write memory, but it does + // modify the waiters list associated with the specified address, which we + // can think of as a write. + parent.readsSharedMemory = true; + parent.writesSharedMemory = true; + parent.readOrder = parent.writeOrder = MemoryOrder::SeqCst; } void visitAtomicFence(AtomicFence* curr) { - // AtomicFence should not be reordered with any memory operations, so we - // set these to true. - parent.readsMemory = true; - parent.writesMemory = true; - parent.isAtomic = true; + // AtomicFence should not be reordered with any shared location accesses, + // so we set these to true. + parent.readsSharedMemory = true; + parent.writesSharedMemory = true; + parent.readOrder = parent.writeOrder = MemoryOrder::SeqCst; } void visitPause(Pause* curr) { - // It's not much of a problem if pause gets reordered with anything, but - // we don't want it to be removed entirely. - parent.isAtomic = true; + // We don't want this to be moved out of loops, but it doesn't otherwises + // matter much how it gets reordered. Say we transfer control as a coarse + // approximation of this. + parent.branchesOut = true; } void visitSIMDExtract(SIMDExtract* curr) {} void visitSIMDReplace(SIMDReplace* curr) {} @@ -654,19 +771,35 @@ class EffectAnalyzer { void visitSIMDTernary(SIMDTernary* curr) {} void visitSIMDShift(SIMDShift* curr) {} void visitSIMDLoad(SIMDLoad* curr) { - parent.readsMemory = true; + if (parent.module.getMemory(curr->memory)->shared) { + parent.readsSharedMemory = true; + } else { + parent.readsMemory = true; + } parent.implicitTrap = true; } void visitSIMDLoadStoreLane(SIMDLoadStoreLane* curr) { - if (curr->isLoad()) { - parent.readsMemory = true; + if (parent.module.getMemory(curr->memory)->shared) { + if (curr->isLoad()) { + parent.readsSharedMemory = true; + } else { + parent.writesSharedMemory = true; + } } else { - parent.writesMemory = true; + if (curr->isLoad()) { + parent.readsMemory = true; + } else { + parent.writesMemory = true; + } } parent.implicitTrap = true; } void visitMemoryInit(MemoryInit* curr) { - parent.writesMemory = true; + if (parent.module.getMemory(curr->memory)->shared) { + parent.writesSharedMemory = true; + } else { + parent.writesMemory = true; + } parent.implicitTrap = true; } void visitDataDrop(DataDrop* curr) { @@ -677,12 +810,24 @@ class EffectAnalyzer { parent.implicitTrap = true; } void visitMemoryCopy(MemoryCopy* curr) { - parent.readsMemory = true; - parent.writesMemory = true; + if (parent.module.getMemory(curr->sourceMemory)->shared) { + parent.readsSharedMemory = true; + } else { + parent.readsMemory = true; + } + if (parent.module.getMemory(curr->destMemory)->shared) { + parent.writesSharedMemory = true; + } else { + parent.writesMemory = true; + } parent.implicitTrap = true; } void visitMemoryFill(MemoryFill* curr) { - parent.writesMemory = true; + if (parent.module.getMemory(curr->memory)->shared) { + parent.writesSharedMemory = true; + } else { + parent.writesMemory = true; + } parent.implicitTrap = true; } void visitConst(Const* curr) {} @@ -738,21 +883,29 @@ class EffectAnalyzer { void visitReturn(Return* curr) { parent.branchesOut = true; } void visitMemorySize(MemorySize* curr) { // memory.size accesses the size of the memory, and thus can be modeled as - // reading memory - parent.readsMemory = true; - // Atomics are sequentially consistent with memory.size. - parent.isAtomic = true; + // reading memory. When the memory is shared, this synchronizes with + // memory.grow on other threads. + if (parent.module.getMemory(curr->memory)->shared) { + parent.readsSharedMemory = true; + parent.readOrder = MemoryOrder::SeqCst; + } else { + parent.readsMemory = true; + } } void visitMemoryGrow(MemoryGrow* curr) { - // TODO: find out if calls is necessary here - parent.calls = true; // memory.grow technically does a read-modify-write operation on the // memory size in the successful case, modifying the set of valid - // addresses, and just a read operation in the failure case - parent.readsMemory = true; - parent.writesMemory = true; - // Atomics are also sequentially consistent with memory.grow. - parent.isAtomic = true; + // addresses, and just a read operation in the failure case. It + // synchronizes with memory.size on other threads, but only when operating + // on shared memories. + if (parent.module.getMemory(curr->memory)->shared) { + parent.readsSharedMemory = true; + parent.writesSharedMemory = true; + parent.readOrder = parent.writeOrder = MemoryOrder::SeqCst; + } else { + parent.readsMemory = true; + parent.writesMemory = true; + } } void visitRefNull(RefNull* curr) {} void visitRefIsNull(RefIsNull* curr) {} @@ -886,112 +1039,104 @@ class EffectAnalyzer { parent.trap = true; return; } - if (curr->ref->type.getHeapType() - .getStruct() - .fields[curr->index] - .mutable_ == Mutable) { - parent.readsMutableStruct = true; - } - // traps when the arg is null if (curr->ref->type.isNullable()) { parent.implicitTrap = true; } - switch (curr->order) { - case MemoryOrder::Unordered: - break; - case MemoryOrder::SeqCst: - // Synchronizes with other threads. - parent.isAtomic = true; - break; - case MemoryOrder::AcqRel: - // Only synchronizes if other threads can read the field. - parent.isAtomic = curr->ref->type.getHeapType().isShared(); - break; + auto heapType = curr->ref->type.getHeapType(); + if (heapType.getStruct().fields[curr->index].mutable_ == Mutable) { + if (heapType.isShared()) { + parent.readsSharedMutableStruct = true; + parent.readOrder = std::max(parent.readOrder, curr->order); + } else { + parent.readsMutableStruct = true; + } } } void visitStructSet(StructSet* curr) { + if (curr->ref->type == Type::unreachable) { + return; + } if (curr->ref->type.isNull()) { parent.trap = true; return; } - parent.writesStruct = true; - // traps when the arg is null if (curr->ref->type.isNullable()) { parent.implicitTrap = true; } - if (curr->order != MemoryOrder::Unordered) { - parent.isAtomic = true; + if (curr->ref->type.getHeapType().isShared()) { + parent.writesSharedStruct = true; + parent.writeOrder = std::max(parent.writeOrder, curr->order); + } else { + parent.writesStruct = true; } } - void visitStructRMW(StructRMW* curr) { + template + void visitStructRMWExpr(StructRMWExpr* curr) { + if (curr->ref->type == Type::unreachable) { + return; + } if (curr->ref->type.isNull()) { parent.trap = true; return; } - parent.readsMutableStruct = true; - parent.writesStruct = true; if (curr->ref->type.isNullable()) { parent.implicitTrap = true; } - assert(curr->order != MemoryOrder::Unordered); - parent.isAtomic = true; + if (curr->ref->type.getHeapType().isShared()) { + parent.readsSharedMutableStruct = true; + parent.writesSharedStruct = true; + parent.readOrder = std::max(parent.readOrder, curr->order); + parent.writeOrder = std::max(parent.writeOrder, curr->order); + } else { + parent.readsMutableStruct = true; + parent.writesStruct = true; + } } - void visitStructCmpxchg(StructCmpxchg* curr) { - if (curr->ref->type.isNull()) { + void visitStructRMW(StructRMW* curr) { visitStructRMWExpr(curr); } + void visitStructCmpxchg(StructCmpxchg* curr) { visitStructRMWExpr(curr); } + void visitStructWait(StructWait* curr) { + // StructWait doesn't strictly write a struct, but it does modify the + // waiters list associated with the waitqueue field, which we can think + // of as a write. + if (curr->ref->type == Type::unreachable) { + return; + } + if (curr->ref->type.isNull() || + !curr->ref->type.getHeapType().isShared()) { parent.trap = true; return; } - parent.readsMutableStruct = true; - parent.writesStruct = true; if (curr->ref->type.isNullable()) { parent.implicitTrap = true; } - assert(curr->order != MemoryOrder::Unordered); - parent.isAtomic = true; - } - void visitStructWait(StructWait* curr) { - parent.isAtomic = true; - - // If the ref is null. - parent.implicitTrap = true; + parent.readsSharedMutableStruct = true; + parent.writesSharedStruct = true; + parent.readOrder = parent.writeOrder = MemoryOrder::SeqCst; // If the timeout is negative and no-one wakes us. parent.mayNotReturn = true; - - // struct.wait mutates an opaque waiter queue which isn't visible in user - // code. Model this as a struct write which prevents reorderings (since - // isAtomic == true). - parent.writesStruct = true; - + } + void visitStructNotify(StructNotify* curr) { if (curr->ref->type == Type::unreachable) { return; } - - // If the ref isn't `unreachable`, then the field must exist and be a - // packed waitqueue due to validation. - assert(curr->ref->type.isStruct()); - assert(curr->index < - curr->ref->type.getHeapType().getStruct().fields.size()); - assert(curr->ref->type.getHeapType() - .getStruct() - .fields.at(curr->index) - .packedType == Field::PackedType::WaitQueue); - - parent.readsMutableStruct |= curr->ref->type.getHeapType() - .getStruct() - .fields.at(curr->index) - .mutable_ == Mutable; - } - void visitStructNotify(StructNotify* curr) { - parent.isAtomic = true; - - // If the ref is null. - parent.implicitTrap = true; - - // struct.notify mutates an opaque waiter queue which isn't visible in - // user code. Model this as a struct write which prevents reorderings - // (since isAtomic == true). - parent.writesStruct = true; + if (curr->ref->type.isNull()) { + parent.trap = true; + return; + } + if (curr->ref->type.isNullable()) { + parent.implicitTrap = true; + } + // Non-shared notifies just return 0. + if (curr->ref->type.getHeapType().isShared()) { + return; + } + // AtomicNotify doesn't strictly write the struct, but it does + // modify the waiters list associated with the waitqueue field, which we + // can think of as a write. + parent.readsSharedMutableStruct = true; + parent.writesSharedStruct = true; + parent.readOrder = parent.writeOrder = MemoryOrder::SeqCst; } void visitArrayNew(ArrayNew* curr) {} void visitArrayNewData(ArrayNewData* curr) { @@ -1006,88 +1151,142 @@ class EffectAnalyzer { } void visitArrayNewFixed(ArrayNewFixed* curr) {} void visitArrayGet(ArrayGet* curr) { + if (curr->ref->type == Type::unreachable) { + return; + } if (curr->ref->type.isNull()) { parent.trap = true; return; } - parent.readsArray = true; - // traps when the arg is null or the index out of bounds - parent.implicitTrap = true; + if (curr->ref->type.isNullable()) { + parent.implicitTrap = true; + } + auto heapType = curr->ref->type.getHeapType(); + if (heapType.getArray().element.mutable_ == Mutable) { + if (heapType.isShared()) { + parent.readsSharedMutableArray = true; + parent.readOrder = std::max(parent.readOrder, curr->order); + } else { + parent.readsMutableArray = true; + } + } } void visitArraySet(ArraySet* curr) { + if (curr->ref->type == Type::unreachable) { + return; + } if (curr->ref->type.isNull()) { parent.trap = true; return; } - parent.writesArray = true; - // traps when the arg is null or the index out of bounds - parent.implicitTrap = true; + if (curr->ref->type.isNullable()) { + parent.implicitTrap = true; + } + auto heapType = curr->ref->type.getHeapType(); + if (heapType.isShared()) { + parent.writesSharedStruct = true; + parent.writeOrder = std::max(parent.readOrder, curr->order); + } else { + parent.writesStruct = true; + } } void visitArrayLen(ArrayLen* curr) { + if (curr->ref->type == Type::unreachable) { + return; + } if (curr->ref->type.isNull()) { parent.trap = true; return; } - // traps when the arg is null if (curr->ref->type.isNullable()) { parent.implicitTrap = true; } } void visitArrayCopy(ArrayCopy* curr) { + if (curr->destRef->type == Type::unreachable || + curr->srcRef->type == Type::unreachable) { + return; + } if (curr->destRef->type.isNull() || curr->srcRef->type.isNull()) { parent.trap = true; return; } - parent.readsArray = true; - parent.writesArray = true; - // traps when a ref is null, or when out of bounds. - parent.implicitTrap = true; + if (curr->destRef->type.isNullable() || curr->srcRef->type.isNullable()) { + parent.implicitTrap = true; + } + auto srcHeapType = curr->srcRef->type.getHeapType(); + if (srcHeapType.getArray().element.mutable_ == Mutable) { + if (srcHeapType.isShared()) { + parent.readsSharedMutableArray = true; + } else { + parent.readsMutableArray = true; + } + } + if (curr->destRef->type.getHeapType().isShared()) { + parent.writesSharedArray = true; + } else { + parent.writesArray = true; + } } void visitArrayFill(ArrayFill* curr) { + if (curr->ref->type == Type::unreachable) { + return; + } if (curr->ref->type.isNull()) { parent.trap = true; return; } - parent.writesArray = true; - // Traps when the destination is null or when out of bounds. - parent.implicitTrap = true; + if (curr->ref->type.isNullable()) { + parent.implicitTrap = true; + } + if (curr->ref->type.getHeapType().isShared()) { + parent.writesSharedArray = true; + } else { + parent.writesArray = true; + } } template void visitArrayInit(ArrayInit* curr) { + if (curr->ref->type == Type::unreachable) { + return; + } if (curr->ref->type.isNull()) { parent.trap = true; return; } - parent.writesArray = true; - // Traps when the destination is null, when out of bounds in source or - // destination, or when the source segment has been dropped. - parent.implicitTrap = true; + if (curr->ref->type.isNullable()) { + parent.implicitTrap = true; + } + if (curr->ref->type.getHeapType().isShared()) { + parent.writesSharedArray = true; + } else { + parent.writesArray = true; + } } void visitArrayInitData(ArrayInitData* curr) { visitArrayInit(curr); } void visitArrayInitElem(ArrayInitElem* curr) { visitArrayInit(curr); } - void visitArrayRMW(ArrayRMW* curr) { - if (curr->ref->type.isNull()) { - parent.trap = true; + template void visitArrayRMWExpr(ArrayRMWExpr* curr) { + if (curr->ref->type == Type::unreachable) { return; } - parent.readsArray = true; - parent.writesArray = true; - // traps when the arg is null or the index out of bounds - parent.implicitTrap = true; - assert(curr->order != MemoryOrder::Unordered); - parent.isAtomic = true; - } - void visitArrayCmpxchg(ArrayCmpxchg* curr) { if (curr->ref->type.isNull()) { parent.trap = true; return; } - parent.readsArray = true; - parent.writesArray = true; - // traps when the arg is null or the index out of bounds - parent.implicitTrap = true; - assert(curr->order != MemoryOrder::Unordered); - parent.isAtomic = true; + if (curr->ref->type.isNullable()) { + parent.implicitTrap = true; + } + if (curr->ref->type.getHeapType().isShared()) { + parent.readsSharedMutableArray = true; + parent.writesSharedArray = true; + parent.readOrder = std::max(parent.readOrder, curr->order); + parent.writeOrder = std::max(parent.writeOrder, curr->order); + } else { + parent.readsMutableArray = true; + parent.writesArray = true; + } } + void visitArrayRMW(ArrayRMW* curr) { visitArrayRMWExpr(curr); } + void visitArrayCmpxchg(ArrayCmpxchg* curr) { visitArrayRMWExpr(curr); } void visitRefAs(RefAs* curr) { if (curr->op == AnyConvertExtern || curr->op == ExternConvertAny) { // These conversions are infallible. @@ -1108,7 +1307,7 @@ class EffectAnalyzer { // traps when ref is null parent.implicitTrap = true; if (curr->op != StringNewFromCodePoint) { - parent.readsArray = true; + parent.readsMutableArray = true; } } void visitStringConst(StringConst* curr) {} @@ -1208,14 +1407,14 @@ class EffectAnalyzer { public: // Helpers - // See comment on invalidate() for the assumptions on the inputs here. + // See comment on orderedBefore() for the assumptions on the inputs here. static bool canReorder(const PassOptions& passOptions, Module& module, Expression* a, Expression* b) { EffectAnalyzer aEffects(passOptions, module, a); EffectAnalyzer bEffects(passOptions, module, b); - return !aEffects.invalidates(bEffects); + return !aEffects.orderedBefore(bEffects); } // C-API @@ -1259,10 +1458,11 @@ class EffectAnalyzer { if (globalsWritten.size() > 0) { effects |= SideEffects::WritesGlobal; } - if (readsMemory) { + // TODO: Expand the C API to handle shared locations, structs, and arrays. + if (readsMemory || readsSharedMemory) { effects |= SideEffects::ReadsMemory; } - if (writesMemory) { + if (writesMemory || writesSharedMemory) { effects |= SideEffects::WritesMemory; } if (readsTable) { @@ -1277,7 +1477,9 @@ class EffectAnalyzer { if (trapsNeverHappen) { effects |= SideEffects::TrapsNeverHappen; } - if (isAtomic) { + // TODO: Expand the C API to handle memory orders. + if (readOrder != MemoryOrder::Unordered || + writeOrder != MemoryOrder::Unordered) { effects |= SideEffects::IsAtomic; } if (throws_) { diff --git a/src/ir/localize.h b/src/ir/localize.h index b36fe639612..5dd58ef0db5 100644 --- a/src/ir/localize.h +++ b/src/ir/localize.h @@ -133,7 +133,9 @@ struct ChildLocalizer { // TODO: Avoid quadratic time here by accumulating effects and checking // vs the accumulation. for (Index j = 0; j < num; j++) { - if (j != i && effects[i].invalidates(effects[j])) { + auto before = std::min(i, j); + auto after = std::max(i, j); + if (j != i && effects[before].orderedBefore(effects[after])) { needLocal = true; break; } diff --git a/src/passes/CodePushing.cpp b/src/passes/CodePushing.cpp index 65eedba6bac..ce6f6fa7fde 100644 --- a/src/passes/CodePushing.cpp +++ b/src/passes/CodePushing.cpp @@ -204,7 +204,7 @@ class Pusher { auto* pushable = isPushable(list[i]); if (pushable) { const auto& effects = getPushableEffects(pushable); - if (cumulativeEffects.invalidates(effects)) { + if (cumulativeEffects.orderedAfter(effects)) { // we can't push this, so further pushables must pass it cumulativeEffects.mergeIn(effects); } else { @@ -354,7 +354,7 @@ class Pusher { const auto& effects = getPushableEffects(pushable); - if (cumulativeEffects.invalidates(effects)) { + if (cumulativeEffects.orderedAfter(effects)) { // This can't be moved forward. Add it to the things that are not // moving. cumulativeEffects.walk(list[i]); diff --git a/src/passes/HeapStoreOptimization.cpp b/src/passes/HeapStoreOptimization.cpp index e4d3d7a1cb2..0ffa5416241 100644 --- a/src/passes/HeapStoreOptimization.cpp +++ b/src/passes/HeapStoreOptimization.cpp @@ -181,7 +181,7 @@ struct HeapStoreOptimization // effects. auto firstEffects = effects(list[i]); auto secondEffects = effects(list[j]); - if (secondEffects.invalidates(firstEffects)) { + if (firstEffects.orderedBefore(secondEffects)) { return false; } @@ -241,7 +241,7 @@ struct HeapStoreOptimization if (!new_->isWithDefault()) { for (Index i = index + 1; i < operands.size(); i++) { auto operandEffects = effects(operands[i]); - if (operandEffects.invalidates(setValueEffects)) { + if (operandEffects.orderedBefore(setValueEffects)) { // TODO: we could use locals to reorder everything return false; } @@ -252,7 +252,7 @@ struct HeapStoreOptimization // if it exists. if (new_->desc) { auto descEffects = effects(new_->desc); - if (descEffects.invalidates(setValueEffects)) { + if (descEffects.orderedBefore(setValueEffects)) { // TODO: we could use locals to reorder everything return false; } @@ -264,7 +264,7 @@ struct HeapStoreOptimization // the optimization X' would happen first. ShallowEffectAnalyzer structNewEffects( getPassOptions(), *getModule(), new_); - if (structNewEffects.invalidates(setValueEffects)) { + if (structNewEffects.orderedBefore(setValueEffects)) { return false; } diff --git a/src/passes/LocalCSE.cpp b/src/passes/LocalCSE.cpp index 73720bed430..b25aabdcf82 100644 --- a/src/passes/LocalCSE.cpp +++ b/src/passes/LocalCSE.cpp @@ -462,8 +462,9 @@ struct Checker // This is the first time we encounter this expression. assert(!activeOriginals.count(curr)); - // Given the current expression, see what it invalidates of the currently- - // hashed expressions, if there are any. + // Given the current expression, see which effects of the currently-hashed + // expressions it would have to remain ordered before, preventing removal of + // a later copy of the hashed expression. if (!activeOriginals.empty()) { // We only need to visit this node itself, as we have already visited its // children by the time we get here. @@ -474,7 +475,7 @@ struct Checker // (curr) // (COPY) // - // We are some code in between an original and a copy of it, and we are + // We are some code in between an original and its copy, and we are // trying to turn COPY into a local.get of a value that we stash at the // original. If |curr| traps then we simply don't reach the copy anyhow. // @@ -490,15 +491,13 @@ struct Checker auto idempotent = isIdempotent(curr, *getModule()); std::vector invalidated; - for (auto& kv : activeOriginals) { - auto* original = kv.first; + for (auto& [original, originalInfo] : activeOriginals) { if (idempotent && ExpressionAnalyzer::shallowEqual(curr, original)) { // |curr| is idempotent, so it does not invalidate later appearances // of itself. continue; } - auto& originalInfo = kv.second; - if (effects.invalidates(originalInfo.effects)) { + if (effects.orderedBefore(originalInfo.effects)) { invalidated.push_back(original); } } diff --git a/src/passes/LoopInvariantCodeMotion.cpp b/src/passes/LoopInvariantCodeMotion.cpp index 6c7aecbc71f..f7c294fcde5 100644 --- a/src/passes/LoopInvariantCodeMotion.cpp +++ b/src/passes/LoopInvariantCodeMotion.cpp @@ -22,8 +22,6 @@ // out expressions may allow moving at least part of a larger whole). // -#include - #include "ir/effects.h" #include "ir/find_all.h" #include "ir/local-graph.h" @@ -122,8 +120,10 @@ struct LoopInvariantCodeMotion // The rest of the loop's effects matter too, we must also // take into account global state like interacting loads and // stores. + // TODO: Simplify this to effectsSoFar.orderedBefore(effects) || + // effects.orderedBefore(loopEffects). bool unsafeToMove = effects.writesGlobalState() || - effectsSoFar.invalidates(effects) || + effectsSoFar.orderedBefore(effects) || (effects.readsMutableGlobalState() && loopEffects.writesGlobalState()); // TODO: look into optimizing this with exceptions. for now, disallow diff --git a/src/passes/MergeBlocks.cpp b/src/passes/MergeBlocks.cpp index 3013f67ac9b..202ea7d1316 100644 --- a/src/passes/MergeBlocks.cpp +++ b/src/passes/MergeBlocks.cpp @@ -507,17 +507,17 @@ struct MergeBlocks return outer; } if ((dependency1 && *dependency1) || (dependency2 && *dependency2)) { - // there are dependencies, things we must be reordered through. make sure - // no problems there + // There are dependencies, i.e. things we must be reordered before. Make + // sure there are no problems. EffectAnalyzer childEffects(getPassOptions(), *getModule(), child); if (dependency1 && *dependency1 && EffectAnalyzer(getPassOptions(), *getModule(), *dependency1) - .invalidates(childEffects)) { + .orderedBefore(childEffects)) { return outer; } if (dependency2 && *dependency2 && EffectAnalyzer(getPassOptions(), *getModule(), *dependency2) - .invalidates(childEffects)) { + .orderedBefore(childEffects)) { return outer; } } @@ -650,7 +650,7 @@ struct MergeBlocks // The block seems to have the shape we want. Check for effects: we want // to move all the items out but the last one, so they must all cross over - // anything we need to move past. + // anything we need to move before. // // In principle we could also handle the case where we can move out only // some of the block items. However, that would be more complex (we'd need @@ -665,7 +665,7 @@ struct MergeBlocks EffectAnalyzer blockChildEffects( getPassOptions(), *getModule(), blockChild); for (auto& effects : childEffects) { - if (blockChildEffects.invalidates(effects)) { + if (blockChildEffects.orderedAfter(effects)) { fail = true; break; } diff --git a/src/passes/Monomorphize.cpp b/src/passes/Monomorphize.cpp index 9f02d00402e..9543d429279 100644 --- a/src/passes/Monomorphize.cpp +++ b/src/passes/Monomorphize.cpp @@ -386,7 +386,7 @@ struct CallContext { // (as described before, we want to move this past immovable code) and // reasons intrinsic to the expression itself that might prevent moving. ShallowEffectAnalyzer currEffects(options, wasm, curr); - if (currEffects.invalidates(nonMovingEffects) || + if (currEffects.orderedBefore(nonMovingEffects) || !canBeMovedIntoContext(curr, currEffects)) { immovable.insert(curr); currImmovable = true; diff --git a/src/passes/OptimizeCasts.cpp b/src/passes/OptimizeCasts.cpp index aa7168f97c2..3ff08ec274c 100644 --- a/src/passes/OptimizeCasts.cpp +++ b/src/passes/OptimizeCasts.cpp @@ -142,8 +142,8 @@ struct EarlyCastFinder std::vector currRefAsMove; // Used to analyze expressions to see if casts can be moved past them. - EffectAnalyzer testRefCast; - EffectAnalyzer testRefAs; + EffectAnalyzer refCastEffects; + EffectAnalyzer refAsEffects; // Maps LocalGets to the most refined RefCast to move to it, to be used by the // EarlyCastApplier. If the most refined RefCast is already at the desired @@ -162,8 +162,8 @@ struct EarlyCastFinder EarlyCastFinder(PassOptions options, Module* module, Function* func) : options(options), numLocals(func->getNumLocals()), currRefCastMove(func->getNumLocals()), - currRefAsMove(func->getNumLocals()), testRefCast(options, *module), - testRefAs(options, *module) { + currRefAsMove(func->getNumLocals()), refCastEffects(options, *module), + refAsEffects(options, *module) { // TODO: generalize this when we handle more than RefAsNonNull. RefCast dummyRefCast(module->allocator); @@ -171,8 +171,8 @@ struct EarlyCastFinder RefAs dummyRefAs(module->allocator); dummyRefAs.op = RefAsNonNull; - testRefCast.visit(&dummyRefCast); - testRefAs.visit(&dummyRefAs); + refCastEffects.visit(&dummyRefCast); + refAsEffects.visit(&dummyRefAs); } // We track information as we go, looking for the best cast to move backwards, @@ -231,17 +231,17 @@ struct EarlyCastFinder void visitFunction(Function* curr) { flushAll(); } void visitExpression(Expression* curr) { - // A new one is instantiated for each expression to determine - // if a cast can be moved past it. - ShallowEffectAnalyzer currAnalyzer(options, *getModule(), curr); + // Determine wheher a later cast could be moved before the current + // expression. + ShallowEffectAnalyzer currEffects(options, *getModule(), curr); - if (testRefCast.invalidates(currAnalyzer)) { + if (refCastEffects.orderedAfter(currEffects)) { for (size_t i = 0; i < numLocals; i++) { flushRefCastResult(i, *getModule()); } } - if (testRefAs.invalidates(currAnalyzer)) { + if (refAsEffects.orderedAfter(currEffects)) { for (size_t i = 0; i < numLocals; i++) { flushRefAsResult(i, *getModule()); } diff --git a/src/passes/OptimizeInstructions.cpp b/src/passes/OptimizeInstructions.cpp index 28d5ae68153..3d4bc2517d3 100644 --- a/src/passes/OptimizeInstructions.cpp +++ b/src/passes/OptimizeInstructions.cpp @@ -1482,7 +1482,7 @@ struct OptimizeInstructions // To avoid such risks we should keep in mind the following: // // * Before removing a cast we should use its type information in the best - // way we can. Only after doing so should a cast be removed. In the exmaple + // way we can. Only after doing so should a cast be removed. In the example // above, that means first seeing that the ref.test must return 1, and only // then possibly removing the ref.cast. // * Do not remove a cast if removing it might remove useful information for @@ -1585,7 +1585,7 @@ struct OptimizeInstructions // trap we want to move. (We use a shallow effect analyzer since we // will only move the ref.as_non_null itself.) ShallowEffectAnalyzer movingEffects(options, *getModule(), input); - if (crossedEffects.invalidates(movingEffects)) { + if (movingEffects.orderedBefore(crossedEffects)) { return; } @@ -2563,7 +2563,7 @@ struct OptimizeInstructions auto& options = getPassRunner()->options; EffectAnalyzer descEffects(options, *getModule(), curr->desc); ShallowEffectAnalyzer movingEffects(options, *getModule(), curr->ref); - if (descEffects.invalidates(movingEffects)) { + if (movingEffects.orderedBefore(descEffects)) { return; } } @@ -2772,7 +2772,7 @@ struct OptimizeInstructions } // Check if two consecutive inputs to an instruction are equal. As they are - // consecutive, no code can execeute in between them, which simplies the + // consecutive, no code can execute in between them, which simplies the // problem here (and which is the case we care about in this pass, which does // simple peephole optimizations - all we care about is a single instruction // at a time, and its inputs). @@ -2816,7 +2816,7 @@ struct OptimizeInstructions // originalRightEffects. auto originalRightEffects = effects(originalRight); auto rightEffects = effects(right); - if (originalRightEffects.invalidates(rightEffects)) { + if (originalRightEffects.orderedBefore(rightEffects)) { return false; } } @@ -2877,7 +2877,7 @@ struct OptimizeInstructions } ShallowEffectAnalyzer parentEffects( getPassOptions(), *getModule(), call); - if (parentEffects.invalidates(childEffects)) { + if (parentEffects.orderedAfter(childEffects)) { return false; } // No effects are possible. @@ -3341,7 +3341,7 @@ struct OptimizeInstructions // The condition is last, so we need a new local, and it may be a // bad idea to use a block like we do for an if. Do it only if we // can reorder - if (!condition.invalidates(value)) { + if (!condition.orderedAfter(value)) { return builder.makeSequence(builder.makeDrop(c), ifTrue); } } @@ -3659,7 +3659,7 @@ struct OptimizeInstructions if (CostAnalyzer(left).cost < MIN_COST) { return nullptr; // avoidable code is too cheap } - if (leftEffects.invalidates(rightEffects)) { + if (leftEffects.orderedBefore(rightEffects)) { return nullptr; // cannot reorder } std::swap(left, right); diff --git a/src/passes/RemoveUnusedBrs.cpp b/src/passes/RemoveUnusedBrs.cpp index 1a6d355ff5e..05f18901ceb 100644 --- a/src/passes/RemoveUnusedBrs.cpp +++ b/src/passes/RemoveUnusedBrs.cpp @@ -65,9 +65,9 @@ stealSlice(Builder& builder, Block* input, Index from, Index to) { return ret; } -// to turn an if into a br-if, we must be able to reorder the -// condition and possible value, and the possible value must -// not have side effects (as they would run unconditionally) +// to turn an if into a br-if, we must be able to reorder the condition after +// the possible value, and the possible value must not have side effects (as +// they would run unconditionally) static bool canTurnIfIntoBrIf(Expression* ifCondition, Expression* brValue, PassOptions& options, @@ -83,7 +83,7 @@ static bool canTurnIfIntoBrIf(Expression* ifCondition, if (value.hasSideEffects()) { return false; } - return !EffectAnalyzer(options, wasm, ifCondition).invalidates(value); + return !EffectAnalyzer(options, wasm, ifCondition).orderedBefore(value); } // This leads to similar choices as LLVM does in some cases, by balancing the @@ -1421,7 +1421,7 @@ struct RemoveUnusedBrs : public WalkerPass> { if (!valueEffects.hasUnremovableSideEffects()) { auto conditionEffects = EffectAnalyzer(passOptions, *getModule(), br->condition); - if (!conditionEffects.invalidates(valueEffects)) { + if (!valueEffects.orderedBefore(conditionEffects)) { // All conditions met, perform the update. drop->value = br->condition; } @@ -1621,7 +1621,8 @@ struct RemoveUnusedBrs : public WalkerPass> { return nullptr; } EffectAnalyzer condition(passOptions, *getModule(), iff->condition); - if (condition.invalidates(ifTrue) || condition.invalidates(ifFalse)) { + if (condition.orderedBefore(ifTrue) || + condition.orderedBefore(ifFalse)) { return nullptr; } auto* select = Builder(*getModule()) diff --git a/src/passes/SimplifyLocals.cpp b/src/passes/SimplifyLocals.cpp index cf0b7495011..668ff062de1 100644 --- a/src/passes/SimplifyLocals.cpp +++ b/src/passes/SimplifyLocals.cpp @@ -303,7 +303,7 @@ struct SimplifyLocals // TODO: this is O(bad) std::vector invalidated; for (auto& [index, info] : sinkables) { - if (effects.invalidates(info.effects)) { + if (effects.orderedBefore(info.effects)) { invalidated.push_back(index); } } @@ -573,7 +573,7 @@ struct SimplifyLocals EffectAnalyzer value( this->getPassOptions(), *this->getModule(), set); *breakLocalSetPointer = set; - if (condition.invalidates(value)) { + if (condition.orderedBefore(value)) { // indeed, we can't do this, stop return; } diff --git a/src/wasm2js.h b/src/wasm2js.h index bc43e623885..1eacdccbc6c 100644 --- a/src/wasm2js.h +++ b/src/wasm2js.h @@ -1337,7 +1337,7 @@ Ref Wasm2JSBuilder::processExpression(Expression* curr, EffectAnalyzer targetEffects(parent->options, *module, curr->target); if (targetEffects.hasAnything()) { for (auto* operand : curr->operands) { - if (targetEffects.invalidates( + if (targetEffects.orderedBefore( EffectAnalyzer(parent->options, *module, operand))) { mustReorder = true; break; @@ -1969,8 +1969,8 @@ Ref Wasm2JSBuilder::processExpression(Expression* curr, parent->options, *module, curr->condition); EffectAnalyzer ifTrueEffects(parent->options, *module, curr->ifTrue); EffectAnalyzer ifFalseEffects(parent->options, *module, curr->ifFalse); - if (conditionEffects.invalidates(ifTrueEffects) || - conditionEffects.invalidates(ifFalseEffects) || + if (conditionEffects.orderedBefore(ifTrueEffects) || + conditionEffects.orderedBefore(ifFalseEffects) || ifTrueEffects.hasSideEffects() || ifFalseEffects.hasSideEffects()) { useLocals = true; } diff --git a/test/lit/passes/gufa-refs.wast b/test/lit/passes/gufa-refs.wast index 9907fd90c23..affa0e81674 100644 --- a/test/lit/passes/gufa-refs.wast +++ b/test/lit/passes/gufa-refs.wast @@ -1575,22 +1575,7 @@ ;; CHECK-NEXT: (ref.null none) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (block - ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (block (result nullref) - ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (array.get $null - ;; CHECK-NEXT: (array.new_default $null - ;; CHECK-NEXT: (i32.const 10) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (i32.const 0) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (ref.null none) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (unreachable) - ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (unreachable) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (array.set $something ;; CHECK-NEXT: (array.new_default $something @@ -5622,16 +5607,15 @@ ;; Packed fields with signed gets. (module - ;; CHECK: (type $array (array (mut i8))) - - ;; CHECK: (type $1 (func)) + ;; CHECK: (type $0 (func)) ;; CHECK: (type $struct (struct (field i16))) (type $struct (struct (field i16))) + ;; CHECK: (type $array (array (mut i8))) (type $array (array (mut i8))) - ;; CHECK: (func $test-struct (type $1) + ;; CHECK: (func $test-struct (type $0) ;; CHECK-NEXT: (local $x (ref $struct)) ;; CHECK-NEXT: (local.set $x ;; CHECK-NEXT: (struct.new $struct @@ -5666,7 +5650,7 @@ ) ) - ;; CHECK: (func $test-array (type $1) + ;; CHECK: (func $test-array (type $0) ;; CHECK-NEXT: (local $x (ref $array)) ;; CHECK-NEXT: (local.set $x ;; CHECK-NEXT: (array.new_fixed $array 1 @@ -5674,26 +5658,10 @@ ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (block (result i32) - ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (array.get_s $array - ;; CHECK-NEXT: (local.get $x) - ;; CHECK-NEXT: (i32.const 0) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (i32.const -1) - ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const -1) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (block (result i32) - ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (array.get_u $array - ;; CHECK-NEXT: (local.get $x) - ;; CHECK-NEXT: (i32.const 0) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (i32.const 255) - ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const 255) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) (func $test-array diff --git a/test/lit/passes/simplify-locals-atomic-effects.wast b/test/lit/passes/simplify-locals-atomic-effects.wast new file mode 100644 index 00000000000..f7c9d617e1c --- /dev/null +++ b/test/lit/passes/simplify-locals-atomic-effects.wast @@ -0,0 +1,96 @@ +;; NOTE: Assertions have been generated by update_lit_checks.py and should not be edited. +;; RUN: wasm-opt %s -all --simplify-locals -S -o - | filecheck %s + +(module + ;; CHECK: (type $shared-struct (shared (struct (field (mut i32))))) + (type $shared-struct (shared (struct (field (mut i32))))) + ;; CHECK: (type $unshared-struct (struct (field (mut i32)))) + (type $unshared-struct (struct (field (mut i32)))) + + ;; CHECK: (memory $shared 1 1 shared) + (memory $shared 1 1 shared) + ;; CHECK: (memory $unshared 1 1) + (memory $unshared 1 1) + + ;; CHECK: (func $memory.size-shared-shared (type $2) (param $shared (ref null $shared-struct)) (result i32) + ;; CHECK-NEXT: (local $x i32) + ;; CHECK-NEXT: (local.set $x + ;; CHECK-NEXT: (memory.size $shared) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (struct.atomic.set $shared-struct 0 + ;; CHECK-NEXT: (local.get $shared) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + (func $memory.size-shared-shared (param $shared (ref null $shared-struct)) (result i32) + (local $x i32) + ;; memory.size can synchronize with memory.grow on another thread when the + ;; memory is shared. It cannot be moved past a write to a shared struct. + (local.set $x + (memory.size $shared) + ) + (struct.atomic.set $shared-struct 0 (local.get $shared) (i32.const 0)) + (local.get $x) + ) + + ;; CHECK: (func $memory.size-unshared-shared (type $2) (param $shared (ref null $shared-struct)) (result i32) + ;; CHECK-NEXT: (local $x i32) + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: (struct.atomic.set $shared-struct 0 + ;; CHECK-NEXT: (local.get $shared) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (memory.size $unshared) + ;; CHECK-NEXT: ) + (func $memory.size-unshared-shared (param $shared (ref null $shared-struct)) (result i32) + (local $x i32) + ;; Now the memory is unshared. We can reorder because no other thread can + ;; observe the reordering. + (local.set $x + (memory.size $unshared) + ) + (struct.atomic.set $shared-struct 0 (local.get $shared) (i32.const 0)) + (local.get $x) + ) + + ;; CHECK: (func $memory.size-shared-unshared (type $3) (param $unshared (ref null $unshared-struct)) (result i32) + ;; CHECK-NEXT: (local $x i32) + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: (struct.atomic.set $unshared-struct 0 + ;; CHECK-NEXT: (local.get $unshared) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (memory.size $shared) + ;; CHECK-NEXT: ) + (func $memory.size-shared-unshared (param $unshared (ref null $unshared-struct)) (result i32) + (local $x i32) + ;; Now the memory is shared but the struct is unshared. Again, we can + ;; reorder. + (local.set $x + (memory.size $shared) + ) + (struct.atomic.set $unshared-struct 0 (local.get $unshared) (i32.const 0)) + (local.get $x) + ) + + ;; CHECK: (func $memory.size-unshared-unshared (type $3) (param $unshared (ref null $unshared-struct)) (result i32) + ;; CHECK-NEXT: (local $x i32) + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: (struct.atomic.set $unshared-struct 0 + ;; CHECK-NEXT: (local.get $unshared) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (memory.size $unshared) + ;; CHECK-NEXT: ) + (func $memory.size-unshared-unshared (param $unshared (ref null $unshared-struct)) (result i32) + (local $x i32) + ;; Now both the memory and struct are unshared, so neither can synchronize + ;; with anything and we reorder freely. + (local.set $x + (memory.size $unshared) + ) + (struct.atomic.set $unshared-struct 0 (local.get $unshared) (i32.const 0)) + (local.get $x) + ) +) diff --git a/test/lit/passes/vacuum-gc-atomics.wast b/test/lit/passes/vacuum-gc-atomics.wast index 49a8a8a6f90..90a1c854e87 100644 --- a/test/lit/passes/vacuum-gc-atomics.wast +++ b/test/lit/passes/vacuum-gc-atomics.wast @@ -6,10 +6,10 @@ ;; RUN: wasm-opt %s -all --vacuum -S -o - | filecheck %s (module - ;; CHECK: (type $shared (shared (struct (field i32)))) - (type $shared (shared (struct (field i32)))) - ;; CHECK: (type $unshared (struct (field i32))) - (type $unshared (struct (field i32))) + ;; CHECK: (type $shared (shared (struct (field (mut i32))))) + (type $shared (shared (struct (field (mut i32))))) + (type $unshared (struct (field (mut i32)))) + (type $shared-immutable (shared (struct (field i32)))) ;; CHECK: (func $get-unordered-unshared (type $0) ;; CHECK-NEXT: (nop) @@ -33,12 +33,19 @@ ) ) + ;; CHECK: (func $get-unordered-shared-immutable (type $0) + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: ) + (func $get-unordered-shared-immutable + (drop + (struct.get $shared-immutable 0 + (struct.new_default $shared-immutable) + ) + ) + ) + ;; CHECK: (func $get-seqcst-unshared (type $0) - ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (struct.atomic.get $unshared 0 - ;; CHECK-NEXT: (struct.new_default $unshared) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (nop) ;; CHECK-NEXT: ) (func $get-seqcst-unshared (drop @@ -63,6 +70,18 @@ ) ) + ;; CHECK: (func $get-seqcst-shared-immutable (type $0) + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: ) + (func $get-seqcst-shared-immutable + (drop + (struct.atomic.get seqcst $shared-immutable 0 + (struct.new_default $shared-immutable) + ) + ) + ) + + ;; CHECK: (func $get-acqrel-unshared (type $0) ;; CHECK-NEXT: (nop) ;; CHECK-NEXT: ) @@ -88,4 +107,15 @@ ) ) ) + + ;; CHECK: (func $get-acqrel-shared-immutable (type $0) + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: ) + (func $get-acqrel-shared-immutable + (drop + (struct.atomic.get acqrel $shared-immutable 0 + (struct.new_default $shared-immutable) + ) + ) + ) ) diff --git a/test/lit/passes/vacuum-tnh.wast b/test/lit/passes/vacuum-tnh.wast index 8bfcfd95170..078db2d8230 100644 --- a/test/lit/passes/vacuum-tnh.wast +++ b/test/lit/passes/vacuum-tnh.wast @@ -14,7 +14,7 @@ ;; NO_TNH: (tag $tag (type $2) (param i32)) (tag $tag (param i32)) - (memory 1 1) + (memory 1 1 shared) (type $struct (struct (field (mut i32)))) From ed9e9993f3e0a2e8b10f53f0423f0f94972c1ab4 Mon Sep 17 00:00:00 2001 From: Thomas Lively Date: Thu, 26 Feb 2026 22:21:44 -0800 Subject: [PATCH 2/4] update other tests --- test/example/cpp-unit.cpp | 2 +- test/wasm2js/bulk-memory.2asm.js.opt | 42 ++-------------------------- 2 files changed, 3 insertions(+), 41 deletions(-) diff --git a/test/example/cpp-unit.cpp b/test/example/cpp-unit.cpp index 13e055b20e6..1a2b9927432 100644 --- a/test/example/cpp-unit.cpp +++ b/test/example/cpp-unit.cpp @@ -632,7 +632,7 @@ void test_effects() { EffectAnalyzer effects(options, module); effects.visit(&arrayCopy); assert_equal(effects.trap, true); - assert_equal(effects.readsArray, true); + assert_equal(effects.readsMutableArray, true); assert_equal(effects.writesArray, true); assert_equal(effects.readsMutableStruct, false); assert_equal(effects.writesStruct, false); diff --git a/test/wasm2js/bulk-memory.2asm.js.opt b/test/wasm2js/bulk-memory.2asm.js.opt index 50ddecee8b2..c8a3a5c9731 100644 --- a/test/wasm2js/bulk-memory.2asm.js.opt +++ b/test/wasm2js/bulk-memory.2asm.js.opt @@ -276,15 +276,6 @@ export var init = retasmFunc.init; export var load8_u = retasmFunc.load8_u; function asmFunc(imports) { - var buffer = new ArrayBuffer(65536); - var HEAP8 = new Int8Array(buffer); - var HEAP16 = new Int16Array(buffer); - var HEAP32 = new Int32Array(buffer); - var HEAPU8 = new Uint8Array(buffer); - var HEAPU16 = new Uint16Array(buffer); - var HEAPU32 = new Uint32Array(buffer); - var HEAPF32 = new Float32Array(buffer); - var HEAPF64 = new Float64Array(buffer); var Math_imul = Math.imul; var Math_fround = Math.fround; var Math_abs = Math.abs; @@ -299,40 +290,11 @@ function asmFunc(imports) { } - function $1() { - __wasm_memory_size(); - } - - function __wasm_memory_size() { - return buffer.byteLength / 65536 | 0; - } - - function __wasm_memory_grow(pagesToAdd) { - pagesToAdd = pagesToAdd | 0; - var oldPages = __wasm_memory_size() | 0; - var newPages = oldPages + pagesToAdd | 0; - if ((oldPages < newPages) && (newPages < 65536)) { - var newBuffer = new ArrayBuffer(Math_imul(newPages, 65536)); - var newHEAP8 = new Int8Array(newBuffer); - newHEAP8.set(HEAP8); - HEAP8 = new Int8Array(newBuffer); - HEAP16 = new Int16Array(newBuffer); - HEAP32 = new Int32Array(newBuffer); - HEAPU8 = new Uint8Array(newBuffer); - HEAPU16 = new Uint16Array(newBuffer); - HEAPU32 = new Uint32Array(newBuffer); - HEAPF32 = new Float32Array(newBuffer); - HEAPF64 = new Float64Array(newBuffer); - buffer = newBuffer; - } - return oldPages; - } - return { "drop_passive": $0, - "init_passive": $1, + "init_passive": $0, "drop_active": $0, - "init_active": $1 + "init_active": $0 }; } From 382a617a2fbcf4f71677f6dde679aa1601192171 Mon Sep 17 00:00:00 2001 From: Thomas Lively Date: Fri, 27 Feb 2026 08:05:05 -0800 Subject: [PATCH 3/4] fix effects for array OOB access --- src/ir/effects.h | 30 ++++++++++++------------------ 1 file changed, 12 insertions(+), 18 deletions(-) diff --git a/src/ir/effects.h b/src/ir/effects.h index 4ac9f227692..f81155ae148 100644 --- a/src/ir/effects.h +++ b/src/ir/effects.h @@ -1158,9 +1158,8 @@ class EffectAnalyzer { parent.trap = true; return; } - if (curr->ref->type.isNullable()) { - parent.implicitTrap = true; - } + // Null refs and OOB access. + parent.implicitTrap = true; auto heapType = curr->ref->type.getHeapType(); if (heapType.getArray().element.mutable_ == Mutable) { if (heapType.isShared()) { @@ -1179,9 +1178,8 @@ class EffectAnalyzer { parent.trap = true; return; } - if (curr->ref->type.isNullable()) { - parent.implicitTrap = true; - } + // Null refs and OOB access. + parent.implicitTrap = true; auto heapType = curr->ref->type.getHeapType(); if (heapType.isShared()) { parent.writesSharedStruct = true; @@ -1211,9 +1209,8 @@ class EffectAnalyzer { parent.trap = true; return; } - if (curr->destRef->type.isNullable() || curr->srcRef->type.isNullable()) { - parent.implicitTrap = true; - } + // Null refs and OOB access. + parent.implicitTrap = true; auto srcHeapType = curr->srcRef->type.getHeapType(); if (srcHeapType.getArray().element.mutable_ == Mutable) { if (srcHeapType.isShared()) { @@ -1236,9 +1233,8 @@ class EffectAnalyzer { parent.trap = true; return; } - if (curr->ref->type.isNullable()) { - parent.implicitTrap = true; - } + // Null refs and OOB access. + parent.implicitTrap = true; if (curr->ref->type.getHeapType().isShared()) { parent.writesSharedArray = true; } else { @@ -1253,9 +1249,8 @@ class EffectAnalyzer { parent.trap = true; return; } - if (curr->ref->type.isNullable()) { - parent.implicitTrap = true; - } + // Null refs and OOB access. + parent.implicitTrap = true; if (curr->ref->type.getHeapType().isShared()) { parent.writesSharedArray = true; } else { @@ -1272,9 +1267,8 @@ class EffectAnalyzer { parent.trap = true; return; } - if (curr->ref->type.isNullable()) { - parent.implicitTrap = true; - } + // Null refs and OOB access. + parent.implicitTrap = true; if (curr->ref->type.getHeapType().isShared()) { parent.readsSharedMutableArray = true; parent.writesSharedArray = true; From e322f559250007dd96d3c0b0796e522a5633d3b1 Mon Sep 17 00:00:00 2001 From: Thomas Lively Date: Fri, 27 Feb 2026 08:09:29 -0800 Subject: [PATCH 4/4] update tests --- test/lit/passes/gufa-refs.wast | 46 ++++++++++++++++++++++++++++------ 1 file changed, 39 insertions(+), 7 deletions(-) diff --git a/test/lit/passes/gufa-refs.wast b/test/lit/passes/gufa-refs.wast index affa0e81674..9907fd90c23 100644 --- a/test/lit/passes/gufa-refs.wast +++ b/test/lit/passes/gufa-refs.wast @@ -1575,7 +1575,22 @@ ;; CHECK-NEXT: (ref.null none) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (unreachable) + ;; CHECK-NEXT: (block + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block (result nullref) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (array.get $null + ;; CHECK-NEXT: (array.new_default $null + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (ref.null none) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (unreachable) + ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (array.set $something ;; CHECK-NEXT: (array.new_default $something @@ -5607,15 +5622,16 @@ ;; Packed fields with signed gets. (module - ;; CHECK: (type $0 (func)) + ;; CHECK: (type $array (array (mut i8))) + + ;; CHECK: (type $1 (func)) ;; CHECK: (type $struct (struct (field i16))) (type $struct (struct (field i16))) - ;; CHECK: (type $array (array (mut i8))) (type $array (array (mut i8))) - ;; CHECK: (func $test-struct (type $0) + ;; CHECK: (func $test-struct (type $1) ;; CHECK-NEXT: (local $x (ref $struct)) ;; CHECK-NEXT: (local.set $x ;; CHECK-NEXT: (struct.new $struct @@ -5650,7 +5666,7 @@ ) ) - ;; CHECK: (func $test-array (type $0) + ;; CHECK: (func $test-array (type $1) ;; CHECK-NEXT: (local $x (ref $array)) ;; CHECK-NEXT: (local.set $x ;; CHECK-NEXT: (array.new_fixed $array 1 @@ -5658,10 +5674,26 @@ ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (i32.const -1) + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (array.get_s $array + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const -1) + ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (i32.const 255) + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (array.get_u $array + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const 255) + ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) (func $test-array