diff --git a/plonky2/src/hash/arch/x86_64/poseidon_goldilocks_avx2_bmi2.rs b/plonky2/src/hash/arch/x86_64/poseidon_goldilocks_avx2_bmi2.rs
index 0fddeba7..934583d6 100644
--- a/plonky2/src/hash/arch/x86_64/poseidon_goldilocks_avx2_bmi2.rs
+++ b/plonky2/src/hash/arch/x86_64/poseidon_goldilocks_avx2_bmi2.rs
@@ -757,9 +757,9 @@ unsafe fn partial_round(
         // multiplication where we've set the first element to 0.) Add the remaining bits now.
         // TODO: This is a bit of an afterthought, which is why these constants are loaded 22
         //   times... There's likely a better way of merging those results.
-        "vmovdqu ymm6, {mds_matrix}[rip]",
-        "vmovdqu ymm7, {mds_matrix}[rip + 32]",
-        "vmovdqu ymm8, {mds_matrix}[rip + 64]",
+        "vmovdqu ymm6, [{mds_matrix}]",
+        "vmovdqu ymm7, [{mds_matrix} + 32]",
+        "vmovdqu ymm8, [{mds_matrix} + 64]",
         "vpsllvq ymm9, ymm13, ymm6",
         "vpsllvq ymm10, ymm13, ymm7",
         "vpsllvq ymm11, ymm13, ymm8",
@@ -775,7 +775,7 @@ unsafe fn partial_round(
         // Reduction required.
 
         state0a = in(reg) state0a,
-        mds_matrix = sym TOP_ROW_EXPS,
+        mds_matrix = in(reg) &TOP_ROW_EXPS,
         inout("ymm0") unreduced_lo0_s,
         inout("ymm1") unreduced_lo1_s,
         inout("ymm2") unreduced_lo2_s,
diff --git a/plonky2/src/lib.rs b/plonky2/src/lib.rs
index 3bddec82..e5e77bb9 100644
--- a/plonky2/src/lib.rs
+++ b/plonky2/src/lib.rs
@@ -6,7 +6,6 @@
 #![allow(clippy::len_without_is_empty)]
 #![allow(clippy::needless_range_loop)]
 #![allow(clippy::return_self_not_must_use)]
-#![feature(asm_sym)]
 #![feature(generic_const_exprs)]
 #![feature(specialization)]
 #![feature(stdsimd)]