diff --git a/LeopardCommon.h b/LeopardCommon.h
index 581e815..f6c16c5 100644
--- a/LeopardCommon.h
+++ b/LeopardCommon.h
@@ -32,11 +32,10 @@
     TODO:
 
     Short-term:
-    + Unroll first/final butterflies to avoid extra copies/xors in encoder (17% of encode time)
-    + Add compile-time selectable XOR-only rowops instead of MULADD
     + Multithreading
 
     Mid-term:
+    + Add compile-time selectable XOR-only rowops instead of MULADD
     + Look into 12-bit fields as a performance optimization
 
     Long-term:
diff --git a/LeopardFF16.cpp b/LeopardFF16.cpp
index 903da9d..1888b4b 100644
--- a/LeopardFF16.cpp
+++ b/LeopardFF16.cpp
@@ -832,11 +832,10 @@ static void IFFT_DIT_Encoder(
         }
     }
 
+    // I tried unrolling this but it does not provide more than 5% performance
+    // improvement for 16-bit finite fields, so it's not worth the complexity.
     if (xor_result)
-    {
-        for (unsigned i = 0; i < m; ++i)
-            xor_mem(xor_result[i], work[i], bytes);
-    }
+        VectorXOR(bytes, m, xor_result, work);
 }