diff --git a/example/support/calcwit.cpp b/example/support/calcwit.cpp new file mode 100644 index 0000000..a8e2d48 --- /dev/null +++ b/example/support/calcwit.cpp @@ -0,0 +1,129 @@ +#include +#include +#include +#include "calcwit.hpp" + +namespace CIRCUIT_NAME { + +extern void run(Circom_CalcWit* ctx); + +std::string int_to_hex( u64 i ) +{ + std::stringstream stream; + stream << "0x" + << std::setfill ('0') << std::setw(16) + << std::hex << i; + return stream.str(); +} + +u64 fnv1a(std::string s) { + u64 hash = 0xCBF29CE484222325LL; + for(char& c : s) { + hash ^= u64(c); + hash *= 0x100000001B3LL; + } + return hash; +} + +Circom_CalcWit::Circom_CalcWit (Circom_Circuit *aCircuit, uint maxTh) { + circuit = aCircuit; + inputSignalAssignedCounter = get_main_input_signal_no(); + inputSignalAssigned = new bool[inputSignalAssignedCounter]; + for (int i = 0; i< inputSignalAssignedCounter; i++) { + inputSignalAssigned[i] = false; + } + signalValues = new FrElement[get_total_signal_no()]; + Fr_str2element(&signalValues[0], "1", 10); + componentMemory = new Circom_Component[get_number_of_components()]; + circuitConstants = circuit ->circuitConstants; + templateInsId2IOSignalInfo = circuit -> templateInsId2IOSignalInfo; + + maxThread = maxTh; + + // parallelism + numThread = 0; + +} + +Circom_CalcWit::~Circom_CalcWit() { + // ... +} + +uint Circom_CalcWit::getInputSignalHashPosition(u64 h) { + uint n = get_size_of_input_hashmap(); + uint pos = (uint)(h % (u64)n); + if (circuit->InputHashMap[pos].hash!=h){ + uint inipos = pos; + pos++; + while (pos != inipos) { + if (circuit->InputHashMap[pos].hash==h) return pos; + if (circuit->InputHashMap[pos].hash==0) { + fprintf(stderr, "Signal not found\n"); + assert(false); + } + pos = (pos+1)%n; + } + fprintf(stderr, "Signals not found\n"); + assert(false); + } + return pos; +} + +void Circom_CalcWit::tryRunCircuit(){ + if (inputSignalAssignedCounter == 0) { + run(this); + } +} + +void Circom_CalcWit::setInputSignal(u64 h, uint i, FrElement & val){ + if (inputSignalAssignedCounter == 0) { + fprintf(stderr, "No more signals to be assigned\n"); + assert(false); + } + uint pos = getInputSignalHashPosition(h); + if (i >= circuit->InputHashMap[pos].signalsize) { + fprintf(stderr, "Input signal array access exceeds the size\n"); + assert(false); + } + + uint si = circuit->InputHashMap[pos].signalid+i; + if (inputSignalAssigned[si-get_main_input_signal_start()]) { + fprintf(stderr, "Signal assigned twice: %d\n", si); + assert(false); + } + signalValues[si] = val; + inputSignalAssigned[si-get_main_input_signal_start()] = true; + inputSignalAssignedCounter--; + tryRunCircuit(); +} + +u64 Circom_CalcWit::getInputSignalSize(u64 h) { + uint pos = getInputSignalHashPosition(h); + return circuit->InputHashMap[pos].signalsize; +} + +std::string Circom_CalcWit::getTrace(u64 id_cmp){ + if (id_cmp == 0) return componentMemory[id_cmp].componentName; + else{ + u64 id_father = componentMemory[id_cmp].idFather; + std::string my_name = componentMemory[id_cmp].componentName; + + return Circom_CalcWit::getTrace(id_father) + "." + my_name; + } + + +} + +std::string Circom_CalcWit::generate_position_array(uint* dimensions, uint size_dimensions, uint index){ + std::string positions = ""; + + for (uint i = 0 ; i < size_dimensions; i++){ + uint last_pos = index % dimensions[size_dimensions -1 - i]; + index = index / dimensions[size_dimensions -1 - i]; + std::string new_pos = "[" + std::to_string(last_pos) + "]"; + positions = new_pos + positions; + } + return positions; +} + +} //namespace diff --git a/example/support/calcwit.hpp b/example/support/calcwit.hpp new file mode 100644 index 0000000..b9028ed --- /dev/null +++ b/example/support/calcwit.hpp @@ -0,0 +1,73 @@ +#ifndef CIRCOM_CALCWIT_H +#define CIRCOM_CALCWIT_H + +#include +#include +#include +#include +#include + +#include "circom.hpp" +#include "fr.hpp" + +#define NMUTEXES 12 //512 + +namespace CIRCUIT_NAME { + +u64 fnv1a(std::string s); + +class Circom_CalcWit { + + bool *inputSignalAssigned; + uint inputSignalAssignedCounter; + + Circom_Circuit *circuit; + +public: + + FrElement *signalValues; + Circom_Component* componentMemory; + FrElement* circuitConstants; + std::map templateInsId2IOSignalInfo; + std::string* listOfTemplateMessages; + + // parallelism + std::mutex numThreadMutex; + std::condition_variable ntcvs; + uint numThread; + + uint maxThread; + + // Functions called by the circuit + Circom_CalcWit(Circom_Circuit *aCircuit, uint numTh = NMUTEXES); + ~Circom_CalcWit(); + + // Public functions + void setInputSignal(u64 h, uint i, FrElement &val); + void tryRunCircuit(); + + u64 getInputSignalSize(u64 h); + + inline uint getRemaingInputsToBeSet() { + return inputSignalAssignedCounter; + } + + inline void getWitness(uint idx, PFrElement val) { + Fr_copy(val, &signalValues[circuit->witness2SignalList[idx]]); + } + + std::string getTrace(u64 id_cmp); + + std::string generate_position_array(uint* dimensions, uint size_dimensions, uint index); + +private: + + uint getInputSignalHashPosition(u64 h); + +}; + +typedef void (*Circom_TemplateFunction)(uint __cIdx, Circom_CalcWit* __ctx); + +} //namespace + +#endif // CIRCOM_CALCWIT_H diff --git a/example/support/fr.asm b/example/support/fr.asm new file mode 100644 index 0000000..720e159 --- /dev/null +++ b/example/support/fr.asm @@ -0,0 +1,8799 @@ + + + global Fr_copy + global Fr_copyn + global Fr_add + global Fr_sub + global Fr_neg + global Fr_mul + global Fr_square + global Fr_band + global Fr_bor + global Fr_bxor + global Fr_bnot + global Fr_shl + global Fr_shr + global Fr_eq + global Fr_neq + global Fr_lt + global Fr_gt + global Fr_leq + global Fr_geq + global Fr_land + global Fr_lor + global Fr_lnot + global Fr_toNormal + global Fr_toLongNormal + global Fr_toMontgomery + global Fr_toInt + global Fr_isTrue + global Fr_q + global Fr_R3 + + global Fr_rawCopy + global Fr_rawZero + global Fr_rawSwap + global Fr_rawAdd + global Fr_rawSub + global Fr_rawNeg + global Fr_rawMMul + global Fr_rawMMul1 + global Fr_rawMSquare + global Fr_rawToMontgomery + global Fr_rawFromMontgomery + global Fr_rawIsEq + global Fr_rawIsZero + global Fr_rawShr + global Fr_rawShl + global Fr_rawq + global Fr_rawR3 + + extern Fr_fail + DEFAULT REL + + section .text + + + + + + + + + + + + + + + + + + + + +;;;;;;;;;;;;;;;;;;;;;; +; copy +;;;;;;;;;;;;;;;;;;;;;; +; Copies +; Params: +; rsi <= the src +; rdi <= the dest +; +; Nidified registers: +; rax +;;;;;;;;;;;;;;;;;;;;;;; +Fr_copy: + + mov rax, [rsi + 0] + mov [rdi + 0], rax + + mov rax, [rsi + 8] + mov [rdi + 8], rax + + mov rax, [rsi + 16] + mov [rdi + 16], rax + + mov rax, [rsi + 24] + mov [rdi + 24], rax + + mov rax, [rsi + 32] + mov [rdi + 32], rax + + ret + + +;;;;;;;;;;;;;;;;;;;;;; +; rawCopy +;;;;;;;;;;;;;;;;;;;;;; +; Copies +; Params: +; rsi <= the src +; rdi <= the dest +; +; Nidified registers: +; rax +;;;;;;;;;;;;;;;;;;;;;;; +Fr_rawCopy: + + mov rax, [rsi + 0] + mov [rdi + 0], rax + + mov rax, [rsi + 8] + mov [rdi + 8], rax + + mov rax, [rsi + 16] + mov [rdi + 16], rax + + mov rax, [rsi + 24] + mov [rdi + 24], rax + + ret + + +;;;;;;;;;;;;;;;;;;;;;; +; rawZero +;;;;;;;;;;;;;;;;;;;;;; +; Copies +; Params: +; rsi <= the src +; +; Nidified registers: +; rax +;;;;;;;;;;;;;;;;;;;;;;; +Fr_rawZero: + xor rax, rax + + mov [rdi + 0], rax + + mov [rdi + 8], rax + + mov [rdi + 16], rax + + mov [rdi + 24], rax + + ret + +;;;;;;;;;;;;;;;;;;;;;; +; rawSwap +;;;;;;;;;;;;;;;;;;;;;; +; Copies +; Params: +; rdi <= a +; rsi <= p +; +; Nidified registers: +; rax +;;;;;;;;;;;;;;;;;;;;;;; +Fr_rawSwap: + + mov rax, [rsi + 0] + mov rcx, [rdi + 0] + mov [rdi + 0], rax + mov [rsi + 0], rbx + + mov rax, [rsi + 8] + mov rcx, [rdi + 8] + mov [rdi + 8], rax + mov [rsi + 8], rbx + + mov rax, [rsi + 16] + mov rcx, [rdi + 16] + mov [rdi + 16], rax + mov [rsi + 16], rbx + + mov rax, [rsi + 24] + mov rcx, [rdi + 24] + mov [rdi + 24], rax + mov [rsi + 24], rbx + + ret + + +;;;;;;;;;;;;;;;;;;;;;; +; copy an array of integers +;;;;;;;;;;;;;;;;;;;;;; +; Copies +; Params: +; rsi <= the src +; rdi <= the dest +; rdx <= number of integers to copy +; +; Nidified registers: +; rax +;;;;;;;;;;;;;;;;;;;;;;; +Fr_copyn: +Fr_copyn_loop: + mov r8, rsi + mov r9, rdi + mov rax, 5 + mul rdx + mov rcx, rax + cld + rep movsq + mov rsi, r8 + mov rdi, r9 + ret + +;;;;;;;;;;;;;;;;;;;;;; +; rawCopyS2L +;;;;;;;;;;;;;;;;;;;;;; +; Convert a 64 bit integer to a long format field element +; Params: +; rsi <= the integer +; rdi <= Pointer to the overwritted element +; +; Nidified registers: +; rax +;;;;;;;;;;;;;;;;;;;;;;; + +rawCopyS2L: + mov al, 0x80 + shl rax, 56 + mov [rdi], rax ; set the result to LONG normal + + cmp rsi, 0 + js u64toLong_adjust_neg + + mov [rdi + 8], rsi + xor rax, rax + + mov [rdi + 16], rax + + mov [rdi + 24], rax + + mov [rdi + 32], rax + + ret + +u64toLong_adjust_neg: + add rsi, [q] ; Set the first digit + mov [rdi + 8], rsi ; + + mov rsi, -1 ; all ones + + mov rax, rsi ; Add to q + adc rax, [q + 8 ] + mov [rdi + 16], rax + + mov rax, rsi ; Add to q + adc rax, [q + 16 ] + mov [rdi + 24], rax + + mov rax, rsi ; Add to q + adc rax, [q + 24 ] + mov [rdi + 32], rax + + ret + +;;;;;;;;;;;;;;;;;;;;;; +; toInt +;;;;;;;;;;;;;;;;;;;;;; +; Convert a 64 bit integer to a long format field element +; Params: +; rsi <= Pointer to the element +; Returs: +; rax <= The value +;;;;;;;;;;;;;;;;;;;;;;; +Fr_toInt: + mov rax, [rdi] + bt rax, 63 + jc Fr_long + movsx rax, eax + ret + +Fr_long: + push rbp + push rsi + push rdx + mov rbp, rsp + bt rax, 62 + jnc Fr_longNormal +Fr_longMontgomery: + + sub rsp, 40 + push rsi + mov rsi, rdi + mov rdi, rsp + call Fr_toNormal + pop rsi + + +Fr_longNormal: + mov rax, [rdi + 8] + mov rcx, rax + shr rcx, 31 + jnz Fr_longNeg + + mov rcx, [rdi + 16] + test rcx, rcx + jnz Fr_longNeg + + mov rcx, [rdi + 24] + test rcx, rcx + jnz Fr_longNeg + + mov rcx, [rdi + 32] + test rcx, rcx + jnz Fr_longNeg + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + +Fr_longNeg: + mov rax, [rdi + 8] + sub rax, [q] + jnc Fr_longErr + + mov rcx, [rdi + 16] + sbb rcx, [q + 8] + jnc Fr_longErr + + mov rcx, [rdi + 24] + sbb rcx, [q + 16] + jnc Fr_longErr + + mov rcx, [rdi + 32] + sbb rcx, [q + 24] + jnc Fr_longErr + + mov rcx, rax + sar rcx, 31 + add rcx, 1 + jnz Fr_longErr + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + +Fr_longErr: + push rdi + mov rdi, 0 + call Fr_fail WRT ..plt + pop rdi + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + + + +Fr_rawMMul: + push r15 + push r14 + push r13 + push r12 + mov rcx,rdx + mov r9,[ np ] + xor r10,r10 + +; FirstLoop + mov rdx,[rsi + 0] + mulx rax,r11,[rcx] + mulx r8,r12,[rcx +8] + adcx r12,rax + mulx rax,r13,[rcx +16] + adcx r13,r8 + mulx r8,r14,[rcx +24] + adcx r14,rax + mov r15,r10 + adcx r15,r8 +; SecondLoop + mov rdx,r9 + mulx rax,rdx,r11 + mulx r8,rax,[q] + adcx rax,r11 + mulx rax,r11,[q +8] + adcx r11,r8 + adox r11,r12 + mulx r8,r12,[q +16] + adcx r12,rax + adox r12,r13 + mulx rax,r13,[q +24] + adcx r13,r8 + adox r13,r14 + mov r14,r10 + adcx r14,rax + adox r14,r15 + +; FirstLoop + mov rdx,[rsi + 8] + mov r15,r10 + mulx r8,rax,[rcx +0] + adcx r11,rax + adox r12,r8 + mulx r8,rax,[rcx +8] + adcx r12,rax + adox r13,r8 + mulx r8,rax,[rcx +16] + adcx r13,rax + adox r14,r8 + mulx r8,rax,[rcx +24] + adcx r14,rax + adox r15,r8 + adcx r15,r10 +; SecondLoop + mov rdx,r9 + mulx rax,rdx,r11 + mulx r8,rax,[q] + adcx rax,r11 + mulx rax,r11,[q +8] + adcx r11,r8 + adox r11,r12 + mulx r8,r12,[q +16] + adcx r12,rax + adox r12,r13 + mulx rax,r13,[q +24] + adcx r13,r8 + adox r13,r14 + mov r14,r10 + adcx r14,rax + adox r14,r15 + +; FirstLoop + mov rdx,[rsi + 16] + mov r15,r10 + mulx r8,rax,[rcx +0] + adcx r11,rax + adox r12,r8 + mulx r8,rax,[rcx +8] + adcx r12,rax + adox r13,r8 + mulx r8,rax,[rcx +16] + adcx r13,rax + adox r14,r8 + mulx r8,rax,[rcx +24] + adcx r14,rax + adox r15,r8 + adcx r15,r10 +; SecondLoop + mov rdx,r9 + mulx rax,rdx,r11 + mulx r8,rax,[q] + adcx rax,r11 + mulx rax,r11,[q +8] + adcx r11,r8 + adox r11,r12 + mulx r8,r12,[q +16] + adcx r12,rax + adox r12,r13 + mulx rax,r13,[q +24] + adcx r13,r8 + adox r13,r14 + mov r14,r10 + adcx r14,rax + adox r14,r15 + +; FirstLoop + mov rdx,[rsi + 24] + mov r15,r10 + mulx r8,rax,[rcx +0] + adcx r11,rax + adox r12,r8 + mulx r8,rax,[rcx +8] + adcx r12,rax + adox r13,r8 + mulx r8,rax,[rcx +16] + adcx r13,rax + adox r14,r8 + mulx r8,rax,[rcx +24] + adcx r14,rax + adox r15,r8 + adcx r15,r10 +; SecondLoop + mov rdx,r9 + mulx rax,rdx,r11 + mulx r8,rax,[q] + adcx rax,r11 + mulx rax,r11,[q +8] + adcx r11,r8 + adox r11,r12 + mulx r8,r12,[q +16] + adcx r12,rax + adox r12,r13 + mulx rax,r13,[q +24] + adcx r13,r8 + adox r13,r14 + mov r14,r10 + adcx r14,rax + adox r14,r15 + +;comparison + cmp r14,[q + 24] + jc Fr_rawMMul_done + jnz Fr_rawMMul_sq + cmp r13,[q + 16] + jc Fr_rawMMul_done + jnz Fr_rawMMul_sq + cmp r12,[q + 8] + jc Fr_rawMMul_done + jnz Fr_rawMMul_sq + cmp r11,[q + 0] + jc Fr_rawMMul_done + jnz Fr_rawMMul_sq +Fr_rawMMul_sq: + sub r11,[q +0] + sbb r12,[q +8] + sbb r13,[q +16] + sbb r14,[q +24] +Fr_rawMMul_done: + mov [rdi + 0],r11 + mov [rdi + 8],r12 + mov [rdi + 16],r13 + mov [rdi + 24],r14 + pop r12 + pop r13 + pop r14 + pop r15 + ret +Fr_rawMSquare: + push r15 + push r14 + push r13 + push r12 + mov rcx,rdx + mov r9,[ np ] + xor r10,r10 + +; FirstLoop + mov rdx,[rsi + 0] + mulx rax,r11,rdx + mulx r8,r12,[rsi +8] + adcx r12,rax + mulx rax,r13,[rsi +16] + adcx r13,r8 + mulx r8,r14,[rsi +24] + adcx r14,rax + mov r15,r10 + adcx r15,r8 +; SecondLoop + mov rdx,r9 + mulx rax,rdx,r11 + mulx r8,rax,[q] + adcx rax,r11 + mulx rax,r11,[q +8] + adcx r11,r8 + adox r11,r12 + mulx r8,r12,[q +16] + adcx r12,rax + adox r12,r13 + mulx rax,r13,[q +24] + adcx r13,r8 + adox r13,r14 + mov r14,r10 + adcx r14,rax + adox r14,r15 + +; FirstLoop + mov rdx,[rsi + 8] + mov r15,r10 + mulx r8,rax,[rsi +0] + adcx r11,rax + adox r12,r8 + mulx r8,rax,[rsi +8] + adcx r12,rax + adox r13,r8 + mulx r8,rax,[rsi +16] + adcx r13,rax + adox r14,r8 + mulx r8,rax,[rsi +24] + adcx r14,rax + adox r15,r8 + adcx r15,r10 +; SecondLoop + mov rdx,r9 + mulx rax,rdx,r11 + mulx r8,rax,[q] + adcx rax,r11 + mulx rax,r11,[q +8] + adcx r11,r8 + adox r11,r12 + mulx r8,r12,[q +16] + adcx r12,rax + adox r12,r13 + mulx rax,r13,[q +24] + adcx r13,r8 + adox r13,r14 + mov r14,r10 + adcx r14,rax + adox r14,r15 + +; FirstLoop + mov rdx,[rsi + 16] + mov r15,r10 + mulx r8,rax,[rsi +0] + adcx r11,rax + adox r12,r8 + mulx r8,rax,[rsi +8] + adcx r12,rax + adox r13,r8 + mulx r8,rax,[rsi +16] + adcx r13,rax + adox r14,r8 + mulx r8,rax,[rsi +24] + adcx r14,rax + adox r15,r8 + adcx r15,r10 +; SecondLoop + mov rdx,r9 + mulx rax,rdx,r11 + mulx r8,rax,[q] + adcx rax,r11 + mulx rax,r11,[q +8] + adcx r11,r8 + adox r11,r12 + mulx r8,r12,[q +16] + adcx r12,rax + adox r12,r13 + mulx rax,r13,[q +24] + adcx r13,r8 + adox r13,r14 + mov r14,r10 + adcx r14,rax + adox r14,r15 + +; FirstLoop + mov rdx,[rsi + 24] + mov r15,r10 + mulx r8,rax,[rsi +0] + adcx r11,rax + adox r12,r8 + mulx r8,rax,[rsi +8] + adcx r12,rax + adox r13,r8 + mulx r8,rax,[rsi +16] + adcx r13,rax + adox r14,r8 + mulx r8,rax,[rsi +24] + adcx r14,rax + adox r15,r8 + adcx r15,r10 +; SecondLoop + mov rdx,r9 + mulx rax,rdx,r11 + mulx r8,rax,[q] + adcx rax,r11 + mulx rax,r11,[q +8] + adcx r11,r8 + adox r11,r12 + mulx r8,r12,[q +16] + adcx r12,rax + adox r12,r13 + mulx rax,r13,[q +24] + adcx r13,r8 + adox r13,r14 + mov r14,r10 + adcx r14,rax + adox r14,r15 + +;comparison + cmp r14,[q + 24] + jc Fr_rawMSquare_done + jnz Fr_rawMSquare_sq + cmp r13,[q + 16] + jc Fr_rawMSquare_done + jnz Fr_rawMSquare_sq + cmp r12,[q + 8] + jc Fr_rawMSquare_done + jnz Fr_rawMSquare_sq + cmp r11,[q + 0] + jc Fr_rawMSquare_done + jnz Fr_rawMSquare_sq +Fr_rawMSquare_sq: + sub r11,[q +0] + sbb r12,[q +8] + sbb r13,[q +16] + sbb r14,[q +24] +Fr_rawMSquare_done: + mov [rdi + 0],r11 + mov [rdi + 8],r12 + mov [rdi + 16],r13 + mov [rdi + 24],r14 + pop r12 + pop r13 + pop r14 + pop r15 + ret +Fr_rawMMul1: + push r15 + push r14 + push r13 + push r12 + mov rcx,rdx + mov r9,[ np ] + xor r10,r10 + +; FirstLoop + mov rdx,rcx + mulx rax,r11,[rsi] + mulx r8,r12,[rsi +8] + adcx r12,rax + mulx rax,r13,[rsi +16] + adcx r13,r8 + mulx r8,r14,[rsi +24] + adcx r14,rax + mov r15,r10 + adcx r15,r8 +; SecondLoop + mov rdx,r9 + mulx rax,rdx,r11 + mulx r8,rax,[q] + adcx rax,r11 + mulx rax,r11,[q +8] + adcx r11,r8 + adox r11,r12 + mulx r8,r12,[q +16] + adcx r12,rax + adox r12,r13 + mulx rax,r13,[q +24] + adcx r13,r8 + adox r13,r14 + mov r14,r10 + adcx r14,rax + adox r14,r15 + + mov r15,r10 +; SecondLoop + mov rdx,r9 + mulx rax,rdx,r11 + mulx r8,rax,[q] + adcx rax,r11 + mulx rax,r11,[q +8] + adcx r11,r8 + adox r11,r12 + mulx r8,r12,[q +16] + adcx r12,rax + adox r12,r13 + mulx rax,r13,[q +24] + adcx r13,r8 + adox r13,r14 + mov r14,r10 + adcx r14,rax + adox r14,r15 + + mov r15,r10 +; SecondLoop + mov rdx,r9 + mulx rax,rdx,r11 + mulx r8,rax,[q] + adcx rax,r11 + mulx rax,r11,[q +8] + adcx r11,r8 + adox r11,r12 + mulx r8,r12,[q +16] + adcx r12,rax + adox r12,r13 + mulx rax,r13,[q +24] + adcx r13,r8 + adox r13,r14 + mov r14,r10 + adcx r14,rax + adox r14,r15 + + mov r15,r10 +; SecondLoop + mov rdx,r9 + mulx rax,rdx,r11 + mulx r8,rax,[q] + adcx rax,r11 + mulx rax,r11,[q +8] + adcx r11,r8 + adox r11,r12 + mulx r8,r12,[q +16] + adcx r12,rax + adox r12,r13 + mulx rax,r13,[q +24] + adcx r13,r8 + adox r13,r14 + mov r14,r10 + adcx r14,rax + adox r14,r15 + +;comparison + cmp r14,[q + 24] + jc Fr_rawMMul1_done + jnz Fr_rawMMul1_sq + cmp r13,[q + 16] + jc Fr_rawMMul1_done + jnz Fr_rawMMul1_sq + cmp r12,[q + 8] + jc Fr_rawMMul1_done + jnz Fr_rawMMul1_sq + cmp r11,[q + 0] + jc Fr_rawMMul1_done + jnz Fr_rawMMul1_sq +Fr_rawMMul1_sq: + sub r11,[q +0] + sbb r12,[q +8] + sbb r13,[q +16] + sbb r14,[q +24] +Fr_rawMMul1_done: + mov [rdi + 0],r11 + mov [rdi + 8],r12 + mov [rdi + 16],r13 + mov [rdi + 24],r14 + pop r12 + pop r13 + pop r14 + pop r15 + ret +Fr_rawFromMontgomery: + push r15 + push r14 + push r13 + push r12 + mov rcx,rdx + mov r9,[ np ] + xor r10,r10 + +; FirstLoop + mov r11,[rsi +0] + mov r12,[rsi +8] + mov r13,[rsi +16] + mov r14,[rsi +24] + mov r15,r10 +; SecondLoop + mov rdx,r9 + mulx rax,rdx,r11 + mulx r8,rax,[q] + adcx rax,r11 + mulx rax,r11,[q +8] + adcx r11,r8 + adox r11,r12 + mulx r8,r12,[q +16] + adcx r12,rax + adox r12,r13 + mulx rax,r13,[q +24] + adcx r13,r8 + adox r13,r14 + mov r14,r10 + adcx r14,rax + adox r14,r15 + + mov r15,r10 +; SecondLoop + mov rdx,r9 + mulx rax,rdx,r11 + mulx r8,rax,[q] + adcx rax,r11 + mulx rax,r11,[q +8] + adcx r11,r8 + adox r11,r12 + mulx r8,r12,[q +16] + adcx r12,rax + adox r12,r13 + mulx rax,r13,[q +24] + adcx r13,r8 + adox r13,r14 + mov r14,r10 + adcx r14,rax + adox r14,r15 + + mov r15,r10 +; SecondLoop + mov rdx,r9 + mulx rax,rdx,r11 + mulx r8,rax,[q] + adcx rax,r11 + mulx rax,r11,[q +8] + adcx r11,r8 + adox r11,r12 + mulx r8,r12,[q +16] + adcx r12,rax + adox r12,r13 + mulx rax,r13,[q +24] + adcx r13,r8 + adox r13,r14 + mov r14,r10 + adcx r14,rax + adox r14,r15 + + mov r15,r10 +; SecondLoop + mov rdx,r9 + mulx rax,rdx,r11 + mulx r8,rax,[q] + adcx rax,r11 + mulx rax,r11,[q +8] + adcx r11,r8 + adox r11,r12 + mulx r8,r12,[q +16] + adcx r12,rax + adox r12,r13 + mulx rax,r13,[q +24] + adcx r13,r8 + adox r13,r14 + mov r14,r10 + adcx r14,rax + adox r14,r15 + +;comparison + cmp r14,[q + 24] + jc Fr_rawFromMontgomery_done + jnz Fr_rawFromMontgomery_sq + cmp r13,[q + 16] + jc Fr_rawFromMontgomery_done + jnz Fr_rawFromMontgomery_sq + cmp r12,[q + 8] + jc Fr_rawFromMontgomery_done + jnz Fr_rawFromMontgomery_sq + cmp r11,[q + 0] + jc Fr_rawFromMontgomery_done + jnz Fr_rawFromMontgomery_sq +Fr_rawFromMontgomery_sq: + sub r11,[q +0] + sbb r12,[q +8] + sbb r13,[q +16] + sbb r14,[q +24] +Fr_rawFromMontgomery_done: + mov [rdi + 0],r11 + mov [rdi + 8],r12 + mov [rdi + 16],r13 + mov [rdi + 24],r14 + pop r12 + pop r13 + pop r14 + pop r15 + ret + +;;;;;;;;;;;;;;;;;;;;;; +; rawToMontgomery +;;;;;;;;;;;;;;;;;;;;;; +; Convert a number to Montgomery +; rdi <= Pointer destination element +; rsi <= Pointer to src element +;;;;;;;;;;;;;;;;;;;; +Fr_rawToMontgomery: + push rdx + lea rdx, [R2] + call Fr_rawMMul + pop rdx + ret + +;;;;;;;;;;;;;;;;;;;;;; +; toMontgomery +;;;;;;;;;;;;;;;;;;;;;; +; Convert a number to Montgomery +; rdi <= Destination +; rdi <= Pointer element to convert +; Modified registers: +; r8, r9, 10, r11, rax, rcx +;;;;;;;;;;;;;;;;;;;; +Fr_toMontgomery: + mov rax, [rsi] + bt rax, 62 ; check if montgomery + jc toMontgomery_doNothing + bt rax, 63 + jc toMontgomeryLong + +toMontgomeryShort: + movsx rdx, eax + mov [rdi], rdx + add rdi, 8 + lea rsi, [R2] + cmp rdx, 0 + js negMontgomeryShort +posMontgomeryShort: + call Fr_rawMMul1 + sub rdi, 8 + mov r11b, 0x40 + shl r11d, 24 + mov [rdi+4], r11d + ret + +negMontgomeryShort: + neg rdx ; Do the multiplication positive and then negate the result. + call Fr_rawMMul1 + mov rsi, rdi + call rawNegL + sub rdi, 8 + mov r11b, 0x40 + shl r11d, 24 + mov [rdi+4], r11d + ret + + +toMontgomeryLong: + mov [rdi], rax + add rdi, 8 + add rsi, 8 + lea rdx, [R2] + call Fr_rawMMul + sub rsi, 8 + sub rdi, 8 + mov r11b, 0xC0 + shl r11d, 24 + mov [rdi+4], r11d + ret + + +toMontgomery_doNothing: + call Fr_copy + ret + +;;;;;;;;;;;;;;;;;;;;;; +; toNormal +;;;;;;;;;;;;;;;;;;;;;; +; Convert a number from Montgomery +; rdi <= Destination +; rsi <= Pointer element to convert +; Modified registers: +; r8, r9, 10, r11, rax, rcx +;;;;;;;;;;;;;;;;;;;; +Fr_toNormal: + mov rax, [rsi] + bt rax, 62 ; check if montgomery + jnc toNormal_doNothing + bt rax, 63 ; if short, it means it's converted + jnc toNormal_doNothing + +toNormalLong: + add rdi, 8 + add rsi, 8 + call Fr_rawFromMontgomery + sub rsi, 8 + sub rdi, 8 + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + ret + +toNormal_doNothing: + call Fr_copy + ret + +;;;;;;;;;;;;;;;;;;;;;; +; toLongNormal +;;;;;;;;;;;;;;;;;;;;;; +; Convert a number to long normal +; rdi <= Destination +; rsi <= Pointer element to convert +; Modified registers: +; r8, r9, 10, r11, rax, rcx +;;;;;;;;;;;;;;;;;;;; +Fr_toLongNormal: + mov rax, [rsi] + bt rax, 63 ; check if long + jnc toLongNormal_fromShort + bt rax, 62 ; check if montgomery + jc toLongNormal_fromMontgomery + call Fr_copy ; It is already long + ret + +toLongNormal_fromMontgomery: + add rdi, 8 + add rsi, 8 + call Fr_rawFromMontgomery + sub rsi, 8 + sub rdi, 8 + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + ret + +toLongNormal_fromShort: + mov r8, rsi ; save rsi + movsx rsi, eax + call rawCopyS2L + mov rsi, r8 ; recover rsi + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + ret + + + + + + + + + + + + +;;;;;;;;;;;;;;;;;;;;;; +; add +;;;;;;;;;;;;;;;;;;;;;; +; Adds two elements of any kind +; Params: +; rsi <= Pointer to element 1 +; rdx <= Pointer to element 2 +; rdi <= Pointer to result +; Modified Registers: +; r8, r9, 10, r11, rax, rcx +;;;;;;;;;;;;;;;;;;;;;; +Fr_add: + push rbp + push rsi + push rdx + mov rbp, rsp + mov rax, [rsi] + mov rcx, [rdx] + bt rax, 63 ; Check if is short first operand + jc add_l1 + bt rcx, 63 ; Check if is short second operand + jc add_s1l2 + +add_s1s2: ; Both operands are short + + xor rdx, rdx + mov edx, eax + add edx, ecx + jo add_manageOverflow ; rsi already is the 64bits result + + mov [rdi], rdx ; not necessary to adjust so just save and return + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + +add_manageOverflow: ; Do the operation in 64 bits + push rsi + movsx rsi, eax + movsx rdx, ecx + add rsi, rdx + call rawCopyS2L + pop rsi + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + +add_l1: + bt rcx, 63 ; Check if is short second operand + jc add_l1l2 + +;;;;;;;; +add_l1s2: + bt rax, 62 ; check if montgomery first + jc add_l1ms2 +add_l1ns2: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + add rsi, 8 + movsx rdx, ecx + add rdi, 8 + cmp rdx, 0 + + jns tmp_1 + neg rdx + call rawSubLS + sub rdi, 8 + sub rsi, 8 + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret +tmp_1: + call rawAddLS + sub rdi, 8 + sub rsi, 8 + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + +add_l1ms2: + bt rcx, 62 ; check if montgomery second + jc add_l1ms2m +add_l1ms2n: + mov r11b, 0xC0 + shl r11d, 24 + mov [rdi+4], r11d + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toMontgomery + mov rdx, rdi + pop rdi + pop rsi + + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call rawAddLL + sub rdi, 8 + sub rsi, 8 + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + +add_l1ms2m: + mov r11b, 0xC0 + shl r11d, 24 + mov [rdi+4], r11d + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call rawAddLL + sub rdi, 8 + sub rsi, 8 + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + +;;;;;;;; +add_s1l2: + bt rcx, 62 ; check if montgomery second + jc add_s1l2m +add_s1l2n: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + lea rsi, [rdx + 8] + movsx rdx, eax + add rdi, 8 + cmp rdx, 0 + + jns tmp_2 + neg rdx + call rawSubLS + sub rdi, 8 + sub rsi, 8 + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret +tmp_2: + call rawAddLS + sub rdi, 8 + sub rsi, 8 + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + +add_s1l2m: + bt rax, 62 ; check if montgomery first + jc add_s1ml2m +add_s1nl2m: + mov r11b, 0xC0 + shl r11d, 24 + mov [rdi+4], r11d + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toMontgomery + mov rsi, rdi + pop rdi + pop rdx + + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call rawAddLL + sub rdi, 8 + sub rsi, 8 + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + +add_s1ml2m: + mov r11b, 0xC0 + shl r11d, 24 + mov [rdi+4], r11d + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call rawAddLL + sub rdi, 8 + sub rsi, 8 + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + +;;;; +add_l1l2: + bt rax, 62 ; check if montgomery first + jc add_l1ml2 +add_l1nl2: + bt rcx, 62 ; check if montgomery second + jc add_l1nl2m +add_l1nl2n: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call rawAddLL + sub rdi, 8 + sub rsi, 8 + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + +add_l1nl2m: + mov r11b, 0xC0 + shl r11d, 24 + mov [rdi+4], r11d + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toMontgomery + mov rsi, rdi + pop rdi + pop rdx + + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call rawAddLL + sub rdi, 8 + sub rsi, 8 + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + +add_l1ml2: + bt rcx, 62 ; check if montgomery seconf + jc add_l1ml2m +add_l1ml2n: + mov r11b, 0xC0 + shl r11d, 24 + mov [rdi+4], r11d + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toMontgomery + mov rdx, rdi + pop rdi + pop rsi + + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call rawAddLL + sub rdi, 8 + sub rsi, 8 + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + +add_l1ml2m: + mov r11b, 0xC0 + shl r11d, 24 + mov [rdi+4], r11d + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call rawAddLL + sub rdi, 8 + sub rsi, 8 + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + + +;;;;;;;;;;;;;;;;;;;;;; +; rawAddLL +;;;;;;;;;;;;;;;;;;;;;; +; Adds two elements of type long +; Params: +; rsi <= Pointer to the long data of element 1 +; rdx <= Pointer to the long data of element 2 +; rdi <= Pointer to the long data of result +; Modified Registers: +; rax +;;;;;;;;;;;;;;;;;;;;;; +rawAddLL: +Fr_rawAdd: + ; Add component by component with carry + + mov rax, [rsi + 0] + add rax, [rdx + 0] + mov [rdi + 0], rax + + mov rax, [rsi + 8] + adc rax, [rdx + 8] + mov [rdi + 8], rax + + mov rax, [rsi + 16] + adc rax, [rdx + 16] + mov [rdi + 16], rax + + mov rax, [rsi + 24] + adc rax, [rdx + 24] + mov [rdi + 24], rax + + jc rawAddLL_sq ; if overflow, substract q + + ; Compare with q + + + cmp rax, [q + 24] + jc rawAddLL_done ; q is bigget so done. + jnz rawAddLL_sq ; q is lower + + + mov rax, [rdi + 16] + + cmp rax, [q + 16] + jc rawAddLL_done ; q is bigget so done. + jnz rawAddLL_sq ; q is lower + + + mov rax, [rdi + 8] + + cmp rax, [q + 8] + jc rawAddLL_done ; q is bigget so done. + jnz rawAddLL_sq ; q is lower + + + mov rax, [rdi + 0] + + cmp rax, [q + 0] + jc rawAddLL_done ; q is bigget so done. + jnz rawAddLL_sq ; q is lower + + ; If equal substract q +rawAddLL_sq: + + mov rax, [q + 0] + sub [rdi + 0], rax + + mov rax, [q + 8] + sbb [rdi + 8], rax + + mov rax, [q + 16] + sbb [rdi + 16], rax + + mov rax, [q + 24] + sbb [rdi + 24], rax + +rawAddLL_done: + ret + + +;;;;;;;;;;;;;;;;;;;;;; +; rawAddLS +;;;;;;;;;;;;;;;;;;;;;; +; Adds two elements of type long +; Params: +; rdi <= Pointer to the long data of result +; rsi <= Pointer to the long data of element 1 +; rdx <= Value to be added +;;;;;;;;;;;;;;;;;;;;;; +rawAddLS: + ; Add component by component with carry + + add rdx, [rsi] + mov [rdi] ,rdx + + mov rdx, 0 + adc rdx, [rsi + 8] + mov [rdi + 8], rdx + + mov rdx, 0 + adc rdx, [rsi + 16] + mov [rdi + 16], rdx + + mov rdx, 0 + adc rdx, [rsi + 24] + mov [rdi + 24], rdx + + jc rawAddLS_sq ; if overflow, substract q + + ; Compare with q + + mov rax, [rdi + 24] + cmp rax, [q + 24] + jc rawAddLS_done ; q is bigget so done. + jnz rawAddLS_sq ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 16] + jc rawAddLS_done ; q is bigget so done. + jnz rawAddLS_sq ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 8] + jc rawAddLS_done ; q is bigget so done. + jnz rawAddLS_sq ; q is lower + + mov rax, [rdi + 0] + cmp rax, [q + 0] + jc rawAddLS_done ; q is bigget so done. + jnz rawAddLS_sq ; q is lower + + ; If equal substract q +rawAddLS_sq: + + mov rax, [q + 0] + sub [rdi + 0], rax + + mov rax, [q + 8] + sbb [rdi + 8], rax + + mov rax, [q + 16] + sbb [rdi + 16], rax + + mov rax, [q + 24] + sbb [rdi + 24], rax + +rawAddLS_done: + ret + + + + + + + + + + + + + + + +;;;;;;;;;;;;;;;;;;;;;; +; sub +;;;;;;;;;;;;;;;;;;;;;; +; Substracts two elements of any kind +; Params: +; rsi <= Pointer to element 1 +; rdx <= Pointer to element 2 +; rdi <= Pointer to result +; Modified Registers: +; r8, r9, 10, r11, rax, rcx +;;;;;;;;;;;;;;;;;;;;;; +Fr_sub: + push rbp + push rsi + push rdx + mov rbp, rsp + mov rax, [rsi] + mov rcx, [rdx] + bt rax, 63 ; Check if is long first operand + jc sub_l1 + bt rcx, 63 ; Check if is long second operand + jc sub_s1l2 + +sub_s1s2: ; Both operands are short + + xor rdx, rdx + mov edx, eax + sub edx, ecx + jo sub_manageOverflow ; rsi already is the 64bits result + + mov [rdi], rdx ; not necessary to adjust so just save and return + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + +sub_manageOverflow: ; Do the operation in 64 bits + push rsi + movsx rsi, eax + movsx rdx, ecx + sub rsi, rdx + call rawCopyS2L + pop rsi + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + +sub_l1: + bt rcx, 63 ; Check if is short second operand + jc sub_l1l2 + +;;;;;;;; +sub_l1s2: + bt rax, 62 ; check if montgomery first + jc sub_l1ms2 +sub_l1ns2: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + add rsi, 8 + movsx rdx, ecx + add rdi, 8 + cmp rdx, 0 + + jns tmp_3 + neg rdx + call rawAddLS + sub rdi, 8 + sub rsi, 8 + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret +tmp_3: + call rawSubLS + sub rdi, 8 + sub rsi, 8 + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + +sub_l1ms2: + bt rcx, 62 ; check if montgomery second + jc sub_l1ms2m +sub_l1ms2n: + mov r11b, 0xC0 + shl r11d, 24 + mov [rdi+4], r11d + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toMontgomery + mov rdx, rdi + pop rdi + pop rsi + + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call rawSubLL + sub rdi, 8 + sub rsi, 8 + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + +sub_l1ms2m: + mov r11b, 0xC0 + shl r11d, 24 + mov [rdi+4], r11d + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call rawSubLL + sub rdi, 8 + sub rsi, 8 + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + +;;;;;;;; +sub_s1l2: + bt rcx, 62 ; check if montgomery first + jc sub_s1l2m +sub_s1l2n: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + cmp eax, 0 + + js tmp_4 + + ; First Operand is positive + push rsi + add rdi, 8 + movsx rsi, eax + add rdx, 8 + call rawSubSL + sub rdi, 8 + pop rsi + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + +tmp_4: ; First operand is negative + push rsi + lea rsi, [rdx + 8] + movsx rdx, eax + add rdi, 8 + neg rdx + call rawNegLS + sub rdi, 8 + pop rsi + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + +sub_s1l2m: + bt rax, 62 ; check if montgomery second + jc sub_s1ml2m +sub_s1nl2m: + mov r11b, 0xC0 + shl r11d, 24 + mov [rdi+4], r11d + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toMontgomery + mov rsi, rdi + pop rdi + pop rdx + + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call rawSubLL + sub rdi, 8 + sub rsi, 8 + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + +sub_s1ml2m: + mov r11b, 0xC0 + shl r11d, 24 + mov [rdi+4], r11d + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call rawSubLL + sub rdi, 8 + sub rsi, 8 + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + +;;;; +sub_l1l2: + bt rax, 62 ; check if montgomery first + jc sub_l1ml2 +sub_l1nl2: + bt rcx, 62 ; check if montgomery second + jc sub_l1nl2m +sub_l1nl2n: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call rawSubLL + sub rdi, 8 + sub rsi, 8 + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + +sub_l1nl2m: + mov r11b, 0xC0 + shl r11d, 24 + mov [rdi+4], r11d + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toMontgomery + mov rsi, rdi + pop rdi + pop rdx + + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call rawSubLL + sub rdi, 8 + sub rsi, 8 + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + +sub_l1ml2: + bt rcx, 62 ; check if montgomery seconf + jc sub_l1ml2m +sub_l1ml2n: + mov r11b, 0xC0 + shl r11d, 24 + mov [rdi+4], r11d + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toMontgomery + mov rdx, rdi + pop rdi + pop rsi + + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call rawSubLL + sub rdi, 8 + sub rsi, 8 + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + +sub_l1ml2m: + mov r11b, 0xC0 + shl r11d, 24 + mov [rdi+4], r11d + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call rawSubLL + sub rdi, 8 + sub rsi, 8 + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + +;;;;;;;;;;;;;;;;;;;;;; +; rawSubLS +;;;;;;;;;;;;;;;;;;;;;; +; Substracts a short element from the long element +; Params: +; rdi <= Pointer to the long data of result +; rsi <= Pointer to the long data of element 1 where will be substracted +; rdx <= Value to be substracted +; [rdi] = [rsi] - rdx +; Modified Registers: +; rax +;;;;;;;;;;;;;;;;;;;;;; +rawSubLS: + ; Substract first digit + + mov rax, [rsi] + sub rax, rdx + mov [rdi] ,rax + mov rdx, 0 + + mov rax, [rsi + 8] + sbb rax, rdx + mov [rdi + 8], rax + + mov rax, [rsi + 16] + sbb rax, rdx + mov [rdi + 16], rax + + mov rax, [rsi + 24] + sbb rax, rdx + mov [rdi + 24], rax + + jnc rawSubLS_done ; if overflow, add q + + ; Add q +rawSubLS_aq: + + mov rax, [q + 0] + add [rdi + 0], rax + + mov rax, [q + 8] + adc [rdi + 8], rax + + mov rax, [q + 16] + adc [rdi + 16], rax + + mov rax, [q + 24] + adc [rdi + 24], rax + +rawSubLS_done: + ret + + +;;;;;;;;;;;;;;;;;;;;;; +; rawSubSL +;;;;;;;;;;;;;;;;;;;;;; +; Substracts a long element from a short element +; Params: +; rdi <= Pointer to the long data of result +; rsi <= Value from where will bo substracted +; rdx <= Pointer to long of the value to be substracted +; +; [rdi] = rsi - [rdx] +; Modified Registers: +; rax +;;;;;;;;;;;;;;;;;;;;;; +rawSubSL: + ; Substract first digit + sub rsi, [rdx] + mov [rdi] ,rsi + + + mov rax, 0 + sbb rax, [rdx + 8] + mov [rdi + 8], rax + + mov rax, 0 + sbb rax, [rdx + 16] + mov [rdi + 16], rax + + mov rax, 0 + sbb rax, [rdx + 24] + mov [rdi + 24], rax + + jnc rawSubSL_done ; if overflow, add q + + ; Add q +rawSubSL_aq: + + mov rax, [q + 0] + add [rdi + 0], rax + + mov rax, [q + 8] + adc [rdi + 8], rax + + mov rax, [q + 16] + adc [rdi + 16], rax + + mov rax, [q + 24] + adc [rdi + 24], rax + +rawSubSL_done: + ret + +;;;;;;;;;;;;;;;;;;;;;; +; rawSubLL +;;;;;;;;;;;;;;;;;;;;;; +; Substracts a long element from a short element +; Params: +; rdi <= Pointer to the long data of result +; rsi <= Pointer to long from where substracted +; rdx <= Pointer to long of the value to be substracted +; +; [rdi] = [rsi] - [rdx] +; Modified Registers: +; rax +;;;;;;;;;;;;;;;;;;;;;; +rawSubLL: +Fr_rawSub: + ; Substract first digit + + mov rax, [rsi + 0] + sub rax, [rdx + 0] + mov [rdi + 0], rax + + mov rax, [rsi + 8] + sbb rax, [rdx + 8] + mov [rdi + 8], rax + + mov rax, [rsi + 16] + sbb rax, [rdx + 16] + mov [rdi + 16], rax + + mov rax, [rsi + 24] + sbb rax, [rdx + 24] + mov [rdi + 24], rax + + jnc rawSubLL_done ; if overflow, add q + + ; Add q +rawSubLL_aq: + + mov rax, [q + 0] + add [rdi + 0], rax + + mov rax, [q + 8] + adc [rdi + 8], rax + + mov rax, [q + 16] + adc [rdi + 16], rax + + mov rax, [q + 24] + adc [rdi + 24], rax + +rawSubLL_done: + ret + +;;;;;;;;;;;;;;;;;;;;;; +; rawNegLS +;;;;;;;;;;;;;;;;;;;;;; +; Substracts a long element and a short element form 0 +; Params: +; rdi <= Pointer to the long data of result +; rsi <= Pointer to long from where substracted +; rdx <= short value to be substracted too +; +; [rdi] = -[rsi] - rdx +; Modified Registers: +; rax +;;;;;;;;;;;;;;;;;;;;;; +rawNegLS: + mov rax, [q] + sub rax, rdx + mov [rdi], rax + + mov rax, [q + 8 ] + sbb rax, 0 + mov [rdi + 8], rax + + mov rax, [q + 16 ] + sbb rax, 0 + mov [rdi + 16], rax + + mov rax, [q + 24 ] + sbb rax, 0 + mov [rdi + 24], rax + + setc dl + + + mov rax, [rdi + 0 ] + sub rax, [rsi + 0] + mov [rdi + 0], rax + + mov rax, [rdi + 8 ] + sbb rax, [rsi + 8] + mov [rdi + 8], rax + + mov rax, [rdi + 16 ] + sbb rax, [rsi + 16] + mov [rdi + 16], rax + + mov rax, [rdi + 24 ] + sbb rax, [rsi + 24] + mov [rdi + 24], rax + + + setc dh + or dl, dh + jz rawNegSL_done + + ; it is a negative value, so add q + + mov rax, [q + 0] + add [rdi + 0], rax + + mov rax, [q + 8] + adc [rdi + 8], rax + + mov rax, [q + 16] + adc [rdi + 16], rax + + mov rax, [q + 24] + adc [rdi + 24], rax + + +rawNegSL_done: + ret + + + + + + + +;;;;;;;;;;;;;;;;;;;;;; +; neg +;;;;;;;;;;;;;;;;;;;;;; +; Adds two elements of any kind +; Params: +; rsi <= Pointer to element to be negated +; rdi <= Pointer to result +; [rdi] = -[rsi] +;;;;;;;;;;;;;;;;;;;;;; +Fr_neg: + mov rax, [rsi] + bt rax, 63 ; Check if is short first operand + jc neg_l + +neg_s: ; Operand is short + + neg eax + jo neg_manageOverflow ; Check if overflow. (0x80000000 is the only case) + + mov [rdi], rax ; not necessary to adjust so just save and return + ret + +neg_manageOverflow: ; Do the operation in 64 bits + push rsi + movsx rsi, eax + neg rsi + call rawCopyS2L + pop rsi + ret + + + +neg_l: + mov [rdi], rax ; Copy the type + + add rdi, 8 + add rsi, 8 + call rawNegL + sub rdi, 8 + sub rsi, 8 + ret + + + +;;;;;;;;;;;;;;;;;;;;;; +; rawNeg +;;;;;;;;;;;;;;;;;;;;;; +; Negates a value +; Params: +; rdi <= Pointer to the long data of result +; rsi <= Pointer to the long data of element 1 +; +; [rdi] = - [rsi] +;;;;;;;;;;;;;;;;;;;;;; +rawNegL: +Fr_rawNeg: + ; Compare is zero + + xor rax, rax + + cmp [rsi + 0], rax + jnz doNegate + + cmp [rsi + 8], rax + jnz doNegate + + cmp [rsi + 16], rax + jnz doNegate + + cmp [rsi + 24], rax + jnz doNegate + + ; it's zero so just set to zero + + mov [rdi + 0], rax + + mov [rdi + 8], rax + + mov [rdi + 16], rax + + mov [rdi + 24], rax + + ret +doNegate: + + mov rax, [q + 0] + sub rax, [rsi + 0] + mov [rdi + 0], rax + + mov rax, [q + 8] + sbb rax, [rsi + 8] + mov [rdi + 8], rax + + mov rax, [q + 16] + sbb rax, [rsi + 16] + mov [rdi + 16], rax + + mov rax, [q + 24] + sbb rax, [rsi + 24] + mov [rdi + 24], rax + + ret + + + + + + + + + + + + + + + + + + + +;;;;;;;;;;;;;;;;;;;;;; +; square +;;;;;;;;;;;;;;;;;;;;;; +; Squares a field element +; Params: +; rsi <= Pointer to element 1 +; rdi <= Pointer to result +; [rdi] = [rsi] * [rsi] +; Modified Registers: +; r8, r9, 10, r11, rax, rcx +;;;;;;;;;;;;;;;;;;;;;; +Fr_square: + mov r8, [rsi] + bt r8, 63 ; Check if is short first operand + jc square_l1 + +square_s1: ; Both operands are short + + xor rax, rax + mov eax, r8d + imul eax + jo square_manageOverflow ; rsi already is the 64bits result + + mov [rdi], rax ; not necessary to adjust so just save and return + +square_manageOverflow: ; Do the operation in 64 bits + push rsi + movsx rax, r8d + imul rax + mov rsi, rax + call rawCopyS2L + pop rsi + + ret + +square_l1: + bt r8, 62 ; check if montgomery first + jc square_l1m +square_l1n: + mov r11b, 0xC0 + shl r11d, 24 + mov [rdi+4], r11d + + add rdi, 8 + add rsi, 8 + call Fr_rawMSquare + sub rdi, 8 + sub rsi, 8 + + + push rsi + add rdi, 8 + mov rsi, rdi + lea rdx, [R3] + call Fr_rawMMul + sub rdi, 8 + pop rsi + + ret + +square_l1m: + mov r11b, 0xC0 + shl r11d, 24 + mov [rdi+4], r11d + + add rdi, 8 + add rsi, 8 + call Fr_rawMSquare + sub rdi, 8 + sub rsi, 8 + + ret + + + +;;;;;;;;;;;;;;;;;;;;;; +; mul +;;;;;;;;;;;;;;;;;;;;;; +; Multiplies two elements of any kind +; Params: +; rsi <= Pointer to element 1 +; rdx <= Pointer to element 2 +; rdi <= Pointer to result +; [rdi] = [rsi] * [rdi] +; Modified Registers: +; r8, r9, 10, r11, rax, rcx +;;;;;;;;;;;;;;;;;;;;;; +Fr_mul: + mov r8, [rsi] + mov r9, [rdx] + bt r8, 63 ; Check if is short first operand + jc mul_l1 + bt r9, 63 ; Check if is short second operand + jc mul_s1l2 + +mul_s1s2: ; Both operands are short + + xor rax, rax + mov eax, r8d + imul r9d + jo mul_manageOverflow ; rsi already is the 64bits result + + mov [rdi], rax ; not necessary to adjust so just save and return + +mul_manageOverflow: ; Do the operation in 64 bits + push rsi + movsx rax, r8d + movsx rcx, r9d + imul rcx + mov rsi, rax + call rawCopyS2L + pop rsi + + ret + +mul_l1: + bt r9, 63 ; Check if is short second operand + jc mul_l1l2 + +;;;;;;;; +mul_l1s2: + bt r8, 62 ; check if montgomery first + jc mul_l1ms2 +mul_l1ns2: + bt r9, 62 ; check if montgomery first + jc mul_l1ns2m +mul_l1ns2n: + mov r11b, 0xC0 + shl r11d, 24 + mov [rdi+4], r11d + + push rsi + add rsi, 8 + movsx rdx, r9d + add rdi, 8 + cmp rdx, 0 + + jns tmp_5 + neg rdx + call Fr_rawMMul1 + mov rsi, rdi + call rawNegL + sub rdi, 8 + pop rsi + + jmp tmp_6 +tmp_5: + call Fr_rawMMul1 + sub rdi, 8 + pop rsi +tmp_6: + + + + push rsi + add rdi, 8 + mov rsi, rdi + lea rdx, [R3] + call Fr_rawMMul + sub rdi, 8 + pop rsi + + ret + + +mul_l1ns2m: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call Fr_rawMMul + sub rdi, 8 + sub rsi, 8 + + ret + + +mul_l1ms2: + bt r9, 62 ; check if montgomery second + jc mul_l1ms2m +mul_l1ms2n: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + push rsi + add rsi, 8 + movsx rdx, r9d + add rdi, 8 + cmp rdx, 0 + + jns tmp_7 + neg rdx + call Fr_rawMMul1 + mov rsi, rdi + call rawNegL + sub rdi, 8 + pop rsi + + jmp tmp_8 +tmp_7: + call Fr_rawMMul1 + sub rdi, 8 + pop rsi +tmp_8: + + + ret + +mul_l1ms2m: + mov r11b, 0xC0 + shl r11d, 24 + mov [rdi+4], r11d + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call Fr_rawMMul + sub rdi, 8 + sub rsi, 8 + + ret + + +;;;;;;;; +mul_s1l2: + bt r8, 62 ; check if montgomery first + jc mul_s1ml2 +mul_s1nl2: + bt r9, 62 ; check if montgomery first + jc mul_s1nl2m +mul_s1nl2n: + mov r11b, 0xC0 + shl r11d, 24 + mov [rdi+4], r11d + + push rsi + lea rsi, [rdx + 8] + movsx rdx, r8d + add rdi, 8 + cmp rdx, 0 + + jns tmp_9 + neg rdx + call Fr_rawMMul1 + mov rsi, rdi + call rawNegL + sub rdi, 8 + pop rsi + + jmp tmp_10 +tmp_9: + call Fr_rawMMul1 + sub rdi, 8 + pop rsi +tmp_10: + + + + push rsi + add rdi, 8 + mov rsi, rdi + lea rdx, [R3] + call Fr_rawMMul + sub rdi, 8 + pop rsi + + ret + +mul_s1nl2m: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + push rsi + lea rsi, [rdx + 8] + movsx rdx, r8d + add rdi, 8 + cmp rdx, 0 + + jns tmp_11 + neg rdx + call Fr_rawMMul1 + mov rsi, rdi + call rawNegL + sub rdi, 8 + pop rsi + + jmp tmp_12 +tmp_11: + call Fr_rawMMul1 + sub rdi, 8 + pop rsi +tmp_12: + + + ret + +mul_s1ml2: + bt r9, 62 ; check if montgomery first + jc mul_s1ml2m +mul_s1ml2n: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call Fr_rawMMul + sub rdi, 8 + sub rsi, 8 + + ret + +mul_s1ml2m: + mov r11b, 0xC0 + shl r11d, 24 + mov [rdi+4], r11d + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call Fr_rawMMul + sub rdi, 8 + sub rsi, 8 + + ret + +;;;; +mul_l1l2: + bt r8, 62 ; check if montgomery first + jc mul_l1ml2 +mul_l1nl2: + bt r9, 62 ; check if montgomery second + jc mul_l1nl2m +mul_l1nl2n: + mov r11b, 0xC0 + shl r11d, 24 + mov [rdi+4], r11d + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call Fr_rawMMul + sub rdi, 8 + sub rsi, 8 + + + push rsi + add rdi, 8 + mov rsi, rdi + lea rdx, [R3] + call Fr_rawMMul + sub rdi, 8 + pop rsi + + ret + +mul_l1nl2m: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call Fr_rawMMul + sub rdi, 8 + sub rsi, 8 + + ret + +mul_l1ml2: + bt r9, 62 ; check if montgomery seconf + jc mul_l1ml2m +mul_l1ml2n: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call Fr_rawMMul + sub rdi, 8 + sub rsi, 8 + + ret + +mul_l1ml2m: + mov r11b, 0xC0 + shl r11d, 24 + mov [rdi+4], r11d + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call Fr_rawMMul + sub rdi, 8 + sub rsi, 8 + + ret + + + + + + + + + + + + + + + + + + + +;;;;;;;;;;;;;;;;;;;;;; +; band +;;;;;;;;;;;;;;;;;;;;;; +; Adds two elements of any kind +; Params: +; rsi <= Pointer to element 1 +; rdx <= Pointer to element 2 +; rdi <= Pointer to result +; Modified Registers: +; r8, r9, 10, r11, rax, rcx +;;;;;;;;;;;;;;;;;;;;;; +Fr_band: + push rbp + push rsi + push rdx + mov rbp, rsp + mov rax, [rsi] + mov rcx, [rdx] + bt rax, 63 ; Check if is short first operand + jc and_l1 + bt rcx, 63 ; Check if is short second operand + jc and_s1l2 + +and_s1s2: + + cmp eax, 0 + + js tmp_13 + + cmp ecx, 0 + js tmp_13 + xor rdx, rdx ; both ops are positive so do the op and return + mov edx, eax + and edx, ecx + mov [rdi], rdx ; not necessary to adjust so just save and return + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + +tmp_13: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toLongNormal + mov rdx, rdi + pop rdi + pop rsi + + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toLongNormal + mov rsi, rdi + pop rdi + pop rdx + + + + mov rax, [rsi + 8] + and rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + and rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + and rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + and rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_15 ; q is bigget so done. + jnz tmp_14 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_15 ; q is bigget so done. + jnz tmp_14 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_15 ; q is bigget so done. + jnz tmp_14 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_15 ; q is bigget so done. + jnz tmp_14 ; q is lower + + ; If equal substract q +tmp_14: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_15: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + + + + +and_l1: + bt rcx, 63 ; Check if is short second operand + jc and_l1l2 + + +and_l1s2: + bt rax, 62 ; check if montgomery first + jc and_l1ms2 +and_l1ns2: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + mov rcx, [rdx] + cmp ecx, 0 + + js tmp_16 + movsx rax, ecx + and rax, [rsi +8] + mov [rdi+8], rax + + xor rax, rax + and rax, [rsi + 16]; + + mov [rdi + 16 ], rax; + + xor rax, rax + and rax, [rsi + 24]; + + mov [rdi + 24 ], rax; + + xor rax, rax + and rax, [rsi + 32]; + + and rax, [lboMask] ; + + mov [rdi + 32 ], rax; + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_18 ; q is bigget so done. + jnz tmp_17 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_18 ; q is bigget so done. + jnz tmp_17 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_18 ; q is bigget so done. + jnz tmp_17 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_18 ; q is bigget so done. + jnz tmp_17 ; q is lower + + ; If equal substract q +tmp_17: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_18: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + +tmp_16: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toLongNormal + mov rdx, rdi + pop rdi + pop rsi + + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + + mov rax, [rsi + 8] + and rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + and rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + and rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + and rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_20 ; q is bigget so done. + jnz tmp_19 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_20 ; q is bigget so done. + jnz tmp_19 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_20 ; q is bigget so done. + jnz tmp_19 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_20 ; q is bigget so done. + jnz tmp_19 ; q is lower + + ; If equal substract q +tmp_19: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_20: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + + +and_l1ms2: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toNormal + mov rsi, rdi + pop rdi + pop rdx + + + mov rcx, [rdx] + cmp ecx, 0 + + js tmp_21 + movsx rax, ecx + and rax, [rsi +8] + mov [rdi+8], rax + + xor rax, rax + and rax, [rsi + 16]; + + mov [rdi + 16 ], rax; + + xor rax, rax + and rax, [rsi + 24]; + + mov [rdi + 24 ], rax; + + xor rax, rax + and rax, [rsi + 32]; + + and rax, [lboMask] ; + + mov [rdi + 32 ], rax; + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_23 ; q is bigget so done. + jnz tmp_22 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_23 ; q is bigget so done. + jnz tmp_22 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_23 ; q is bigget so done. + jnz tmp_22 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_23 ; q is bigget so done. + jnz tmp_22 ; q is lower + + ; If equal substract q +tmp_22: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_23: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + +tmp_21: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toLongNormal + mov rdx, rdi + pop rdi + pop rsi + + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + + mov rax, [rsi + 8] + and rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + and rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + and rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + and rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_25 ; q is bigget so done. + jnz tmp_24 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_25 ; q is bigget so done. + jnz tmp_24 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_25 ; q is bigget so done. + jnz tmp_24 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_25 ; q is bigget so done. + jnz tmp_24 ; q is lower + + ; If equal substract q +tmp_24: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_25: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + + + +and_s1l2: + bt rcx, 62 ; check if montgomery first + jc and_s1l2m +and_s1l2n: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + mov eax, [rsi] + cmp eax, 0 + + js tmp_26 + and rax, [rdx +8] + mov [rdi+8], rax + + xor rax, rax + and rax, [rdx + 16] + + mov [rdi + 16 ], rax + + xor rax, rax + and rax, [rdx + 24] + + mov [rdi + 24 ], rax + + xor rax, rax + and rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_28 ; q is bigget so done. + jnz tmp_27 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_28 ; q is bigget so done. + jnz tmp_27 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_28 ; q is bigget so done. + jnz tmp_27 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_28 ; q is bigget so done. + jnz tmp_27 ; q is lower + + ; If equal substract q +tmp_27: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_28: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + +tmp_26: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toLongNormal + mov rsi, rdi + pop rdi + pop rdx + + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + + mov rax, [rsi + 8] + and rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + and rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + and rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + and rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_30 ; q is bigget so done. + jnz tmp_29 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_30 ; q is bigget so done. + jnz tmp_29 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_30 ; q is bigget so done. + jnz tmp_29 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_30 ; q is bigget so done. + jnz tmp_29 ; q is lower + + ; If equal substract q +tmp_29: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_30: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + + +and_s1l2m: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toNormal + mov rdx, rdi + pop rdi + pop rsi + + + mov eax, [rsi] + cmp eax, 0 + + js tmp_31 + and rax, [rdx +8] + mov [rdi+8], rax + + xor rax, rax + and rax, [rdx + 16] + + mov [rdi + 16 ], rax + + xor rax, rax + and rax, [rdx + 24] + + mov [rdi + 24 ], rax + + xor rax, rax + and rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_33 ; q is bigget so done. + jnz tmp_32 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_33 ; q is bigget so done. + jnz tmp_32 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_33 ; q is bigget so done. + jnz tmp_32 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_33 ; q is bigget so done. + jnz tmp_32 ; q is lower + + ; If equal substract q +tmp_32: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_33: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + +tmp_31: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toLongNormal + mov rsi, rdi + pop rdi + pop rdx + + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + + mov rax, [rsi + 8] + and rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + and rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + and rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + and rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_35 ; q is bigget so done. + jnz tmp_34 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_35 ; q is bigget so done. + jnz tmp_34 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_35 ; q is bigget so done. + jnz tmp_34 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_35 ; q is bigget so done. + jnz tmp_34 ; q is lower + + ; If equal substract q +tmp_34: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_35: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + + + +and_l1l2: + bt rax, 62 ; check if montgomery first + jc and_l1ml2 + bt rcx, 62 ; check if montgomery first + jc and_l1nl2m +and_l1nl2n: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + + mov rax, [rsi + 8] + and rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + and rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + and rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + and rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_37 ; q is bigget so done. + jnz tmp_36 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_37 ; q is bigget so done. + jnz tmp_36 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_37 ; q is bigget so done. + jnz tmp_36 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_37 ; q is bigget so done. + jnz tmp_36 ; q is lower + + ; If equal substract q +tmp_36: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_37: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + +and_l1nl2m: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toNormal + mov rdx, rdi + pop rdi + pop rsi + + + + mov rax, [rsi + 8] + and rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + and rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + and rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + and rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_39 ; q is bigget so done. + jnz tmp_38 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_39 ; q is bigget so done. + jnz tmp_38 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_39 ; q is bigget so done. + jnz tmp_38 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_39 ; q is bigget so done. + jnz tmp_38 ; q is lower + + ; If equal substract q +tmp_38: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_39: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + +and_l1ml2: + bt rcx, 62 ; check if montgomery first + jc and_l1ml2m +and_l1ml2n: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toNormal + mov rsi, rdi + pop rdi + pop rdx + + + + mov rax, [rsi + 8] + and rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + and rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + and rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + and rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_41 ; q is bigget so done. + jnz tmp_40 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_41 ; q is bigget so done. + jnz tmp_40 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_41 ; q is bigget so done. + jnz tmp_40 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_41 ; q is bigget so done. + jnz tmp_40 ; q is lower + + ; If equal substract q +tmp_40: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_41: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + +and_l1ml2m: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toNormal + mov rsi, rdi + pop rdi + pop rdx + + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toNormal + mov rdx, rdi + pop rdi + pop rsi + + + + mov rax, [rsi + 8] + and rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + and rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + and rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + and rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_43 ; q is bigget so done. + jnz tmp_42 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_43 ; q is bigget so done. + jnz tmp_42 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_43 ; q is bigget so done. + jnz tmp_42 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_43 ; q is bigget so done. + jnz tmp_42 ; q is lower + + ; If equal substract q +tmp_42: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_43: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + +;;;;;;;;;;;;;;;;;;;;;; +; bor +;;;;;;;;;;;;;;;;;;;;;; +; Adds two elements of any kind +; Params: +; rsi <= Pointer to element 1 +; rdx <= Pointer to element 2 +; rdi <= Pointer to result +; Modified Registers: +; r8, r9, 10, r11, rax, rcx +;;;;;;;;;;;;;;;;;;;;;; +Fr_bor: + push rbp + push rsi + push rdx + mov rbp, rsp + mov rax, [rsi] + mov rcx, [rdx] + bt rax, 63 ; Check if is short first operand + jc or_l1 + bt rcx, 63 ; Check if is short second operand + jc or_s1l2 + +or_s1s2: + + cmp eax, 0 + + js tmp_44 + + cmp ecx, 0 + js tmp_44 + xor rdx, rdx ; both ops are positive so do the op and return + mov edx, eax + or edx, ecx + mov [rdi], rdx ; not necessary to adjust so just save and return + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + +tmp_44: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toLongNormal + mov rdx, rdi + pop rdi + pop rsi + + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toLongNormal + mov rsi, rdi + pop rdi + pop rdx + + + + mov rax, [rsi + 8] + or rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + or rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + or rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + or rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_46 ; q is bigget so done. + jnz tmp_45 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_46 ; q is bigget so done. + jnz tmp_45 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_46 ; q is bigget so done. + jnz tmp_45 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_46 ; q is bigget so done. + jnz tmp_45 ; q is lower + + ; If equal substract q +tmp_45: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_46: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + + + + +or_l1: + bt rcx, 63 ; Check if is short second operand + jc or_l1l2 + + +or_l1s2: + bt rax, 62 ; check if montgomery first + jc or_l1ms2 +or_l1ns2: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + mov rcx, [rdx] + cmp ecx, 0 + + js tmp_47 + movsx rax, ecx + or rax, [rsi +8] + mov [rdi+8], rax + + xor rax, rax + or rax, [rsi + 16]; + + mov [rdi + 16 ], rax; + + xor rax, rax + or rax, [rsi + 24]; + + mov [rdi + 24 ], rax; + + xor rax, rax + or rax, [rsi + 32]; + + and rax, [lboMask] ; + + mov [rdi + 32 ], rax; + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_49 ; q is bigget so done. + jnz tmp_48 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_49 ; q is bigget so done. + jnz tmp_48 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_49 ; q is bigget so done. + jnz tmp_48 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_49 ; q is bigget so done. + jnz tmp_48 ; q is lower + + ; If equal substract q +tmp_48: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_49: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + +tmp_47: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toLongNormal + mov rdx, rdi + pop rdi + pop rsi + + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + + mov rax, [rsi + 8] + or rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + or rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + or rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + or rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_51 ; q is bigget so done. + jnz tmp_50 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_51 ; q is bigget so done. + jnz tmp_50 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_51 ; q is bigget so done. + jnz tmp_50 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_51 ; q is bigget so done. + jnz tmp_50 ; q is lower + + ; If equal substract q +tmp_50: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_51: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + + +or_l1ms2: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toNormal + mov rsi, rdi + pop rdi + pop rdx + + + mov rcx, [rdx] + cmp ecx, 0 + + js tmp_52 + movsx rax, ecx + or rax, [rsi +8] + mov [rdi+8], rax + + xor rax, rax + or rax, [rsi + 16]; + + mov [rdi + 16 ], rax; + + xor rax, rax + or rax, [rsi + 24]; + + mov [rdi + 24 ], rax; + + xor rax, rax + or rax, [rsi + 32]; + + and rax, [lboMask] ; + + mov [rdi + 32 ], rax; + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_54 ; q is bigget so done. + jnz tmp_53 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_54 ; q is bigget so done. + jnz tmp_53 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_54 ; q is bigget so done. + jnz tmp_53 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_54 ; q is bigget so done. + jnz tmp_53 ; q is lower + + ; If equal substract q +tmp_53: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_54: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + +tmp_52: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toLongNormal + mov rdx, rdi + pop rdi + pop rsi + + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + + mov rax, [rsi + 8] + or rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + or rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + or rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + or rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_56 ; q is bigget so done. + jnz tmp_55 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_56 ; q is bigget so done. + jnz tmp_55 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_56 ; q is bigget so done. + jnz tmp_55 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_56 ; q is bigget so done. + jnz tmp_55 ; q is lower + + ; If equal substract q +tmp_55: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_56: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + + + +or_s1l2: + bt rcx, 62 ; check if montgomery first + jc or_s1l2m +or_s1l2n: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + mov eax, [rsi] + cmp eax, 0 + + js tmp_57 + or rax, [rdx +8] + mov [rdi+8], rax + + xor rax, rax + or rax, [rdx + 16] + + mov [rdi + 16 ], rax + + xor rax, rax + or rax, [rdx + 24] + + mov [rdi + 24 ], rax + + xor rax, rax + or rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_59 ; q is bigget so done. + jnz tmp_58 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_59 ; q is bigget so done. + jnz tmp_58 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_59 ; q is bigget so done. + jnz tmp_58 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_59 ; q is bigget so done. + jnz tmp_58 ; q is lower + + ; If equal substract q +tmp_58: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_59: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + +tmp_57: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toLongNormal + mov rsi, rdi + pop rdi + pop rdx + + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + + mov rax, [rsi + 8] + or rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + or rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + or rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + or rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_61 ; q is bigget so done. + jnz tmp_60 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_61 ; q is bigget so done. + jnz tmp_60 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_61 ; q is bigget so done. + jnz tmp_60 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_61 ; q is bigget so done. + jnz tmp_60 ; q is lower + + ; If equal substract q +tmp_60: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_61: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + + +or_s1l2m: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toNormal + mov rdx, rdi + pop rdi + pop rsi + + + mov eax, [rsi] + cmp eax, 0 + + js tmp_62 + or rax, [rdx +8] + mov [rdi+8], rax + + xor rax, rax + or rax, [rdx + 16] + + mov [rdi + 16 ], rax + + xor rax, rax + or rax, [rdx + 24] + + mov [rdi + 24 ], rax + + xor rax, rax + or rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_64 ; q is bigget so done. + jnz tmp_63 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_64 ; q is bigget so done. + jnz tmp_63 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_64 ; q is bigget so done. + jnz tmp_63 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_64 ; q is bigget so done. + jnz tmp_63 ; q is lower + + ; If equal substract q +tmp_63: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_64: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + +tmp_62: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toLongNormal + mov rsi, rdi + pop rdi + pop rdx + + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + + mov rax, [rsi + 8] + or rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + or rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + or rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + or rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_66 ; q is bigget so done. + jnz tmp_65 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_66 ; q is bigget so done. + jnz tmp_65 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_66 ; q is bigget so done. + jnz tmp_65 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_66 ; q is bigget so done. + jnz tmp_65 ; q is lower + + ; If equal substract q +tmp_65: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_66: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + + + +or_l1l2: + bt rax, 62 ; check if montgomery first + jc or_l1ml2 + bt rcx, 62 ; check if montgomery first + jc or_l1nl2m +or_l1nl2n: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + + mov rax, [rsi + 8] + or rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + or rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + or rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + or rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_68 ; q is bigget so done. + jnz tmp_67 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_68 ; q is bigget so done. + jnz tmp_67 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_68 ; q is bigget so done. + jnz tmp_67 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_68 ; q is bigget so done. + jnz tmp_67 ; q is lower + + ; If equal substract q +tmp_67: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_68: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + +or_l1nl2m: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toNormal + mov rdx, rdi + pop rdi + pop rsi + + + + mov rax, [rsi + 8] + or rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + or rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + or rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + or rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_70 ; q is bigget so done. + jnz tmp_69 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_70 ; q is bigget so done. + jnz tmp_69 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_70 ; q is bigget so done. + jnz tmp_69 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_70 ; q is bigget so done. + jnz tmp_69 ; q is lower + + ; If equal substract q +tmp_69: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_70: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + +or_l1ml2: + bt rcx, 62 ; check if montgomery first + jc or_l1ml2m +or_l1ml2n: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toNormal + mov rsi, rdi + pop rdi + pop rdx + + + + mov rax, [rsi + 8] + or rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + or rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + or rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + or rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_72 ; q is bigget so done. + jnz tmp_71 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_72 ; q is bigget so done. + jnz tmp_71 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_72 ; q is bigget so done. + jnz tmp_71 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_72 ; q is bigget so done. + jnz tmp_71 ; q is lower + + ; If equal substract q +tmp_71: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_72: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + +or_l1ml2m: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toNormal + mov rsi, rdi + pop rdi + pop rdx + + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toNormal + mov rdx, rdi + pop rdi + pop rsi + + + + mov rax, [rsi + 8] + or rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + or rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + or rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + or rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_74 ; q is bigget so done. + jnz tmp_73 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_74 ; q is bigget so done. + jnz tmp_73 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_74 ; q is bigget so done. + jnz tmp_73 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_74 ; q is bigget so done. + jnz tmp_73 ; q is lower + + ; If equal substract q +tmp_73: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_74: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + +;;;;;;;;;;;;;;;;;;;;;; +; bxor +;;;;;;;;;;;;;;;;;;;;;; +; Adds two elements of any kind +; Params: +; rsi <= Pointer to element 1 +; rdx <= Pointer to element 2 +; rdi <= Pointer to result +; Modified Registers: +; r8, r9, 10, r11, rax, rcx +;;;;;;;;;;;;;;;;;;;;;; +Fr_bxor: + push rbp + push rsi + push rdx + mov rbp, rsp + mov rax, [rsi] + mov rcx, [rdx] + bt rax, 63 ; Check if is short first operand + jc xor_l1 + bt rcx, 63 ; Check if is short second operand + jc xor_s1l2 + +xor_s1s2: + + cmp eax, 0 + + js tmp_75 + + cmp ecx, 0 + js tmp_75 + xor rdx, rdx ; both ops are positive so do the op and return + mov edx, eax + xor edx, ecx + mov [rdi], rdx ; not necessary to adjust so just save and return + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + +tmp_75: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toLongNormal + mov rdx, rdi + pop rdi + pop rsi + + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toLongNormal + mov rsi, rdi + pop rdi + pop rdx + + + + mov rax, [rsi + 8] + xor rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + xor rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + xor rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + xor rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_77 ; q is bigget so done. + jnz tmp_76 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_77 ; q is bigget so done. + jnz tmp_76 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_77 ; q is bigget so done. + jnz tmp_76 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_77 ; q is bigget so done. + jnz tmp_76 ; q is lower + + ; If equal substract q +tmp_76: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_77: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + + + + +xor_l1: + bt rcx, 63 ; Check if is short second operand + jc xor_l1l2 + + +xor_l1s2: + bt rax, 62 ; check if montgomery first + jc xor_l1ms2 +xor_l1ns2: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + mov rcx, [rdx] + cmp ecx, 0 + + js tmp_78 + movsx rax, ecx + xor rax, [rsi +8] + mov [rdi+8], rax + + xor rax, rax + xor rax, [rsi + 16]; + + mov [rdi + 16 ], rax; + + xor rax, rax + xor rax, [rsi + 24]; + + mov [rdi + 24 ], rax; + + xor rax, rax + xor rax, [rsi + 32]; + + and rax, [lboMask] ; + + mov [rdi + 32 ], rax; + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_80 ; q is bigget so done. + jnz tmp_79 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_80 ; q is bigget so done. + jnz tmp_79 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_80 ; q is bigget so done. + jnz tmp_79 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_80 ; q is bigget so done. + jnz tmp_79 ; q is lower + + ; If equal substract q +tmp_79: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_80: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + +tmp_78: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toLongNormal + mov rdx, rdi + pop rdi + pop rsi + + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + + mov rax, [rsi + 8] + xor rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + xor rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + xor rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + xor rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_82 ; q is bigget so done. + jnz tmp_81 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_82 ; q is bigget so done. + jnz tmp_81 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_82 ; q is bigget so done. + jnz tmp_81 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_82 ; q is bigget so done. + jnz tmp_81 ; q is lower + + ; If equal substract q +tmp_81: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_82: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + + +xor_l1ms2: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toNormal + mov rsi, rdi + pop rdi + pop rdx + + + mov rcx, [rdx] + cmp ecx, 0 + + js tmp_83 + movsx rax, ecx + xor rax, [rsi +8] + mov [rdi+8], rax + + xor rax, rax + xor rax, [rsi + 16]; + + mov [rdi + 16 ], rax; + + xor rax, rax + xor rax, [rsi + 24]; + + mov [rdi + 24 ], rax; + + xor rax, rax + xor rax, [rsi + 32]; + + and rax, [lboMask] ; + + mov [rdi + 32 ], rax; + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_85 ; q is bigget so done. + jnz tmp_84 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_85 ; q is bigget so done. + jnz tmp_84 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_85 ; q is bigget so done. + jnz tmp_84 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_85 ; q is bigget so done. + jnz tmp_84 ; q is lower + + ; If equal substract q +tmp_84: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_85: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + +tmp_83: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toLongNormal + mov rdx, rdi + pop rdi + pop rsi + + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + + mov rax, [rsi + 8] + xor rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + xor rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + xor rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + xor rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_87 ; q is bigget so done. + jnz tmp_86 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_87 ; q is bigget so done. + jnz tmp_86 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_87 ; q is bigget so done. + jnz tmp_86 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_87 ; q is bigget so done. + jnz tmp_86 ; q is lower + + ; If equal substract q +tmp_86: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_87: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + + + +xor_s1l2: + bt rcx, 62 ; check if montgomery first + jc xor_s1l2m +xor_s1l2n: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + mov eax, [rsi] + cmp eax, 0 + + js tmp_88 + xor rax, [rdx +8] + mov [rdi+8], rax + + xor rax, rax + xor rax, [rdx + 16] + + mov [rdi + 16 ], rax + + xor rax, rax + xor rax, [rdx + 24] + + mov [rdi + 24 ], rax + + xor rax, rax + xor rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_90 ; q is bigget so done. + jnz tmp_89 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_90 ; q is bigget so done. + jnz tmp_89 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_90 ; q is bigget so done. + jnz tmp_89 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_90 ; q is bigget so done. + jnz tmp_89 ; q is lower + + ; If equal substract q +tmp_89: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_90: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + +tmp_88: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toLongNormal + mov rsi, rdi + pop rdi + pop rdx + + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + + mov rax, [rsi + 8] + xor rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + xor rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + xor rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + xor rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_92 ; q is bigget so done. + jnz tmp_91 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_92 ; q is bigget so done. + jnz tmp_91 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_92 ; q is bigget so done. + jnz tmp_91 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_92 ; q is bigget so done. + jnz tmp_91 ; q is lower + + ; If equal substract q +tmp_91: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_92: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + + +xor_s1l2m: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toNormal + mov rdx, rdi + pop rdi + pop rsi + + + mov eax, [rsi] + cmp eax, 0 + + js tmp_93 + xor rax, [rdx +8] + mov [rdi+8], rax + + xor rax, rax + xor rax, [rdx + 16] + + mov [rdi + 16 ], rax + + xor rax, rax + xor rax, [rdx + 24] + + mov [rdi + 24 ], rax + + xor rax, rax + xor rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_95 ; q is bigget so done. + jnz tmp_94 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_95 ; q is bigget so done. + jnz tmp_94 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_95 ; q is bigget so done. + jnz tmp_94 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_95 ; q is bigget so done. + jnz tmp_94 ; q is lower + + ; If equal substract q +tmp_94: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_95: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + +tmp_93: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toLongNormal + mov rsi, rdi + pop rdi + pop rdx + + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + + mov rax, [rsi + 8] + xor rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + xor rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + xor rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + xor rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_97 ; q is bigget so done. + jnz tmp_96 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_97 ; q is bigget so done. + jnz tmp_96 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_97 ; q is bigget so done. + jnz tmp_96 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_97 ; q is bigget so done. + jnz tmp_96 ; q is lower + + ; If equal substract q +tmp_96: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_97: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + + + +xor_l1l2: + bt rax, 62 ; check if montgomery first + jc xor_l1ml2 + bt rcx, 62 ; check if montgomery first + jc xor_l1nl2m +xor_l1nl2n: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + + mov rax, [rsi + 8] + xor rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + xor rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + xor rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + xor rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_99 ; q is bigget so done. + jnz tmp_98 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_99 ; q is bigget so done. + jnz tmp_98 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_99 ; q is bigget so done. + jnz tmp_98 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_99 ; q is bigget so done. + jnz tmp_98 ; q is lower + + ; If equal substract q +tmp_98: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_99: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + +xor_l1nl2m: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toNormal + mov rdx, rdi + pop rdi + pop rsi + + + + mov rax, [rsi + 8] + xor rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + xor rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + xor rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + xor rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_101 ; q is bigget so done. + jnz tmp_100 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_101 ; q is bigget so done. + jnz tmp_100 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_101 ; q is bigget so done. + jnz tmp_100 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_101 ; q is bigget so done. + jnz tmp_100 ; q is lower + + ; If equal substract q +tmp_100: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_101: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + +xor_l1ml2: + bt rcx, 62 ; check if montgomery first + jc xor_l1ml2m +xor_l1ml2n: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toNormal + mov rsi, rdi + pop rdi + pop rdx + + + + mov rax, [rsi + 8] + xor rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + xor rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + xor rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + xor rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_103 ; q is bigget so done. + jnz tmp_102 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_103 ; q is bigget so done. + jnz tmp_102 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_103 ; q is bigget so done. + jnz tmp_102 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_103 ; q is bigget so done. + jnz tmp_102 ; q is lower + + ; If equal substract q +tmp_102: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_103: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + +xor_l1ml2m: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toNormal + mov rsi, rdi + pop rdi + pop rdx + + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toNormal + mov rdx, rdi + pop rdi + pop rsi + + + + mov rax, [rsi + 8] + xor rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + xor rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + xor rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + xor rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_105 ; q is bigget so done. + jnz tmp_104 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_105 ; q is bigget so done. + jnz tmp_104 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_105 ; q is bigget so done. + jnz tmp_104 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_105 ; q is bigget so done. + jnz tmp_104 ; q is lower + + ; If equal substract q +tmp_104: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_105: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + + +;;;;;;;;;;;;;;;;;;;;;; +; bnot +;;;;;;;;;;;;;;;;;;;;;; +; Adds two elements of any kind +; Params: +; rsi <= Pointer to element 1 +; rdi <= Pointer to result +; Modified Registers: +; r8, r9, 10, r11, rax, rcx +;;;;;;;;;;;;;;;;;;;;;; +Fr_bnot: + push rbp + push rsi + push rdx + mov rbp, rsp + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + mov rax, [rsi] + bt rax, 63 ; Check if is long operand + jc bnot_l1 +bnot_s: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toLongNormal + mov rsi, rdi + pop rdi + pop rdx + + jmp bnot_l1n + +bnot_l1: + bt rax, 62 ; check if montgomery first + jnc bnot_l1n + +bnot_l1m: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toNormal + mov rsi, rdi + pop rdi + pop rdx + + +bnot_l1n: + + mov rax, [rsi + 8] + not rax + + mov [rdi + 8], rax + + mov rax, [rsi + 16] + not rax + + mov [rdi + 16], rax + + mov rax, [rsi + 24] + not rax + + mov [rdi + 24], rax + + mov rax, [rsi + 32] + not rax + + and rax, [lboMask] + + mov [rdi + 32], rax + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_107 ; q is bigget so done. + jnz tmp_106 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_107 ; q is bigget so done. + jnz tmp_106 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_107 ; q is bigget so done. + jnz tmp_106 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_107 ; q is bigget so done. + jnz tmp_106 ; q is lower + + ; If equal substract q +tmp_106: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_107: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + + +;;;;;;;;;;;;;;;;;;;;;; +; rawShr +;;;;;;;;;;;;;;;;;;;;;; +; Adds two elements of any kind +; Params: +; rsi <= Pointer to element 1 +; rdx <= how much is shifted +; rdi <= Pointer to result +; Modified Registers: +; r8, r9, 10, r11, rax, rcx +;;;;;;;;;;;;;;;;;;;;;; +Fr_rawShr: +rawShr: + cmp rdx, 0 + je Fr_rawCopy + + cmp rdx, 254 + jae Fr_rawZero + +rawShr_nz: + mov r8, rdx + shr r8,6 + mov rcx, rdx + and rcx, 0x3F + jz rawShr_aligned + mov ch, 64 + sub ch, cl + + mov r9, 1 + rol cx, 8 + shl r9, cl + rol cx, 8 + sub r9, 1 + mov r10, r9 + not r10 + + + cmp r8, 3 + jae rawShr_if2_0 + + mov rax, [rsi + r8*8 + 0 ] + shr rax, cl + and rax, r9 + mov r11, [rsi + r8*8 + 8 ] + rol cx, 8 + shl r11, cl + rol cx, 8 + and r11, r10 + or rax, r11 + mov [rdi + 0], rax + + jmp rawShr_endif_0 +rawShr_if2_0: + jne rawShr_else_0 + + mov rax, [rsi + r8*8 + 0 ] + shr rax, cl + and rax, r9 + mov [rdi + 0], rax + + jmp rawShr_endif_0 +rawShr_else_0: + xor rax, rax + mov [rdi + 0], rax +rawShr_endif_0: + + cmp r8, 2 + jae rawShr_if2_1 + + mov rax, [rsi + r8*8 + 8 ] + shr rax, cl + and rax, r9 + mov r11, [rsi + r8*8 + 16 ] + rol cx, 8 + shl r11, cl + rol cx, 8 + and r11, r10 + or rax, r11 + mov [rdi + 8], rax + + jmp rawShr_endif_1 +rawShr_if2_1: + jne rawShr_else_1 + + mov rax, [rsi + r8*8 + 8 ] + shr rax, cl + and rax, r9 + mov [rdi + 8], rax + + jmp rawShr_endif_1 +rawShr_else_1: + xor rax, rax + mov [rdi + 8], rax +rawShr_endif_1: + + cmp r8, 1 + jae rawShr_if2_2 + + mov rax, [rsi + r8*8 + 16 ] + shr rax, cl + and rax, r9 + mov r11, [rsi + r8*8 + 24 ] + rol cx, 8 + shl r11, cl + rol cx, 8 + and r11, r10 + or rax, r11 + mov [rdi + 16], rax + + jmp rawShr_endif_2 +rawShr_if2_2: + jne rawShr_else_2 + + mov rax, [rsi + r8*8 + 16 ] + shr rax, cl + and rax, r9 + mov [rdi + 16], rax + + jmp rawShr_endif_2 +rawShr_else_2: + xor rax, rax + mov [rdi + 16], rax +rawShr_endif_2: + + cmp r8, 0 + jae rawShr_if2_3 + + mov rax, [rsi + r8*8 + 24 ] + shr rax, cl + and rax, r9 + mov r11, [rsi + r8*8 + 32 ] + rol cx, 8 + shl r11, cl + rol cx, 8 + and r11, r10 + or rax, r11 + mov [rdi + 24], rax + + jmp rawShr_endif_3 +rawShr_if2_3: + jne rawShr_else_3 + + mov rax, [rsi + r8*8 + 24 ] + shr rax, cl + and rax, r9 + mov [rdi + 24], rax + + jmp rawShr_endif_3 +rawShr_else_3: + xor rax, rax + mov [rdi + 24], rax +rawShr_endif_3: + + + ret + +rawShr_aligned: + + cmp r8, 3 + ja rawShr_if3_0 + mov rax, [rsi + r8*8 + 0 ] + mov [rdi + 0], rax + jmp rawShr_endif3_0 +rawShr_if3_0: + xor rax, rax + mov [rdi + 0], rax +rawShr_endif3_0: + + cmp r8, 2 + ja rawShr_if3_1 + mov rax, [rsi + r8*8 + 8 ] + mov [rdi + 8], rax + jmp rawShr_endif3_1 +rawShr_if3_1: + xor rax, rax + mov [rdi + 8], rax +rawShr_endif3_1: + + cmp r8, 1 + ja rawShr_if3_2 + mov rax, [rsi + r8*8 + 16 ] + mov [rdi + 16], rax + jmp rawShr_endif3_2 +rawShr_if3_2: + xor rax, rax + mov [rdi + 16], rax +rawShr_endif3_2: + + cmp r8, 0 + ja rawShr_if3_3 + mov rax, [rsi + r8*8 + 24 ] + mov [rdi + 24], rax + jmp rawShr_endif3_3 +rawShr_if3_3: + xor rax, rax + mov [rdi + 24], rax +rawShr_endif3_3: + + ret + + +;;;;;;;;;;;;;;;;;;;;;; +; rawShl +;;;;;;;;;;;;;;;;;;;;;; +; Adds two elements of any kind +; Params: +; rsi <= Pointer to element 1 +; rdx <= how much is shifted +; rdi <= Pointer to result +; Modified Registers: +; r8, r9, 10, r11, rax, rcx +;;;;;;;;;;;;;;;;;;;;;; +Fr_rawShl: +rawShl: + cmp rdx, 0 + je Fr_rawCopy + + cmp rdx, 254 + jae Fr_rawZero + + mov r8, rdx + shr r8,6 + mov rcx, rdx + and rcx, 0x3F + jz rawShl_aligned + mov ch, 64 + sub ch, cl + + + mov r10, 1 + shl r10, cl + sub r10, 1 + mov r9, r10 + not r9 + + mov rdx, rsi + mov rax, r8 + shl rax, 3 + sub rdx, rax + + + cmp r8, 3 + jae rawShl_if2_3 + + mov rax, [rdx + 24 ] + shl rax, cl + and rax, r9 + mov r11, [rdx + 16 ] + rol cx, 8 + shr r11, cl + rol cx, 8 + and r11, r10 + or rax, r11 + + and rax, [lboMask] + + + mov [rdi + 24], rax + + jmp rawShl_endif_3 +rawShl_if2_3: + jne rawShl_else_3 + + mov rax, [rdx + 24 ] + shl rax, cl + and rax, r9 + + and rax, [lboMask] + + + mov [rdi + 24], rax + + jmp rawShl_endif_3 +rawShl_else_3: + xor rax, rax + mov [rdi + 24], rax +rawShl_endif_3: + + cmp r8, 2 + jae rawShl_if2_2 + + mov rax, [rdx + 16 ] + shl rax, cl + and rax, r9 + mov r11, [rdx + 8 ] + rol cx, 8 + shr r11, cl + rol cx, 8 + and r11, r10 + or rax, r11 + + + mov [rdi + 16], rax + + jmp rawShl_endif_2 +rawShl_if2_2: + jne rawShl_else_2 + + mov rax, [rdx + 16 ] + shl rax, cl + and rax, r9 + + + mov [rdi + 16], rax + + jmp rawShl_endif_2 +rawShl_else_2: + xor rax, rax + mov [rdi + 16], rax +rawShl_endif_2: + + cmp r8, 1 + jae rawShl_if2_1 + + mov rax, [rdx + 8 ] + shl rax, cl + and rax, r9 + mov r11, [rdx + 0 ] + rol cx, 8 + shr r11, cl + rol cx, 8 + and r11, r10 + or rax, r11 + + + mov [rdi + 8], rax + + jmp rawShl_endif_1 +rawShl_if2_1: + jne rawShl_else_1 + + mov rax, [rdx + 8 ] + shl rax, cl + and rax, r9 + + + mov [rdi + 8], rax + + jmp rawShl_endif_1 +rawShl_else_1: + xor rax, rax + mov [rdi + 8], rax +rawShl_endif_1: + + cmp r8, 0 + jae rawShl_if2_0 + + mov rax, [rdx + 0 ] + shl rax, cl + and rax, r9 + mov r11, [rdx + -8 ] + rol cx, 8 + shr r11, cl + rol cx, 8 + and r11, r10 + or rax, r11 + + + mov [rdi + 0], rax + + jmp rawShl_endif_0 +rawShl_if2_0: + jne rawShl_else_0 + + mov rax, [rdx + 0 ] + shl rax, cl + and rax, r9 + + + mov [rdi + 0], rax + + jmp rawShl_endif_0 +rawShl_else_0: + xor rax, rax + mov [rdi + 0], rax +rawShl_endif_0: + + + + + + + ; Compare with q + + mov rax, [rdi + 24] + cmp rax, [q + 24] + jc tmp_109 ; q is bigget so done. + jnz tmp_108 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 16] + jc tmp_109 ; q is bigget so done. + jnz tmp_108 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 8] + jc tmp_109 ; q is bigget so done. + jnz tmp_108 ; q is lower + + mov rax, [rdi + 0] + cmp rax, [q + 0] + jc tmp_109 ; q is bigget so done. + jnz tmp_108 ; q is lower + + ; If equal substract q +tmp_108: + + mov rax, [q + 0] + sub [rdi + 0], rax + + mov rax, [q + 8] + sbb [rdi + 8], rax + + mov rax, [q + 16] + sbb [rdi + 16], rax + + mov rax, [q + 24] + sbb [rdi + 24], rax + +tmp_109: + + ret; + +rawShl_aligned: + mov rdx, rsi + mov rax, r8 + shl rax, 3 + sub rdx, rax + + + cmp r8, 3 + ja rawShl_if3_3 + mov rax, [rdx + 24 ] + + and rax, [lboMask] + + mov [rdi + 24], rax + jmp rawShl_endif3_3 +rawShl_if3_3: + xor rax, rax + mov [rdi + 24], rax +rawShl_endif3_3: + + cmp r8, 2 + ja rawShl_if3_2 + mov rax, [rdx + 16 ] + + mov [rdi + 16], rax + jmp rawShl_endif3_2 +rawShl_if3_2: + xor rax, rax + mov [rdi + 16], rax +rawShl_endif3_2: + + cmp r8, 1 + ja rawShl_if3_1 + mov rax, [rdx + 8 ] + + mov [rdi + 8], rax + jmp rawShl_endif3_1 +rawShl_if3_1: + xor rax, rax + mov [rdi + 8], rax +rawShl_endif3_1: + + cmp r8, 0 + ja rawShl_if3_0 + mov rax, [rdx + 0 ] + + mov [rdi + 0], rax + jmp rawShl_endif3_0 +rawShl_if3_0: + xor rax, rax + mov [rdi + 0], rax +rawShl_endif3_0: + + + + + + ; Compare with q + + mov rax, [rdi + 24] + cmp rax, [q + 24] + jc tmp_111 ; q is bigget so done. + jnz tmp_110 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 16] + jc tmp_111 ; q is bigget so done. + jnz tmp_110 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 8] + jc tmp_111 ; q is bigget so done. + jnz tmp_110 ; q is lower + + mov rax, [rdi + 0] + cmp rax, [q + 0] + jc tmp_111 ; q is bigget so done. + jnz tmp_110 ; q is lower + + ; If equal substract q +tmp_110: + + mov rax, [q + 0] + sub [rdi + 0], rax + + mov rax, [q + 8] + sbb [rdi + 8], rax + + mov rax, [q + 16] + sbb [rdi + 16], rax + + mov rax, [q + 24] + sbb [rdi + 24], rax + +tmp_111: + + ret + + + + + + +;;;;;;;;;;;;;;;;;;;;;; +; shr +;;;;;;;;;;;;;;;;;;;;;; +; Adds two elements of any kind +; Params: +; rsi <= Pointer to element 1 +; rdx <= Pointer to element 2 +; rdi <= Pointer to result +; Modified Registers: +; r8, r9, 10, r11, rax, rcx +;;;;;;;;;;;;;;;;;;;;;; +Fr_shr: + push rbp + push rsi + push rdi + push rdx + mov rbp, rsp + + + + + + + mov rcx, [rdx] + bt rcx, 63 ; Check if is short second operand + jnc tmp_112 + + ; long 2 + bt rcx, 62 ; Check if is montgomery second operand + jnc tmp_113 + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toNormal + mov rdx, rdi + pop rdi + pop rsi + +tmp_113: + mov rcx, [rdx + 8] + cmp rcx, 254 + jae tmp_114 + xor rax, rax + + cmp [rdx + 16], rax + jnz tmp_114 + + cmp [rdx + 24], rax + jnz tmp_114 + + cmp [rdx + 32], rax + jnz tmp_114 + + mov rdx, rcx + jmp do_shr + +tmp_114: + mov rcx, [q] + sub rcx, [rdx+8] + cmp rcx, 254 + jae setzero + mov rax, [q] + sub rax, [rdx+8] + + mov rax, [q+ 8] + sbb rax, [rdx + 16] + jnz setzero + + mov rax, [q+ 16] + sbb rax, [rdx + 24] + jnz setzero + + mov rax, [q+ 24] + sbb rax, [rdx + 32] + jnz setzero + + mov rdx, rcx + jmp do_shl + +tmp_112: + cmp ecx, 0 + jl tmp_115 + cmp ecx, 254 + jae setzero + movsx rdx, ecx + jmp do_shr +tmp_115: + neg ecx + cmp ecx, 254 + jae setzero + movsx rdx, ecx + jmp do_shl + + + + +;;;;;;;;;;;;;;;;;;;;;; +; shl +;;;;;;;;;;;;;;;;;;;;;; +; Adds two elements of any kind +; Params: +; rsi <= Pointer to element 1 +; rdx <= Pointer to element 2 +; rdi <= Pointer to result +; Modified Registers: +; r8, r9, 10, r11, rax, rcx +;;;;;;;;;;;;;;;;;;;;;; +Fr_shl: + push rbp + push rsi + push rdi + push rdx + mov rbp, rsp + + + + + + mov rcx, [rdx] + bt rcx, 63 ; Check if is short second operand + jnc tmp_116 + + ; long 2 + bt rcx, 62 ; Check if is montgomery second operand + jnc tmp_117 + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toNormal + mov rdx, rdi + pop rdi + pop rsi + +tmp_117: + mov rcx, [rdx + 8] + cmp rcx, 254 + jae tmp_118 + xor rax, rax + + cmp [rdx + 16], rax + jnz tmp_118 + + cmp [rdx + 24], rax + jnz tmp_118 + + cmp [rdx + 32], rax + jnz tmp_118 + + mov rdx, rcx + jmp do_shl + +tmp_118: + mov rcx, [q] + sub rcx, [rdx+8] + cmp rcx, 254 + jae setzero + mov rax, [q] + sub rax, [rdx+8] + + mov rax, [q+ 8] + sbb rax, [rdx + 16] + jnz setzero + + mov rax, [q+ 16] + sbb rax, [rdx + 24] + jnz setzero + + mov rax, [q+ 24] + sbb rax, [rdx + 32] + jnz setzero + + mov rdx, rcx + jmp do_shr + +tmp_116: + cmp ecx, 0 + jl tmp_119 + cmp ecx, 254 + jae setzero + movsx rdx, ecx + jmp do_shl +tmp_119: + neg ecx + cmp ecx, 254 + jae setzero + movsx rdx, ecx + jmp do_shr + + + +;;;;;;;;;; +;;; doShl +;;;;;;;;;; +do_shl: + mov rcx, [rsi] + bt rcx, 63 ; Check if is short second operand + jc do_shll +do_shls: + + movsx rax, ecx + cmp rax, 0 + jz setzero; + jl do_shlcl + + cmp rdx, 31 + jae do_shlcl + + mov cl, dl + shl rax, cl + mov rcx, rax + shr rcx, 31 + jnz do_shlcl + mov [rdi], rax + mov rsp, rbp + pop rdx + pop rdi + pop rsi + pop rbp + ret + +do_shlcl: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toLongNormal + mov rsi, rdi + pop rdi + pop rdx + + jmp do_shlln + +do_shll: + bt rcx, 62 ; Check if is short second operand + jnc do_shlln + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toNormal + mov rsi, rdi + pop rdi + pop rdx + +do_shlln: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + add rdi, 8 + add rsi, 8 + call rawShl + mov rsp, rbp + pop rdx + pop rdi + pop rsi + pop rbp + ret + + +;;;;;;;;;; +;;; doShr +;;;;;;;;;; +do_shr: + mov rcx, [rsi] + bt rcx, 63 ; Check if is short second operand + jc do_shrl +do_shrs: + movsx rax, ecx + cmp rax, 0 + jz setzero; + jl do_shrcl + + cmp rdx, 31 + jae setzero + + mov cl, dl + shr rax, cl + mov [rdi], rax + mov rsp, rbp + pop rdx + pop rdi + pop rsi + pop rbp + ret + +do_shrcl: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toLongNormal + mov rsi, rdi + pop rdi + pop rdx + + +do_shrl: + bt rcx, 62 ; Check if is short second operand + jnc do_shrln + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toNormal + mov rsi, rdi + pop rdi + pop rdx + +do_shrln: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + add rdi, 8 + add rsi, 8 + call rawShr + mov rsp, rbp + pop rdx + pop rdi + pop rsi + pop rbp + ret + +setzero: + xor rax, rax + mov [rdi], rax + mov rsp, rbp + pop rdx + pop rdi + pop rsi + pop rbp + ret + + + + + +;;;;;;;;;;;;;;;;;;;;;; +; rgt - Raw Greater Than +;;;;;;;;;;;;;;;;;;;;;; +; returns in ax 1 id *rsi > *rdx +; Params: +; rsi <= Pointer to element 1 +; rdx <= Pointer to element 2 +; rax <= Return 1 or 0 +; Modified Registers: +; r8, r9, rax +;;;;;;;;;;;;;;;;;;;;;; +Fr_rgt: + push rbp + push rsi + push rdx + mov rbp, rsp + mov r8, [rsi] + mov r9, [rdx] + bt r8, 63 ; Check if is short first operand + jc rgt_l1 + bt r9, 63 ; Check if is short second operand + jc rgt_s1l2 + +rgt_s1s2: ; Both operands are short + cmp r8d, r9d + jg rgt_ret1 + jmp rgt_ret0 + + +rgt_l1: + bt r9, 63 ; Check if is short second operand + jc rgt_l1l2 + +;;;;;;;; +rgt_l1s2: + bt r8, 62 ; check if montgomery first + jc rgt_l1ms2 +rgt_l1ns2: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toLongNormal + mov rdx, rdi + pop rdi + pop rsi + + jmp rgtL1L2 + +rgt_l1ms2: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toLongNormal + mov rdx, rdi + pop rdi + pop rsi + + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toNormal + mov rsi, rdi + pop rdi + pop rdx + + jmp rgtL1L2 + + +;;;;;;;; +rgt_s1l2: + bt r9, 62 ; check if montgomery second + jc rgt_s1l2m +rgt_s1l2n: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toLongNormal + mov rsi, rdi + pop rdi + pop rdx + + jmp rgtL1L2 + +rgt_s1l2m: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toLongNormal + mov rsi, rdi + pop rdi + pop rdx + + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toNormal + mov rdx, rdi + pop rdi + pop rsi + + jmp rgtL1L2 + +;;;; +rgt_l1l2: + bt r8, 62 ; check if montgomery first + jc rgt_l1ml2 +rgt_l1nl2: + bt r9, 62 ; check if montgomery second + jc rgt_l1nl2m +rgt_l1nl2n: + jmp rgtL1L2 + +rgt_l1nl2m: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toNormal + mov rdx, rdi + pop rdi + pop rsi + + jmp rgtL1L2 + +rgt_l1ml2: + bt r9, 62 ; check if montgomery second + jc rgt_l1ml2m +rgt_l1ml2n: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toNormal + mov rsi, rdi + pop rdi + pop rdx + + jmp rgtL1L2 + +rgt_l1ml2m: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toNormal + mov rsi, rdi + pop rdi + pop rdx + + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toNormal + mov rdx, rdi + pop rdi + pop rsi + + jmp rgtL1L2 + + +;;;;;; +; rgtL1L2 +;;;;;; + +rgtL1L2: + + + mov rax, [rsi + 32] + cmp [half + 24], rax ; comare with (q-1)/2 + jc rgtl1l2_n1 ; half e1-e2 is neg => e1 < e2 + + jnz rgtl1l2_p1 ; half>rax => e1 -e2 is pos => e1 > e2 + + + mov rax, [rsi + 24] + cmp [half + 16], rax ; comare with (q-1)/2 + jc rgtl1l2_n1 ; half e1-e2 is neg => e1 < e2 + + jnz rgtl1l2_p1 ; half>rax => e1 -e2 is pos => e1 > e2 + + + mov rax, [rsi + 16] + cmp [half + 8], rax ; comare with (q-1)/2 + jc rgtl1l2_n1 ; half e1-e2 is neg => e1 < e2 + + jnz rgtl1l2_p1 ; half>rax => e1 -e2 is pos => e1 > e2 + + + mov rax, [rsi + 8] + cmp [half + 0], rax ; comare with (q-1)/2 + jc rgtl1l2_n1 ; half e1-e2 is neg => e1 < e2 + + jmp rgtl1l2_p1 + + + +rgtl1l2_p1: + + + mov rax, [rdx + 32] + cmp [half + 24], rax ; comare with (q-1)/2 + jc rgt_ret1 ; half e1-e2 is neg => e1 < e2 + + jnz rgtRawL1L2 ; half>rax => e1 -e2 is pos => e1 > e2 + + + mov rax, [rdx + 24] + cmp [half + 16], rax ; comare with (q-1)/2 + jc rgt_ret1 ; half e1-e2 is neg => e1 < e2 + + jnz rgtRawL1L2 ; half>rax => e1 -e2 is pos => e1 > e2 + + + mov rax, [rdx + 16] + cmp [half + 8], rax ; comare with (q-1)/2 + jc rgt_ret1 ; half e1-e2 is neg => e1 < e2 + + jnz rgtRawL1L2 ; half>rax => e1 -e2 is pos => e1 > e2 + + + mov rax, [rdx + 8] + cmp [half + 0], rax ; comare with (q-1)/2 + jc rgt_ret1 ; half e1-e2 is neg => e1 < e2 + + jmp rgtRawL1L2 + + + + +rgtl1l2_n1: + + + mov rax, [rdx + 32] + cmp [half + 24], rax ; comare with (q-1)/2 + jc rgtRawL1L2 ; half e1-e2 is neg => e1 < e2 + + jnz rgt_ret0 ; half>rax => e1 -e2 is pos => e1 > e2 + + + mov rax, [rdx + 24] + cmp [half + 16], rax ; comare with (q-1)/2 + jc rgtRawL1L2 ; half e1-e2 is neg => e1 < e2 + + jnz rgt_ret0 ; half>rax => e1 -e2 is pos => e1 > e2 + + + mov rax, [rdx + 16] + cmp [half + 8], rax ; comare with (q-1)/2 + jc rgtRawL1L2 ; half e1-e2 is neg => e1 < e2 + + jnz rgt_ret0 ; half>rax => e1 -e2 is pos => e1 > e2 + + + mov rax, [rdx + 8] + cmp [half + 0], rax ; comare with (q-1)/2 + jc rgtRawL1L2 ; half e1-e2 is neg => e1 < e2 + + jmp rgt_ret0 + + + + + +rgtRawL1L2: + + mov rax, [rsi + 32] + cmp [rdx + 32], rax ; comare with (q-1)/2 + jc rgt_ret1 ; rsi 1st > 2nd + + jnz rgt_ret0 + + + mov rax, [rsi + 24] + cmp [rdx + 24], rax ; comare with (q-1)/2 + jc rgt_ret1 ; rsi 1st > 2nd + + jnz rgt_ret0 + + + mov rax, [rsi + 16] + cmp [rdx + 16], rax ; comare with (q-1)/2 + jc rgt_ret1 ; rsi 1st > 2nd + + jnz rgt_ret0 + + + mov rax, [rsi + 8] + cmp [rdx + 8], rax ; comare with (q-1)/2 + jc rgt_ret1 ; rsi 1st > 2nd + + + +rgt_ret0: + xor rax, rax + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret +rgt_ret1: + mov rax, 1 + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + +;;;;;;;;;;;;;;;;;;;;;; +; rlt - Raw Less Than +;;;;;;;;;;;;;;;;;;;;;; +; returns in ax 1 id *rsi > *rdx +; Params: +; rsi <= Pointer to element 1 +; rdx <= Pointer to element 2 +; rax <= Return 1 or 0 +; Modified Registers: +; r8, r9, rax +;;;;;;;;;;;;;;;;;;;;;; +Fr_rlt: + push rbp + push rsi + push rdx + mov rbp, rsp + mov r8, [rsi] + mov r9, [rdx] + bt r8, 63 ; Check if is short first operand + jc rlt_l1 + bt r9, 63 ; Check if is short second operand + jc rlt_s1l2 + +rlt_s1s2: ; Both operands are short + cmp r8d, r9d + jl rlt_ret1 + jmp rlt_ret0 + + +rlt_l1: + bt r9, 63 ; Check if is short second operand + jc rlt_l1l2 + +;;;;;;;; +rlt_l1s2: + bt r8, 62 ; check if montgomery first + jc rlt_l1ms2 +rlt_l1ns2: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toLongNormal + mov rdx, rdi + pop rdi + pop rsi + + jmp rltL1L2 + +rlt_l1ms2: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toLongNormal + mov rdx, rdi + pop rdi + pop rsi + + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toNormal + mov rsi, rdi + pop rdi + pop rdx + + jmp rltL1L2 + + +;;;;;;;; +rlt_s1l2: + bt r9, 62 ; check if montgomery second + jc rlt_s1l2m +rlt_s1l2n: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toLongNormal + mov rsi, rdi + pop rdi + pop rdx + + jmp rltL1L2 + +rlt_s1l2m: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toLongNormal + mov rsi, rdi + pop rdi + pop rdx + + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toNormal + mov rdx, rdi + pop rdi + pop rsi + + jmp rltL1L2 + +;;;; +rlt_l1l2: + bt r8, 62 ; check if montgomery first + jc rlt_l1ml2 +rlt_l1nl2: + bt r9, 62 ; check if montgomery second + jc rlt_l1nl2m +rlt_l1nl2n: + jmp rltL1L2 + +rlt_l1nl2m: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toNormal + mov rdx, rdi + pop rdi + pop rsi + + jmp rltL1L2 + +rlt_l1ml2: + bt r9, 62 ; check if montgomery second + jc rlt_l1ml2m +rlt_l1ml2n: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toNormal + mov rsi, rdi + pop rdi + pop rdx + + jmp rltL1L2 + +rlt_l1ml2m: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toNormal + mov rsi, rdi + pop rdi + pop rdx + + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toNormal + mov rdx, rdi + pop rdi + pop rsi + + jmp rltL1L2 + + +;;;;;; +; rltL1L2 +;;;;;; + +rltL1L2: + + + mov rax, [rsi + 32] + cmp [half + 24], rax ; comare with (q-1)/2 + jc rltl1l2_n1 ; half e1-e2 is neg => e1 < e2 + + jnz rltl1l2_p1 ; half>rax => e1 -e2 is pos => e1 > e2 + + + mov rax, [rsi + 24] + cmp [half + 16], rax ; comare with (q-1)/2 + jc rltl1l2_n1 ; half e1-e2 is neg => e1 < e2 + + jnz rltl1l2_p1 ; half>rax => e1 -e2 is pos => e1 > e2 + + + mov rax, [rsi + 16] + cmp [half + 8], rax ; comare with (q-1)/2 + jc rltl1l2_n1 ; half e1-e2 is neg => e1 < e2 + + jnz rltl1l2_p1 ; half>rax => e1 -e2 is pos => e1 > e2 + + + mov rax, [rsi + 8] + cmp [half + 0], rax ; comare with (q-1)/2 + jc rltl1l2_n1 ; half e1-e2 is neg => e1 < e2 + + jmp rltl1l2_p1 + + + +rltl1l2_p1: + + + mov rax, [rdx + 32] + cmp [half + 24], rax ; comare with (q-1)/2 + jc rlt_ret0 ; half e1-e2 is neg => e1 < e2 + + jnz rltRawL1L2 ; half>rax => e1 -e2 is pos => e1 > e2 + + + mov rax, [rdx + 24] + cmp [half + 16], rax ; comare with (q-1)/2 + jc rlt_ret0 ; half e1-e2 is neg => e1 < e2 + + jnz rltRawL1L2 ; half>rax => e1 -e2 is pos => e1 > e2 + + + mov rax, [rdx + 16] + cmp [half + 8], rax ; comare with (q-1)/2 + jc rlt_ret0 ; half e1-e2 is neg => e1 < e2 + + jnz rltRawL1L2 ; half>rax => e1 -e2 is pos => e1 > e2 + + + mov rax, [rdx + 8] + cmp [half + 0], rax ; comare with (q-1)/2 + jc rlt_ret0 ; half e1-e2 is neg => e1 < e2 + + jmp rltRawL1L2 + + + + +rltl1l2_n1: + + + mov rax, [rdx + 32] + cmp [half + 24], rax ; comare with (q-1)/2 + jc rltRawL1L2 ; half e1-e2 is neg => e1 < e2 + + jnz rlt_ret1 ; half>rax => e1 -e2 is pos => e1 > e2 + + + mov rax, [rdx + 24] + cmp [half + 16], rax ; comare with (q-1)/2 + jc rltRawL1L2 ; half e1-e2 is neg => e1 < e2 + + jnz rlt_ret1 ; half>rax => e1 -e2 is pos => e1 > e2 + + + mov rax, [rdx + 16] + cmp [half + 8], rax ; comare with (q-1)/2 + jc rltRawL1L2 ; half e1-e2 is neg => e1 < e2 + + jnz rlt_ret1 ; half>rax => e1 -e2 is pos => e1 > e2 + + + mov rax, [rdx + 8] + cmp [half + 0], rax ; comare with (q-1)/2 + jc rltRawL1L2 ; half e1-e2 is neg => e1 < e2 + + jmp rlt_ret1 + + + + + +rltRawL1L2: + + mov rax, [rsi + 32] + cmp [rdx + 32], rax ; comare with (q-1)/2 + jc rlt_ret0 ; rsi 1st > 2nd + jnz rlt_ret1 + + mov rax, [rsi + 24] + cmp [rdx + 24], rax ; comare with (q-1)/2 + jc rlt_ret0 ; rsi 1st > 2nd + jnz rlt_ret1 + + mov rax, [rsi + 16] + cmp [rdx + 16], rax ; comare with (q-1)/2 + jc rlt_ret0 ; rsi 1st > 2nd + jnz rlt_ret1 + + mov rax, [rsi + 8] + cmp [rdx + 8], rax ; comare with (q-1)/2 + jc rlt_ret0 ; rsi 1st > 2nd + jnz rlt_ret1 + + +rlt_ret0: + xor rax, rax + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret +rlt_ret1: + mov rax, 1 + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + +;;;;;;;;;;;;;;;;;;;;;; +; req - Raw Eq +;;;;;;;;;;;;;;;;;;;;;; +; returns in ax 1 id *rsi == *rdx +; Params: +; rsi <= Pointer to element 1 +; rdx <= Pointer to element 2 +; rax <= Return 1 or 0 +; Modified Registers: +; r8, r9, rax +;;;;;;;;;;;;;;;;;;;;;; +Fr_req: + push rbp + push rsi + push rdx + mov rbp, rsp + mov r8, [rsi] + mov r9, [rdx] + bt r8, 63 ; Check if is short first operand + jc req_l1 + bt r9, 63 ; Check if is short second operand + jc req_s1l2 + +req_s1s2: ; Both operands are short + cmp r8d, r9d + je req_ret1 + jmp req_ret0 + + +req_l1: + bt r9, 63 ; Check if is short second operand + jc req_l1l2 + +;;;;;;;; +req_l1s2: + bt r8, 62 ; check if montgomery first + jc req_l1ms2 +req_l1ns2: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toLongNormal + mov rdx, rdi + pop rdi + pop rsi + + jmp reqL1L2 + +req_l1ms2: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toMontgomery + mov rdx, rdi + pop rdi + pop rsi + + jmp reqL1L2 + + +;;;;;;;; +req_s1l2: + bt r9, 62 ; check if montgomery second + jc req_s1l2m +req_s1l2n: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toLongNormal + mov rsi, rdi + pop rdi + pop rdx + + jmp reqL1L2 + +req_s1l2m: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toMontgomery + mov rsi, rdi + pop rdi + pop rdx + + jmp reqL1L2 + +;;;; +req_l1l2: + bt r8, 62 ; check if montgomery first + jc req_l1ml2 +req_l1nl2: + bt r9, 62 ; check if montgomery second + jc req_l1nl2m +req_l1nl2n: + jmp reqL1L2 + +req_l1nl2m: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toMontgomery + mov rsi, rdi + pop rdi + pop rdx + + jmp reqL1L2 + +req_l1ml2: + bt r9, 62 ; check if montgomery second + jc req_l1ml2m +req_l1ml2n: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toMontgomery + mov rdx, rdi + pop rdi + pop rsi + + jmp reqL1L2 + +req_l1ml2m: + jmp reqL1L2 + + +;;;;;; +; eqL1L2 +;;;;;; + +reqL1L2: + + mov rax, [rsi + 8] + cmp [rdx + 8], rax + jne req_ret0 ; rsi 1st > 2nd + + mov rax, [rsi + 16] + cmp [rdx + 16], rax + jne req_ret0 ; rsi 1st > 2nd + + mov rax, [rsi + 24] + cmp [rdx + 24], rax + jne req_ret0 ; rsi 1st > 2nd + + mov rax, [rsi + 32] + cmp [rdx + 32], rax + jne req_ret0 ; rsi 1st > 2nd + + +req_ret1: + mov rax, 1 + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + +req_ret0: + xor rax, rax + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + +;;;;;;;;;;;;;;;;;;;;;; +; gt +;;;;;;;;;;;;;;;;;;;;;; +; Compares two elements of any kind +; Params: +; rsi <= Pointer to element 1 +; rdx <= Pointer to element 2 +; rdi <= Pointer to result can be zero or one. +; Modified Registers: +; rax, rcx +;;;;;;;;;;;;;;;;;;;;;; +Fr_gt: + call Fr_rgt + mov [rdi], rax + ret + +;;;;;;;;;;;;;;;;;;;;;; +; lt +;;;;;;;;;;;;;;;;;;;;;; +; Compares two elements of any kind +; Params: +; rsi <= Pointer to element 1 +; rdx <= Pointer to element 2 +; rdi <= Pointer to result can be zero or one. +; Modified Registers: +; rax, rcx +;;;;;;;;;;;;;;;;;;;;;; +Fr_lt: + call Fr_rlt + mov [rdi], rax + ret + +;;;;;;;;;;;;;;;;;;;;;; +; eq +;;;;;;;;;;;;;;;;;;;;;; +; Compares two elements of any kind +; Params: +; rsi <= Pointer to element 1 +; rdx <= Pointer to element 2 +; rdi <= Pointer to result can be zero or one. +; Modified Registers: +; rax, rcx +;;;;;;;;;;;;;;;;;;;;;; +Fr_eq: + call Fr_req + mov [rdi], rax + ret + +;;;;;;;;;;;;;;;;;;;;;; +; neq +;;;;;;;;;;;;;;;;;;;;;; +; Compares two elements of any kind +; Params: +; rsi <= Pointer to element 1 +; rdx <= Pointer to element 2 +; rdi <= Pointer to result can be zero or one. +; Modified Registers: +; rax, rcx +;;;;;;;;;;;;;;;;;;;;;; +Fr_neq: + call Fr_req + xor rax, 1 + mov [rdi], rax + ret + +;;;;;;;;;;;;;;;;;;;;;; +; geq +;;;;;;;;;;;;;;;;;;;;;; +; Compares two elements of any kind +; Params: +; rsi <= Pointer to element 1 +; rdx <= Pointer to element 2 +; rdi <= Pointer to result can be zero or one. +; Modified Registers: +; rax, rcx +;;;;;;;;;;;;;;;;;;;;;; +Fr_geq: + call Fr_rlt + xor rax, 1 + mov [rdi], rax + ret + +;;;;;;;;;;;;;;;;;;;;;; +; leq +;;;;;;;;;;;;;;;;;;;;;; +; Compares two elements of any kind +; Params: +; rsi <= Pointer to element 1 +; rdx <= Pointer to element 2 +; rdi <= Pointer to result can be zero or one. +; Modified Registers: +; rax, rcx +;;;;;;;;;;;;;;;;;;;;;; +Fr_leq: + call Fr_rgt + xor rax, 1 + mov [rdi], rax + ret + + + +;;;;;;;;;;;;;;;;;;;;;; +; rawIsEq +;;;;;;;;;;;;;;;;;;;;;; +; Compares two elements of any kind +; Params: +; rdi <= Pointer to element 1 +; rsi <= Pointer to element 2 +; Returns +; ax <= 1 if are equal 0, otherwise +; Modified Registers: +; rax +;;;;;;;;;;;;;;;;;;;;;; +Fr_rawIsEq: + + mov rax, [rsi + 0] + cmp [rdi + 0], rax + jne rawIsEq_ret0 + + mov rax, [rsi + 8] + cmp [rdi + 8], rax + jne rawIsEq_ret0 + + mov rax, [rsi + 16] + cmp [rdi + 16], rax + jne rawIsEq_ret0 + + mov rax, [rsi + 24] + cmp [rdi + 24], rax + jne rawIsEq_ret0 + +rawIsEq_ret1: + mov rax, 1 + ret + +rawIsEq_ret0: + xor rax, rax + ret + +;;;;;;;;;;;;;;;;;;;;;; +; rawIsZero +;;;;;;;;;;;;;;;;;;;;;; +; Compares two elements of any kind +; Params: +; rdi <= Pointer to element 1 +; Returns +; ax <= 1 if is 0, otherwise +; Modified Registers: +; rax +;;;;;;;;;;;;;;;;;;;;;; +Fr_rawIsZero: + + cmp qword [rdi + 0], $0 + jne rawIsZero_ret0 + + cmp qword [rdi + 8], $0 + jne rawIsZero_ret0 + + cmp qword [rdi + 16], $0 + jne rawIsZero_ret0 + + cmp qword [rdi + 24], $0 + jne rawIsZero_ret0 + + +rawIsZero_ret1: + mov rax, 1 + ret + +rawIsZero_ret0: + xor rax, rax + ret + + + + + + + + + + + +;;;;;;;;;;;;;;;;;;;;;; +; land +;;;;;;;;;;;;;;;;;;;;;; +; Logical and between two elements +; Params: +; rsi <= Pointer to element 1 +; rdx <= Pointer to element 2 +; rdi <= Pointer to result zero or one +; Modified Registers: +; rax, rcx, r8 +;;;;;;;;;;;;;;;;;;;;;; +Fr_land: + + + + + + + mov rax, [rsi] + bt rax, 63 + jc tmp_120 + + test eax, eax + jz retZero_122 + jmp retOne_121 + +tmp_120: + + mov rax, [rsi + 8] + test rax, rax + jnz retOne_121 + + mov rax, [rsi + 16] + test rax, rax + jnz retOne_121 + + mov rax, [rsi + 24] + test rax, rax + jnz retOne_121 + + mov rax, [rsi + 32] + test rax, rax + jnz retOne_121 + + +retZero_122: + mov qword r8, 0 + jmp done_123 + +retOne_121: + mov qword r8, 1 + +done_123: + + + + + + + + mov rax, [rdx] + bt rax, 63 + jc tmp_124 + + test eax, eax + jz retZero_126 + jmp retOne_125 + +tmp_124: + + mov rax, [rdx + 8] + test rax, rax + jnz retOne_125 + + mov rax, [rdx + 16] + test rax, rax + jnz retOne_125 + + mov rax, [rdx + 24] + test rax, rax + jnz retOne_125 + + mov rax, [rdx + 32] + test rax, rax + jnz retOne_125 + + +retZero_126: + mov qword rcx, 0 + jmp done_127 + +retOne_125: + mov qword rcx, 1 + +done_127: + + and rcx, r8 + mov [rdi], rcx + ret + + +;;;;;;;;;;;;;;;;;;;;;; +; lor +;;;;;;;;;;;;;;;;;;;;;; +; Logical or between two elements +; Params: +; rsi <= Pointer to element 1 +; rdx <= Pointer to element 2 +; rdi <= Pointer to result zero or one +; Modified Registers: +; rax, rcx, r8 +;;;;;;;;;;;;;;;;;;;;;; +Fr_lor: + + + + + + + mov rax, [rsi] + bt rax, 63 + jc tmp_128 + + test eax, eax + jz retZero_130 + jmp retOne_129 + +tmp_128: + + mov rax, [rsi + 8] + test rax, rax + jnz retOne_129 + + mov rax, [rsi + 16] + test rax, rax + jnz retOne_129 + + mov rax, [rsi + 24] + test rax, rax + jnz retOne_129 + + mov rax, [rsi + 32] + test rax, rax + jnz retOne_129 + + +retZero_130: + mov qword r8, 0 + jmp done_131 + +retOne_129: + mov qword r8, 1 + +done_131: + + + + + + + + mov rax, [rdx] + bt rax, 63 + jc tmp_132 + + test eax, eax + jz retZero_134 + jmp retOne_133 + +tmp_132: + + mov rax, [rdx + 8] + test rax, rax + jnz retOne_133 + + mov rax, [rdx + 16] + test rax, rax + jnz retOne_133 + + mov rax, [rdx + 24] + test rax, rax + jnz retOne_133 + + mov rax, [rdx + 32] + test rax, rax + jnz retOne_133 + + +retZero_134: + mov qword rcx, 0 + jmp done_135 + +retOne_133: + mov qword rcx, 1 + +done_135: + + or rcx, r8 + mov [rdi], rcx + ret + + +;;;;;;;;;;;;;;;;;;;;;; +; lnot +;;;;;;;;;;;;;;;;;;;;;; +; Do the logical not of an element +; Params: +; rsi <= Pointer to element to be tested +; rdi <= Pointer to result one if element1 is zero and zero otherwise +; Modified Registers: +; rax, rax, r8 +;;;;;;;;;;;;;;;;;;;;;; +Fr_lnot: + + + + + + + mov rax, [rsi] + bt rax, 63 + jc tmp_136 + + test eax, eax + jz retZero_138 + jmp retOne_137 + +tmp_136: + + mov rax, [rsi + 8] + test rax, rax + jnz retOne_137 + + mov rax, [rsi + 16] + test rax, rax + jnz retOne_137 + + mov rax, [rsi + 24] + test rax, rax + jnz retOne_137 + + mov rax, [rsi + 32] + test rax, rax + jnz retOne_137 + + +retZero_138: + mov qword rcx, 0 + jmp done_139 + +retOne_137: + mov qword rcx, 1 + +done_139: + + test rcx, rcx + + jz lnot_retOne +lnot_retZero: + mov qword [rdi], 0 + ret +lnot_retOne: + mov qword [rdi], 1 + ret + + +;;;;;;;;;;;;;;;;;;;;;; +; isTrue +;;;;;;;;;;;;;;;;;;;;;; +; Convert a 64 bit integer to a long format field element +; Params: +; rsi <= Pointer to the element +; Returs: +; rax <= 1 if true 0 if false +;;;;;;;;;;;;;;;;;;;;;;; +Fr_isTrue: + + + + + + + mov rax, [rdi] + bt rax, 63 + jc tmp_140 + + test eax, eax + jz retZero_142 + jmp retOne_141 + +tmp_140: + + mov rax, [rdi + 8] + test rax, rax + jnz retOne_141 + + mov rax, [rdi + 16] + test rax, rax + jnz retOne_141 + + mov rax, [rdi + 24] + test rax, rax + jnz retOne_141 + + mov rax, [rdi + 32] + test rax, rax + jnz retOne_141 + + +retZero_142: + mov qword rax, 0 + jmp done_143 + +retOne_141: + mov qword rax, 1 + +done_143: + + ret + + + + + + section .data +Fr_q: + dd 0 + dd 0x80000000 +Fr_rawq: +q dq 0x43e1f593f0000001,0x2833e84879b97091,0xb85045b68181585d,0x30644e72e131a029 +half dq 0xa1f0fac9f8000000,0x9419f4243cdcb848,0xdc2822db40c0ac2e,0x183227397098d014 +R2 dq 0x1bb8e645ae216da7,0x53fe3ab1e35c59e3,0x8c49833d53bb8085,0x0216d0b17f4e44a5 +Fr_R3: + dd 0 + dd 0x80000000 +Fr_rawR3: +R3 dq 0x5e94d8e1b4bf0040,0x2a489cbe1cfbb6b8,0x893cc664a19fcfed,0x0cf8594b7fcc657c +lboMask dq 0x3fffffffffffffff +np dq 0xc2e1f593efffffff + diff --git a/example/support/fr.cpp b/example/support/fr.cpp new file mode 100644 index 0000000..4658ad7 --- /dev/null +++ b/example/support/fr.cpp @@ -0,0 +1,322 @@ +#include "fr.hpp" +#include +#include +#include +#include + + +static mpz_t q; +static mpz_t zero; +static mpz_t one; +static mpz_t mask; +static size_t nBits; +static bool initialized = false; + +void Fr_toMpz(mpz_t r, PFrElement pE) { + FrElement tmp; + Fr_toNormal(&tmp, pE); + if (!(tmp.type & Fr_LONG)) { + mpz_set_si(r, tmp.shortVal); + if (tmp.shortVal<0) { + mpz_add(r, r, q); + } + } else { + mpz_import(r, Fr_N64, -1, 8, -1, 0, (const void *)tmp.longVal); + } +} + +void Fr_fromMpz(PFrElement pE, mpz_t v) { + if (mpz_fits_sint_p(v)) { + pE->type = Fr_SHORT; + pE->shortVal = mpz_get_si(v); + } else { + pE->type = Fr_LONG; + for (int i=0; ilongVal[i] = 0; + mpz_export((void *)(pE->longVal), NULL, -1, 8, -1, 0, v); + } +} + + +bool Fr_init() { + if (initialized) return false; + initialized = true; + mpz_init(q); + mpz_import(q, Fr_N64, -1, 8, -1, 0, (const void *)Fr_q.longVal); + mpz_init_set_ui(zero, 0); + mpz_init_set_ui(one, 1); + nBits = mpz_sizeinbase (q, 2); + mpz_init(mask); + mpz_mul_2exp(mask, one, nBits); + mpz_sub(mask, mask, one); + return true; +} + +void Fr_str2element(PFrElement pE, char const *s, uint base) { + mpz_t mr; + mpz_init_set_str(mr, s, base); + mpz_fdiv_r(mr, mr, q); + Fr_fromMpz(pE, mr); + mpz_clear(mr); +} + +char *Fr_element2str(PFrElement pE) { + FrElement tmp; + mpz_t r; + if (!(pE->type & Fr_LONG)) { + if (pE->shortVal>=0) { + char *r = new char[32]; + sprintf(r, "%d", pE->shortVal); + return r; + } else { + mpz_init_set_si(r, pE->shortVal); + mpz_add(r, r, q); + } + } else { + Fr_toNormal(&tmp, pE); + mpz_init(r); + mpz_import(r, Fr_N64, -1, 8, -1, 0, (const void *)tmp.longVal); + } + char *res = mpz_get_str (0, 10, r); + mpz_clear(r); + return res; +} + +void Fr_idiv(PFrElement r, PFrElement a, PFrElement b) { + mpz_t ma; + mpz_t mb; + mpz_t mr; + mpz_init(ma); + mpz_init(mb); + mpz_init(mr); + + Fr_toMpz(ma, a); + // char *s1 = mpz_get_str (0, 10, ma); + // printf("s1 %s\n", s1); + Fr_toMpz(mb, b); + // char *s2 = mpz_get_str (0, 10, mb); + // printf("s2 %s\n", s2); + mpz_fdiv_q(mr, ma, mb); + // char *sr = mpz_get_str (0, 10, mr); + // printf("r %s\n", sr); + Fr_fromMpz(r, mr); + + mpz_clear(ma); + mpz_clear(mb); + mpz_clear(mr); +} + +void Fr_mod(PFrElement r, PFrElement a, PFrElement b) { + mpz_t ma; + mpz_t mb; + mpz_t mr; + mpz_init(ma); + mpz_init(mb); + mpz_init(mr); + + Fr_toMpz(ma, a); + Fr_toMpz(mb, b); + mpz_fdiv_r(mr, ma, mb); + Fr_fromMpz(r, mr); + + mpz_clear(ma); + mpz_clear(mb); + mpz_clear(mr); +} + +void Fr_pow(PFrElement r, PFrElement a, PFrElement b) { + mpz_t ma; + mpz_t mb; + mpz_t mr; + mpz_init(ma); + mpz_init(mb); + mpz_init(mr); + + Fr_toMpz(ma, a); + Fr_toMpz(mb, b); + mpz_powm(mr, ma, mb, q); + Fr_fromMpz(r, mr); + + mpz_clear(ma); + mpz_clear(mb); + mpz_clear(mr); +} + +void Fr_inv(PFrElement r, PFrElement a) { + mpz_t ma; + mpz_t mr; + mpz_init(ma); + mpz_init(mr); + + Fr_toMpz(ma, a); + mpz_invert(mr, ma, q); + Fr_fromMpz(r, mr); + mpz_clear(ma); + mpz_clear(mr); +} + +void Fr_div(PFrElement r, PFrElement a, PFrElement b) { + FrElement tmp; + Fr_inv(&tmp, b); + Fr_mul(r, a, &tmp); +} + +void Fr_fail() { + assert(false); +} + +void Fr_longErr() +{ + Fr_fail(); +} + +RawFr::RawFr() { + Fr_init(); + set(fZero, 0); + set(fOne, 1); + neg(fNegOne, fOne); +} + +RawFr::~RawFr() { +} + +void RawFr::fromString(Element &r, const std::string &s, uint32_t radix) { + mpz_t mr; + mpz_init_set_str(mr, s.c_str(), radix); + mpz_fdiv_r(mr, mr, q); + for (int i=0; i>3] & (1 << (p & 0x7))) +void RawFr::exp(Element &r, const Element &base, uint8_t* scalar, unsigned int scalarSize) { + bool oneFound = false; + Element copyBase; + copy(copyBase, base); + for (int i=scalarSize*8-1; i>=0; i--) { + if (!oneFound) { + if ( !BIT_IS_SET(scalar, i) ) continue; + copy(r, copyBase); + oneFound = true; + continue; + } + square(r, r); + if ( BIT_IS_SET(scalar, i) ) { + mul(r, r, copyBase); + } + } + if (!oneFound) { + copy(r, fOne); + } +} + +void RawFr::toMpz(mpz_t r, const Element &a) { + Element tmp; + Fr_rawFromMontgomery(tmp.v, a.v); + mpz_import(r, Fr_N64, -1, 8, -1, 0, (const void *)tmp.v); +} + +void RawFr::fromMpz(Element &r, const mpz_t a) { + for (int i=0; i +#include +#include + +#ifdef __APPLE__ +#include // typedef unsigned int uint; +#endif // __APPLE__ + +extern FrElement Fr_q; +extern FrElement Fr_R2; +extern FrElement Fr_R3; +extern FrRawElement Fr_rawq; +extern FrRawElement Fr_rawR3; + +#ifdef USE_ASM + +#if defined(ARCH_X86_64) + +extern "C" void Fr_copy(PFrElement r, PFrElement a); +extern "C" void Fr_copyn(PFrElement r, PFrElement a, int n); +extern "C" void Fr_add(PFrElement r, PFrElement a, PFrElement b); +extern "C" void Fr_sub(PFrElement r, PFrElement a, PFrElement b); +extern "C" void Fr_neg(PFrElement r, PFrElement a); +extern "C" void Fr_mul(PFrElement r, PFrElement a, PFrElement b); +extern "C" void Fr_square(PFrElement r, PFrElement a); +extern "C" void Fr_band(PFrElement r, PFrElement a, PFrElement b); +extern "C" void Fr_bor(PFrElement r, PFrElement a, PFrElement b); +extern "C" void Fr_bxor(PFrElement r, PFrElement a, PFrElement b); +extern "C" void Fr_bnot(PFrElement r, PFrElement a); +extern "C" void Fr_shl(PFrElement r, PFrElement a, PFrElement b); +extern "C" void Fr_shr(PFrElement r, PFrElement a, PFrElement b); +extern "C" void Fr_eq(PFrElement r, PFrElement a, PFrElement b); +extern "C" void Fr_neq(PFrElement r, PFrElement a, PFrElement b); +extern "C" void Fr_lt(PFrElement r, PFrElement a, PFrElement b); +extern "C" void Fr_gt(PFrElement r, PFrElement a, PFrElement b); +extern "C" void Fr_leq(PFrElement r, PFrElement a, PFrElement b); +extern "C" void Fr_geq(PFrElement r, PFrElement a, PFrElement b); +extern "C" void Fr_land(PFrElement r, PFrElement a, PFrElement b); +extern "C" void Fr_lor(PFrElement r, PFrElement a, PFrElement b); +extern "C" void Fr_lnot(PFrElement r, PFrElement a); +extern "C" void Fr_toNormal(PFrElement r, PFrElement a); +extern "C" void Fr_toLongNormal(PFrElement r, PFrElement a); +extern "C" void Fr_toMontgomery(PFrElement r, PFrElement a); + +extern "C" int Fr_isTrue(PFrElement pE); +extern "C" int Fr_toInt(PFrElement pE); + +extern "C" void Fr_rawCopy(FrRawElement pRawResult, const FrRawElement pRawA); +extern "C" void Fr_rawSwap(FrRawElement pRawResult, FrRawElement pRawA); +extern "C" void Fr_rawAdd(FrRawElement pRawResult, const FrRawElement pRawA, const FrRawElement pRawB); +extern "C" void Fr_rawSub(FrRawElement pRawResult, const FrRawElement pRawA, const FrRawElement pRawB); +extern "C" void Fr_rawNeg(FrRawElement pRawResult, const FrRawElement pRawA); +extern "C" void Fr_rawMMul(FrRawElement pRawResult, const FrRawElement pRawA, const FrRawElement pRawB); +extern "C" void Fr_rawMSquare(FrRawElement pRawResult, const FrRawElement pRawA); +extern "C" void Fr_rawMMul1(FrRawElement pRawResult, const FrRawElement pRawA, uint64_t pRawB); +extern "C" void Fr_rawToMontgomery(FrRawElement pRawResult, const FrRawElement &pRawA); +extern "C" void Fr_rawFromMontgomery(FrRawElement pRawResult, const FrRawElement &pRawA); +extern "C" int Fr_rawIsEq(const FrRawElement pRawA, const FrRawElement pRawB); +extern "C" int Fr_rawIsZero(const FrRawElement pRawB); +extern "C" void Fr_rawShl(FrRawElement r, FrRawElement a, uint64_t b); +extern "C" void Fr_rawShr(FrRawElement r, FrRawElement a, uint64_t b); + +extern "C" void Fr_fail(); + +#elif defined(ARCH_ARM64) + + void Fr_copy(PFrElement r, PFrElement a); + void Fr_mul(PFrElement r, PFrElement a, PFrElement b); + void Fr_toNormal(PFrElement r, PFrElement a); + + void Fr_toLongNormal(PFrElement r, PFrElement a); + int Fr_isTrue(PFrElement pE); + void Fr_copyn(PFrElement r, PFrElement a, int n); + void Fr_lt(PFrElement r, PFrElement a, PFrElement b); + int Fr_toInt(PFrElement pE); + void Fr_shr(PFrElement r, PFrElement a, PFrElement b); + void Fr_shl(PFrElement r, PFrElement a, PFrElement b); + void Fr_band(PFrElement r, PFrElement a, PFrElement b); + void Fr_bor(PFrElement r, PFrElement a, PFrElement b); + void Fr_bxor(PFrElement r, PFrElement a, PFrElement b); + void Fr_bnot(PFrElement r, PFrElement a); + void Fr_sub(PFrElement r, PFrElement a, PFrElement b); + void Fr_eq(PFrElement r, PFrElement a, PFrElement b); + void Fr_neq(PFrElement r, PFrElement a, PFrElement b); + void Fr_add(PFrElement r, PFrElement a, PFrElement b); + void Fr_gt(PFrElement r, PFrElement a, PFrElement b); + void Fr_leq(PFrElement r, PFrElement a, PFrElement b); + void Fr_geq(PFrElement r, PFrElement a, PFrElement b); + void Fr_lor(PFrElement r, PFrElement a, PFrElement b); + void Fr_lnot(PFrElement r, PFrElement a); + void Fr_land(PFrElement r, PFrElement a, PFrElement b); + void Fr_neg(PFrElement r, PFrElement a); + void Fr_toMontgomery(PFrElement r, PFrElement a); + void Fr_square(PFrElement r, PFrElement a); + +extern "C" void Fr_rawCopy(FrRawElement pRawResult, const FrRawElement pRawA); +extern "C" void Fr_rawSwap(FrRawElement pRawResult, FrRawElement pRawA); +extern "C" void Fr_rawAdd(FrRawElement pRawResult, const FrRawElement pRawA, const FrRawElement pRawB); +extern "C" void Fr_rawSub(FrRawElement pRawResult, const FrRawElement pRawA, const FrRawElement pRawB); +extern "C" void Fr_rawNeg(FrRawElement pRawResult, const FrRawElement pRawA); +extern "C" void Fr_rawMMul(FrRawElement pRawResult, const FrRawElement pRawA, const FrRawElement pRawB); + void Fr_rawMSquare(FrRawElement pRawResult, const FrRawElement pRawA); +extern "C" void Fr_rawMMul1(FrRawElement pRawResult, const FrRawElement pRawA, uint64_t pRawB); + void Fr_rawToMontgomery(FrRawElement pRawResult, const FrRawElement &pRawA); +extern "C" void Fr_rawFromMontgomery(FrRawElement pRawResult, const FrRawElement &pRawA); +extern "C" int Fr_rawIsEq(const FrRawElement pRawA, const FrRawElement pRawB); +extern "C" int Fr_rawIsZero(const FrRawElement pRawB); + void Fr_rawZero(FrRawElement pRawResult); +extern "C" void Fr_rawCopyS2L(FrRawElement pRawResult, int64_t val); +extern "C" void Fr_rawAddLS(FrRawElement pRawResult, FrRawElement pRawA, uint64_t rawB); +extern "C" void Fr_rawSubSL(FrRawElement pRawResult, uint64_t rawA, FrRawElement pRawB); +extern "C" void Fr_rawSubLS(FrRawElement pRawResult, FrRawElement pRawA, uint64_t rawB); +extern "C" void Fr_rawNegLS(FrRawElement pRawResult, FrRawElement pRawA, uint64_t rawB); +extern "C" int Fr_rawCmp(FrRawElement pRawA, FrRawElement pRawB); +extern "C" void Fr_rawAnd(FrRawElement pRawResult, FrRawElement pRawA, FrRawElement pRawB); +extern "C" void Fr_rawOr(FrRawElement pRawResult, FrRawElement pRawA, FrRawElement pRawB); +extern "C" void Fr_rawXor(FrRawElement pRawResult, FrRawElement pRawA, FrRawElement pRawB); +extern "C" void Fr_rawShl(FrRawElement r, FrRawElement a, uint64_t b); +extern "C" void Fr_rawShr(FrRawElement r, FrRawElement a, uint64_t b); +extern "C" void Fr_rawNot(FrRawElement pRawResult, FrRawElement pRawA); +extern "C" void Fr_rawSubRegular(FrRawElement pRawResult, FrRawElement pRawA, FrRawElement pRawB); + + void Fr_fail(); + void Fr_longErr(); + +#endif + +#else + + +void Fr_copy(PFrElement r, PFrElement a); +void Fr_mul(PFrElement r, PFrElement a, PFrElement b); +void Fr_toNormal(PFrElement r, PFrElement a); + +void Fr_toLongNormal(PFrElement r, PFrElement a); +int Fr_isTrue(PFrElement pE); +void Fr_copyn(PFrElement r, PFrElement a, int n); +void Fr_lt(PFrElement r, PFrElement a, PFrElement b); +int Fr_toInt(PFrElement pE); +void Fr_shl(PFrElement r, PFrElement a, PFrElement b); +void Fr_shr(PFrElement r, PFrElement a, PFrElement b); +void Fr_band(PFrElement r, PFrElement a, PFrElement b); +void Fr_bor(PFrElement r, PFrElement a, PFrElement b); +void Fr_bxor(PFrElement r, PFrElement a, PFrElement b); +void Fr_bnot(PFrElement r, PFrElement a); +void Fr_sub(PFrElement r, PFrElement a, PFrElement b); +void Fr_eq(PFrElement r, PFrElement a, PFrElement b); +void Fr_neq(PFrElement r, PFrElement a, PFrElement b); +void Fr_add(PFrElement r, PFrElement a, PFrElement b); +void Fr_gt(PFrElement r, PFrElement a, PFrElement b); +void Fr_leq(PFrElement r, PFrElement a, PFrElement b); +void Fr_geq(PFrElement r, PFrElement a, PFrElement b); +void Fr_lor(PFrElement r, PFrElement a, PFrElement b); +void Fr_lnot(PFrElement r, PFrElement a); +void Fr_land(PFrElement r, PFrElement a, PFrElement b); +void Fr_neg(PFrElement r, PFrElement a); +void Fr_toMontgomery(PFrElement r, PFrElement a); +void Fr_square(PFrElement r, PFrElement a); + +void Fr_rawCopy(FrRawElement pRawResult, const FrRawElement pRawA); +void Fr_rawSwap(FrRawElement pRawResult, FrRawElement pRawA); +void Fr_rawAdd(FrRawElement pRawResult, const FrRawElement pRawA, const FrRawElement pRawB); +void Fr_rawSub(FrRawElement pRawResult, const FrRawElement pRawA, const FrRawElement pRawB); +void Fr_rawNeg(FrRawElement pRawResult, const FrRawElement pRawA); +void Fr_rawMMul(FrRawElement pRawResult, const FrRawElement pRawA, const FrRawElement pRawB); +void Fr_rawMSquare(FrRawElement pRawResult, const FrRawElement pRawA); +void Fr_rawMMul1(FrRawElement pRawResult, const FrRawElement pRawA, uint64_t pRawB); +void Fr_rawToMontgomery(FrRawElement pRawResult, const FrRawElement &pRawA); +void Fr_rawFromMontgomery(FrRawElement pRawResult, const FrRawElement &pRawA); +int Fr_rawIsEq(const FrRawElement pRawA, const FrRawElement pRawB); +int Fr_rawIsZero(const FrRawElement pRawB); +void Fr_rawZero(FrRawElement pRawResult); +void Fr_rawCopyS2L(FrRawElement pRawResult, int64_t val); +void Fr_rawAddLS(FrRawElement pRawResult, FrRawElement pRawA, uint64_t rawB); +void Fr_rawSubSL(FrRawElement pRawResult, uint64_t rawA, FrRawElement pRawB); +void Fr_rawSubLS(FrRawElement pRawResult, FrRawElement pRawA, uint64_t rawB); +void Fr_rawNegLS(FrRawElement pRawResult, FrRawElement pRawA, uint64_t rawB); +int Fr_rawCmp(FrRawElement pRawA, FrRawElement pRawB); +void Fr_rawAnd(FrRawElement pRawResult, FrRawElement pRawA, FrRawElement pRawB); +void Fr_rawOr(FrRawElement pRawResult, FrRawElement pRawA, FrRawElement pRawB); +void Fr_rawXor(FrRawElement pRawResult, FrRawElement pRawA, FrRawElement pRawB); +void Fr_rawShl(FrRawElement r, FrRawElement a, uint64_t b); +void Fr_rawShr(FrRawElement r, FrRawElement a, uint64_t b); +void Fr_rawNot(FrRawElement pRawResult, FrRawElement pRawA); +void Fr_rawSubRegular(FrRawElement pRawResult, FrRawElement pRawA, FrRawElement pRawB); + +void Fr_fail(); +void Fr_longErr(); + +#endif + +// Pending functions to convert + +void Fr_str2element(PFrElement pE, char const*s, uint base); +char *Fr_element2str(PFrElement pE); +void Fr_idiv(PFrElement r, PFrElement a, PFrElement b); +void Fr_mod(PFrElement r, PFrElement a, PFrElement b); +void Fr_inv(PFrElement r, PFrElement a); +void Fr_div(PFrElement r, PFrElement a, PFrElement b); +void Fr_pow(PFrElement r, PFrElement a, PFrElement b); + +class RawFr { + +public: + const static int N64 = Fr_N64; + const static int MaxBits = 254; + + + struct Element { + FrRawElement v; + }; + +private: + Element fZero; + Element fOne; + Element fNegOne; + +public: + + RawFr(); + ~RawFr(); + + const Element &zero() { return fZero; }; + const Element &one() { return fOne; }; + const Element &negOne() { return fNegOne; }; + Element set(int value); + void set(Element &r, int value); + + void fromString(Element &r, const std::string &n, uint32_t radix = 10); + std::string toString(const Element &a, uint32_t radix = 10); + + void inline copy(Element &r, const Element &a) { Fr_rawCopy(r.v, a.v); }; + void inline swap(Element &a, Element &b) { Fr_rawSwap(a.v, b.v); }; + void inline add(Element &r, const Element &a, const Element &b) { Fr_rawAdd(r.v, a.v, b.v); }; + void inline sub(Element &r, const Element &a, const Element &b) { Fr_rawSub(r.v, a.v, b.v); }; + void inline mul(Element &r, const Element &a, const Element &b) { Fr_rawMMul(r.v, a.v, b.v); }; + + Element inline add(const Element &a, const Element &b) { Element r; Fr_rawAdd(r.v, a.v, b.v); return r;}; + Element inline sub(const Element &a, const Element &b) { Element r; Fr_rawSub(r.v, a.v, b.v); return r;}; + Element inline mul(const Element &a, const Element &b) { Element r; Fr_rawMMul(r.v, a.v, b.v); return r;}; + + Element inline neg(const Element &a) { Element r; Fr_rawNeg(r.v, a.v); return r; }; + Element inline square(const Element &a) { Element r; Fr_rawMSquare(r.v, a.v); return r; }; + + Element inline add(int a, const Element &b) { return add(set(a), b);}; + Element inline sub(int a, const Element &b) { return sub(set(a), b);}; + Element inline mul(int a, const Element &b) { return mul(set(a), b);}; + + Element inline add(const Element &a, int b) { return add(a, set(b));}; + Element inline sub(const Element &a, int b) { return sub(a, set(b));}; + Element inline mul(const Element &a, int b) { return mul(a, set(b));}; + + void inline mul1(Element &r, const Element &a, uint64_t b) { Fr_rawMMul1(r.v, a.v, b); }; + void inline neg(Element &r, const Element &a) { Fr_rawNeg(r.v, a.v); }; + void inline square(Element &r, const Element &a) { Fr_rawMSquare(r.v, a.v); }; + void inv(Element &r, const Element &a); + void div(Element &r, const Element &a, const Element &b); + void exp(Element &r, const Element &base, uint8_t* scalar, unsigned int scalarSize); + + void inline toMontgomery(Element &r, const Element &a) { Fr_rawToMontgomery(r.v, a.v); }; + void inline fromMontgomery(Element &r, const Element &a) { Fr_rawFromMontgomery(r.v, a.v); }; + int inline eq(const Element &a, const Element &b) { return Fr_rawIsEq(a.v, b.v); }; + int inline isZero(const Element &a) { return Fr_rawIsZero(a.v); }; + + void toMpz(mpz_t r, const Element &a); + void fromMpz(Element &a, const mpz_t r); + + int toRprBE(const Element &element, uint8_t *data, int bytes); + int fromRprBE(Element &element, const uint8_t *data, int bytes); + + int bytes ( void ) { return Fr_N64 * 8; }; + + void fromUI(Element &r, unsigned long int v); + + static RawFr field; + +}; + + +#endif // __FR_H + + + diff --git a/example/support/fr_element.hpp b/example/support/fr_element.hpp new file mode 100644 index 0000000..e2bb52c --- /dev/null +++ b/example/support/fr_element.hpp @@ -0,0 +1,23 @@ +#ifndef FR_ELEMENT_HPP +#define FR_ELEMENT_HPP + +#include + +#define Fr_N64 4 +#define Fr_SHORT 0x00000000 +#define Fr_MONTGOMERY 0x40000000 +#define Fr_SHORTMONTGOMERY 0x40000000 +#define Fr_LONG 0x80000000 +#define Fr_LONGMONTGOMERY 0xC0000000 + +typedef uint64_t FrRawElement[Fr_N64]; + +typedef struct __attribute__((__packed__)) { + int32_t shortVal; + uint32_t type; + FrRawElement longVal; +} FrElement; + +typedef FrElement *PFrElement; + +#endif // FR_ELEMENT_HPP diff --git a/example/support/fr_generic.cpp b/example/support/fr_generic.cpp new file mode 100755 index 0000000..3d8ee16 --- /dev/null +++ b/example/support/fr_generic.cpp @@ -0,0 +1,2389 @@ +#include "fr.hpp" +#include +#include +#include + +FrElement Fr_q = {0, 0x80000000, {0x43e1f593f0000001,0x2833e84879b97091,0xb85045b68181585d,0x30644e72e131a029}}; +FrElement Fr_R2 = {0, 0x80000000, {0x1bb8e645ae216da7,0x53fe3ab1e35c59e3,0x8c49833d53bb8085,0x0216d0b17f4e44a5}}; +FrElement Fr_R3 = {0, 0x80000000, {0x5e94d8e1b4bf0040,0x2a489cbe1cfbb6b8,0x893cc664a19fcfed,0x0cf8594b7fcc657c}}; + +static FrRawElement half = {0xa1f0fac9f8000000,0x9419f4243cdcb848,0xdc2822db40c0ac2e,0x183227397098d014}; + + +void Fr_copy(PFrElement r, const PFrElement a) +{ + *r = *a; +} + +void Fr_toNormal(PFrElement r, PFrElement a) +{ + if (a->type == Fr_LONGMONTGOMERY) + { + r->type = Fr_LONG; + Fr_rawFromMontgomery(r->longVal, a->longVal); + } + else + { + Fr_copy(r, a); + } +} + +static inline int has_mul32_overflow(int64_t val) +{ + int64_t sign = val >> 31; + + if (sign) + { + sign = ~sign; + } + + return sign ? 1 : 0; +} + +static inline int Fr_rawSMul(int64_t *r, int32_t a, int32_t b) +{ + *r = (int64_t)a * b; + + return has_mul32_overflow(*r); +} + +static inline void mul_s1s2(PFrElement r, PFrElement a, PFrElement b) +{ + int64_t result; + + int overflow = Fr_rawSMul(&result, a->shortVal, b->shortVal); + + if (overflow) + { + Fr_rawCopyS2L(r->longVal, result); + r->type = Fr_LONG; + r->shortVal = 0; + } + else + { + // done the same way as in intel asm implementation + r->shortVal = (int32_t)result; + r->type = Fr_SHORT; + // + + Fr_rawCopyS2L(r->longVal, result); + r->type = Fr_LONG; + r->shortVal = 0; + } +} + +static inline void mul_l1nl2n(PFrElement r, PFrElement a, PFrElement b) +{ + r->type = Fr_LONGMONTGOMERY; + + Fr_rawMMul(r->longVal, a->longVal, b->longVal); + Fr_rawMMul(r->longVal, r->longVal, Fr_R3.longVal); +} + +static inline void mul_l1nl2m(PFrElement r, PFrElement a, PFrElement b) +{ + r->type = Fr_LONG; + Fr_rawMMul(r->longVal, a->longVal, b->longVal); +} + +static inline void mul_l1ml2m(PFrElement r, PFrElement a, PFrElement b) +{ + r->type = Fr_LONGMONTGOMERY; + Fr_rawMMul(r->longVal, a->longVal, b->longVal); +} + +static inline void mul_l1ml2n(PFrElement r, PFrElement a, PFrElement b) +{ + r->type = Fr_LONG; + Fr_rawMMul(r->longVal, a->longVal, b->longVal); +} + +static inline void mul_l1ns2n(PFrElement r, PFrElement a, PFrElement b) +{ + r->type = Fr_LONGMONTGOMERY; + + if (b->shortVal < 0) + { + int64_t b_shortVal = b->shortVal; + Fr_rawMMul1(r->longVal, a->longVal, -b_shortVal); + Fr_rawNeg(r->longVal, r->longVal); + } + else + { + Fr_rawMMul1(r->longVal, a->longVal, b->shortVal); + } + + Fr_rawMMul(r->longVal, r->longVal, Fr_R3.longVal); +} + +static inline void mul_s1nl2n(PFrElement r, PFrElement a, PFrElement b) +{ + r->type = Fr_LONGMONTGOMERY; + + if (a->shortVal < 0) + { + int64_t a_shortVal = a->shortVal; + Fr_rawMMul1(r->longVal, b->longVal, -a_shortVal); + Fr_rawNeg(r->longVal, r->longVal); + } + else + { + Fr_rawMMul1(r->longVal, b->longVal, a->shortVal); + } + + Fr_rawMMul(r->longVal, r->longVal, Fr_R3.longVal); +} + +static inline void mul_l1ms2n(PFrElement r, PFrElement a, PFrElement b) +{ + r->type = Fr_LONG; + + if (b->shortVal < 0) + { + int64_t b_shortVal = b->shortVal; + Fr_rawMMul1(r->longVal, a->longVal, -b_shortVal); + Fr_rawNeg(r->longVal, r->longVal); + } + else + { + Fr_rawMMul1(r->longVal, a->longVal, b->shortVal); + } +} + +static inline void mul_s1nl2m(PFrElement r, PFrElement a, PFrElement b) +{ + r->type = Fr_LONG; + + if (a->shortVal < 0) + { + int64_t a_shortVal = a->shortVal; + Fr_rawMMul1(r->longVal, b->longVal, -a_shortVal); + Fr_rawNeg(r->longVal, r->longVal); + } + else + { + Fr_rawMMul1(r->longVal, b->longVal, a->shortVal); + } +} + +static inline void mul_l1ns2m(PFrElement r, PFrElement a, PFrElement b) +{ + r->type = Fr_LONG; + Fr_rawMMul(r->longVal, a->longVal, b->longVal); +} + +static inline void mul_l1ms2m(PFrElement r, PFrElement a, PFrElement b) +{ + r->type = Fr_LONGMONTGOMERY; + Fr_rawMMul(r->longVal, a->longVal, b->longVal); +} + +static inline void mul_s1ml2m(PFrElement r, PFrElement a, PFrElement b) +{ + r->type = Fr_LONGMONTGOMERY; + Fr_rawMMul(r->longVal, a->longVal, b->longVal); +} + +static inline void mul_s1ml2n(PFrElement r, PFrElement a, PFrElement b) +{ + r->type = Fr_LONG; + Fr_rawMMul(r->longVal, a->longVal, b->longVal); +} + +void Fr_mul(PFrElement r, PFrElement a, PFrElement b) +{ + if (a->type & Fr_LONG) + { + if (b->type & Fr_LONG) + { + if (a->type & Fr_MONTGOMERY) + { + if (b->type & Fr_MONTGOMERY) + { + mul_l1ml2m(r, a, b); + } + else + { + mul_l1ml2n(r, a, b); + } + } + else + { + if (b->type & Fr_MONTGOMERY) + { + mul_l1nl2m(r, a, b); + } + else + { + mul_l1nl2n(r, a, b); + } + } + } + else if (a->type & Fr_MONTGOMERY) + { + if (b->type & Fr_MONTGOMERY) + { + mul_l1ms2m(r, a, b); + } + else + { + mul_l1ms2n(r, a, b); + } + } + else + { + if (b->type & Fr_MONTGOMERY) + { + mul_l1ns2m(r, a, b); + } + else + { + mul_l1ns2n(r, a, b); + } + } + } + else if (b->type & Fr_LONG) + { + if (a->type & Fr_MONTGOMERY) + { + if (b->type & Fr_MONTGOMERY) + { + mul_s1ml2m(r, a, b); + } + else + { + mul_s1ml2n(r,a, b); + } + } + else if (b->type & Fr_MONTGOMERY) + { + mul_s1nl2m(r, a, b); + } + else + { + mul_s1nl2n(r, a, b); + } + } + else + { + mul_s1s2(r, a, b); + } +} + +void Fr_toLongNormal(PFrElement r, PFrElement a) +{ + if (a->type & Fr_LONG) + { + if (a->type & Fr_MONTGOMERY) + { + Fr_rawFromMontgomery(r->longVal, a->longVal); + r->type = Fr_LONG; + } + else + { + Fr_copy(r, a); + } + } + else + { + Fr_rawCopyS2L(r->longVal, a->shortVal); + r->type = Fr_LONG; + r->shortVal = 0; + } +} + +void Fr_toMontgomery(PFrElement r, PFrElement a) +{ + if (a->type & Fr_MONTGOMERY) + { + Fr_copy(r, a); + } + else if (a->type & Fr_LONG) + { + r->shortVal = a->shortVal; + + Fr_rawMMul(r->longVal, a->longVal, Fr_R2.longVal); + + r->type = Fr_LONGMONTGOMERY; + } + else if (a->shortVal < 0) + { + int64_t a_shortVal = a->shortVal; + Fr_rawMMul1(r->longVal, Fr_R2.longVal, -a_shortVal); + Fr_rawNeg(r->longVal, r->longVal); + + r->type = Fr_SHORTMONTGOMERY; + } + else + { + Fr_rawMMul1(r->longVal, Fr_R2.longVal, a->shortVal); + + r->type = Fr_SHORTMONTGOMERY; + } +} + +void Fr_copyn(PFrElement r, PFrElement a, int n) +{ + std::memcpy(r, a, n * sizeof(FrElement)); +} + +static inline int has_add32_overflow(int64_t val) +{ + int64_t signs = (val >> 31) & 0x3; + + return signs == 1 || signs == 2; +} + +static inline int Fr_rawSSub(int64_t *r, int32_t a, int32_t b) +{ + *r = (int64_t)a - b; + + return has_add32_overflow(*r); +} + +static inline void sub_s1s2(PFrElement r, PFrElement a, PFrElement b) +{ + int64_t diff; + + int overflow = Fr_rawSSub(&diff, a->shortVal, b->shortVal); + + if (overflow) + { + Fr_rawCopyS2L(r->longVal, diff); + r->type = Fr_LONG; + r->shortVal = 0; + } + else + { + r->type = Fr_SHORT; + r->shortVal = (int32_t)diff; + } +} + +static inline void sub_l1nl2n(PFrElement r, PFrElement a, PFrElement b) +{ + r->type = Fr_LONG; + + Fr_rawSub(r->longVal, a->longVal, b->longVal); +} + +static inline void sub_l1nl2m(PFrElement r, PFrElement a, PFrElement b) +{ + r->type = Fr_LONGMONTGOMERY; + + FrElement a_m; + Fr_toMontgomery(&a_m, a); + + Fr_rawSub(r->longVal, a_m.longVal, b->longVal); +} + +static inline void sub_l1ml2m(PFrElement r, PFrElement a, PFrElement b) +{ + r->type = Fr_LONGMONTGOMERY; + + Fr_rawSub(r->longVal, a->longVal, b->longVal); +} + +static inline void sub_l1ml2n(PFrElement r, PFrElement a, PFrElement b) +{ + r->type = Fr_LONGMONTGOMERY; + + FrElement b_m; + Fr_toMontgomery(&b_m, b); + + Fr_rawSub(r->longVal, a->longVal, b_m.longVal); +} + +static inline void sub_s1l2n(PFrElement r, PFrElement a, PFrElement b) +{ + r->type = Fr_LONG; + + if (a->shortVal >= 0) + { + Fr_rawSubSL(r->longVal, a->shortVal, b->longVal); + } + else + { + int64_t a_shortVal = a->shortVal; + Fr_rawNegLS(r->longVal, b->longVal, -a_shortVal); + } +} + +static inline void sub_l1ms2n(PFrElement r, PFrElement a, PFrElement b) +{ + r->type = Fr_LONGMONTGOMERY; + + FrElement b_m; + Fr_toMontgomery(&b_m, b); + + Fr_rawSub(r->longVal, a->longVal, b_m.longVal); +} + +static inline void sub_s1nl2m(PFrElement r, PFrElement a, PFrElement b) +{ + r->type = Fr_LONGMONTGOMERY; + + FrElement a_m; + Fr_toMontgomery(&a_m, a); + + Fr_rawSub(r->longVal, a_m.longVal, b->longVal); +} + +static inline void sub_l1ns2(PFrElement r, PFrElement a, PFrElement b) +{ + r->type = Fr_LONG; + + if (b->shortVal < 0) + { + int64_t b_shortVal = b->shortVal; + Fr_rawAddLS(r->longVal, a->longVal, -b_shortVal); + } + else + { + Fr_rawSubLS(r->longVal, a->longVal, b->shortVal); + } +} + +static inline void sub_l1ms2m(PFrElement r, PFrElement a, PFrElement b) +{ + r->type = Fr_LONGMONTGOMERY; + + Fr_rawSub(r->longVal, a->longVal, b->longVal); +} + +static inline void sub_s1ml2m(PFrElement r,PFrElement a,PFrElement b) +{ + r->type = Fr_LONGMONTGOMERY; + + Fr_rawSub(r->longVal, a->longVal, b->longVal); +} + +void Fr_sub(PFrElement r, PFrElement a, PFrElement b) +{ + if (a->type & Fr_LONG) + { + if (b->type & Fr_LONG) + { + if (a->type & Fr_MONTGOMERY) + { + if (b->type & Fr_MONTGOMERY) + { + sub_l1ml2m(r, a, b); + } + else + { + sub_l1ml2n(r, a, b); + } + } + else if (b->type & Fr_MONTGOMERY) + { + sub_l1nl2m(r, a, b); + } + else + { + sub_l1nl2n(r, a, b); + } + } + else if (a->type & Fr_MONTGOMERY) + { + if (b->type & Fr_MONTGOMERY) + { + sub_l1ms2m(r, a, b); + } + else + { + sub_l1ms2n(r, a, b); + } + } + else + { + sub_l1ns2(r, a, b); + } + } + else if (b->type & Fr_LONG) + { + if (b->type & Fr_MONTGOMERY) + { + if (a->type & Fr_MONTGOMERY) + { + sub_s1ml2m(r,a,b); + } + else + { + sub_s1nl2m(r,a,b); + } + } + else + { + sub_s1l2n(r,a,b); + } + } + else + { + sub_s1s2(r, a, b); + } +} + +static inline int Fr_rawSAdd(int64_t *r, int32_t a, int32_t b) +{ + *r = (int64_t)a + b; + + return has_add32_overflow(*r); +} + +static inline void add_s1s2(PFrElement r, PFrElement a, PFrElement b) +{ + int64_t sum; + + int overflow = Fr_rawSAdd(&sum, a->shortVal, b->shortVal); + + if (overflow) + { + Fr_rawCopyS2L(r->longVal, sum); + r->type = Fr_LONG; + r->shortVal = 0; + } + else + { + r->type = Fr_SHORT; + r->shortVal = (int32_t)sum; + } +} + +static inline void add_l1nl2n(PFrElement r, PFrElement a, PFrElement b) +{ + r->type = Fr_LONG; + + Fr_rawAdd(r->longVal, a->longVal, b->longVal); +} + +static inline void add_l1nl2m(PFrElement r, PFrElement a, PFrElement b) +{ + r->type = Fr_LONGMONTGOMERY; + + FrElement a_m; + Fr_toMontgomery(&a_m, a); + + Fr_rawAdd(r->longVal, a_m.longVal, b->longVal); +} + +static inline void add_l1ml2m(PFrElement r, PFrElement a, PFrElement b) +{ + r->type = Fr_LONGMONTGOMERY; + Fr_rawAdd(r->longVal, a->longVal, b->longVal); +} + +static inline void add_l1ml2n(PFrElement r, PFrElement a, PFrElement b) +{ + r->type = Fr_LONGMONTGOMERY; + + FrElement b_m; + Fr_toMontgomery(&b_m, b); + + Fr_rawAdd(r->longVal, a->longVal, b_m.longVal); +} + +static inline void add_s1l2n(PFrElement r, PFrElement a, PFrElement b) +{ + r->type = Fr_LONG; + + if (a->shortVal >= 0) + { + Fr_rawAddLS(r->longVal, b->longVal, a->shortVal); + } + else + { + int64_t a_shortVal = a->shortVal; + Fr_rawSubLS(r->longVal, b->longVal, -a_shortVal); + } +} + +static inline void add_l1ms2n(PFrElement r, PFrElement a, PFrElement b) +{ + FrElement b_m; + + r->type = Fr_LONGMONTGOMERY; + + Fr_toMontgomery(&b_m, b); + + Fr_rawAdd(r->longVal, a->longVal, b_m.longVal); +} + +static inline void add_s1nl2m(PFrElement r, PFrElement a, PFrElement b) +{ + r->type = Fr_LONGMONTGOMERY; + + FrElement m_a; + Fr_toMontgomery(&m_a, a); + + Fr_rawAdd(r->longVal, m_a.longVal, b->longVal); +} + +static inline void add_l1ns2(PFrElement r, PFrElement a, PFrElement b) +{ + r->type = Fr_LONG; + + if (b->shortVal >= 0) + { + Fr_rawAddLS(r->longVal, a->longVal, b->shortVal); + } + else + { + int64_t b_shortVal = b->shortVal; + Fr_rawSubLS(r->longVal, a->longVal, -b_shortVal); + } +} + +static inline void add_l1ms2m(PFrElement r, PFrElement a, PFrElement b) +{ + r->type = Fr_LONGMONTGOMERY; + + Fr_rawAdd(r->longVal, a->longVal, b->longVal); +} + +static inline void add_s1ml2m(PFrElement r, PFrElement a, PFrElement b) +{ + r->type = Fr_LONGMONTGOMERY; + + Fr_rawAdd(r->longVal, a->longVal, b->longVal); +} + +void Fr_add(PFrElement r, PFrElement a, PFrElement b) +{ + if (a->type & Fr_LONG) + { + if (b->type & Fr_LONG) + { + if (a->type & Fr_MONTGOMERY) + { + if (b->type & Fr_MONTGOMERY) + { + add_l1ml2m(r, a, b); + } + else + { + add_l1ml2n(r, a, b); + } + } + else + { + if (b->type & Fr_MONTGOMERY) + { + add_l1nl2m(r, a, b); + } + else + { + add_l1nl2n(r, a, b); + } + } + } + else if (a->type & Fr_MONTGOMERY) + { + if (b->type & Fr_MONTGOMERY) + { + add_l1ms2m(r, a, b); + } + else + { + add_l1ms2n(r, a, b); + } + } + else + { + add_l1ns2(r, a, b); + } + } + else if (b->type & Fr_LONG) + { + if (b->type & Fr_MONTGOMERY) + { + if (a->type & Fr_MONTGOMERY) + { + add_s1ml2m(r, a, b); + } + else + { + add_s1nl2m(r, a, b); + } + } + else + { + add_s1l2n(r, a, b); + } + } + else + { + add_s1s2(r, a, b); + } +} + +int Fr_isTrue(PFrElement pE) +{ + int result; + + if (pE->type & Fr_LONG) + { + result = !Fr_rawIsZero(pE->longVal); + } + else + { + result = pE->shortVal != 0; + } + + return result; +} + +int Fr_longNeg(PFrElement pE) +{ + if(Fr_rawCmp(pE->longVal, Fr_q.longVal) >= 0) + { + Fr_longErr(); + return 0; + } + + int64_t result = pE->longVal[0] - Fr_q.longVal[0]; + + int64_t is_long = (result >> 31) + 1; + + if(is_long) + { + Fr_longErr(); + return 0; + } + + return result; +} + +int Fr_longNormal(PFrElement pE) +{ + uint64_t is_long = 0; + uint64_t result; + + result = pE->longVal[0]; + + is_long = result >> 31; + + if (is_long) + { + return Fr_longNeg(pE); + } + + if (pE->longVal[1] || pE->longVal[2] || pE->longVal[3]) + { + return Fr_longNeg(pE); + } + + return result; +} + +// Convert a 64 bit integer to a long format field element +int Fr_toInt(PFrElement pE) +{ + int result; + + if (pE->type & Fr_LONG) + { + if (pE->type & Fr_MONTGOMERY) + { + FrElement e_n; + Fr_toNormal(&e_n, pE); + + result = Fr_longNormal(&e_n); + } + else + { + result = Fr_longNormal(pE); + } + } + else + { + result = pE->shortVal; + } + + return result; +} + +static inline int rlt_s1s2(PFrElement a, PFrElement b) +{ + return (a->shortVal < b->shortVal) ? 1 : 0; +} + +static inline int rltRawL1L2(FrRawElement pRawA, FrRawElement pRawB) +{ + int result = Fr_rawCmp(pRawB, pRawA); + + return result > 0 ? 1 : 0; +} + +static inline int rltl1l2_n1(FrRawElement pRawA, FrRawElement pRawB) +{ + int result = Fr_rawCmp(half, pRawB); + + if (result < 0) + { + return rltRawL1L2(pRawA, pRawB); + } + + return 1; +} + +static inline int rltl1l2_p1(FrRawElement pRawA, FrRawElement pRawB) +{ + int result = Fr_rawCmp(half, pRawB); + + if (result < 0) + { + return 0; + } + + return rltRawL1L2(pRawA, pRawB); +} + +static inline int rltL1L2(FrRawElement pRawA, FrRawElement pRawB) +{ + int result = Fr_rawCmp(half, pRawA); + + if (result < 0) + { + return rltl1l2_n1(pRawA, pRawB); + } + + return rltl1l2_p1(pRawA, pRawB); +} + +static inline int rlt_l1nl2n(PFrElement a, PFrElement b) +{ + return rltL1L2(a->longVal, b->longVal); +} + +static inline int rlt_l1nl2m(PFrElement a, PFrElement b) +{ + FrElement b_n; + + Fr_toNormal(&b_n, b); + + return rltL1L2(a->longVal, b_n.longVal); +} + +static inline int rlt_l1ml2m(PFrElement a, PFrElement b) +{ + FrElement a_n; + FrElement b_n; + + Fr_toNormal(&a_n, a); + Fr_toNormal(&b_n, b); + + return rltL1L2(a_n.longVal, b_n.longVal); +} + +static inline int rlt_l1ml2n(PFrElement a, PFrElement b) +{ + FrElement a_n; + + Fr_toNormal(&a_n, a); + + return rltL1L2(a_n.longVal, b->longVal); +} + +static inline int rlt_s1l2n(PFrElement a,PFrElement b) +{ + FrElement a_n; + + Fr_toLongNormal(&a_n,a); + + return rltL1L2(a_n.longVal, b->longVal); +} + +static inline int rlt_l1ms2(PFrElement a, PFrElement b) +{ + FrElement a_n; + FrElement b_ln; + + Fr_toLongNormal(&b_ln ,b); + Fr_toNormal(&a_n, a); + + return rltL1L2(a_n.longVal, b_ln.longVal); +} + +static inline int rlt_s1l2m(PFrElement a, PFrElement b) +{ + FrElement a_n; + FrElement b_n; + + Fr_toLongNormal(&a_n, a); + Fr_toNormal(&b_n, b); + + return rltL1L2(a_n.longVal, b_n.longVal); +} + +static inline int rlt_l1ns2(PFrElement a, PFrElement b) +{ + FrElement b_n; + + Fr_toLongNormal(&b_n, b); + + return rltL1L2(a->longVal, b_n.longVal); +} + +int32_t Fr_rlt(PFrElement a, PFrElement b) +{ + int32_t result; + + if (a->type & Fr_LONG) + { + if (b->type & Fr_LONG) + { + if (a->type & Fr_MONTGOMERY) + { + if (b->type & Fr_MONTGOMERY) + { + result = rlt_l1ml2m(a, b); + } + else + { + result = rlt_l1ml2n(a, b); + } + } + else if (b->type & Fr_MONTGOMERY) + { + result = rlt_l1nl2m(a, b); + } + else + { + result = rlt_l1nl2n(a, b); + } + } + else if (a->type & Fr_MONTGOMERY) + { + result = rlt_l1ms2(a, b); + } + else + { + result = rlt_l1ns2(a, b); + } + } + else if (b->type & Fr_LONG) + { + if (b->type & Fr_MONTGOMERY) + { + result = rlt_s1l2m(a,b); + } + else + { + result = rlt_s1l2n(a,b); + } + } + else + { + result = rlt_s1s2(a, b); + } + + return result; +} + +void Fr_lt(PFrElement r, PFrElement a, PFrElement b) +{ + r->shortVal = Fr_rlt(a, b); + r->type = Fr_SHORT; +} + +void Fr_geq(PFrElement r, PFrElement a, PFrElement b) +{ + int32_t result = Fr_rlt(a, b); + result ^= 0x1; + + r->shortVal = result; + r->type = Fr_SHORT; +} + +static inline int Fr_rawSNeg(int64_t *r, int32_t a) +{ + *r = -(int64_t)a; + + return has_add32_overflow(*r); +} + +void Fr_neg(PFrElement r, PFrElement a) +{ + if (a->type & Fr_LONG) + { + r->type = a->type; + r->shortVal = a->shortVal; + Fr_rawNeg(r->longVal, a->longVal); + } + else + { + int64_t a_shortVal; + + int overflow = Fr_rawSNeg(&a_shortVal, a->shortVal); + + if (overflow) + { + Fr_rawCopyS2L(r->longVal, a_shortVal); + r->type = Fr_LONG; + r->shortVal = 0; + } + else + { + r->type = Fr_SHORT; + r->shortVal = (int32_t)a_shortVal; + } + } +} + +static inline int reqL1L2(FrRawElement pRawA, FrRawElement pRawB) +{ + return Fr_rawCmp(pRawB, pRawA) == 0; +} + +static inline int req_s1s2(PFrElement r, PFrElement a, PFrElement b) +{ + return (a->shortVal == b->shortVal) ? 1 : 0; +} + +static inline int req_l1nl2n(PFrElement r, PFrElement a, PFrElement b) +{ + return reqL1L2(a->longVal, b->longVal); +} + +static inline int req_l1nl2m(PFrElement r, PFrElement a, PFrElement b) +{ + FrElement a_m; + Fr_toMontgomery(&a_m, a); + + return reqL1L2(a_m.longVal, b->longVal); +} + +static inline int req_l1ml2m(PFrElement r, PFrElement a, PFrElement b) +{ + return reqL1L2(a->longVal, b->longVal); +} + +static inline int req_l1ml2n(PFrElement r, PFrElement a, PFrElement b) +{ + FrElement b_m; + Fr_toMontgomery(&b_m, b); + + return reqL1L2(a->longVal, b_m.longVal); +} + +static inline int req_s1l2n(PFrElement r, PFrElement a, PFrElement b) +{ + FrElement a_n; + Fr_toLongNormal(&a_n, a); + + return reqL1L2(a_n.longVal, b->longVal); +} + +static inline int req_l1ms2(PFrElement r, PFrElement a, PFrElement b) +{ + FrElement b_m; + Fr_toMontgomery(&b_m, b); + + return reqL1L2(a->longVal, b_m.longVal); +} + +static inline int req_s1l2m(PFrElement r, PFrElement a, PFrElement b) +{ + FrElement a_m; + Fr_toMontgomery(&a_m, a); + + return reqL1L2(a_m.longVal, b->longVal); +} + +static inline int req_l1ns2(PFrElement r, PFrElement a, PFrElement b) +{ + FrElement b_n; + Fr_toLongNormal(&b_n, b); + + return reqL1L2(a->longVal, b_n.longVal); +} + +// Compares two elements of any kind +int Fr_req(PFrElement r, PFrElement a, PFrElement b) +{ + int result; + + if (a->type & Fr_LONG) + { + if (b->type & Fr_LONG) + { + if (a->type & Fr_MONTGOMERY) + { + if (b->type & Fr_MONTGOMERY) + { + result = req_l1ml2m(r, a, b); + } + else + { + result = req_l1ml2n(r, a, b); + } + } + else if (b->type & Fr_MONTGOMERY) + { + result = req_l1nl2m(r, a, b); + } + else + { + result = req_l1nl2n(r, a, b); + } + } + else if (a->type & Fr_MONTGOMERY) + { + result = req_l1ms2(r, a, b); + } + else + { + result = req_l1ns2(r, a, b); + } + } + else if (b->type & Fr_LONG) + { + if (b->type & Fr_MONTGOMERY) + { + result = req_s1l2m(r, a, b); + } + else + { + result = req_s1l2n(r, a, b); + } + } + else + { + result = req_s1s2(r, a, b); + } + + return result; +} + +void Fr_eq(PFrElement r, PFrElement a, PFrElement b) +{ + r->shortVal = Fr_req(r, a, b); + r->type = Fr_SHORT; +} + +void Fr_neq(PFrElement r, PFrElement a, PFrElement b) +{ + int result = Fr_req(r, a, b); + + r->shortVal = result ^ 0x1; + r->type = Fr_SHORT; +} + +// Logical or between two elements +void Fr_lor(PFrElement r, PFrElement a, PFrElement b) +{ + int32_t is_true_a; + + if (a->type & Fr_LONG) + { + is_true_a = !Fr_rawIsZero(a->longVal); + } + else + { + is_true_a = a->shortVal ? 1 : 0; + } + + int32_t is_true_b; + + if (b->type & Fr_LONG) + { + is_true_b = !Fr_rawIsZero(b->longVal); + } + else + { + is_true_b = b->shortVal ? 1 : 0; + } + + r->shortVal = is_true_a | is_true_b; + r->type = Fr_SHORT; +} + +void Fr_lnot(PFrElement r, PFrElement a) +{ + if (a->type & Fr_LONG) + { + r->shortVal = Fr_rawIsZero(a->longVal); + } + else + { + r->shortVal = a->shortVal ? 0 : 1; + } + + r->type = Fr_SHORT; +} + + +static inline int rgt_s1s2(PFrElement a, PFrElement b) +{ + return (a->shortVal > b->shortVal) ? 1 : 0; +} + +static inline int rgtRawL1L2(FrRawElement pRawA, FrRawElement pRawB) +{ + int result = Fr_rawCmp(pRawB, pRawA); + + return (result < 0) ? 1 : 0; +} + +static inline int rgtl1l2_n1(FrRawElement pRawA, FrRawElement pRawB) +{ + int result = Fr_rawCmp(half, pRawB); + + if (result < 0) + { + return rgtRawL1L2(pRawA, pRawB); + } + return 0; +} + +static inline int rgtl1l2_p1(FrRawElement pRawA, FrRawElement pRawB) +{ + int result = Fr_rawCmp(half, pRawB); + + if (result < 0) + { + return 1; + } + return rgtRawL1L2(pRawA, pRawB); +} + +static inline int rgtL1L2(FrRawElement pRawA, FrRawElement pRawB) +{ + int result = Fr_rawCmp(half, pRawA); + + if (result < 0) + { + return rgtl1l2_n1(pRawA, pRawB); + } + + return rgtl1l2_p1(pRawA, pRawB); +} + +static inline int rgt_l1nl2n(PFrElement a, PFrElement b) +{ + return rgtL1L2(a->longVal, b->longVal); +} + +static inline int rgt_l1nl2m(PFrElement a, PFrElement b) +{ + FrElement b_n; + Fr_toNormal(&b_n, b); + + return rgtL1L2(a->longVal, b_n.longVal); +} + +static inline int rgt_l1ml2m(PFrElement a, PFrElement b) +{ + FrElement a_n; + FrElement b_n; + + Fr_toNormal(&a_n, a); + Fr_toNormal(&b_n, b); + + return rgtL1L2(a_n.longVal, b_n.longVal); +} + +static inline int rgt_l1ml2n(PFrElement a, PFrElement b) +{ + FrElement a_n; + Fr_toNormal(&a_n, a); + + return rgtL1L2(a_n.longVal, b->longVal); +} + +static inline int rgt_s1l2n(PFrElement a, PFrElement b) +{ + FrElement a_n; + Fr_toLongNormal(&a_n, a); + + return rgtL1L2(a_n.longVal, b->longVal); +} + +static inline int rgt_l1ms2(PFrElement a, PFrElement b) +{ + FrElement a_n; + FrElement b_n; + + Fr_toNormal(&a_n, a); + Fr_toLongNormal(&b_n, b); + + return rgtL1L2(a_n.longVal, b_n.longVal); +} + +static inline int rgt_s1l2m(PFrElement a, PFrElement b) +{ + FrElement a_n; + FrElement b_n; + + Fr_toLongNormal(&a_n, a); + Fr_toNormal(&b_n, b); + + return rgtL1L2(a_n.longVal, b_n.longVal); +} + +static inline int rgt_l1ns2(PFrElement a, PFrElement b) +{ + FrElement b_n; + Fr_toLongNormal(&b_n, b); + + return rgtL1L2(a->longVal, b_n.longVal); +} + +int Fr_rgt(PFrElement r, PFrElement a, PFrElement b) +{ + int result = 0; + + if (a->type & Fr_LONG) + { + if (b->type & Fr_LONG) + { + if (a->type & Fr_MONTGOMERY) + { + if (b->type & Fr_MONTGOMERY) + { + result = rgt_l1ml2m(a, b); + } + else + { + result = rgt_l1ml2n(a, b); + } + } + else if (b->type & Fr_MONTGOMERY) + { + result = rgt_l1nl2m(a, b); + } + else + { + result = rgt_l1nl2n(a, b); + } + } + else if (a->type & Fr_MONTGOMERY) + { + result = rgt_l1ms2(a, b); + } + else + { + result = rgt_l1ns2(a, b); + } + } + else if (b->type & Fr_LONG) + { + if (b->type & Fr_MONTGOMERY) + { + result = rgt_s1l2m(a, b); + } + else + { + result = rgt_s1l2n(a,b); + } + } + else + { + result = rgt_s1s2(a, b); + } + + return result; +} + +void Fr_gt(PFrElement r, PFrElement a, PFrElement b) +{ + r->shortVal = Fr_rgt(r, a, b); + r->type = Fr_SHORT; +} + +void Fr_leq(PFrElement r, PFrElement a, PFrElement b) +{ + int32_t result = Fr_rgt(r, a, b); + result ^= 0x1; + + r->shortVal = result; + r->type = Fr_SHORT; +} + +// Logical and between two elements +void Fr_land(PFrElement r, PFrElement a, PFrElement b) +{ + int32_t is_true_a; + + if (a->type & Fr_LONG) + { + is_true_a = !Fr_rawIsZero(a->longVal); + } + else + { + is_true_a = a->shortVal ? 1 : 0; + } + + int32_t is_true_b; + + if (b->type & Fr_LONG) + { + is_true_b = !Fr_rawIsZero(b->longVal); + } + else + { + is_true_b = b->shortVal ? 1 : 0; + } + + r->shortVal = is_true_a & is_true_b; + r->type = Fr_SHORT; +} + +static inline void and_s1s2(PFrElement r, PFrElement a, PFrElement b) +{ + if (a->shortVal >= 0 && b->shortVal >= 0) + { + int32_t result = a->shortVal & b->shortVal; + r->shortVal = result; + r->type = Fr_SHORT; + return; + } + + r->type = Fr_LONG; + + FrElement a_n; + FrElement b_n; + + Fr_toLongNormal(&a_n, a); + Fr_toLongNormal(&b_n, b); + + Fr_rawAnd(r->longVal, a_n.longVal, b_n.longVal); +} + +static inline void and_l1nl2n(PFrElement r, PFrElement a, PFrElement b) +{ + r->type = Fr_LONG; + Fr_rawAnd(r->longVal, a->longVal, b->longVal); +} + +static inline void and_l1nl2m(PFrElement r, PFrElement a, PFrElement b) +{ + r->type = Fr_LONG; + + FrElement b_n; + Fr_toNormal(&b_n, b); + + Fr_rawAnd(r->longVal, a->longVal, b_n.longVal); +} + +static inline void and_l1ml2m(PFrElement r, PFrElement a, PFrElement b) +{ + r->type = Fr_LONG; + + FrElement a_n; + FrElement b_n; + + Fr_toNormal(&a_n, a); + Fr_toNormal(&b_n, b); + + Fr_rawAnd(r->longVal, a_n.longVal, b_n.longVal); +} + +static inline void and_l1ml2n(PFrElement r, PFrElement a, PFrElement b) +{ + r->type = Fr_LONG; + + FrElement a_n; + Fr_toNormal(&a_n, a); + + Fr_rawAnd(r->longVal, a_n.longVal, b->longVal); +} + +static inline void and_s1l2n(PFrElement r, PFrElement a, PFrElement b) +{ + r->type = Fr_LONG; + + FrElement a_n; + + if (a->shortVal >= 0) + { + a_n = {0, 0, {(uint64_t)a->shortVal, 0, 0, 0}}; + } + else + { + Fr_toLongNormal(&a_n, a); + } + + Fr_rawAnd(r->longVal, a_n.longVal, b->longVal); +} + +static inline void and_l1ms2(PFrElement r, PFrElement a, PFrElement b) +{ + r->type = Fr_LONG; + + FrElement a_n; + FrElement b_n; + + Fr_toNormal(&a_n, a); + + if (b->shortVal >= 0) + { + b_n = {0, 0, {(uint64_t)b->shortVal, 0, 0, 0}}; + } + else + { + Fr_toLongNormal(&b_n, b); + } + + Fr_rawAnd(r->longVal, b_n.longVal, a_n.longVal); +} + +static inline void and_s1l2m(PFrElement r, PFrElement a, PFrElement b) +{ + r->type = Fr_LONG; + + FrElement a_n; + FrElement b_n; + + Fr_toNormal(&b_n, b); + + if (a->shortVal >= 0) + { + a_n = {0, 0, {(uint64_t)a->shortVal, 0, 0, 0}}; + } + else + { + Fr_toLongNormal(&a_n, a); + } + + Fr_rawAnd(r->longVal, a_n.longVal, b_n.longVal); +} + +static inline void and_l1ns2(PFrElement r, PFrElement a, PFrElement b) +{ + r->type = Fr_LONG; + + FrElement b_n; + + if (b->shortVal >= 0) + { + b_n = {0, 0, {(uint64_t)b->shortVal, 0, 0, 0}}; + } + else + { + Fr_toLongNormal(&b_n, b); + } + + Fr_rawAnd(r->longVal, a->longVal, b_n.longVal); +} + +// Ands two elements of any kind +void Fr_band(PFrElement r, PFrElement a, PFrElement b) +{ + if (a->type & Fr_LONG) + { + if (b->type & Fr_LONG) + { + if (a->type & Fr_MONTGOMERY) + { + if (b->type & Fr_MONTGOMERY) + { + and_l1ml2m(r, a, b); + } + else + { + and_l1ml2n(r, a, b); + } + } + else if (b->type & Fr_MONTGOMERY) + { + and_l1nl2m(r, a, b); + } + else + { + and_l1nl2n(r, a, b); + } + } + else if (a->type & Fr_MONTGOMERY) + { + and_l1ms2(r, a, b); + } + else + { + and_l1ns2(r, a, b); + } + } + else if (b->type & Fr_LONG) + { + if (b->type & Fr_MONTGOMERY) + { + and_s1l2m(r, a, b); + } + else + { + and_s1l2n(r, a, b); + } + } + else + { + and_s1s2(r, a, b); + } +} + +void Fr_rawZero(FrRawElement pRawResult) +{ + std::memset(pRawResult, 0, sizeof(FrRawElement)); +} + +static inline void rawShl(FrRawElement r, FrRawElement a, uint64_t b) +{ + if (b == 0) + { + Fr_rawCopy(r, a); + return; + } + + if (b >= 254) + { + Fr_rawZero(r); + return; + } + + Fr_rawShl(r, a, b); +} + +static inline void rawShr(FrRawElement r, FrRawElement a, uint64_t b) +{ + if (b == 0) + { + Fr_rawCopy(r, a); + return; + } + + if (b >= 254) + { + Fr_rawZero(r); + return; + } + + Fr_rawShr(r,a, b); +} + +static inline void Fr_setzero(PFrElement r) +{ + r->type = 0; + r->shortVal = 0; +} + +static inline void do_shlcl(PFrElement r, PFrElement a, uint64_t b) +{ + FrElement a_long; + Fr_toLongNormal(&a_long, a); + + r->type = Fr_LONG; + rawShl(r->longVal, a_long.longVal, b); +} + +static inline void do_shlln(PFrElement r, PFrElement a, uint64_t b) +{ + r->type = Fr_LONG; + rawShl(r->longVal, a->longVal, b); +} + +static inline void do_shl(PFrElement r, PFrElement a, uint64_t b) +{ + if (a->type & Fr_LONG) + { + if (a->type == Fr_LONGMONTGOMERY) + { + FrElement a_long; + Fr_toNormal(&a_long, a); + + do_shlln(r, &a_long, b); + } + else + { + do_shlln(r, a, b); + } + } + else + { + int64_t a_shortVal = a->shortVal; + + if (a_shortVal == 0) + { + Fr_setzero(r); + } + else if (a_shortVal < 0) + { + do_shlcl(r, a, b); + } + else if(b >= 31) + { + do_shlcl(r, a, b); + } + else + { + a_shortVal <<= b; + + const uint64_t a_is_over_short = a_shortVal >> 31; + + if (a_is_over_short) + { + do_shlcl(r, a, b); + } + else + { + r->type = Fr_SHORT; + r->shortVal = a_shortVal; + } + } + } +} + +static inline void do_shrln(PFrElement r, PFrElement a, uint64_t b) +{ + r->type = Fr_LONG; + rawShr(r->longVal, a->longVal, b); +} + +static inline void do_shrl(PFrElement r, PFrElement a, uint64_t b) +{ + if (a->type == Fr_LONGMONTGOMERY) + { + FrElement a_long; + Fr_toNormal(&a_long, a); + + do_shrln(r, &a_long, b); + } + else + { + do_shrln(r, a, b); + } +} + +static inline void do_shr(PFrElement r, PFrElement a, uint64_t b) +{ + if (a->type & Fr_LONG) + { + do_shrl(r, a, b); + } + else + { + int64_t a_shortVal = a->shortVal; + + if (a_shortVal == 0) + { + Fr_setzero(r); + } + else if (a_shortVal < 0) + { + FrElement a_long; + Fr_toLongNormal(&a_long, a); + + do_shrl(r, &a_long, b); + } + else if(b >= 31) + { + Fr_setzero(r); + } + else + { + a_shortVal >>= b; + + r->shortVal = a_shortVal; + r->type = Fr_SHORT; + } + } +} + +static inline void Fr_shr_big_shift(PFrElement r, PFrElement a, PFrElement b) +{ + static FrRawElement max_shift = {254, 0, 0, 0}; + + FrRawElement shift; + + Fr_rawSubRegular(shift, Fr_q.longVal, b->longVal); + + if (Fr_rawCmp(shift, max_shift) >= 0) + { + Fr_setzero(r); + } + else + { + do_shl(r, a, shift[0]); + } +} + +static inline void Fr_shr_long(PFrElement r, PFrElement a, PFrElement b) +{ + static FrRawElement max_shift = {254, 0, 0, 0}; + + if (Fr_rawCmp(b->longVal, max_shift) >= 0) + { + Fr_shr_big_shift(r, a, b); + } + else + { + do_shr(r, a, b->longVal[0]); + } +} + +void Fr_shr(PFrElement r, PFrElement a, PFrElement b) +{ + if (b->type & Fr_LONG) + { + if (b->type == Fr_LONGMONTGOMERY) + { + FrElement b_long; + Fr_toNormal(&b_long, b); + + Fr_shr_long(r, a, &b_long); + } + else + { + Fr_shr_long(r, a, b); + } + } + else + { + int64_t b_shortVal = b->shortVal; + + if (b_shortVal < 0) + { + b_shortVal = -b_shortVal; + + if (b_shortVal >= 254) + { + Fr_setzero(r); + } + else + { + do_shl(r, a, b_shortVal); + } + } + else if (b_shortVal >= 254) + { + Fr_setzero(r); + } + else + { + do_shr(r, a, b_shortVal); + } + } +} + +static inline void Fr_shl_big_shift(PFrElement r, PFrElement a, PFrElement b) +{ + static FrRawElement max_shift = {254, 0, 0, 0}; + + FrRawElement shift; + + Fr_rawSubRegular(shift, Fr_q.longVal, b->longVal); + + if (Fr_rawCmp(shift, max_shift) >= 0) + { + Fr_setzero(r); + } + else + { + do_shr(r, a, shift[0]); + } +} + +static inline void Fr_shl_long(PFrElement r, PFrElement a, PFrElement b) +{ + static FrRawElement max_shift = {254, 0, 0, 0}; + + if (Fr_rawCmp(b->longVal, max_shift) >= 0) + { + Fr_shl_big_shift(r, a, b); + } + else + { + do_shl(r, a, b->longVal[0]); + } +} + +void Fr_shl(PFrElement r, PFrElement a, PFrElement b) +{ + if (b->type & Fr_LONG) + { + if (b->type == Fr_LONGMONTGOMERY) + { + FrElement b_long; + Fr_toNormal(&b_long, b); + + Fr_shl_long(r, a, &b_long); + } + else + { + Fr_shl_long(r, a, b); + } + } + else + { + int64_t b_shortVal = b->shortVal; + + if (b_shortVal < 0) + { + b_shortVal = -b_shortVal; + + if (b_shortVal >= 254) + { + Fr_setzero(r); + } + else + { + do_shr(r, a, b_shortVal); + } + } + else if (b_shortVal >= 254) + { + Fr_setzero(r); + } + else + { + do_shl(r, a, b_shortVal); + } + } +} + +void Fr_square(PFrElement r, PFrElement a) +{ + if (a->type & Fr_LONG) + { + if (a->type == Fr_LONGMONTGOMERY) + { + r->type = Fr_LONGMONTGOMERY; + Fr_rawMSquare(r->longVal, a->longVal); + } + else + { + r->type = Fr_LONGMONTGOMERY; + Fr_rawMSquare(r->longVal, a->longVal); + Fr_rawMMul(r->longVal, r->longVal, Fr_R3.longVal); + } + } + else + { + int64_t result; + + int overflow = Fr_rawSMul(&result, a->shortVal, a->shortVal); + + if (overflow) + { + Fr_rawCopyS2L(r->longVal, result); + r->type = Fr_LONG; + r->shortVal = 0; + } + else + { + // done the same way as in intel asm implementation + r->shortVal = (int32_t)result; + r->type = Fr_SHORT; + // + + Fr_rawCopyS2L(r->longVal, result); + r->type = Fr_LONG; + r->shortVal = 0; + } + } +} + +static inline void or_s1s2(PFrElement r, PFrElement a, PFrElement b) +{ + if (a->shortVal >= 0 && b->shortVal >= 0) + { + r->shortVal = a->shortVal | b->shortVal; + r->type = Fr_SHORT; + return; + } + + r->type = Fr_LONG; + + FrElement a_n; + FrElement b_n; + + Fr_toLongNormal(&a_n, a); + Fr_toLongNormal(&b_n, b); + + Fr_rawOr(r->longVal, a_n.longVal, b_n.longVal); +} + +static inline void or_s1l2m(PFrElement r, PFrElement a, PFrElement b) +{ + r->type = Fr_LONG; + + FrElement a_n; + FrElement b_n; + + Fr_toNormal(&b_n, b); + + if (a->shortVal >= 0) + { + a_n = {0, 0, {(uint64_t)a->shortVal, 0, 0, 0}}; + } + else + { + Fr_toLongNormal(&a_n, a); + } + + Fr_rawOr(r->longVal, a_n.longVal, b_n.longVal); +} + +static inline void or_s1l2n(PFrElement r, PFrElement a, PFrElement b) +{ + r->type = Fr_LONG; + + FrElement a_n; + + if (a->shortVal >= 0) + { + a_n = {0, 0, {(uint64_t)a->shortVal, 0, 0, 0}}; + } + else + { + Fr_toLongNormal(&a_n, a); + } + + Fr_rawOr(r->longVal, a_n.longVal, b->longVal); +} + +static inline void or_l1ns2(PFrElement r, PFrElement a, PFrElement b) +{ + r->type = Fr_LONG; + + FrElement b_n; + + if (b->shortVal >= 0) + { + b_n = {0, 0, {(uint64_t)b->shortVal, 0, 0, 0}}; + } + else + { + Fr_toLongNormal(&b_n, b); + } + + Fr_rawOr(r->longVal, a->longVal, b_n.longVal); +} + +static inline void or_l1ms2(PFrElement r, PFrElement a, PFrElement b) +{ + r->type = Fr_LONG; + + FrElement a_n; + FrElement b_n; + + Fr_toNormal(&a_n, a); + + if (b->shortVal >= 0) + { + b_n = {0, 0, {(uint64_t)b->shortVal, 0, 0, 0}}; + } + else + { + Fr_toLongNormal(&b_n, b); + } + + Fr_rawOr(r->longVal, b_n.longVal, a_n.longVal); +} + +static inline void or_l1nl2n(PFrElement r, PFrElement a, PFrElement b) +{ + r->type = Fr_LONG; + Fr_rawOr(r->longVal, a->longVal, b->longVal); +} + +static inline void or_l1nl2m(PFrElement r, PFrElement a, PFrElement b) +{ + r->type = Fr_LONG; + + FrElement b_n; + Fr_toNormal(&b_n, b); + + Fr_rawOr(r->longVal, a->longVal, b_n.longVal); +} + +static inline void or_l1ml2n(PFrElement r, PFrElement a, PFrElement b) +{ + r->type = Fr_LONG; + + FrElement a_n; + Fr_toNormal(&a_n, a); + + Fr_rawOr(r->longVal, a_n.longVal, b->longVal); +} + +static inline void or_l1ml2m(PFrElement r, PFrElement a, PFrElement b) +{ + r->type = Fr_LONG; + + FrElement a_n; + FrElement b_n; + + Fr_toNormal(&a_n, a); + Fr_toNormal(&b_n, b); + + Fr_rawOr(r->longVal, a_n.longVal, b_n.longVal); +} + + +void Fr_bor(PFrElement r, PFrElement a, PFrElement b) +{ + if (a->type & Fr_LONG) + { + if (b->type & Fr_LONG) + { + if (a->type & Fr_MONTGOMERY) + { + if (b->type & Fr_MONTGOMERY) + { + or_l1ml2m(r, a, b); + } + else + { + or_l1ml2n(r, a, b); + } + } + else if (b->type & Fr_MONTGOMERY) + { + or_l1nl2m(r, a, b); + } + else + { + or_l1nl2n(r, a, b); + } + } + else if (a->type & Fr_MONTGOMERY) + { + or_l1ms2(r, a, b); + } + else + { + or_l1ns2(r, a, b); + } + } + else if (b->type & Fr_LONG) + { + if (b->type & Fr_MONTGOMERY) + { + or_s1l2m(r, a, b); + } + else + { + or_s1l2n(r, a, b); + } + } + else + { + or_s1s2(r, a, b); + } +} + +static inline void xor_s1s2(PFrElement r, PFrElement a, PFrElement b) +{ + if (a->shortVal >= 0 && b->shortVal >= 0) + { + r->shortVal = a->shortVal ^ b->shortVal; + r->type = Fr_SHORT; + return; + } + + r->type = Fr_LONG; + + FrElement a_n; + FrElement b_n; + + Fr_toLongNormal(&a_n, a); + Fr_toLongNormal(&b_n, b); + + Fr_rawXor(r->longVal, a_n.longVal, b_n.longVal); +} + +static inline void xor_s1l2n(PFrElement r, PFrElement a, PFrElement b) +{ + r->type = Fr_LONG; + + FrElement a_n; + + if (a->shortVal >= 0) + { + a_n = {0, 0, {(uint64_t)a->shortVal, 0, 0, 0}}; + } + else + { + Fr_toLongNormal(&a_n, a); + } + + Fr_rawXor(r->longVal, a_n.longVal, b->longVal); +} + +static inline void xor_s1l2m(PFrElement r, PFrElement a, PFrElement b) +{ + r->type = Fr_LONG; + + FrElement a_n; + FrElement b_n; + + Fr_toNormal(&b_n, b); + + if (a->shortVal >= 0) + { + a_n = {0, 0, {(uint64_t)a->shortVal, 0, 0, 0}}; + } + else + { + Fr_toLongNormal(&a_n, a); + } + + Fr_rawXor(r->longVal, a_n.longVal, b_n.longVal); +} + +static inline void xor_l1ns2(PFrElement r, PFrElement a, PFrElement b) +{ + r->type = Fr_LONG; + + FrElement b_n; + + if (b->shortVal >= 0) + { + b_n = {0, 0, {(uint64_t)b->shortVal, 0, 0, 0}}; + } + else + { + Fr_toLongNormal(&b_n, b); + } + + Fr_rawXor(r->longVal, a->longVal, b_n.longVal); +} + +static inline void xor_l1ms2(PFrElement r, PFrElement a, PFrElement b) +{ + r->type = Fr_LONG; + + FrElement a_n; + FrElement b_n; + + Fr_toNormal(&a_n, a); + + if (b->shortVal >= 0) + { + b_n = {0, 0, {(uint64_t)b->shortVal, 0, 0, 0}}; + } + else + { + Fr_toLongNormal(&b_n, b); + } + + Fr_rawXor(r->longVal, b_n.longVal, a_n.longVal); +} + +static inline void xor_l1nl2n(PFrElement r, PFrElement a, PFrElement b) +{ + r->type = Fr_LONG; + Fr_rawXor(r->longVal, a->longVal, b->longVal); +} + +static inline void xor_l1nl2m(PFrElement r, PFrElement a, PFrElement b) +{ + r->type = Fr_LONG; + + FrElement b_n; + Fr_toNormal(&b_n, b); + + Fr_rawXor(r->longVal, a->longVal, b_n.longVal); +} + +static inline void xor_l1ml2n(PFrElement r, PFrElement a, PFrElement b) +{ + r->type = Fr_LONG; + + FrElement a_n; + Fr_toNormal(&a_n, a); + + Fr_rawXor(r->longVal, a_n.longVal, b->longVal); +} + +static inline void xor_l1ml2m(PFrElement r, PFrElement a, PFrElement b) +{ + r->type = Fr_LONG; + + FrElement a_n; + FrElement b_n; + + Fr_toNormal(&a_n, a); + Fr_toNormal(&b_n, b); + + Fr_rawXor(r->longVal, a_n.longVal, b_n.longVal); +} + +void Fr_bxor(PFrElement r, PFrElement a, PFrElement b) +{ + if (a->type & Fr_LONG) + { + if (b->type & Fr_LONG) + { + if (a->type & Fr_MONTGOMERY) + { + if (b->type & Fr_MONTGOMERY) + { + xor_l1ml2m(r, a, b); + } + else + { + xor_l1ml2n(r, a, b); + } + } + else if (b->type & Fr_MONTGOMERY) + { + xor_l1nl2m(r, a, b); + } + else + { + xor_l1nl2n(r, a, b); + } + } + else if (a->type & Fr_MONTGOMERY) + { + xor_l1ms2(r, a, b); + } + else + { + xor_l1ns2(r, a, b); + } + } + else if (b->type & Fr_LONG) + { + if (b->type & Fr_MONTGOMERY) + { + xor_s1l2m(r, a, b); + } + else + { + xor_s1l2n(r, a, b); + } + } + else + { + xor_s1s2(r, a, b); + } +} + +void Fr_bnot(PFrElement r, PFrElement a) +{ + r->type = Fr_LONG; + + if (a->type == Fr_LONG) + { + if (a->type & Fr_MONTGOMERY) + { + FrElement a_n; + Fr_toNormal(&a_n, a); + + Fr_rawNot(r->longVal, a_n.longVal); + } + else + { + Fr_rawNot(r->longVal, a->longVal); + } + } + else + { + FrElement a_n; + Fr_toLongNormal(&a_n, a); + + Fr_rawNot(r->longVal, a_n.longVal); + } +} diff --git a/example/support/fr_raw_arm64.o b/example/support/fr_raw_arm64.o new file mode 100644 index 0000000..4069502 Binary files /dev/null and b/example/support/fr_raw_arm64.o differ diff --git a/example/support/fr_raw_arm64.s b/example/support/fr_raw_arm64.s new file mode 100755 index 0000000..244e2bd --- /dev/null +++ b/example/support/fr_raw_arm64.s @@ -0,0 +1,1199 @@ + .global Fr_rawAdd + .global Fr_rawAddLS + .global Fr_rawSub + .global Fr_rawSubRegular + .global Fr_rawNeg + .global Fr_rawNegLS + .global Fr_rawSubSL + .global Fr_rawSubLS + .global Fr_rawMMul + .global Fr_rawMMul1 + .global Fr_rawFromMontgomery + .global Fr_rawCopy + .global Fr_rawSwap + .global Fr_rawIsEq + .global Fr_rawIsZero + .global Fr_rawCopyS2L + .global Fr_rawCmp + .global Fr_rawAnd + .global Fr_rawOr + .global Fr_rawXor + .global Fr_rawShr + .global Fr_rawShl + .global Fr_rawNot + + .global _Fr_rawAdd + .global _Fr_rawAddLS + .global _Fr_rawSub + .global _Fr_rawSubRegular + .global _Fr_rawNeg + .global _Fr_rawNegLS + .global _Fr_rawSubSL + .global _Fr_rawSubLS + .global _Fr_rawMMul + .global _Fr_rawMMul1 + .global _Fr_rawFromMontgomery + .global _Fr_rawCopy + .global _Fr_rawSwap + .global _Fr_rawIsEq + .global _Fr_rawIsZero + .global _Fr_rawCopyS2L + .global _Fr_rawCmp + .global _Fr_rawAnd + .global _Fr_rawOr + .global _Fr_rawXor + .global _Fr_rawShr + .global _Fr_rawShl + .global _Fr_rawNot + + .text + .align 4 + +// void Fr_rawAdd(FrRawElement pRawResult, FrRawElement pRawA, FrRawElement pRawB) +Fr_rawAdd: +_Fr_rawAdd: + ldp x3, x4, [x1] + ldp x7, x8, [x2] + adds x3, x3, x7 + adcs x4, x4, x8 + + ldp x5, x6, [x1, 16] + ldp x9, x10, [x2, 16] + adcs x5, x5, x9 + adcs x6, x6, x10 + + cset x16, cs + + adr x11, Fr_rawq + ldp x12, x13, [x11] + ldp x14, x15, [x11, 16] + + subs x7, x3, x12 + sbcs x8, x4, x13 + sbcs x9, x5, x14 + sbcs x10, x6, x15 + + cbnz x16, Fr_rawAdd_done_s + b.hs Fr_rawAdd_done_s + + stp x3, x4, [x0] + stp x5, x6, [x0, 16] + ret + +Fr_rawAdd_done_s: + stp x7, x8, [x0] + stp x9, x10, [x0, 16] + ret + + +//void Fr_rawAddLS(FrRawElement pRawResult, FrRawElement pRawA, uint64_t rawB) +Fr_rawAddLS: +_Fr_rawAddLS: + ldp x3, x4, [x1] + adds x3, x3, x2 + adcs x4, x4, xzr + + ldp x5, x6, [x1, 16] + adcs x5, x5, xzr + adcs x6, x6, xzr + + cset x16, cs + + adr x11, Fr_rawq + ldp x12, x13, [x11] + ldp x14, x15, [x11, 16] + + subs x7, x3, x12 + sbcs x8, x4, x13 + sbcs x9, x5, x14 + sbcs x10, x6, x15 + + cbnz x16, Fr_rawAddLS_done_s + b.hs Fr_rawAddLS_done_s + + stp x3, x4, [x0] + stp x5, x6, [x0, 16] + ret + +Fr_rawAddLS_done_s: + stp x7, x8, [x0] + stp x9, x10, [x0, 16] + ret + + +// void Fr_rawSub(FrRawElement pRawResult, FrRawElement pRawA, FrRawElement pRawB) +Fr_rawSub: +_Fr_rawSub: + ldp x3, x4, [x1] + ldp x7, x8, [x2] + subs x3, x3, x7 + sbcs x4, x4, x8 + + ldp x5, x6, [x1, 16] + ldp x9, x10, [x2, 16] + sbcs x5, x5, x9 + sbcs x6, x6, x10 + + b.cs Fr_rawSub_done + + adr x11, Fr_rawq + ldp x12, x13, [x11] + ldp x14, x15, [x11, 16] + + adds x3, x3, x12 + adcs x4, x4, x13 + adcs x5, x5, x14 + adc x6, x6, x15 + +Fr_rawSub_done: + stp x3, x4, [x0] + stp x5, x6, [x0, 16] + ret + + +//void Fr_rawSubRegular(FrRawElement pRawResult, FrRawElement pRawA, FrRawElement pRawB) +Fr_rawSubRegular: +_Fr_rawSubRegular: + ldp x3, x4, [x1] + ldp x7, x8, [x2] + subs x3, x3, x7 + sbcs x4, x4, x8 + + ldp x5, x6, [x1, 16] + ldp x9, x10, [x2, 16] + sbcs x5, x5, x9 + sbc x6, x6, x10 + + stp x3, x4, [x0] + stp x5, x6, [x0, 16] + ret + +//void Fr_rawSubSL(FrRawElement pRawResult, uint64_t rawA, FrRawElement pRawB) +Fr_rawSubSL: +_Fr_rawSubSL: + ldp x7, x8, [x2] + subs x3, x1, x7 + sbcs x4, xzr, x8 + + ldp x9, x10, [x2, 16] + sbcs x5, xzr, x9 + sbcs x6, xzr, x10 + + b.cs Fr_rawSubSL_done + + adr x11, Fr_rawq + ldp x12, x13, [x11] + ldp x14, x15, [x11, 16] + + adds x3, x3, x12 + adcs x4, x4, x13 + adcs x5, x5, x14 + adc x6, x6, x15 + +Fr_rawSubSL_done: + stp x3, x4, [x0] + stp x5, x6, [x0, 16] + ret + + +//void Fr_rawSubLS(FrRawElement pRawResult, FrRawElement pRawA, uint64_t rawB) +Fr_rawSubLS: +_Fr_rawSubLS: + ldp x3, x4, [x1] + subs x3, x3, x2 + sbcs x4, x4, xzr + + ldp x5, x6, [x1, 16] + sbcs x5, x5, xzr + sbcs x6, x6, xzr + + b.cs Fr_rawSubLS_done + + adr x11, Fr_rawq + ldp x12, x13, [x11] + ldp x14, x15, [x11, 16] + + adds x3, x3, x12 + adcs x4, x4, x13 + adcs x5, x5, x14 + adc x6, x6, x15 + +Fr_rawSubLS_done: + stp x3, x4, [x0] + stp x5, x6, [x0, 16] + ret + + +// void Fr_rawNeg(FrRawElement pRawResult, FrRawElement pRawA) +Fr_rawNeg: +_Fr_rawNeg: + ldp x2, x3, [x1] + orr x6, x2, x3 + + ldp x4, x5, [x1, 16] + orr x7, x4, x5 + orr x8, x6, x7 + + cbz x8, Fr_rawNeg_done_zero + + adr x10, Fr_rawq + ldp x11, x12, [x10] + ldp x13, x14, [x10, 16] + + subs x2, x11, x2 + sbcs x3, x12, x3 + sbcs x4, x13, x4 + sbc x5, x14, x5 + + stp x2, x3, [x0] + stp x4, x5, [x0, 16] + ret + +Fr_rawNeg_done_zero: + stp xzr, xzr, [x0] + stp xzr, xzr, [x0, 16] + ret + + +//void Fr_rawNegLS(FrRawElement pRawResult, FrRawElement pRawA, uint64_t rawB) +Fr_rawNegLS: +_Fr_rawNegLS: + ldp x3, x4, [x1] + ldp x5, x6, [x1, 16] + + adr x11, Fr_rawq + ldp x12, x13, [x11] + ldp x14, x15, [x11, 16] + + subs x7, x12, x2 + sbcs x8, x13, xzr + sbcs x9, x14, xzr + sbcs x10, x15, xzr + + cset x16, cs + + subs x7, x7, x3 + sbcs x8, x8, x4 + sbcs x9, x9, x5 + sbcs x10, x10, x6 + + cset x17, cs + orr x17, x17, x16 + + cbz x17, Fr_rawNegLS_done + + adds x7, x7, x12 + adcs x8, x8, x13 + adcs x9, x9, x14 + adc x10, x10, x15 + + +Fr_rawNegLS_done: + stp x7, x8, [x0] + stp x9, x10, [x0, 16] + ret + + +// void Fr_rawMMul(FrRawElement pRawResult, FrRawElement pRawA, FrRawElement pRawB) +Fr_rawMMul: +_Fr_rawMMul: + ldr x3, [x1] //pRawA[0] + ldp x5, x6, [x2] //pRawB + ldp x7, x8, [x2, 16] + + adr x4, Fr_np + ldr x4, [x4] + + str x28, [sp, #-16]! + + adr x2, Fr_rawq + ldp x15, x16, [x2] + ldp x17, x28, [x2, 16] + + // product0 = pRawB * pRawA[0] + mul x10, x5, x3 + umulh x11, x5, x3 + mul x2, x6, x3 + adds x11, x11, x2 + umulh x12, x6, x3 + mul x2, x7, x3 + adcs x12, x12, x2 + umulh x13, x7, x3 + mul x2, x8, x3 + adcs x13, x13, x2 + umulh x14, x8, x3 + adc x14, x14, xzr + + // np0 = Fr_np * product0[0]; + mul x9, x4, x10 + + // product0 = product0 + Fr_rawq * np0 + mul x2, x15, x9 + adds x10, x10, x2 + mul x3, x16, x9 + adcs x11, x11, x3 + mul x2, x17, x9 + adcs x12, x12, x2 + mul x3, x28, x9 + adcs x13, x13, x3 + adc x14, x14, xzr + + umulh x2, x15, x9 + adds x11, x11, x2 + umulh x3, x16, x9 + adcs x12, x12, x3 + umulh x2, x17, x9 + adcs x13, x13, x2 + umulh x3, x28, x9 + adcs x14, x14, x3 + adc x9, xzr, xzr + + // product1 = product0 + pRawB * pRawA[1] + ldr x3, [x1, 8] //pRawA[1] + mul x10, x5, x3 + adds x10, x10, x11 + mul x11, x6, x3 + adcs x11, x11, x12 + mul x12, x7, x3 + adcs x12, x12, x13 + mul x13, x8, x3 + adcs x13, x13, x14 + adc x14, xzr, xzr + + adds x11, x11, x9 + umulh x2, x5, x3 + adcs x11, x11, x2 + umulh x9, x6, x3 + adcs x12, x12, x9 + umulh x2, x7, x3 + adcs x13, x13, x2 + umulh x9, x8, x3 + adc x14, x14, x9 + + // np0 = Fr_np * product1[0]; + mul x9, x4, x10 + + // product1 = product1 + Fr_rawq * np0 + mul x2, x15, x9 + adds x10, x10, x2 + mul x3, x16, x9 + adcs x11, x11, x3 + mul x2, x17, x9 + adcs x12, x12, x2 + mul x3, x28, x9 + adcs x13, x13, x3 + adc x14, x14, xzr + + umulh x2, x15, x9 + adds x11, x11, x2 + umulh x3, x16, x9 + adcs x12, x12, x3 + umulh x2, x17, x9 + adcs x13, x13, x2 + umulh x3, x28, x9 + adcs x14, x14, x3 + adc x9, xzr, xzr + + + // product2 = product1 + pRawB * pRawA[2] + ldr x3, [x1, 16] //pRawA[2] + mul x10, x5, x3 + adds x10, x10, x11 + mul x11, x6, x3 + adcs x11, x11, x12 + mul x12, x7, x3 + adcs x12, x12, x13 + mul x13, x8, x3 + adcs x13, x13, x14 + adc x14, xzr, xzr + + adds x11, x11, x9 + umulh x2, x5, x3 + adcs x11, x11, x2 + umulh x9, x6, x3 + adcs x12, x12, x9 + umulh x2, x7, x3 + adcs x13, x13, x2 + umulh x9, x8, x3 + adc x14, x14, x9 + + // np0 = Fr_np * product2[0]; + mul x9, x4, x10 + + // product2 = product2 + Fr_rawq * np0 + mul x2, x15, x9 + adds x10, x10, x2 + mul x3, x16, x9 + adcs x11, x11, x3 + mul x2, x17, x9 + adcs x12, x12, x2 + mul x3, x28, x9 + adcs x13, x13, x3 + adc x14, x14, xzr + + umulh x2, x15, x9 + adds x11, x11, x2 + umulh x3, x16, x9 + adcs x12, x12, x3 + umulh x2, x17, x9 + adcs x13, x13, x2 + umulh x3, x28, x9 + adcs x14, x14, x3 + adc x9, xzr, xzr + + // product3 = product2 + pRawB * pRawA[3] + ldr x3, [x1, 24] //pRawA[3] + mul x10, x5, x3 + adds x10, x10, x11 + mul x11, x6, x3 + adcs x11, x11, x12 + mul x12, x7, x3 + adcs x12, x12, x13 + mul x13, x8, x3 + adcs x13, x13, x14 + adc x14, xzr, xzr + + adds x11, x11, x9 + umulh x2, x5, x3 + adcs x11, x11, x2 + umulh x9, x6, x3 + adcs x12, x12, x9 + umulh x2, x7, x3 + adcs x13, x13, x2 + umulh x9, x8, x3 + adc x14, x14, x9 + + // np0 = Fr_np * product3[0]; + mul x9, x4, x10 + + // product3 = product3 + Fr_rawq * np0 + mul x2, x15, x9 + adds x10, x10, x2 + mul x3, x16, x9 + adcs x11, x11, x3 + mul x2, x17, x9 + adcs x12, x12, x2 + mul x3, x28, x9 + adcs x13, x13, x3 + adc x14, x14, xzr + + umulh x2, x15, x9 + adds x11, x11, x2 + umulh x3, x16, x9 + adcs x12, x12, x3 + umulh x2, x17, x9 + adcs x13, x13, x2 + umulh x3, x28, x9 + adcs x14, x14, x3 + + // result >= Fr_rawq + subs x5, x11, x15 + sbcs x6, x12, x16 + sbcs x7, x13, x17 + sbcs x8, x14, x28 + + ldr x28, [sp], #16 + + b.hs Fr_rawMul_done_s + + stp x11, x12, [x0] + stp x13, x14, [x0, 16] + ret + +Fr_rawMul_done_s: + stp x5, x6, [x0] + stp x7, x8, [x0, 16] + ret + + +// void Fr_rawMMul1(FrRawElement pRawResult, FrRawElement pRawA, uint64_t pRawB) +Fr_rawMMul1: +_Fr_rawMMul1: + ldp x5, x6, [x1] //pRawA + ldp x7, x8, [x1, 16] + + adr x4, Fr_np + ldr x4, [x4] + + // product0 = pRawA * pRawB + mul x10, x5, x2 + umulh x11, x5, x2 + mul x3, x6, x2 + adds x11, x11, x3 + umulh x12, x6, x2 + mul x3, x7, x2 + adcs x12, x12, x3 + umulh x13, x7, x2 + mul x3, x8, x2 + adcs x13, x13, x3 + umulh x14, x8, x2 + adc x14, x14, xzr + + adr x3, Fr_rawq + ldp x15, x16, [x3] + ldp x17, x8, [x3, 16] + + // np0 = Fr_np * product0[0]; + mul x9, x4, x10 + + // product0 = product0 + Fr_rawq * np0 + mul x2, x15, x9 + adds x10, x10, x2 + mul x3, x16, x9 + adcs x11, x11, x3 + mul x2, x17, x9 + adcs x12, x12, x2 + mul x3, x8, x9 + adcs x13, x13, x3 + adc x14, x14, xzr + + umulh x2, x15, x9 + adds x11, x11, x2 + umulh x3, x16, x9 + adcs x12, x12, x3 + umulh x2, x17, x9 + adcs x13, x13, x2 + umulh x3, x8, x9 + adcs x14, x14, x3 + adc x7, xzr, xzr + + // np0 = Fr_np * product1[0]; + mul x9, x4, x11 + + // product1 = product1 + Fr_rawq * np0 + mul x2, x15, x9 + adds x10, x11, x2 + mul x3, x16, x9 + adcs x11, x12, x3 + mul x2, x17, x9 + adcs x12, x13, x2 + mul x3, x8, x9 + adcs x13, x14, x3 + adc x14, xzr, xzr + + adds x11, x11, x7 + umulh x2, x15, x9 + adcs x11, x11, x2 + umulh x3, x16, x9 + adcs x12, x12, x3 + umulh x2, x17, x9 + adcs x13, x13, x2 + umulh x3, x8, x9 + adcs x14, x14, x3 + adc x7, xzr, xzr + + // np0 = Fr_np * product2[0]; + mul x9, x4, x11 + + // product2 = product2 + Fr_rawq * np0 + mul x2, x15, x9 + adds x10, x11, x2 + mul x3, x16, x9 + adcs x11, x12, x3 + mul x2, x17, x9 + adcs x12, x13, x2 + mul x3, x8, x9 + adcs x13, x14, x3 + adc x14, xzr, xzr + + adds x11, x11, x7 + umulh x2, x15, x9 + adds x11, x11, x2 + umulh x3, x16, x9 + adcs x12, x12, x3 + umulh x2, x17, x9 + adcs x13, x13, x2 + umulh x3, x8, x9 + adcs x14, x14, x3 + adc x7, xzr, xzr + + // np0 = Fr_np * product3[0]; + mul x9, x4, x11 + + // product3 = product3 + Fr_rawq * np0 + mul x2, x15, x9 + adds x10, x11, x2 + mul x3, x16, x9 + adcs x11, x12, x3 + mul x2, x17, x9 + adcs x12, x13, x2 + mul x3, x8, x9 + adcs x13, x14, x3 + adc x14, xzr, xzr + + adds x11, x11, x7 + umulh x2, x15, x9 + adds x11, x11, x2 + umulh x3, x16, x9 + adcs x12, x12, x3 + umulh x2, x17, x9 + adcs x13, x13, x2 + umulh x3, x8, x9 + adcs x14, x14, x3 + + // result >= Fr_rawq + subs x5, x11, x15 + sbcs x6, x12, x16 + sbcs x7, x13, x17 + sbcs x8, x14, x8 + + b.hs Fr_rawMul1_done_s + + stp x11, x12, [x0] + stp x13, x14, [x0, 16] + ret + + Fr_rawMul1_done_s: + stp x5, x6, [x0] + stp x7, x8, [x0, 16] + ret + + +// void Fr_rawFromMontgomery(FrRawElement pRawResult, FrRawElement pRawA) +Fr_rawFromMontgomery: +_Fr_rawFromMontgomery: + ldp x10, x11, [x1] //pRawA + ldp x12, x13, [x1, 16] + mov x14, xzr + + adr x4, Fr_np + ldr x4, [x4] + + adr x3, Fr_rawq + ldp x15, x16, [x3] + ldp x17, x8, [x3, 16] + + // np0 = Fr_np * product0[0]; + mul x9, x4, x10 + + // product0 = product0 + Fr_rawq * np0 + mul x2, x15, x9 + adds x10, x10, x2 + mul x3, x16, x9 + adcs x11, x11, x3 + mul x2, x17, x9 + adcs x12, x12, x2 + mul x3, x8, x9 + adcs x13, x13, x3 + adc x14, x14, xzr + + umulh x2, x15, x9 + adds x11, x11, x2 + umulh x3, x16, x9 + adcs x12, x12, x3 + umulh x2, x17, x9 + adcs x13, x13, x2 + umulh x3, x8, x9 + adcs x14, x14, x3 + adc x7, xzr, xzr + + // np0 = Fr_np * product1[0]; + mul x9, x4, x11 + + // product1 = product1 + Fr_rawq * np0 + mul x2, x15, x9 + adds x10, x11, x2 + mul x3, x16, x9 + adcs x11, x12, x3 + mul x2, x17, x9 + adcs x12, x13, x2 + mul x3, x8 , x9 + adcs x13, x14, x3 + adc x14, xzr, xzr + + adds x11, x11, x7 + umulh x2, x15, x9 + adcs x11, x11, x2 + umulh x3, x16, x9 + adcs x12, x12, x3 + umulh x2, x17, x9 + adcs x13, x13, x2 + umulh x3, x8 , x9 + adcs x14, x14, x3 + adc x7, xzr, xzr + + // np0 = Fr_np * product2[0]; + mul x9, x4, x11 + + // product2 = product2 + Fr_rawq * np0 + mul x2, x15, x9 + adds x10, x11, x2 + mul x3, x16, x9 + adcs x11, x12, x3 + mul x2, x17, x9 + adcs x12, x13, x2 + mul x3, x8, x9 + adcs x13, x14, x3 + adc x14, xzr, xzr + + adds x11, x11, x7 + umulh x2, x15, x9 + adds x11, x11, x2 + umulh x3, x16, x9 + adcs x12, x12, x3 + umulh x2, x17, x9 + adcs x13, x13, x2 + umulh x3, x8, x9 + adcs x14, x14, x3 + adc x7, xzr, xzr + + // np0 = Fr_np * product3[0]; + mul x9, x4, x11 + + // product3 = product3 + Fr_rawq * np0 + mul x2, x15, x9 + adds x10, x11, x2 + mul x3, x16, x9 + adcs x11, x12, x3 + mul x2, x17, x9 + adcs x12, x13, x2 + mul x3, x8, x9 + adcs x13, x14, x3 + adc x14, xzr, xzr + + adds x11, x11, x7 + umulh x2, x15, x9 + adds x11, x11, x2 + umulh x3, x16, x9 + adcs x12, x12, x3 + umulh x2, x17, x9 + adcs x13, x13, x2 + umulh x3, x8, x9 + adcs x14, x14, x3 + + // result >= Fr_rawq + subs x5, x11, x15 + sbcs x6, x12, x16 + sbcs x7, x13, x17 + sbcs x8, x14, x8 + + b.hs Fr_rawFromMontgomery_s + + stp x11, x12, [x0] + stp x13, x14, [x0, 16] + ret + +Fr_rawFromMontgomery_s: + stp x5, x6, [x0] + stp x7, x8, [x0, 16] + ret + + + +// void Fr_rawCopy(FrRawElement pRawResult, FrRawElement pRawA) +Fr_rawCopy: +_Fr_rawCopy: + ldp x2, x3, [x1] + stp x2, x3, [x0] + + ldp x4, x5, [x1, 16] + stp x4, x5, [x0, 16] + ret + + +// void Fr_rawSwap(FrRawElement pRawResult, FrRawElement pRawA) +Fr_rawSwap: +_Fr_rawSwap: + ldp x3, x4, [x0] + ldp x7, x8, [x1] + + stp x3, x4, [x1] + stp x7, x8, [x0] + + ldp x5, x6, [x0, 16] + ldp x9, x10, [x1, 16] + + stp x5, x6, [x1, 16] + stp x9, x10, [x0, 16] + ret + + +// int Fr_rawIsEq(FrRawElement pRawA, FrRawElement pRawB) +Fr_rawIsEq: +_Fr_rawIsEq: + ldp x3, x4, [x0] + ldp x7, x8, [x1] + eor x11, x3, x7 + eor x12, x4, x8 + + ldp x5, x6, [x0, 16] + ldp x9, x10, [x1, 16] + eor x13, x5, x9 + eor x14, x6, x10 + + orr x15, x11, x12 + orr x16, x13, x14 + + orr x0, x15, x16 + cmp x0, xzr + cset x0, eq + ret + + +// int Fr_rawIsZero(FrRawElement rawA) +Fr_rawIsZero: +_Fr_rawIsZero: + ldp x1, x2, [x0] + orr x5, x1, x2 + + ldp x3, x4, [x0, 16] + orr x6, x3, x4 + + orr x0, x5, x6 + cmp x0, xzr + cset x0, eq + ret + + +// void Fr_rawCopyS2L(FrRawElement pRawResult, int64_t val) +Fr_rawCopyS2L: +_Fr_rawCopyS2L: + cmp x1, xzr + b.lt Fr_rawCopyS2L_adjust_neg + + stp x1, xzr, [x0] + stp xzr, xzr, [x0, 16] + ret + +Fr_rawCopyS2L_adjust_neg: + adr x3, Fr_rawq + ldp x5, x6, [x3] + ldp x7, x8, [x3, 16] + + mov x9, -1 + + adds x1, x1, x5 + adcs x2, x9, x6 + adcs x3, x9, x7 + adc x4, x9, x8 + + stp x1, x2, [x0] + stp x3, x4, [x0, 16] + ret + + +//int Fr_rawCmp(FrRawElement pRawA, FrRawElement pRawB) +Fr_rawCmp: +_Fr_rawCmp: + ldp x3, x4, [x0] + ldp x5, x6, [x0, 16] + ldp x7, x8, [x1] + ldp x9, x10, [x1, 16] + + subs x3, x3, x7 + cset x0, ne + + sbcs x4, x4, x8 + cinc x0, x0, ne + + sbcs x5, x5, x9 + cinc x0, x0, ne + + sbcs x6, x6, x10 + cinc x0, x0, ne + + cneg x0, x0, lo + ret + +//void Fr_rawAnd(FrRawElement pRawResult, FrRawElement pRawA, FrRawElement pRawB) +Fr_rawAnd: +_Fr_rawAnd: + ldp x3, x4, [x1] + ldp x7, x8, [x2] + and x3, x3, x7 + and x4, x4, x8 + + ldp x5, x6, [x1, 16] + ldp x9, x10, [x2, 16] + and x5, x5, x9 + and x6, x6, x10 + + and x6, x6, 0x3fffffffffffffff // lboMask + + adr x11, Fr_rawq + ldp x12, x13, [x11] + ldp x14, x15, [x11, 16] + + subs x7, x3, x12 + sbcs x8, x4, x13 + sbcs x9, x5, x14 + sbcs x10, x6, x15 + + csel x3, x7, x3, hs + csel x4, x8, x4, hs + csel x5, x9, x5, hs + csel x6, x10, x6, hs + + stp x3, x4, [x0] + stp x5, x6, [x0, 16] + ret + +//void Fr_rawOr(FrRawElement pRawResult, FrRawElement pRawA, FrRawElement pRawB) +Fr_rawOr: +_Fr_rawOr: + ldp x3, x4, [x1] + ldp x7, x8, [x2] + orr x3, x3, x7 + orr x4, x4, x8 + + ldp x5, x6, [x1, 16] + ldp x9, x10, [x2, 16] + orr x5, x5, x9 + orr x6, x6, x10 + + and x6, x6, 0x3fffffffffffffff // lboMask + + adr x11, Fr_rawq + ldp x12, x13, [x11] + ldp x14, x15, [x11, 16] + + subs x7, x3, x12 + sbcs x8, x4, x13 + sbcs x9, x5, x14 + sbcs x10, x6, x15 + + csel x3, x7, x3, hs + csel x4, x8, x4, hs + csel x5, x9, x5, hs + csel x6, x10, x6, hs + + stp x3, x4, [x0] + stp x5, x6, [x0, 16] + ret + +//void Fr_rawXor(FrRawElement pRawResult, FrRawElement pRawA, FrRawElement pRawB) +Fr_rawXor: +_Fr_rawXor: + ldp x3, x4, [x1] + ldp x7, x8, [x2] + eor x3, x3, x7 + eor x4, x4, x8 + + ldp x5, x6, [x1, 16] + ldp x9, x10, [x2, 16] + eor x5, x5, x9 + eor x6, x6, x10 + + and x6, x6, 0x3fffffffffffffff // lboMask + + adr x11, Fr_rawq + ldp x12, x13, [x11] + ldp x14, x15, [x11, 16] + + subs x7, x3, x12 + sbcs x8, x4, x13 + sbcs x9, x5, x14 + sbcs x10, x6, x15 + + csel x3, x7, x3, hs + csel x4, x8, x4, hs + csel x5, x9, x5, hs + csel x6, x10, x6, hs + + stp x3, x4, [x0] + stp x5, x6, [x0, 16] + ret + +//void Fr_rawShl(FrRawElement r, FrRawElement a, uint64_t b) +Fr_rawShl: +_Fr_rawShl: + ldp x3, x4, [x1] + ldp x5, x6, [x1, 16] + + ands x7, x2, 0x3f // bit_shift = b % 64 + mov x8, 0x3f + mov x9, 0x1 + sub x8, x8, x7 // bit_shift augmenter to 64 + + tbnz x2, 7, Fr_rawShl_word_shift_2 + tbnz x2, 6, Fr_rawShl_word_shift_1 + +Fr_rawShl_word_shift_0: + lsl x13, x6, x7 + lsr x15, x5, x8 + lsr x15, x15, x9 + orr x13, x13, x15 + + lsl x12, x5, x7 + lsr x16, x4, x8 + lsr x16, x16, x9 + orr x12, x12, x16 + + lsl x11, x4, x7 + lsr x17, x3, x8 + lsr x17, x17, x9 + orr x11, x11, x17 + + lsl x10, x3, x7 + + b Fr_rawShl_sub + +Fr_rawShl_word_shift_1: + lsl x13, x5, x7 + lsr x15, x4, x8 + lsr x15, x15, x9 + orr x13, x13, x15 + + lsl x12, x4, x7 + lsr x16, x3, x8 + lsr x16, x16, x9 + orr x12, x12, x16 + + lsl x11, x3, x7 + mov x10, xzr + + b Fr_rawShl_sub + +Fr_rawShl_word_shift_2: + tbnz x2, 6, Fr_rawShl_word_shift_3 + + lsl x13, x4, x7 + lsr x15, x3, x8 + lsr x15, x15, x9 + orr x13, x13, x15 + + lsl x12, x3, x7 + mov x11, xzr + mov x10, xzr + + b Fr_rawShl_sub + +Fr_rawShl_word_shift_3: + lsl x13, x3, x7 + mov x12, xzr + mov x11, xzr + mov x10, xzr + +Fr_rawShl_sub: + and x13, x13, 0x3fffffffffffffff // lboMask + + adr x9, Fr_rawq + ldp x14, x15, [x9] + ldp x16, x17, [x9, 16] + + subs x3, x10, x14 + sbcs x4, x11, x15 + sbcs x5, x12, x16 + sbcs x6, x13, x17 + + csel x10, x3, x10, hs + csel x11, x4, x11, hs + csel x12, x5, x12, hs + csel x13, x6, x13, hs + + stp x10, x11, [x0] + stp x12, x13, [x0, 16] + ret + + +//void Fr_rawShr(FrRawElement r, FrRawElement a, uint64_t b) +Fr_rawShr: +_Fr_rawShr: + ldp x3, x4, [x1] + ldp x5, x6, [x1, 16] + + and x7, x2, 0x3f // bit_shift = b % 64 + mov x8, 0x40 + sub x8, x8, x7 // bit_shift augmenter to 64 + + tbnz x2, 7, Fr_rawShr_word_shift_2 + tbnz x2, 6, Fr_rawShr_word_shift_1 + +Fr_rawShr_word_shift_0: + cbz x7, Fr_rawShr_word_shift_0_end + + lsr x3, x3, x7 + lsl x15, x4, x8 + orr x3, x3, x15 + + lsr x4, x4, x7 + lsl x16, x5, x8 + orr x4, x4, x16 + + lsr x5, x5, x7 + lsl x17, x6, x8 + orr x5, x5, x17 + + lsr x6, x6, x7 + +Fr_rawShr_word_shift_0_end: + stp x3, x4, [x0] + stp x5, x6, [x0, 16] + ret + +Fr_rawShr_word_shift_1: + cbz x7, Fr_rawShr_word_shift_1_end + + lsr x4, x4, x7 + lsl x15, x5, x8 + orr x4, x4, x15 + + lsr x5, x5, x7 + lsl x16, x6, x8 + orr x5, x5, x16 + + lsr x6, x6, x7 + +Fr_rawShr_word_shift_1_end: + stp x4, x5, [x0] + stp x6, xzr, [x0, 16] + ret + +Fr_rawShr_word_shift_2: + tbnz x2, 6, Fr_rawShr_word_shift_3 + + cbz x7, Fr_rawShr_word_shift_2_end + + lsr x5, x5, x7 + lsl x15, x6, x8 + orr x5, x5, x15 + + lsr x6, x6, x7 + +Fr_rawShr_word_shift_2_end: + stp x5, x6, [x0] + stp xzr, xzr, [x0, 16] + ret + +Fr_rawShr_word_shift_3: + lsr x6, x6, x7 + + stp x6, xzr, [x0] + stp xzr, xzr, [x0, 16] + ret + +//void Fr_rawNot(FrRawElement pRawResult, FrRawElement pRawA) +Fr_rawNot: +_Fr_rawNot: + ldp x3, x4, [x1] + mvn x3, x3 + mvn x4, x4 + + ldp x5, x6, [x1, 16] + mvn x5, x5 + mvn x6, x6 + + and x6, x6, 0x3fffffffffffffff // lboMask + + adr x11, Fr_rawq + ldp x12, x13, [x11] + ldp x14, x15, [x11, 16] + + subs x7, x3, x12 + sbcs x8, x4, x13 + sbcs x9, x5, x14 + sbcs x10, x6, x15 + + csel x3, x7, x3, hs + csel x4, x8, x4, hs + csel x5, x9, x5, hs + csel x6, x10, x6, hs + + stp x3, x4, [x0] + stp x5, x6, [x0, 16] + ret + + + .align 8 +Fr_rawq: .quad 0x43e1f593f0000001,0x2833e84879b97091,0xb85045b68181585d,0x30644e72e131a029 +Fr_np: .quad 0xc2e1f593efffffff diff --git a/example/support/fr_raw_generic.cpp b/example/support/fr_raw_generic.cpp new file mode 100644 index 0000000..8378a4e --- /dev/null +++ b/example/support/fr_raw_generic.cpp @@ -0,0 +1,364 @@ +#include "fr_element.hpp" +#include +#include + +static uint64_t Fr_rawq[] = {0x43e1f593f0000001,0x2833e84879b97091,0xb85045b68181585d,0x30644e72e131a029, 0}; +static FrRawElement Fr_rawR2 = {0x1bb8e645ae216da7,0x53fe3ab1e35c59e3,0x8c49833d53bb8085,0x0216d0b17f4e44a5}; +static uint64_t Fr_np = {0xc2e1f593efffffff}; +static uint64_t lboMask = 0x3fffffffffffffff; + + +void Fr_rawAdd(FrRawElement pRawResult, const FrRawElement pRawA, const FrRawElement pRawB) +{ + uint64_t carry = mpn_add_n(pRawResult, pRawA, pRawB, Fr_N64); + + if(carry || mpn_cmp(pRawResult, Fr_rawq, Fr_N64) >= 0) + { + mpn_sub_n(pRawResult, pRawResult, Fr_rawq, Fr_N64); + } +} + +void Fr_rawAddLS(FrRawElement pRawResult, FrRawElement pRawA, uint64_t rawB) +{ + uint64_t carry = mpn_add_1(pRawResult, pRawA, Fr_N64, rawB); + + if(carry || mpn_cmp(pRawResult, Fr_rawq, Fr_N64) >= 0) + { + mpn_sub_n(pRawResult, pRawResult, Fr_rawq, Fr_N64); + } +} + +void Fr_rawSub(FrRawElement pRawResult, const FrRawElement pRawA, const FrRawElement pRawB) +{ + uint64_t carry = mpn_sub_n(pRawResult, pRawA, pRawB, Fr_N64); + + if(carry) + { + mpn_add_n(pRawResult, pRawResult, Fr_rawq, Fr_N64); + } +} + +void Fr_rawSubRegular(FrRawElement pRawResult, FrRawElement pRawA, FrRawElement pRawB) +{ + mpn_sub_n(pRawResult, pRawA, pRawB, Fr_N64); +} + +void Fr_rawSubSL(FrRawElement pRawResult, uint64_t rawA, FrRawElement pRawB) +{ + FrRawElement pRawA = {rawA, 0, 0, 0}; + + uint64_t carry = mpn_sub_n(pRawResult, pRawA, pRawB, Fr_N64); + + if(carry) + { + mpn_add_n(pRawResult, pRawResult, Fr_rawq, Fr_N64); + } +} + +void Fr_rawSubLS(FrRawElement pRawResult, FrRawElement pRawA, uint64_t rawB) +{ + uint64_t carry = mpn_sub_1(pRawResult, pRawA, Fr_N64, rawB); + + if(carry) + { + mpn_add_n(pRawResult, pRawResult, Fr_rawq, Fr_N64); + } +} + +void Fr_rawNeg(FrRawElement pRawResult, const FrRawElement pRawA) +{ + const uint64_t zero[Fr_N64] = {0, 0, 0, 0}; + + if (mpn_cmp(pRawA, zero, Fr_N64) != 0) + { + mpn_sub_n(pRawResult, Fr_rawq, pRawA, Fr_N64); + } + else + { + mpn_copyi(pRawResult, zero, Fr_N64); + } +} + +// Substracts a long element and a short element form 0 +void Fr_rawNegLS(FrRawElement pRawResult, FrRawElement pRawA, uint64_t rawB) +{ + uint64_t carry1 = mpn_sub_1(pRawResult, Fr_rawq, Fr_N64, rawB); + uint64_t carry2 = mpn_sub_n(pRawResult, pRawResult, pRawA, Fr_N64); + + if (carry1 || carry2) + { + mpn_add_n(pRawResult, pRawResult, Fr_rawq, Fr_N64); + } +} + +void Fr_rawCopy(FrRawElement pRawResult, const FrRawElement pRawA) +{ + pRawResult[0] = pRawA[0]; + pRawResult[1] = pRawA[1]; + pRawResult[2] = pRawA[2]; + pRawResult[3] = pRawA[3]; +} + +int Fr_rawIsEq(const FrRawElement pRawA, const FrRawElement pRawB) +{ + return mpn_cmp(pRawA, pRawB, Fr_N64) == 0; +} + +void Fr_rawMMul(FrRawElement pRawResult, const FrRawElement pRawA, const FrRawElement pRawB) +{ + const mp_size_t N = Fr_N64+1; + const uint64_t *mq = Fr_rawq; + + uint64_t np0; + + uint64_t product0[N] = {0}; + uint64_t product1[N] = {0}; + uint64_t product2[N] = {0}; + uint64_t product3[N] = {0}; + + product0[4] = mpn_mul_1(product0, pRawB, Fr_N64, pRawA[0]); + + np0 = Fr_np * product0[0]; + product1[1] = mpn_addmul_1(product0, mq, N, np0); + + product1[4] = mpn_addmul_1(product1, pRawB, Fr_N64, pRawA[1]); + mpn_add(product1, product1, N, product0+1, N-1); + + np0 = Fr_np * product1[0]; + product2[1] = mpn_addmul_1(product1, mq, N, np0); + + product2[4] = mpn_addmul_1(product2, pRawB, Fr_N64, pRawA[2]); + mpn_add(product2, product2, N, product1+1, N-1); + + np0 = Fr_np * product2[0]; + product3[1] = mpn_addmul_1(product2, mq, N, np0); + + product3[4] = mpn_addmul_1(product3, pRawB, Fr_N64, pRawA[3]); + mpn_add(product3, product3, N, product2+1, N-1); + + np0 = Fr_np * product3[0]; + mpn_addmul_1(product3, mq, N, np0); + + mpn_copyi(pRawResult, product3+1, Fr_N64); + + if (mpn_cmp(pRawResult, mq, Fr_N64) >= 0) + { + mpn_sub_n(pRawResult, pRawResult, mq, Fr_N64); + } +} + +void Fr_rawMSquare(FrRawElement pRawResult, const FrRawElement pRawA) +{ + Fr_rawMMul(pRawResult, pRawA, pRawA); +} + +void Fr_rawMMul1(FrRawElement pRawResult, const FrRawElement pRawA, uint64_t pRawB) +{ + const mp_size_t N = Fr_N64+1; + const uint64_t *mq = Fr_rawq; + + uint64_t np0; + + uint64_t product0[N] = {0}; + uint64_t product1[N] = {0}; + uint64_t product2[N] = {0}; + uint64_t product3[N] = {0}; + + product0[4] = mpn_mul_1(product0, pRawA, Fr_N64, pRawB); + + np0 = Fr_np * product0[0]; + product1[1] = mpn_addmul_1(product0, mq, N, np0); + mpn_add(product1, product1, N, product0+1, N-1); + + np0 = Fr_np * product1[0]; + product2[1] = mpn_addmul_1(product1, mq, N, np0); + mpn_add(product2, product2, N, product1+1, N-1); + + np0 = Fr_np * product2[0]; + product3[1] = mpn_addmul_1(product2, mq, N, np0); + mpn_add(product3, product3, N, product2+1, N-1); + + np0 = Fr_np * product3[0]; + mpn_addmul_1(product3, mq, N, np0); + + mpn_copyi(pRawResult, product3+1, Fr_N64); + + if (mpn_cmp(pRawResult, mq, Fr_N64) >= 0) + { + mpn_sub_n(pRawResult, pRawResult, mq, Fr_N64); + } +} + +void Fr_rawToMontgomery(FrRawElement pRawResult, const FrRawElement &pRawA) +{ + Fr_rawMMul(pRawResult, pRawA, Fr_rawR2); +} + +void Fr_rawFromMontgomery(FrRawElement pRawResult, const FrRawElement &pRawA) +{ + const mp_size_t N = Fr_N64+1; + const uint64_t *mq = Fr_rawq; + + uint64_t np0; + + uint64_t product0[N]; + uint64_t product1[N] = {0}; + uint64_t product2[N] = {0}; + uint64_t product3[N] = {0}; + + mpn_copyi(product0, pRawA, Fr_N64); product0[4] = 0; + + np0 = Fr_np * product0[0]; + product1[1] = mpn_addmul_1(product0, mq, N, np0); + mpn_add(product1, product1, N, product0+1, N-1); + + np0 = Fr_np * product1[0]; + product2[1] = mpn_addmul_1(product1, mq, N, np0); + mpn_add(product2, product2, N, product1+1, N-1); + + np0 = Fr_np * product2[0]; + product3[1] = mpn_addmul_1(product2, mq, N, np0); + mpn_add(product3, product3, N, product2+1, N-1); + + np0 = Fr_np * product3[0]; + mpn_addmul_1(product3, mq, N, np0); + + mpn_copyi(pRawResult, product3+1, Fr_N64); + + if (mpn_cmp(pRawResult, mq, Fr_N64) >= 0) + { + mpn_sub_n(pRawResult, pRawResult, mq, Fr_N64); + } +} + +int Fr_rawIsZero(const FrRawElement rawA) +{ + return mpn_zero_p(rawA, Fr_N64) ? 1 : 0; +} + +int Fr_rawCmp(FrRawElement pRawA, FrRawElement pRawB) +{ + return mpn_cmp(pRawA, pRawB, Fr_N64); +} + +void Fr_rawSwap(FrRawElement pRawResult, FrRawElement pRawA) +{ + FrRawElement temp; + + temp[0] = pRawResult[0]; + temp[1] = pRawResult[1]; + temp[2] = pRawResult[2]; + temp[3] = pRawResult[3]; + + pRawResult[0] = pRawA[0]; + pRawResult[1] = pRawA[1]; + pRawResult[2] = pRawA[2]; + pRawResult[3] = pRawA[3]; + + pRawA[0] = temp[0]; + pRawA[1] = temp[1]; + pRawA[2] = temp[2]; + pRawA[3] = temp[3]; +} + +void Fr_rawCopyS2L(FrRawElement pRawResult, int64_t val) +{ + pRawResult[0] = val; + pRawResult[1] = 0; + pRawResult[2] = 0; + pRawResult[3] = 0; + + if (val < 0) + { + pRawResult[1] = -1; + pRawResult[2] = -1; + pRawResult[3] = -1; + + mpn_add_n(pRawResult, pRawResult, Fr_rawq, Fr_N64); + } +} + +void Fr_rawAnd(FrRawElement pRawResult, FrRawElement pRawA, FrRawElement pRawB) +{ + mpn_and_n(pRawResult, pRawA, pRawB, Fr_N64); + + pRawResult[3] &= lboMask; + + if (mpn_cmp(pRawResult, Fr_rawq, Fr_N64) >= 0) + { + mpn_sub_n(pRawResult, pRawResult, Fr_rawq, Fr_N64); + } +} + +void Fr_rawOr(FrRawElement pRawResult, FrRawElement pRawA, FrRawElement pRawB) +{ + mpn_ior_n(pRawResult, pRawA, pRawB, Fr_N64); + + pRawResult[3] &= lboMask; + + if (mpn_cmp(pRawResult, Fr_rawq, Fr_N64) >= 0) + { + mpn_sub_n(pRawResult, pRawResult, Fr_rawq, Fr_N64); + } +} + +void Fr_rawXor(FrRawElement pRawResult, FrRawElement pRawA, FrRawElement pRawB) +{ + mpn_xor_n(pRawResult, pRawA, pRawB, Fr_N64); + + pRawResult[3] &= lboMask; + + if (mpn_cmp(pRawResult, Fr_rawq, Fr_N64) >= 0) + { + mpn_sub_n(pRawResult, pRawResult, Fr_rawq, Fr_N64); + } +} + +void Fr_rawShl(FrRawElement r, FrRawElement a, uint64_t b) +{ + uint64_t bit_shift = b % 64; + uint64_t word_shift = b / 64; + uint64_t word_count = Fr_N64 - word_shift; + + mpn_copyi(r + word_shift, a, word_count); + std::memset(r, 0, word_shift * sizeof(uint64_t)); + + if (bit_shift) + { + mpn_lshift(r, r, Fr_N64, bit_shift); + } + + r[3] &= lboMask; + + if (mpn_cmp(r, Fr_rawq, Fr_N64) >= 0) + { + mpn_sub_n(r, r, Fr_rawq, Fr_N64); + } +} + +void Fr_rawShr(FrRawElement r, FrRawElement a, uint64_t b) +{ + const uint64_t bit_shift = b % 64; + const uint64_t word_shift = b / 64; + const uint64_t word_count = Fr_N64 - word_shift; + + mpn_copyi(r, a + word_shift, word_count); + std::memset(r + word_count, 0, word_shift * sizeof(uint64_t)); + + if (bit_shift) + { + mpn_rshift(r, r, Fr_N64, bit_shift); + } +} + +void Fr_rawNot(FrRawElement pRawResult, FrRawElement pRawA) +{ + mpn_com(pRawResult, pRawA, Fr_N64); + + pRawResult[3] &= lboMask; + + if (mpn_cmp(pRawResult, Fr_rawq, Fr_N64) >= 0) + { + mpn_sub_n(pRawResult, pRawResult, Fr_rawq, Fr_N64); + } +} diff --git a/example/support/witnesscalc.cpp b/example/support/witnesscalc.cpp new file mode 100644 index 0000000..4d6d810 --- /dev/null +++ b/example/support/witnesscalc.cpp @@ -0,0 +1,309 @@ +#include "witnesscalc.h" +#include "calcwit.hpp" +#include "circom.hpp" +#include "fr.hpp" +#include +#include +#include + +namespace CIRCUIT_NAME { + +using json = nlohmann::json; + +Circom_Circuit* loadCircuit(const void *buffer, unsigned long buffer_size) { + if (buffer_size % sizeof(u32) != 0) { + throw std::runtime_error("Invalid circuit file: wrong buffer_size"); + } + + Circom_Circuit *circuit = new Circom_Circuit; + + u8* bdata = (u8*)buffer; + + circuit->InputHashMap = new HashSignalInfo[get_size_of_input_hashmap()]; + uint dsize = get_size_of_input_hashmap()*sizeof(HashSignalInfo); + memcpy((void *)(circuit->InputHashMap), (void *)bdata, dsize); + + circuit->witness2SignalList = new u64[get_size_of_witness()]; + uint inisize = dsize; + dsize = get_size_of_witness()*sizeof(u64); + memcpy((void *)(circuit->witness2SignalList), (void *)(bdata+inisize), dsize); + + circuit->circuitConstants = new FrElement[get_size_of_constants()]; + if (get_size_of_constants()>0) { + inisize += dsize; + dsize = get_size_of_constants()*sizeof(FrElement); + memcpy((void *)(circuit->circuitConstants), (void *)(bdata+inisize), dsize); + } + + std::map templateInsId2IOSignalInfo1; + if (get_size_of_io_map()>0) { + u32 index[get_size_of_io_map()]; + inisize += dsize; + dsize = get_size_of_io_map()*sizeof(u32); + memcpy((void *)index, (void *)(bdata+inisize), dsize); + inisize += dsize; + if (inisize % sizeof(u32) != 0) { + throw std::runtime_error("Invalid circuit file: wrong inisize"); + } + u32 dataiomap[(buffer_size-inisize)/sizeof(u32)]; + memcpy((void *)dataiomap, (void *)(bdata+inisize), buffer_size-inisize); + u32* pu32 = dataiomap; + + for (int i = 0; i < get_size_of_io_map(); i++) { + u32 n = *pu32; + IODefPair p; + p.len = n; + IODef defs[n]; + pu32 += 1; + for (u32 j = 0; j templateInsId2IOSignalInfo = move(templateInsId2IOSignalInfo1); + + return circuit; +} + +bool check_valid_number(std::string & s, uint base){ + bool is_valid = true; + if (base == 16){ + for (uint i = 0; i < s.size(); i++){ + is_valid &= ( + ('0' <= s[i] && s[i] <= '9') || + ('a' <= s[i] && s[i] <= 'f') || + ('A' <= s[i] && s[i] <= 'F') + ); + } + } else{ + for (uint i = 0; i < s.size(); i++){ + is_valid &= ('0' <= s[i] && s[i] < char(int('0') + base)); + } + } + return is_valid; +} + +void json2FrElements (json val, std::vector & vval){ + if (!val.is_array()) { + FrElement v; + std::string s_aux, s; + uint base; + if (val.is_string()) { + s_aux = val.get(); + std::string possible_prefix = s_aux.substr(0, 2); + if (possible_prefix == "0b" || possible_prefix == "0B"){ + s = s_aux.substr(2, s_aux.size() - 2); + base = 2; + } else if (possible_prefix == "0o" || possible_prefix == "0O"){ + s = s_aux.substr(2, s_aux.size() - 2); + base = 8; + } else if (possible_prefix == "0x" || possible_prefix == "0X"){ + s = s_aux.substr(2, s_aux.size() - 2); + base = 16; + } else{ + s = s_aux; + base = 10; + } + if (!check_valid_number(s, base)){ + std::ostringstream errStrStream; + errStrStream << "Invalid number in JSON input: " << s_aux << "\n"; + throw std::runtime_error(errStrStream.str() ); + } + } else if (val.is_number()) { + double vd = val.get(); + std::stringstream stream; + stream << std::fixed << std::setprecision(0) << vd; + s = stream.str(); + base = 10; + } else { + throw std::runtime_error("Invalid JSON type"); + } + Fr_str2element (&v, s.c_str(), base); + vval.push_back(v); + } else { + for (uint i = 0; i < val.size(); i++) { + json2FrElements (val[i], vval); + } + } +} + +void loadJson(Circom_CalcWit *ctx, const char *json_buffer, unsigned long buffer_size) { + + json j = json::parse(json_buffer, json_buffer + buffer_size); + + u64 nItems = j.size(); + // printf("Items : %llu\n",nItems); + if (nItems == 0){ + ctx->tryRunCircuit(); + } + for (json::iterator it = j.begin(); it != j.end(); ++it) { + // std::cout << it.key() << " => " << it.value() << '\n'; + u64 h = fnv1a(it.key()); + std::vector v; + json2FrElements(it.value(),v); + uint signalSize = ctx->getInputSignalSize(h); + if (v.size() < signalSize) { + std::ostringstream errStrStream; + errStrStream << "Error loading signal " << it.key() << ": Not enough values\n"; + throw std::runtime_error(errStrStream.str() ); + } + if (v.size() > signalSize) { + std::ostringstream errStrStream; + errStrStream << "Error loading signal " << it.key() << ": Too many values\n"; + throw std::runtime_error(errStrStream.str() ); + } + for (uint i = 0; i " << Fr_element2str(&(v[i])) << '\n'; + ctx->setInputSignal(h,i,v[i]); + } catch (std::runtime_error e) { + std::ostringstream errStrStream; + errStrStream << "Error setting signal: " << it.key() << "\n" << e.what(); + throw std::runtime_error(errStrStream.str() ); + } + } + } +} + +unsigned long getBinWitnessSize() { + + uint Nwtns = get_size_of_witness(); + + return 44 + Fr_N64*8 * (Nwtns + 1); +} + +char *appendBuffer(char *buffer, const void *src, unsigned long src_size) { + + memcpy(buffer, src, src_size); + return buffer + src_size; +} + +char *appendBuffer(char *buffer, const u32 src) { + + return appendBuffer(buffer, &src, 4); +} + +char *appendBuffer(char *buffer, const u64 src) { + + return appendBuffer(buffer, &src, 8); +} + +char *appendBuffer(char *buffer, const FrRawElement src) { + + return appendBuffer(buffer, src, Fr_N64*8); +} + +void storeBinWitness(Circom_CalcWit *ctx, char *buffer) { + + buffer = appendBuffer(buffer, "wtns", 4); + + u32 version = 2; + buffer = appendBuffer(buffer, version); + + u32 nSections = 2; + buffer = appendBuffer(buffer, nSections); + + // Header + u32 idSection1 = 1; + buffer = appendBuffer(buffer, idSection1); + + u32 n8 = Fr_N64*8; + + u64 idSection1length = 8 + n8; + buffer = appendBuffer(buffer, idSection1length); + + buffer = appendBuffer(buffer, n8); + + buffer = appendBuffer(buffer, Fr_q.longVal); + + uint Nwtns = get_size_of_witness(); + + u32 nVars = (u32)Nwtns; + buffer = appendBuffer(buffer, nVars); + + // Data + u32 idSection2 = 2; + buffer = appendBuffer(buffer, idSection2); + + u64 idSection2length = (u64)n8*(u64)Nwtns; + buffer = appendBuffer(buffer, idSection2length); + + FrElement v; + + for (int i=0;igetWitness(i, &v); + Fr_toLongNormal(&v, &v); + buffer = appendBuffer(buffer, v.longVal); + } +} + +int witnesscalc( + const char *circuit_buffer, unsigned long circuit_size, + const char *json_buffer, unsigned long json_size, + char *wtns_buffer, unsigned long *wtns_size, + char *error_msg, unsigned long error_msg_maxsize) +{ + unsigned long witnessSize = getBinWitnessSize(); + + if (*wtns_size < witnessSize) { + *wtns_size = witnessSize; + return WITNESSCALC_ERROR_SHORT_BUFFER; + } + + try { + + std::unique_ptr circuit(loadCircuit(circuit_buffer, circuit_size)); + + std::unique_ptr ctx(new Circom_CalcWit(circuit.get())); + + loadJson(ctx.get(), json_buffer, json_size); + + if (ctx.get()->getRemaingInputsToBeSet() != 0) { + std::stringstream stream; + stream << "Not all inputs have been set. Only " + << get_main_input_signal_no()-ctx.get()->getRemaingInputsToBeSet() + << " out of " << get_main_input_signal_no(); + + strncpy(error_msg, stream.str().c_str(), error_msg_maxsize); + return WITNESSCALC_ERROR; + } + + storeBinWitness(ctx.get(), wtns_buffer); + *wtns_size = witnessSize; + + } catch (std::exception& e) { + + if (error_msg) { + strncpy(error_msg, e.what(), error_msg_maxsize); + } + return WITNESSCALC_ERROR; + + } catch (std::exception *e) { + + if (error_msg) { + strncpy(error_msg, e->what(), error_msg_maxsize); + } + delete e; + return WITNESSCALC_ERROR; + + } catch (...) { + if (error_msg) { + strncpy(error_msg, "unknown error", error_msg_maxsize); + } + return WITNESSCALC_ERROR; + } + + return WITNESSCALC_OK; +} + +} // namespace diff --git a/example/support/witnesscalc.h b/example/support/witnesscalc.h new file mode 100644 index 0000000..86c1124 --- /dev/null +++ b/example/support/witnesscalc.h @@ -0,0 +1,33 @@ +#ifndef WITNESSCALC_H +#define WITNESSCALC_H + +namespace CIRCUIT_NAME { + +#define WITNESSCALC_OK 0x0 +#define WITNESSCALC_ERROR 0x1 +#define WITNESSCALC_ERROR_SHORT_BUFFER 0x2 + +/** + * + * @return error code: + * WITNESSCALC_OK - in case of success. + * WITNESSCALC_ERROR - in case of an error. + * + * On success wtns_buffer is filled with witness data and + * wtns_size contains the number bytes copied to wtns_buffer. + * + * If wtns_buffer is too small then the function returns WITNESSCALC_ERROR_SHORT_BUFFER + * and the minimum size for wtns_buffer in wtns_size. + * + */ + +int +witnesscalc( + const char *circuit_buffer, unsigned long circuit_size, + const char *json_buffer, unsigned long json_size, + char *wtns_buffer, unsigned long *wtns_size, + char *error_msg, unsigned long error_msg_maxsize); + +} // namespace + +#endif // WITNESSCALC_H