From 2a5c581b222af6fcac7d7515e168510361478b7e Mon Sep 17 00:00:00 2001 From: Gathros <6323830+Gathros@users.noreply.github.com> Date: Fri, 12 Oct 2018 12:14:08 +0100 Subject: [PATCH 1/4] Adding verlet.s --- .../verlet_integration/code/asm-x64/verlet.s | 139 ++++++++++++++++++ 1 file changed, 139 insertions(+) create mode 100644 contents/verlet_integration/code/asm-x64/verlet.s diff --git a/contents/verlet_integration/code/asm-x64/verlet.s b/contents/verlet_integration/code/asm-x64/verlet.s new file mode 100644 index 000000000..d92187c16 --- /dev/null +++ b/contents/verlet_integration/code/asm-x64/verlet.s @@ -0,0 +1,139 @@ +.intel_syntax noprefix + +.section .rodata + zero: .double 0.0 + two: .double 2.0 + half: .double 0.5 + verlet_fmt: .string "Time for Verlet integration is: %lf\n" + stormer_fmt: .string "Time and Velocity for Stormer Verlet Integration is: %lf, %lf\n" + velocity_fmt: .string "Time and Velocity for Velocity Verlet Integration is: %lf, %lf\n" + pos: .double 5.0 + acc: .double -10.0 + dt: .double 0.01 + +.section .text + .global main + .extern printf + +# rdi - time ptr +# xmm0 - pos +# xmm1 - acc +# xmm2 - dt +verlet: + pxor xmm7, xmm7 # Holds 0 for comparisons + pxor xmm3, xmm3 # Holds time value + comisd xmm0, xmm7 # Check if pos is greater then 0.0 + jbe verlet_return + movsd xmm6, xmm1 # xmm6 = acc * dt * dt + mulsd xmm6, xmm2 + mulsd xmm6, xmm2 + movsd xmm5, xmm0 # Holds previous position +verlet_loop: + addsd xmm3, xmm2 # Adding dt to time + movsd xmm4, xmm0 # Hold old value of posistion + addsd xmm0, xmm0 # Calculating new position + subsd xmm0, xmm5 + addsd xmm0, xmm6 + movsd xmm5, xmm4 + comisd xmm0, xmm7 # Check if position is greater then 0.0 + ja verlet_loop +verlet_return: + movsd QWORD PTR [rdi], xmm3 # Saving time value + ret + +# rdi - time ptr +# rsi - vel ptr +# xmm0 - pos +# xmm1 - acc +# xmm2 - dt +stormer_verlet: + pxor xmm7, xmm7 # Holds 0 for comparisons + pxor xmm3, xmm3 # Holds time value + comisd xmm0, xmm7 # Check if pos is greater then 0.0 + jbe stormer_verlet_return + movsd xmm6, xmm1 # xmm6 = acc * dt * dt + mulsd xmm6, xmm2 + mulsd xmm6, xmm2 + movsd xmm5, xmm0 # Holds previous position +stormer_verlet_loop: + addsd xmm3, xmm2 # Adding dt to time + movsd xmm4, xmm0 # Hold old value of posistion + addsd xmm0, xmm0 # Calculating new position + subsd xmm0, xmm5 + addsd xmm0, xmm6 + movsd xmm5, xmm4 + comisd xmm0, xmm7 # Check if position is greater then 0.0 + ja stormer_verlet_loop +stormer_verlet_return: + movsd QWORD PTR [rdi], xmm3 # Saving time and velocity + mulsd xmm3, xmm1 + movsd QWORD PTR [rsi], xmm3 + ret + +# rdi - time ptr +# rsi - vel ptr +# xmm0 - pos +# xmm1 - acc +# xmm2 - dt +velocity_verlet: + pxor xmm7, xmm7 # Holds 0 for comparisons + pxor xmm3, xmm3 # Holds the velocity value + pxor xmm4, xmm4 # Holds the time value + comisd xmm0, xmm7 # Check if pos is greater then 0.0 + jbe velocity_verlet_return + movsd xmm5, half # xmm5 = 0.5 * dt * dt * acc + mulsd xmm5, xmm2 + mulsd xmm5, xmm2 + mulsd xmm5, xmm1 +velocity_verlet_loop: + movsd xmm6, xmm3 # Move velocity into register + mulsd xmm6, xmm2 # Calculate new position + addsd xmm6, xmm5 + addsd xmm0, xmm6 + addsd xmm4, xmm2 # Incrementing time + movsd xmm3, xmm4 # Updating velocity + mulsd xmm3, xmm1 + comisd xmm0, xmm7 + ja velocity_verlet_loop +velocity_verlet_return: + movsd QWORD PTR [rdi], xmm4 # Saving time and velocity + movsd QWORD PTR [rsi], xmm3 + ret + +main: + push rbp + sub rsp, 16 # Making space for time and velocity + mov rdi, rsp # Calling verlet + movsd xmm0, pos + movsd xmm1, acc + movsd xmm2, dt + call verlet + mov rdi, OFFSET verlet_fmt # Print output + movsd xmm0, QWORD PTR [rsp] + mov rax, 1 + call printf + mov rdi, rsp # Calling stormer_verlet + lea rsi, [rsp + 8] + movsd xmm0, pos + movsd xmm1, acc + movsd xmm2, dt + call stormer_verlet + mov rdi, OFFSET stormer_fmt # Print output + movsd xmm0, QWORD PTR [rsp] + movsd xmm1, QWORD PTR [rsp + 8] + mov rax, 1 + call printf + mov rdi, rsp # Calling velocity_verlet + lea rsi, [rsp + 8] + movsd xmm0, pos + movsd xmm1, acc + movsd xmm2, dt + call velocity_verlet + mov rdi, OFFSET velocity_fmt # Print output + movsd xmm0, QWORD PTR [rsp] + movsd xmm1, QWORD PTR [rsp + 8] + mov rax, 1 + call printf + add rsp, 16 + pop rbp + ret From d1d43ada3a7cc901dbcf2ecf095162c850850870 Mon Sep 17 00:00:00 2001 From: Gathros <6323830+Gathros@users.noreply.github.com> Date: Fri, 12 Oct 2018 12:18:11 +0100 Subject: [PATCH 2/4] updating verlet_integration.md --- contents/verlet_integration/verlet_integration.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/contents/verlet_integration/verlet_integration.md b/contents/verlet_integration/verlet_integration.md index bc34f1b37..850a437c2 100644 --- a/contents/verlet_integration/verlet_integration.md +++ b/contents/verlet_integration/verlet_integration.md @@ -63,6 +63,8 @@ Unfortunately, this has not yet been implemented in LabVIEW, so here's Julia cod [import:1-14, lang="ruby"](code/ruby/verlet.rb) {% sample lang="go" %} [import:5-16, lang:"go"](code/golang/verlet.go) +{% sample lang="asm-x64" %} +[import:18-42, lang:"asm-x64"](code/asm-x64/verlet.s) {% endmethod %} Now, obviously this poses a problem; what if we want to calculate a term that requires velocity, like the kinetic energy, $$\frac{1}{2}mv^2$$? In this case, we certainly cannot get rid of the velocity! Well, we can find the velocity to $$\mathcal{O}(\Delta t^2)$$ accuracy by using the Stormer-Verlet method, which is the same as before, but we calculate velocity like so @@ -113,6 +115,8 @@ Unfortunately, this has not yet been implemented in LabVIEW, so here's Julia cod [import:16-32, lang="ruby"](code/ruby/verlet.rb) {% sample lang="go" %} [import:18-30, lang:"go"](code/golang/verlet.go) +{% sample lang="asm-x64" %} +[import:44-71, lang:"asm-x64"](code/asm-x64/verlet.s) {% endmethod %} @@ -177,6 +181,8 @@ Unfortunately, this has not yet been implemented in LabVIEW, so here's Julia cod [import:34-46, lang="ruby"](code/ruby/verlet.rb) {% sample lang="go" %} [import:32-42, lang:"go"](code/golang/verlet.go) +{% sample lang="asm-x64" %} +[import:73-101, lang:"asm-x64"](code/asm-x64/verlet.s) {% endmethod %} Even though this method is more widely used than the simple Verlet method mentioned above, it unforunately has an error term of $$\mathcal{O}(\Delta t^2)$$, which is two orders of magnitude worse. That said, if you want to have a simulaton with many objects that depend on one another --- like a gravity simulation --- the Velocity Verlet algorithm is a handy choice; however, you may have to play further tricks to allow everything to scale appropriately. These types of simulatons are sometimes called *n-body* simulations and one such trick is the Barnes-Hut algorithm, which cuts the complexity of n-body simulations from $$\sim \mathcal{O}(n^2)$$ to $$\sim \mathcal{O}(n\log(n))$$. @@ -223,6 +229,8 @@ Submitted by P. Mekhail [import, lang="ruby"](code/ruby/verlet.rb) {% sample lang="go" %} [import, lang:"go"](code/golang/verlet.go) +{% sample lang="asm-x64" %} +[import, lang:"asm-x64"](code/asm-x64/verlet.s) {% endmethod %} From a86c61ed979bafdf7b341b6c39ed49e2f2c5eeaf Mon Sep 17 00:00:00 2001 From: Gathros <6323830+Gathros@users.noreply.github.com> Date: Sun, 14 Oct 2018 09:27:58 +0100 Subject: [PATCH 3/4] Returning in registers --- .../verlet_integration/code/asm-x64/verlet.s | 40 +++++++------------ 1 file changed, 14 insertions(+), 26 deletions(-) diff --git a/contents/verlet_integration/code/asm-x64/verlet.s b/contents/verlet_integration/code/asm-x64/verlet.s index d92187c16..920411142 100644 --- a/contents/verlet_integration/code/asm-x64/verlet.s +++ b/contents/verlet_integration/code/asm-x64/verlet.s @@ -15,10 +15,10 @@ .global main .extern printf -# rdi - time ptr # xmm0 - pos # xmm1 - acc # xmm2 - dt +# RET xmm0 - time verlet: pxor xmm7, xmm7 # Holds 0 for comparisons pxor xmm3, xmm3 # Holds time value @@ -38,14 +38,14 @@ verlet_loop: comisd xmm0, xmm7 # Check if position is greater then 0.0 ja verlet_loop verlet_return: - movsd QWORD PTR [rdi], xmm3 # Saving time value + movsd xmm0, xmm3 # Saving time value ret -# rdi - time ptr -# rsi - vel ptr # xmm0 - pos # xmm1 - acc # xmm2 - dt +# RET xmm0 - time +# RET xmm1 - velocity stormer_verlet: pxor xmm7, xmm7 # Holds 0 for comparisons pxor xmm3, xmm3 # Holds time value @@ -65,16 +65,16 @@ stormer_verlet_loop: comisd xmm0, xmm7 # Check if position is greater then 0.0 ja stormer_verlet_loop stormer_verlet_return: - movsd QWORD PTR [rdi], xmm3 # Saving time and velocity + movsd xmm0, xmm3 # Saving time and velocity mulsd xmm3, xmm1 - movsd QWORD PTR [rsi], xmm3 + movsd xmm1, xmm3 ret -# rdi - time ptr -# rsi - vel ptr # xmm0 - pos # xmm1 - acc # xmm2 - dt +# RET xmm0 - time +# RET xmm1 - velocity velocity_verlet: pxor xmm7, xmm7 # Holds 0 for comparisons pxor xmm3, xmm3 # Holds the velocity value @@ -96,44 +96,32 @@ velocity_verlet_loop: comisd xmm0, xmm7 ja velocity_verlet_loop velocity_verlet_return: - movsd QWORD PTR [rdi], xmm4 # Saving time and velocity - movsd QWORD PTR [rsi], xmm3 + movsd xmm0, xmm4 # Saving time and velocity + movsd xmm1, xmm3 ret main: push rbp - sub rsp, 16 # Making space for time and velocity - mov rdi, rsp # Calling verlet - movsd xmm0, pos + movsd xmm0, pos # Calling verlet movsd xmm1, acc movsd xmm2, dt call verlet mov rdi, OFFSET verlet_fmt # Print output - movsd xmm0, QWORD PTR [rsp] mov rax, 1 call printf - mov rdi, rsp # Calling stormer_verlet - lea rsi, [rsp + 8] - movsd xmm0, pos + movsd xmm0, pos # Calling stormer_verlet movsd xmm1, acc movsd xmm2, dt call stormer_verlet mov rdi, OFFSET stormer_fmt # Print output - movsd xmm0, QWORD PTR [rsp] - movsd xmm1, QWORD PTR [rsp + 8] mov rax, 1 call printf - mov rdi, rsp # Calling velocity_verlet - lea rsi, [rsp + 8] - movsd xmm0, pos + movsd xmm0, pos # Calling velocity_verlet movsd xmm1, acc movsd xmm2, dt call velocity_verlet - mov rdi, OFFSET velocity_fmt # Print output - movsd xmm0, QWORD PTR [rsp] - movsd xmm1, QWORD PTR [rsp + 8] + mov rdi, OFFSET velocity_fmt # Print output mov rax, 1 call printf - add rsp, 16 pop rbp ret From ca2541d3aa7086e59304c1bca47087273acd8da6 Mon Sep 17 00:00:00 2001 From: Gathros <6323830+Gathros@users.noreply.github.com> Date: Tue, 16 Oct 2018 19:03:13 +0100 Subject: [PATCH 4/4] adding newline to verlet.s --- contents/verlet_integration/code/asm-x64/verlet.s | 1 + 1 file changed, 1 insertion(+) diff --git a/contents/verlet_integration/code/asm-x64/verlet.s b/contents/verlet_integration/code/asm-x64/verlet.s index 920411142..ed2521f78 100644 --- a/contents/verlet_integration/code/asm-x64/verlet.s +++ b/contents/verlet_integration/code/asm-x64/verlet.s @@ -125,3 +125,4 @@ main: call printf pop rbp ret +