Skip to content

Commit 47fe49f

Browse files
Gathroszsparal
authored andcommitted
Verlet in X86_64 (#505)
* Adding verlet.s * updating verlet_integration.md * Returning in registers * adding newline to verlet.s
1 parent c763d0d commit 47fe49f

File tree

2 files changed

+136
-0
lines changed

2 files changed

+136
-0
lines changed
Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
.intel_syntax noprefix
2+
3+
.section .rodata
4+
zero: .double 0.0
5+
two: .double 2.0
6+
half: .double 0.5
7+
verlet_fmt: .string "Time for Verlet integration is: %lf\n"
8+
stormer_fmt: .string "Time and Velocity for Stormer Verlet Integration is: %lf, %lf\n"
9+
velocity_fmt: .string "Time and Velocity for Velocity Verlet Integration is: %lf, %lf\n"
10+
pos: .double 5.0
11+
acc: .double -10.0
12+
dt: .double 0.01
13+
14+
.section .text
15+
.global main
16+
.extern printf
17+
18+
# xmm0 - pos
19+
# xmm1 - acc
20+
# xmm2 - dt
21+
# RET xmm0 - time
22+
verlet:
23+
pxor xmm7, xmm7 # Holds 0 for comparisons
24+
pxor xmm3, xmm3 # Holds time value
25+
comisd xmm0, xmm7 # Check if pos is greater then 0.0
26+
jbe verlet_return
27+
movsd xmm6, xmm1 # xmm6 = acc * dt * dt
28+
mulsd xmm6, xmm2
29+
mulsd xmm6, xmm2
30+
movsd xmm5, xmm0 # Holds previous position
31+
verlet_loop:
32+
addsd xmm3, xmm2 # Adding dt to time
33+
movsd xmm4, xmm0 # Hold old value of posistion
34+
addsd xmm0, xmm0 # Calculating new position
35+
subsd xmm0, xmm5
36+
addsd xmm0, xmm6
37+
movsd xmm5, xmm4
38+
comisd xmm0, xmm7 # Check if position is greater then 0.0
39+
ja verlet_loop
40+
verlet_return:
41+
movsd xmm0, xmm3 # Saving time value
42+
ret
43+
44+
# xmm0 - pos
45+
# xmm1 - acc
46+
# xmm2 - dt
47+
# RET xmm0 - time
48+
# RET xmm1 - velocity
49+
stormer_verlet:
50+
pxor xmm7, xmm7 # Holds 0 for comparisons
51+
pxor xmm3, xmm3 # Holds time value
52+
comisd xmm0, xmm7 # Check if pos is greater then 0.0
53+
jbe stormer_verlet_return
54+
movsd xmm6, xmm1 # xmm6 = acc * dt * dt
55+
mulsd xmm6, xmm2
56+
mulsd xmm6, xmm2
57+
movsd xmm5, xmm0 # Holds previous position
58+
stormer_verlet_loop:
59+
addsd xmm3, xmm2 # Adding dt to time
60+
movsd xmm4, xmm0 # Hold old value of posistion
61+
addsd xmm0, xmm0 # Calculating new position
62+
subsd xmm0, xmm5
63+
addsd xmm0, xmm6
64+
movsd xmm5, xmm4
65+
comisd xmm0, xmm7 # Check if position is greater then 0.0
66+
ja stormer_verlet_loop
67+
stormer_verlet_return:
68+
movsd xmm0, xmm3 # Saving time and velocity
69+
mulsd xmm3, xmm1
70+
movsd xmm1, xmm3
71+
ret
72+
73+
# xmm0 - pos
74+
# xmm1 - acc
75+
# xmm2 - dt
76+
# RET xmm0 - time
77+
# RET xmm1 - velocity
78+
velocity_verlet:
79+
pxor xmm7, xmm7 # Holds 0 for comparisons
80+
pxor xmm3, xmm3 # Holds the velocity value
81+
pxor xmm4, xmm4 # Holds the time value
82+
comisd xmm0, xmm7 # Check if pos is greater then 0.0
83+
jbe velocity_verlet_return
84+
movsd xmm5, half # xmm5 = 0.5 * dt * dt * acc
85+
mulsd xmm5, xmm2
86+
mulsd xmm5, xmm2
87+
mulsd xmm5, xmm1
88+
velocity_verlet_loop:
89+
movsd xmm6, xmm3 # Move velocity into register
90+
mulsd xmm6, xmm2 # Calculate new position
91+
addsd xmm6, xmm5
92+
addsd xmm0, xmm6
93+
addsd xmm4, xmm2 # Incrementing time
94+
movsd xmm3, xmm4 # Updating velocity
95+
mulsd xmm3, xmm1
96+
comisd xmm0, xmm7
97+
ja velocity_verlet_loop
98+
velocity_verlet_return:
99+
movsd xmm0, xmm4 # Saving time and velocity
100+
movsd xmm1, xmm3
101+
ret
102+
103+
main:
104+
push rbp
105+
movsd xmm0, pos # Calling verlet
106+
movsd xmm1, acc
107+
movsd xmm2, dt
108+
call verlet
109+
mov rdi, OFFSET verlet_fmt # Print output
110+
mov rax, 1
111+
call printf
112+
movsd xmm0, pos # Calling stormer_verlet
113+
movsd xmm1, acc
114+
movsd xmm2, dt
115+
call stormer_verlet
116+
mov rdi, OFFSET stormer_fmt # Print output
117+
mov rax, 1
118+
call printf
119+
movsd xmm0, pos # Calling velocity_verlet
120+
movsd xmm1, acc
121+
movsd xmm2, dt
122+
call velocity_verlet
123+
mov rdi, OFFSET velocity_fmt # Print output
124+
mov rax, 1
125+
call printf
126+
pop rbp
127+
ret
128+

contents/verlet_integration/verlet_integration.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,8 @@ Unfortunately, this has not yet been implemented in LabVIEW, so here's Julia cod
6363
[import:1-14, lang="ruby"](code/ruby/verlet.rb)
6464
{% sample lang="go" %}
6565
[import:5-16, lang:"go"](code/golang/verlet.go)
66+
{% sample lang="asm-x64" %}
67+
[import:18-42, lang:"asm-x64"](code/asm-x64/verlet.s)
6668
{% endmethod %}
6769

6870
Now, obviously this poses a problem; what if we want to calculate a term that requires velocity, like the kinetic energy, $$\frac{1}{2}mv^2$$? In this case, we certainly cannot get rid of the velocity! Well, we can find the velocity to $$\mathcal{O}(\Delta t^2)$$ accuracy by using the Stormer-Verlet method, which is the same as before, but we calculate velocity like so
@@ -113,6 +115,8 @@ Unfortunately, this has not yet been implemented in LabVIEW, so here's Julia cod
113115
[import:16-32, lang="ruby"](code/ruby/verlet.rb)
114116
{% sample lang="go" %}
115117
[import:18-30, lang:"go"](code/golang/verlet.go)
118+
{% sample lang="asm-x64" %}
119+
[import:44-71, lang:"asm-x64"](code/asm-x64/verlet.s)
116120
{% endmethod %}
117121

118122

@@ -177,6 +181,8 @@ Unfortunately, this has not yet been implemented in LabVIEW, so here's Julia cod
177181
[import:34-46, lang="ruby"](code/ruby/verlet.rb)
178182
{% sample lang="go" %}
179183
[import:32-42, lang:"go"](code/golang/verlet.go)
184+
{% sample lang="asm-x64" %}
185+
[import:73-101, lang:"asm-x64"](code/asm-x64/verlet.s)
180186
{% endmethod %}
181187

182188
Even though this method is more widely used than the simple Verlet method mentioned above, it unforunately has an error term of $$\mathcal{O}(\Delta t^2)$$, which is two orders of magnitude worse. That said, if you want to have a simulaton with many objects that depend on one another --- like a gravity simulation --- the Velocity Verlet algorithm is a handy choice; however, you may have to play further tricks to allow everything to scale appropriately. These types of simulatons are sometimes called *n-body* simulations and one such trick is the Barnes-Hut algorithm, which cuts the complexity of n-body simulations from $$\sim \mathcal{O}(n^2)$$ to $$\sim \mathcal{O}(n\log(n))$$.
@@ -223,6 +229,8 @@ Submitted by P. Mekhail
223229
[import, lang="ruby"](code/ruby/verlet.rb)
224230
{% sample lang="go" %}
225231
[import, lang:"go"](code/golang/verlet.go)
232+
{% sample lang="asm-x64" %}
233+
[import, lang:"asm-x64"](code/asm-x64/verlet.s)
226234
{% endmethod %}
227235

228236

0 commit comments

Comments
 (0)