From 006815d13979c9ccfd127fb698b142d22b4719a7 Mon Sep 17 00:00:00 2001 From: Shreyas Ekanathan Date: Mon, 19 May 2025 19:33:51 -0400 Subject: [PATCH 1/3] add butterfly --- .vscode/settings.json | 3 + Project.toml | 2 + src/RecursiveFactorization.jl | 1 + src/butterflylu.jl | 133 ++++++++++++++++++++++++++++++++++ test/runtests.jl | 25 +++++++ 5 files changed, 164 insertions(+) create mode 100644 .vscode/settings.json create mode 100644 src/butterflylu.jl diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..da2b7b7 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,3 @@ +{ + "julia.environmentPath": "c:\\Users\\sivasubl\\OneDrive - UMass Chan Medical School\\Documents\\GitHub\\RecursiveFactorization.jl" +} \ No newline at end of file diff --git a/Project.toml b/Project.toml index 0d78bdc..f8edbea 100644 --- a/Project.toml +++ b/Project.toml @@ -10,6 +10,7 @@ Polyester = "f517fe37-dbe3-4b94-8317-1923a5111588" PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a" StrideArraysCore = "7792a7ef-975c-4747-a70f-980b88e8d1da" TriangularSolve = "d5829a12-d9aa-46ab-831f-fb7c9ab06edf" +VectorizedRNG = "33b4df10-0173-11e9-2a0c-851a7edac40e" [compat] LinearAlgebra = "1.5" @@ -18,6 +19,7 @@ Polyester = "0.3.2,0.4.1, 0.5, 0.6, 0.7" PrecompileTools = "1" StrideArraysCore = "0.5.5" TriangularSolve = "0.2" +VectorizedRNG = "0.2.25" julia = "1.10" [extras] diff --git a/src/RecursiveFactorization.jl b/src/RecursiveFactorization.jl index a91a0b0..fb6784a 100644 --- a/src/RecursiveFactorization.jl +++ b/src/RecursiveFactorization.jl @@ -4,6 +4,7 @@ if isdefined(Base, :Experimental) && @eval Base.Experimental.@max_methods 1 end include("./lu.jl") +include("./butterflylu.jl") import PrecompileTools diff --git a/src/butterflylu.jl b/src/butterflylu.jl new file mode 100644 index 0000000..c5adcdd --- /dev/null +++ b/src/butterflylu.jl @@ -0,0 +1,133 @@ +using VectorizedRNG +using LinearAlgebra: Diagonal, I +using LoopVectorization +using RecursiveFactorization + +@inline exphalf(x) = exp(x) * oftype(x, 0.5) +function 🦋!(wv, ::Val{SEED} = Val(888)) where {SEED} + T = eltype(wv) + mrng = VectorizedRNG.MutableXoshift(SEED) + GC.@preserve mrng begin rand!(exphalf, VectorizedRNG.Xoshift(mrng), wv, static(0), + T(-0.05), T(0.1)) end +end + +function 🦋generate!(A, ::Val{SEED} = Val(888)) where {SEED} + Usz = 2 * size(A, 1) + Vsz = 2 * size(A, 2) + uv = similar(A, Usz + Vsz) + 🦋!(uv, Val(SEED)) + (uv,) +end + +function 🦋workspace(A, ::Val{SEED} = Val(888)) where {SEED} + A = pad!(A) + B = similar(A); + ws = 🦋generate!(B) + 🦋mul!(copyto!(B, A), ws) + U, V, B = materializeUV(B, ws) + F = RecursiveFactorization.lu!(B, Val(false)) + A, B, U, V, F +end + +const butterfly_workspace = 🦋workspace; + +function 🦋mul_level!(A, u, v) + # for now, assume... + M, N = size(A) + Ml = M >>> 1 + Nl = N >>> 1 + Mh = M - Ml + Nh = N - Nl + @turbo for n in 1:Nl + for m in 1:Ml + A11 = A[m, n] + A21 = A[m + Mh, n] + A12 = A[m, n + Nh] + A22 = A[m + Mh, n + Nh] + + T1 = A11 + A12 + T2 = A21 + A22 + T3 = A11 - A12 + T4 = A21 - A22 + C11 = T1 + T2 + C21 = T1 - T2 + C12 = T3 + T4 + C22 = T3 - T4 + + u1 = u[m] + u2 = u[m + Mh] + v1 = v[n] + v2 = v[n + Nh] + + A[m, n] = u1 * C11 * v1 + A[m + Mh, n] = u2 * C21 * v1 + A[m, n + Nh] = u1 * C12 * v2 + A[m + Mh, n + Nh] = u2 * C22 * v2 + end + end +end + +function 🦋mul!(A, (uv,)) + M, N = size(A) + Mh = M >>> 1 + Nh = N >>> 1 + U₁ = @view(uv[1:Mh]) + U₂ = @view(uv[(1 + Mh + Nh):(2 * Mh + Nh)]) + V₁ = @view(uv[(Mh + 1):(Mh + Nh)]) + V₂ = @view(uv[(1 + 2 * Mh + Nh):(2 * Mh + 2 * Nh)]) + 🦋mul_level!(@view(A[1:Mh, 1:Nh]), U₁, V₁) + 🦋mul_level!(@view(A[(1 + Mh):M, 1:Nh]), U₂, V₁) + 🦋mul_level!(@view(A[1:Mh, (1 + Nh):N]), U₁, V₂) + 🦋mul_level!(@view(A[(1 + Mh):M, (1 + Nh):N]), U₂, V₂) + U = @view(uv[(1 + 2 * Mh + 2 * Nh):(2 * Mh + 2 * Nh + M)]) + V = @view(uv[(1 + 2 * Mh + 2 * Nh + M):(2 * Mh + 2 * Nh + M + N)]) + 🦋mul_level!(@view(A[1:M, 1:N]), U, V) + A +end + +function diagnegbottom(x) + N = length(x) + y = similar(x, N >>> 1) + z = similar(x, N >>> 1) + for n in 1:(N >>> 1) + y[n] = x[n] + end + for n in 1:(N >>> 1) + z[n] = x[n + (N >>> 1)] + end + Diagonal(y), Diagonal(z) +end +🦋(A, B) = [A B + A -B] + +function materializeUV(A, (uv,)) + M, N = size(A) + Mh = M >>> 1 + Nh = N >>> 1 + + U₁u, U₁l = diagnegbottom(@view(uv[1:Mh])) + U₂u, U₂l = diagnegbottom(@view(uv[(1 + Mh + Nh):(2 * Mh + Nh)])) + V₁u, V₁l = diagnegbottom(@view(uv[(Mh + 1):(Mh + Nh)])) + V₂u, V₂l = diagnegbottom(@view(uv[(1 + 2 * Mh + Nh):(2 * Mh + 2 * Nh)])) + Uu, Ul = diagnegbottom(@view(uv[(1 + 2 * Mh + 2 * Nh):(2 * Mh + 2 * Nh + M)])) + Vu, Vl = diagnegbottom(@view(uv[(1 + 2 * Mh + 2 * Nh + M):(2 * Mh + 2 * Nh + M + N)])) + + Bu2 = [🦋(U₁u, U₁l) 0*I + 0*I 🦋(U₂u, U₂l)] + Bu1 = 🦋(Uu, Ul) + + Bv2 = [🦋(V₁u, V₁l) 0*I + 0*I 🦋(V₂u, V₂l)] + Bv1 = 🦋(Vu, Vl) + + (Bu2 * Bu1)', Bv2 * Bv1, A +end + +function pad!(A) + M, N = size(A) + xn = 4 - M % 4 + A = [A zeros(N, xn) + zeros(xn, N) I(xn) + ] + A +end \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index 66d490a..9851a73 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -64,3 +64,28 @@ testlu(A::Union{Transpose, Adjoint}, MF, BF, p) = testlu(parent(A), parent(MF), end end end + +function wilkinson(N) + A = zeros(N, N) + A[1:(N+1):N*N] .= 1 + A[:, end] .= 1 + for n in 1:(N - 1) + for r in (n + 1):N + @inbounds A[r, n] = -1 + end + end + A +end + +@testset "🦋" begin + for i in 790 : 810 + A = wilkinson(i) + b = rand(i) + A_ext, B, U, V, F = RecursiveFactorization.🦋workspace(A) + M, N = size(A) + xn = 4 - M % 4 + b_ext = [b; rand(xn)] + x = V * (F \ (U * b_ext)) + @test norm(A * x[1:M] .- b) <= 1e-10 + end +end \ No newline at end of file From 20874ffda3e2a3035f74dcebeb78282bf34b71da Mon Sep 17 00:00:00 2001 From: Shreyas Ekanathan Date: Mon, 19 May 2025 19:35:10 -0400 Subject: [PATCH 2/3] Update butterflylu.jl --- src/butterflylu.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/butterflylu.jl b/src/butterflylu.jl index c5adcdd..7f4b758 100644 --- a/src/butterflylu.jl +++ b/src/butterflylu.jl @@ -11,7 +11,7 @@ function 🦋!(wv, ::Val{SEED} = Val(888)) where {SEED} T(-0.05), T(0.1)) end end -function 🦋generate!(A, ::Val{SEED} = Val(888)) where {SEED} +function 🦋generate_random!(A, ::Val{SEED} = Val(888)) where {SEED} Usz = 2 * size(A, 1) Vsz = 2 * size(A, 2) uv = similar(A, Usz + Vsz) From 5f558fcb9c02690ba3ae6709bc49e578683494ec Mon Sep 17 00:00:00 2001 From: Shreyas Ekanathan Date: Sat, 31 May 2025 15:42:27 -0400 Subject: [PATCH 3/3] fixes --- .vscode/settings.json | 3 --- src/butterflylu.jl | 22 ++++++++++++++++------ test/runtests.jl | 2 +- 3 files changed, 17 insertions(+), 10 deletions(-) delete mode 100644 .vscode/settings.json diff --git a/.vscode/settings.json b/.vscode/settings.json deleted file mode 100644 index da2b7b7..0000000 --- a/.vscode/settings.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "julia.environmentPath": "c:\\Users\\sivasubl\\OneDrive - UMass Chan Medical School\\Documents\\GitHub\\RecursiveFactorization.jl" -} \ No newline at end of file diff --git a/src/butterflylu.jl b/src/butterflylu.jl index 7f4b758..cb1ada6 100644 --- a/src/butterflylu.jl +++ b/src/butterflylu.jl @@ -22,11 +22,11 @@ end function 🦋workspace(A, ::Val{SEED} = Val(888)) where {SEED} A = pad!(A) B = similar(A); - ws = 🦋generate!(B) + ws = 🦋generate_random!(B) 🦋mul!(copyto!(B, A), ws) U, V, B = materializeUV(B, ws) F = RecursiveFactorization.lu!(B, Val(false)) - A, B, U, V, F + A, U, V, F end const butterfly_workspace = 🦋workspace; @@ -126,8 +126,18 @@ end function pad!(A) M, N = size(A) xn = 4 - M % 4 - A = [A zeros(N, xn) - zeros(xn, N) I(xn) - ] - A + A_new = similar(A, M + xn, N + xn) + for j in 1 : N, i in 1 : M + @inbounds A_new[i, j] = A[i, j] + end + + for j in M + 1 : M + xn, i in 1:M + @inbounds A_new[i, j] = 0 + @inbounds A_new[j, i] = 0 + end + + for i in M + 1 : M + xn, j in N + 1 : N + xn + @inbounds A_new[i,j] = i == j + end + A_new end \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index 9851a73..d4d7dda 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -81,7 +81,7 @@ end for i in 790 : 810 A = wilkinson(i) b = rand(i) - A_ext, B, U, V, F = RecursiveFactorization.🦋workspace(A) + A_ext, U, V, F = RecursiveFactorization.🦋workspace(A) M, N = size(A) xn = 4 - M % 4 b_ext = [b; rand(xn)]