Skip to content

Commit

Permalink
distributed arrays in solve, edited version of decomposition renamed …
Browse files Browse the repository at this point in the history
…to decompositionUa
  • Loading branch information
GHilmarG committed Jan 30, 2024
1 parent 1046929 commit d6552f9
Show file tree
Hide file tree
Showing 12 changed files with 141 additions and 103 deletions.
29 changes: 25 additions & 4 deletions ABfgPreEliminate.m
Original file line number Diff line number Diff line change
Expand Up @@ -55,11 +55,32 @@
Q=speye(nA,nA)-BtB ;
Atilde=Q*A+ BtB ;
btilde=(Q*f+B'*g) ;


%dAtilde=factorize(Atilde); % for some reason, this does make things slower


if CtrlVar.Distribute
if ~isdistributed(Atilde)
Atilde=distributed(Atilde);
end
if ~isdatetime(btilde)
btilde=distributed(btilde);
end
end

% decomposition is about the same, and as expected this only speeds things up if several solves with the same matrix
% are needed.
%
% tic
% dAtilde=decomposition(Atilde);
% dx=dAtilde\btilde;
% toc

% tic
x=Atilde\btilde;

if CtrlVar.Distribute
x=gather(x) ;
end
% toc

y=B*(f-A*x);

% Now the solution of the scaled system has been found.
Expand Down
4 changes: 2 additions & 2 deletions CalcCostFunctionNR.m
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@
l.ubvb=l.ubvb+gamma*dl;


CtrlVar.uvMatrixAssembly.Ronly=true; MUAworkers=[];
Ruv=KRTFgeneralBCs(CtrlVar,MUA,F,MUAworkers);
CtrlVar.uvMatrixAssembly.Ronly=true;
Ruv=KRTFgeneralBCs(CtrlVar,MUA,F);

if ~isempty(L)
frhs=-Ruv-L'*l.ubvb;
Expand Down
6 changes: 3 additions & 3 deletions KRTFgeneralBCs.m
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
function [Ruv,Kuv,Tint,Fext,MUAworkers]=KRTFgeneralBCs(CtrlVar,MUA,F,MUAworkers)
function [Ruv,Kuv,Tint,Fext]=KRTFgeneralBCs(CtrlVar,MUA,F)

%%
%
Expand All @@ -10,7 +10,7 @@
%
%%

narginchk(4,4)
narginchk(3,3)



Expand All @@ -30,7 +30,7 @@
end


[Ruv,Kuv,Tint,Fext,MUAworkers]=uvMatrixAssembly(CtrlVar,MUA,F,MUAworkers);
[Ruv,Kuv,Tint,Fext]=uvMatrixAssembly(CtrlVar,MUA,F);



Expand Down
14 changes: 7 additions & 7 deletions SSTREAM2dNR2.m
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,14 @@



function [UserVar,RunInfo,F,l,Kuv,Ruv,L,MUAworkers]=SSTREAM2dNR2(UserVar,RunInfo,CtrlVar,MUA,BCs,F,l,MUAworkers)
function [UserVar,RunInfo,F,l,Kuv,Ruv,L]=SSTREAM2dNR2(UserVar,RunInfo,CtrlVar,MUA,BCs,F,l)


% Solves SSA/SSTREAM for u and v


nargoutchk(8,8)
narginchk(8,8)
nargoutchk(7,7)
narginchk(7,7)


tStart=tic;
Expand Down Expand Up @@ -119,7 +119,7 @@


CtrlVar.uvAssembly.ZeroFields=true; CtrlVar.uvMatrixAssembly.Ronly=true ;
fext0=KRTFgeneralBCs(CtrlVar,MUA,F,MUAworkers); % RHS with velocities set to zero, i.e. only external forces
fext0=KRTFgeneralBCs(CtrlVar,MUA,F); % RHS with velocities set to zero, i.e. only external forces

%% New normalization idea, 10 April 2023
% set (ub,vb) to zero, except where BCs imply otherwise, ie make the iterate feasable
Expand All @@ -135,7 +135,7 @@


CtrlVar.uvAssembly.ZeroFields=false; CtrlVar.uvMatrixAssembly.Ronly=true ;
Ruv=KRTFgeneralBCs(CtrlVar,MUA,F,MUAworkers); % RHS with calculated velocities, i.e. difference between external and internal forces
Ruv=KRTFgeneralBCs(CtrlVar,MUA,F); % RHS with calculated velocities, i.e. difference between external and internal forces

RunInfo.CPU.Solution.uv=0;

Expand Down Expand Up @@ -198,13 +198,13 @@

tAssembly=tic;
CtrlVar.uvAssembly.ZeroFields=false; CtrlVar.uvMatrixAssembly.Ronly=false;
[Ruv,Kuv,~,~,MUAworkers]=KRTFgeneralBCs(CtrlVar,MUA,F,MUAworkers);
[Ruv,Kuv,~,~]=KRTFgeneralBCs(CtrlVar,MUA,F);
RunInfo.CPU.Assembly.uv=toc(tAssembly)+RunInfo.CPU.Assembly.uv;
NRincomplete=0;
else
tAssembly=tic;
CtrlVar.uvAssembly.ZeroFields=false; CtrlVar.uvMatrixAssembly.Ronly=1;
Ruv=KRTFgeneralBCs(CtrlVar,MUA,F,MUAworkers);
Ruv=KRTFgeneralBCs(CtrlVar,MUA,F);
RunInfo.CPU.Assembly.uv=toc(tAssembly)+RunInfo.CPU.Assembly.uv;
NRincomplete=1;
end
Expand Down
18 changes: 18 additions & 0 deletions TestGPU.m
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,24 @@
%
%%



NumWorkers=8 ;

ParPool = gcp('nocreate') ;

if isempty(ParPool)

parpool('Processes',NumWorkers)

elseif (ParPool.NumWorkers~=NumWorkers)

delete(gcp('nocreate'))
parpool('Processes',NumWorkers)

end

%%
iExperiment=0;
density=0.05 ;
timings=zeros(10,7)+NaN;
Expand Down
82 changes: 36 additions & 46 deletions TestKRTFparallel.m
Original file line number Diff line number Diff line change
@@ -1,57 +1,21 @@

%%
clc
load TestSaveuvhAssembly

tic

for Iint=1:MUA.nip


[Tx1,Fx1,Ty1,Fy1,Th1,Fh1,Kxu1,Kxv1,Kyu1,Kyv1,Kxh1,Kyh1,Khu1,Khv1,Khh1]=...
uvhAssemblyIntPointImplicitSUPG(Iint,ndim,MUA,...
bnod,hnod,unod,vnod,AGlennod,nnod,Cnod,mnod,h0nod,u0nod,v0nod,as0nod,ab0nod,as1nod,ab1nod,dadhnod,Bnod,Snod,rhonod,...
uonod,vonod,Conod,monod,uanod,vanod,Canod,manod,...
CtrlVar,rhow,g,Ronly,ca,sa,dt,...
Tx0,Fx0,Ty0,Fy0,Th0,Fh0,Kxu0,Kxv0,Kyu0,Kyv0,Kxh0,Kyh0,Khu0,Khv0,Khh0);

Tx=Tx+Tx1; Fx=Fx+Fx1;
Ty=Ty+Ty1; Fy=Fy+Fy1;
Th=Th+Th1; Fh=Fh+Fh1;

Kxu=Kxu+Kxu1; Kxv=Kxv+Kxv1;
Kyu=Kyu+Kyu1; Kyv=Kyv+Kyv1;
Kxh=Kxh+Kxh1; Kyh=Kyh+Kyh1;
Khu=Khu+Khu1; Khv=Khv+Khv1; Khh=Khh+Khh1;

end
toc
NumWorkers=12 ;

ParPool = gcp('nocreate') ;

tic
if isempty(ParPool)

parfor Iint=1:MUA.nip


[Tx1,Fx1,Ty1,Fy1,Th1,Fh1,Kxu1,Kxv1,Kyu1,Kyv1,Kxh1,Kyh1,Khu1,Khv1,Khh1]=...
uvhAssemblyIntPointImplicitSUPG(Iint,ndim,MUA,...
bnod,hnod,unod,vnod,AGlennod,nnod,Cnod,mnod,h0nod,u0nod,v0nod,as0nod,ab0nod,as1nod,ab1nod,dadhnod,Bnod,Snod,rhonod,...
uonod,vonod,Conod,monod,uanod,vanod,Canod,manod,...
CtrlVar,rhow,g,Ronly,ca,sa,dt,...
Tx0,Fx0,Ty0,Fy0,Th0,Fh0,Kxu0,Kxv0,Kyu0,Kyv0,Kxh0,Kyh0,Khu0,Khv0,Khh0);

Tx=Tx+Tx1; Fx=Fx+Fx1;
Ty=Ty+Ty1; Fy=Fy+Fy1;
Th=Th+Th1; Fh=Fh+Fh1;

Kxu=Kxu+Kxu1; Kxv=Kxv+Kxv1;
Kyu=Kyu+Kyu1; Kyv=Kyv+Kyv1;
Kxh=Kxh+Kxh1; Kyh=Kyh+Kyh1;
Khu=Khu+Khu1; Khv=Khv+Khv1; Khh=Khh+Khh1;

end
toc
parpool('Processes',NumWorkers)

elseif (ParPool.NumWorkers~=NumWorkers)

delete(gcp('nocreate'))
parpool('Processes',NumWorkers)

end

%%
N=10000;
Expand Down Expand Up @@ -85,9 +49,35 @@



%%



load("TestSolveKApe.mat","A","B","f","g","x0","y0","CtrlVar")


%%

tSeq=tic;
[x,y]=ABfgPreEliminate(CtrlVar,A,B,f,g);
tSeq=toc(tSeq);

tDistribute=tic;
Adist=distributed(A);
Bdist=distributed(B);
fdist=distributed(f);
gdist=distributed(g);
tDistribute=toc(tDistribute);

tPar=tic;
[xDist,yDist]=ABfgPreEliminate(CtrlVar,Adist,Bdist,fdist,gdist);
x2=gather(xDist) ; y2=gather(yDist) ;
tPar=toc(tPar);

fprintf("tSeq=%g \t tDistribute=%g \t tPar=%g \t gain=%g \n",tSeq,tDistribute,tPar,tSeq/(tDistribute+tPar))

% tSeq=26.2609 tDistribute=1.36496 tPar=13.7741 gain=1.73465 C23000099 8 workers
%%



46 changes: 29 additions & 17 deletions TestParallelAssemblyOptions.m
Original file line number Diff line number Diff line change
@@ -1,11 +1,5 @@


% n=200 ; A = sprandsym(n,.1,0.1); x=ones(n,1) ;


%

% delete(gcp('nocreate')); parpool('Threads',8)

NumWorkers=12 ;

Expand All @@ -26,7 +20,7 @@
parfevalOnAll(gcp(), @warning, 0, 'off','MATLAB:decomposition:SaveNotSupported');
warning('off','MATLAB:decomposition:genericError')

Solving="-uvh-" ;
Solving="-uv-" ;

% Set output files directory
[~,hostname]=system('hostname') ;
Expand Down Expand Up @@ -63,9 +57,11 @@

%load(UserVar.InverseRestartFileDirectory+"InverseRestartFile-Joughin-Ca1-Cs100000-Aa1-As100000-5km-Alim-Clim-.mat","CtrlVarInRestartFile","RunInfo","MUA","F","BCs","l")

load(UserVar.InverseRestartFileDirectory+"InverseRestartFile-Cornford-Ca1-Cs100000-Aa1-As100000-5km-Alim-Clim-.mat","CtrlVarInRestartFile","RunInfo","MUA","F","BCs","l")

load(UserVar.ForwardRestartFileDirectory+"Restart-FT-P-Duvh-TWIS-MR4-SM-TM001-Cornford-2k5km-Alim-Clim-Ca1-Cs100000-Aa1-As100000-InvMR5","CtrlVarInRestartFile","RunInfo","MUA","F","BCs","l")
if contains(Solving,"-uv-")
load(UserVar.InverseRestartFileDirectory+"InverseRestartFile-Cornford-Ca1-Cs100000-Aa1-As100000-5km-Alim-Clim-.mat","CtrlVarInRestartFile","RunInfo","MUA","F","BCs","l")
else
load(UserVar.ForwardRestartFileDirectory+"Restart-FT-P-Duvh-TWIS-MR4-SM-TM001-Cornford-2k5km-Alim-Clim-Ca1-Cs100000-Aa1-As100000-InvMR5","CtrlVarInRestartFile","RunInfo","MUA","F","BCs","l")
end

tic
MUA.dM=decomposition(MUA.M,'chol','upper') ;
Expand All @@ -82,12 +78,12 @@
CtrlVar.Parallel.uvAssembly.spmd.nWorkers=[];


CtrlVar.Parallel.uvAssembly.spmd.isOn=false;
CtrlVar.Parallel.uvAssembly.spmd.isOn=true;
CtrlVar.Parallel.uvAssembly.parfeval.isOn=false;


CtrlVar.Parallel.uvhAssembly.spmd.isOn=true;

CtrlVar.Parallel.uvhAssembly.spmd.isOn=false;
CtrlVar.Distribute=true ;

CtrlVar.Parallel.isTest=false;

Expand All @@ -97,10 +93,15 @@

if contains(Solving,"-uv-")
% F.ub=F.ub*0 ; F.vb=F.vb*0;
[UserVar,RunInfo,F,l,Kuv,Ruv,Lubvb]= uv(UserVar,RunInfo,CtrlVar,MUA,BCs,F,l,MUAworkers) ;
[UserVar,RunInfo,F,l,Kuv,Ruv,Lubvb]= uv(UserVar,RunInfo,CtrlVar,MUA,BCs,F,l) ;

UaPlots(CtrlVar,MUA,F,"-uv-")


% 19.2 sec
% 14.2 sec with SPMD assembly
% 11.9 sec with SPMD assembly and distributed solve

end


Expand All @@ -109,19 +110,30 @@
if contains(Solving,"-uvh-")
%% uvh
CtrlVar.dt=0.001;

tTotal=tic;
[UserVar,RunInfo,F1,l1,BCs1,dt]=uvh(UserVar,RunInfo,CtrlVar,MUA,F,F,l,l,BCs) ;
tTotal=toc(tTotal);

fprintf("Total time=%g \t Solver=%g \t Assembly=%g \n",tTotal,RunInfo.CPU.Solution.uvh,RunInfo.CPU.Assembly.uvh)

% Total time=98.2963 Solver=51.8608 Assembly=22.2396 C23000099 SPMD(12)
% Total time=158.332 Solver=51.4621 Assembly=68.7052 C23000099 ~SPMD(12)
% Total time=89.8162 Solver=47.6165 Assembly=22.6194 C23000099 SPMD(24)

% Total time=94.3706 Solver=49.6399 Assembly=23.7048 C23000099 SPMD(12)
% Total time=75.4851 Solver=30.5031 Assembly=24.2612 C23000099 SPMD(12) distributed

% Total time=141.966 Solver=47.9826 Assembly=52.8219 C23000099 SPMD(4)
% Total time=134.659 Solver=28.9537 Assembly=68.6005 C23000099 ~SPMD(8) distributed

% Total time=158.332 Solver=51.4621 Assembly=68.7052 C23000099 ~SPMD(12) ~distributed




% Total time=116.786 Solver=62.4066 Assembly=29.1553 DESKTOP-BU2IHIR SPMD(12)
% Total time=174.072 Solver=62.0067 Assembly=69.6168 DESKTOP-BU2IHIR ~SPMD(12)

UaPlots(CtrlVar,MUA,F,"-uv-")


end
9 changes: 7 additions & 2 deletions decomposition.m → decompositionUa.m
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
classdef (Sealed) decomposition < matlab.mixin.CustomDisplay & matlab.mixin.internal.Scalar





classdef (Sealed) decompositionUa < matlab.mixin.CustomDisplay & matlab.mixin.internal.Scalar
%DECOMPOSITION Matrix decomposition
% DA = DECOMPOSITION(A) returns a decomposition of matrix A, which
% can be used to solve the linear system A*X = B efficiently. The
Expand Down Expand Up @@ -141,7 +146,7 @@
end

methods
function f = decomposition(A, varargin)
function f = decompositionUa(A, varargin)

if nargin == 0
A = [];
Expand Down
6 changes: 3 additions & 3 deletions uv.m
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,11 @@



function [UserVar,RunInfo,F,l,Kuv,Ruv,Lubvb,MUAworkers]= uv(UserVar,RunInfo,CtrlVar,MUA,BCs,F,l,MUAworkers)
function [UserVar,RunInfo,F,l,Kuv,Ruv,Lubvb]= uv(UserVar,RunInfo,CtrlVar,MUA,BCs,F,l)


nargoutchk(4,8);
narginchk(8,8)
narginchk(7,7)

tdiagnostic=tic;

Expand Down Expand Up @@ -85,7 +85,7 @@

if CtrlVar.InfoLevel >= 10 ; fprintf(CtrlVar.fidlog,' Starting SSTREAM diagnostic step. \n') ; end

[UserVar,RunInfo,F,l,Kuv,Ruv,Lubvb,MUAworkers]=SSTREAM2dNR2(UserVar,RunInfo,CtrlVar,MUA,BCs,F,l,MUAworkers);
[UserVar,RunInfo,F,l,Kuv,Ruv,Lubvb]=SSTREAM2dNR2(UserVar,RunInfo,CtrlVar,MUA,BCs,F,l);
%[UserVar,F,l,Kuv,Ruv,RunInfo,Lubvb]=SSTREAM2dNR2(UserVar,CtrlVar,MUA,BCs,F,l,RunInfo);

if ~RunInfo.Forward.uvConverged
Expand Down
Loading

0 comments on commit d6552f9

Please sign in to comment.