Skip to content
GitLab
Menu
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
espressomd
walberla_for_es
Commits
427a8c68
Commit
427a8c68
authored
Oct 31, 2018
by
Martin Bauer
Browse files
Refactored test for GPU communication
- comparison to old CPU transfer communication included
parent
ff97334f
Changes
2
Hide whitespace changes
Inline
Side-by-side
tests/cuda/codegen/EquivalenceTest.cpp
View file @
427a8c68
...
...
@@ -6,7 +6,6 @@
#include
"field/FlagField.h"
#include
"field/AddToStorage.h"
#include
"lbm/communication/PdfFieldPackInfo.h"
#include
"lbm/vtk/VTKOutput.h"
#include
"lbm/PerformanceLogger.h"
#include
"blockforest/communication/UniformBufferedScheme.h"
#include
"timeloop/all.h"
...
...
@@ -58,7 +57,8 @@ void initPdfField( const shared_ptr<StructuredBlockForest> &blocks, BlockDataID
auto
globalZ
=
real_c
(
offset
[
2
]
+
z
);
auto
xArg
=
real_c
(
std
::
sin
(
real_c
(
globalX
)
/
real_t
(
4
)
*
real_c
(
domainBB
.
size
(
0
))
));
auto
zArg
=
real_c
(
std
::
sin
(
real_c
(
globalZ
)
/
real_t
(
4
)
*
real_c
(
domainBB
.
size
(
2
))
));
pdfField
->
setToEquilibrium
(
x
,
y
,
z
,
Vector3
<
real_t
>
(
0.05
*
std
::
sin
(
xArg
),
0
,
0.05
*
std
::
cos
(
zArg
)));
pdfField
->
setToEquilibrium
(
x
,
y
,
z
,
Vector3
<
real_t
>
(
real_t
(
0.05
)
*
std
::
sin
(
xArg
),
0
,
real_t
(
0.05
)
*
std
::
cos
(
zArg
)));
);
}
}
...
...
@@ -82,6 +82,7 @@ int main( int argc, char **argv )
BlockDataID
flagFieldId
=
field
::
addFlagFieldToStorage
<
FlagField_T
>
(
blocks
,
"flag field"
);
const
FlagUID
fluidFlagUID
(
"Fluid"
);
geometry
::
setNonBoundaryCellsToDomain
<
FlagField_T
>
(
*
blocks
,
flagFieldId
,
fluidFlagUID
);
GeneratedLatticeModel_T
generatedLatticeModel
=
GeneratedLatticeModel_T
(
omega
);
// Part 1 : Native walberla
...
...
@@ -99,7 +100,6 @@ int main( int argc, char **argv )
// Part 2: Generated CPU Version
GeneratedLatticeModel_T
generatedLatticeModel
=
GeneratedLatticeModel_T
(
omega
);
BlockDataID
pdfFieldGeneratedId
=
lbm
::
addPdfFieldToStorage
(
blocks
,
"pdfGenerated"
,
generatedLatticeModel
,
field
::
fzyx
);
initPdfField
<
GeneratedPdfField_T
>
(
blocks
,
pdfFieldGeneratedId
);
CpuCommScheme_T
cpuComm
(
blocks
);
...
...
@@ -113,6 +113,7 @@ int main( int argc, char **argv )
// Part 3: Generated GPU Version
bool
overlapCommunication
=
parameters
.
getParameter
<
bool
>
(
"overlapCommunication"
,
true
);
bool
cudaEnabledMPI
=
parameters
.
getParameter
<
bool
>
(
"cudaEnabledMPI"
,
false
);
bool
oldCommunication
=
parameters
.
getParameter
<
bool
>
(
"oldCommunication"
,
false
);
BlockDataID
pdfShadowCPU
=
lbm
::
addPdfFieldToStorage
(
blocks
,
"cpu shadow field"
,
generatedLatticeModel
,
field
::
fzyx
);
initPdfField
<
GeneratedPdfField_T
>
(
blocks
,
pdfShadowCPU
);
...
...
@@ -126,10 +127,23 @@ int main( int argc, char **argv )
gpuComm
.
addPackInfo
(
make_shared
<
pystencils
::
EquivalenceTest_GPUPackInfo
>
(
pdfGpuFieldId
));
auto
runCommunication
=
[
&
]()
{
gpuComm
();
};
CpuCommScheme_T
oldGpuScheme
(
blocks
);
std
::
vector
<
cudaStream_t
>
streams
;
for
(
uint_t
i
=
0
;
i
<
Stencil_T
::
Size
;
++
i
)
{
cudaStream_t
s
;
cudaStreamCreate
(
&
s
);
streams
.
push_back
(
s
);
}
using
OldPackInfo
=
cuda
::
communication
::
GPUPackInfo
<
cuda
::
GPUField
<
real_t
>
>
;
oldGpuScheme
.
addPackInfo
(
make_shared
<
OldPackInfo
>
(
pdfGpuFieldId
,
streams
)
);
SweepTimeloop
gpuTimeLoop
(
blocks
->
getBlockStorage
(),
timesteps
);
if
(
!
overlapCommunication
)
{
gpuTimeLoop
.
add
()
<<
BeforeFunction
(
runCommunication
,
"gpu communication"
)
gpuTimeLoop
.
add
()
<<
(
oldCommunication
?
BeforeFunction
(
oldGpuScheme
)
:
BeforeFunction
(
runCommunication
,
"gpu communication"
))
<<
Sweep
(
cudaLbKernel
,
"LB stream & collide gpu"
);
}
else
...
...
tests/cuda/codegen/EquivalenceTest.gen.py
View file @
427a8c68
...
...
@@ -3,6 +3,8 @@ from lbmpy_walberla import generate_lattice_model_files
from
lbmpy.creationfunctions
import
create_lb_update_rule
from
pystencils_walberla.sweep
import
Sweep
dtype
=
'float64'
# LB options
options
=
{
'method'
:
'srt'
,
...
...
@@ -12,14 +14,14 @@ options = {
'compressible'
:
False
,
'maxwellian_moments'
:
False
,
'temporary_field_name'
:
'pdfs_tmp'
,
'optimization'
:
{
'cse_global'
:
Fals
e
,
'cse_pdfs'
:
Fals
e
,
'double_precision'
:
True
}
'optimization'
:
{
'cse_global'
:
Tru
e
,
'cse_pdfs'
:
Tru
e
,
'double_precision'
:
dtype
==
'float64'
}
}
# GPU optimization options
opt
=
{
'gpu_indexing_params'
:
{
'block_size'
:
(
128
,
2
,
1
)},
'data_type'
:
'float64'
}
outer_opt
=
{
'gpu_indexing_params'
:
{
'block_size'
:
(
32
,
32
,
32
)},
'data_type'
:
'float64'
}
opt
=
{
'gpu_indexing_params'
:
{
'block_size'
:
(
128
,
1
,
1
)},
'data_type'
:
dtype
}
outer_opt
=
{
'gpu_indexing_params'
:
{
'block_size'
:
(
32
,
32
,
32
)},
'data_type'
:
dtype
}
def
lb_assignments
():
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment