Commit d13ef9a7 authored by Michael Kuron's avatar Michael Kuron 💡

hipfft

parent ec88cb28
......@@ -4,3 +4,6 @@
[submodule "libs/HIP"]
path = libs/HIP
url = https://github.com/ROCm-Developer-Tools/HIP
[submodule "libs/rocFFT"]
path = libs/rocFFT
url = https://github.com/ROCmSoftwarePlatform/rocFFT
......@@ -280,15 +280,18 @@ if(CUDA AND EXISTS "${CMAKE_SOURCE_DIR}/.git")
# Try to find git
find_package(Git)
if(GIT_FOUND)
if (NOT EXISTS "${CMAKE_SOURCE_DIR}/libs/HIP/.git")
execute_process(COMMAND ${GIT_EXECUTABLE} submodule update --init -- libs/HIP
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR})
else()
execute_process(COMMAND ${GIT_EXECUTABLE} submodule update -- libs/HIP
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR})
endif()
foreach(sm IN ITEMS HIP rocFFT)
if (NOT EXISTS "${CMAKE_SOURCE_DIR}/libs/HIP/.git")
execute_process(COMMAND ${GIT_EXECUTABLE} submodule update --init -- libs/${sm}
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR})
else()
execute_process(COMMAND ${GIT_EXECUTABLE} submodule update -- libs/${sm}
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR})
endif()
endforeach(sm)
endif()
include_directories(SYSTEM "${CMAKE_SOURCE_DIR}/libs/HIP/include")
include_directories(SYSTEM "${CMAKE_SOURCE_DIR}/libs/rocFFT/library/include")
endif()
if(WITH_SCAFACOS)
......
Subproject commit 07e8086949849f42c0b2912e616c6f6c0311a5d4
......@@ -25,8 +25,6 @@
#include "config.hpp"
#include <thrust/device_ptr.h>
#include <thrust/reduce.h>
#include <curand.h>
#include <curand_kernel.h>
typedef float dds_float;
......
......@@ -19,26 +19,25 @@
#ifndef _P3M_GPU_H
#define _P3M_GPU_H
// NOTE :if one wants to use doubles it requires cuda compute capability 1.3
#define _P3M_GPU_FLOAT
//#define _P3M_GPU_REAL_DOUBLE
#ifdef _P3M_GPU_FLOAT
#define REAL_TYPE float
#define CUFFT_TYPE_COMPLEX cufftComplex
#define CUFFT_FORW_FFT cufftExecR2C
#define CUFFT_BACK_FFT cufftExecC2R
#define CUFFT_PLAN_FORW_FLAG CUFFT_R2C
#define CUFFT_PLAN_BACK_FLAG CUFFT_C2R
#define FFT_TYPE_COMPLEX hipfftComplex
#define FFT_FORW_FFT hipfftExecR2C
#define FFT_BACK_FFT hipfftExecC2R
#define FFT_PLAN_FORW_FLAG HIPFFT_R2C
#define FFT_PLAN_BACK_FLAG HIPFFT_C2R
#endif
#ifdef _P3M_GPU_REAL_DOUBLE
#define REAL_TYPE double
#define CUFFT_TYPE_COMPLEX cufftDoubleComplex
#define CUFFT_FORW_FFT cufftExecD2Z
#define CUFFT_BACK_FFT cufftExecZ2D
#define CUFFT_PLAN_FORW_FLAG CUFFT_D2Z
#define CUFFT_PLAN_BACK_FLAG CUFFT_Z2D
#define FFT_TYPE_COMPLEX hipfftDoubleComplex
#define FFT_FORW_FFT hipfftExecD2Z
#define FFT_BACK_FFT hipfftExecZ2D
#define FFT_PLAN_FORW_FLAG HIPFFT_D2Z
#define FFT_PLAN_BACK_FLAG HIPFFT_Z2D
#endif
void p3m_gpu_init(int cao, int mesh[3], double alpha);
......
......@@ -40,7 +40,7 @@
#include "cuda_interface.hpp"
#include "cuda_utils.hpp"
#include <cufft.h>
#include <hipfft.h>
#include "electrostatics_magnetostatics/p3m_gpu.hpp"
......@@ -57,11 +57,11 @@ extern double box_l[3];
struct P3MGpuData {
/** Charge mesh */
CUFFT_TYPE_COMPLEX *charge_mesh;
FFT_TYPE_COMPLEX *charge_mesh;
/** Force meshes */
CUFFT_TYPE_COMPLEX *force_mesh_x;
CUFFT_TYPE_COMPLEX *force_mesh_y;
CUFFT_TYPE_COMPLEX *force_mesh_z;
FFT_TYPE_COMPLEX *force_mesh_x;
FFT_TYPE_COMPLEX *force_mesh_y;
FFT_TYPE_COMPLEX *force_mesh_z;
/** Influence Function */
REAL_TYPE *G_hat;
/** Charge assignment order */
......@@ -88,8 +88,8 @@ P3MGpuData p3m_gpu_data;
struct p3m_gpu_fft_plans_t {
/** FFT plans */
cufftHandle forw_plan;
cufftHandle back_plan;
hipfftHandle forw_plan;
hipfftHandle back_plan;
} p3m_gpu_fft_plans;
static char p3m_gpu_data_initialized = 0;
......@@ -320,8 +320,6 @@ __global__ void calculate_influence_function_device(const P3MGpuData p) {
}
}
// NOTE :if one wants to use the function below it requires cuda compute
// capability 1.3
#ifdef _P3M_GPU_REAL_DOUBLE
__device__ double atomicAdd(double *address, double val) {
unsigned long long int *address_as_ull = (unsigned long long int *)address;
......@@ -355,8 +353,8 @@ __global__ void apply_diff_op(const P3MGpuData p) {
(blockIdx.y > p.mesh[1] / 2) ? blockIdx.y - p.mesh[1] : blockIdx.y;
const int nz = threadIdx.x;
const CUFFT_TYPE_COMPLEX meshw = p.charge_mesh[linear_index];
CUFFT_TYPE_COMPLEX buf;
const FFT_TYPE_COMPLEX meshw = p.charge_mesh[linear_index];
FFT_TYPE_COMPLEX buf;
buf.x = -2.0 * PI * meshw.y;
buf.y = 2.0 * PI * meshw.x;
......@@ -652,7 +650,7 @@ void assign_forces(const CUDA_particle_data *const pdata, const P3MGpuData p,
* Mainly allocation on the device and influence function calculation.
* Be advised: this needs mesh^3*5*sizeof(REAL_TYPE) of device memory.
* We use real to complex FFTs, so the size of the reciprocal mesh
* is (cuFFT convention) Nx x Ny x [ Nz /2 + 1 ].
* is (hipfft convention) Nx x Ny x [ Nz /2 + 1 ].
*/
void p3m_gpu_init(int cao, int mesh[3], double alpha) {
......@@ -711,8 +709,8 @@ void p3m_gpu_init(int cao, int mesh[3], double alpha) {
cuda_safe_mem(hipFree(p3m_gpu_data.G_hat));
p3m_gpu_data.G_hat = 0;
cufftDestroy(p3m_gpu_fft_plans.forw_plan);
cufftDestroy(p3m_gpu_fft_plans.back_plan);
hipfftDestroy(p3m_gpu_fft_plans.forw_plan);
hipfftDestroy(p3m_gpu_fft_plans.back_plan);
p3m_gpu_data_initialized = 0;
}
......@@ -723,20 +721,20 @@ void p3m_gpu_init(int cao, int mesh[3], double alpha) {
(p3m_gpu_data.mesh[2] / 2 + 1);
cuda_safe_mem(hipMalloc((void **)&(p3m_gpu_data.charge_mesh),
cmesh_size * sizeof(CUFFT_TYPE_COMPLEX)));
cmesh_size * sizeof(FFT_TYPE_COMPLEX)));
cuda_safe_mem(hipMalloc((void **)&(p3m_gpu_data.force_mesh_x),
cmesh_size * sizeof(CUFFT_TYPE_COMPLEX)));
cmesh_size * sizeof(FFT_TYPE_COMPLEX)));
cuda_safe_mem(hipMalloc((void **)&(p3m_gpu_data.force_mesh_y),
cmesh_size * sizeof(CUFFT_TYPE_COMPLEX)));
cmesh_size * sizeof(FFT_TYPE_COMPLEX)));
cuda_safe_mem(hipMalloc((void **)&(p3m_gpu_data.force_mesh_z),
cmesh_size * sizeof(CUFFT_TYPE_COMPLEX)));
cmesh_size * sizeof(FFT_TYPE_COMPLEX)));
cuda_safe_mem(hipMalloc((void **)&(p3m_gpu_data.G_hat),
cmesh_size * sizeof(REAL_TYPE)));
cufftPlan3d(&(p3m_gpu_fft_plans.forw_plan), mesh[0], mesh[1], mesh[2],
CUFFT_PLAN_FORW_FLAG);
cufftPlan3d(&(p3m_gpu_fft_plans.back_plan), mesh[0], mesh[1], mesh[2],
CUFFT_PLAN_BACK_FLAG);
hipfftPlan3d(&(p3m_gpu_fft_plans.forw_plan), mesh[0], mesh[1], mesh[2],
FFT_PLAN_FORW_FLAG);
hipfftPlan3d(&(p3m_gpu_fft_plans.back_plan), mesh[0], mesh[1], mesh[2],
FFT_PLAN_BACK_FLAG);
}
if (((reinit_if == 1) || (p3m_gpu_data_initialized == 0)) &&
......@@ -819,10 +817,10 @@ void p3m_gpu_add_farfield_force() {
assign_charges(lb_particle_gpu, p3m_gpu_data);
/** Do forward FFT of the charge mesh */
if (CUFFT_FORW_FFT(p3m_gpu_fft_plans.forw_plan,
if (FFT_FORW_FFT(p3m_gpu_fft_plans.forw_plan,
(REAL_TYPE *)p3m_gpu_data.charge_mesh,
p3m_gpu_data.charge_mesh) != CUFFT_SUCCESS) {
fprintf(stderr, "CUFFT error: Forward FFT failed\n");
p3m_gpu_data.charge_mesh) != HIPFFT_SUCCESS) {
fprintf(stderr, "hipfft error: Forward FFT failed\n");
return;
}
......@@ -833,11 +831,11 @@ void p3m_gpu_add_farfield_force() {
KERNELCALL(apply_diff_op, gridConv, threadsConv, (p3m_gpu_data));
/** Transform the components of the electric field back */
CUFFT_BACK_FFT(p3m_gpu_fft_plans.back_plan, p3m_gpu_data.force_mesh_x,
FFT_BACK_FFT(p3m_gpu_fft_plans.back_plan, p3m_gpu_data.force_mesh_x,
(REAL_TYPE *)p3m_gpu_data.force_mesh_x);
CUFFT_BACK_FFT(p3m_gpu_fft_plans.back_plan, p3m_gpu_data.force_mesh_y,
FFT_BACK_FFT(p3m_gpu_fft_plans.back_plan, p3m_gpu_data.force_mesh_y,
(REAL_TYPE *)p3m_gpu_data.force_mesh_y);
CUFFT_BACK_FFT(p3m_gpu_fft_plans.back_plan, p3m_gpu_data.force_mesh_z,
FFT_BACK_FFT(p3m_gpu_fft_plans.back_plan, p3m_gpu_data.force_mesh_z,
(REAL_TYPE *)p3m_gpu_data.force_mesh_z);
/** Assign the forces from the mesh back to the particles */
......
......@@ -36,13 +36,6 @@ typedef float ekfloat;
#define MAX_NUMBER_OF_SPECIES 10
#ifdef __HIPCC__
#include <cufft.h>
#else
typedef void cufftComplex;
typedef void cufftReal;
#endif
/* Data structure holding parameters and memory pointers for the link flux
* system. */
......
......@@ -19,13 +19,6 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
#ifndef _FD_ELECTROSTATICS_HPP
#define _FD_ELECTROSTATICS_HPP
#ifdef __HIPCC__
#include <cufft.h>
#else
typedef void cufftComplex;
typedef void cufftReal;
#endif
#define PI_FLOAT 3.14159265358979323846f
class FdElectrostatics {
......@@ -45,8 +38,8 @@ public:
dim_x_padded = (inputParameters.dim_x / 2 + 1) * 2;
}
cufftComplex *charge_potential;
cufftReal *greensfcn;
hipfftComplex *charge_potential;
hipfftReal *greensfcn;
int dim_x_padded;
};
......@@ -61,27 +54,23 @@ public:
~FdElectrostatics();
FdElectrostatics(InputParameters inputParameters, hipStream_t stream);
void calculatePotential();
void calculatePotential(cufftComplex *charge_potential);
void calculatePotential(hipfftComplex *charge_potential);
Grid getGrid();
private:
Parameters parameters;
hipStream_t cuda_stream;
cufftHandle plan_fft;
cufftHandle plan_ifft;
hipfftHandle plan_fft;
hipfftHandle plan_ifft;
bool initialized;
};
#ifdef __HIPCC__
// extern __device__ __constant__ FdElectrostatics::Parameters
// fde_parameters_gpu;
__device__ cufftReal fde_getNode(int x, int y, int z);
__device__ cufftReal fde_getNode(int i);
__device__ void fde_setNode(int x, int y, int z, cufftReal value);
__device__ void fde_setNode(int i, cufftReal value);
#endif //__HIPCC__
__device__ hipfftReal fde_getNode(int x, int y, int z);
__device__ hipfftReal fde_getNode(int i);
__device__ void fde_setNode(int x, int y, int z, hipfftReal value);
__device__ void fde_setNode(int i, hipfftReal value);
#endif
......@@ -2,9 +2,9 @@
// TODO: throw exceptions upon errors initialization
#include "grid_based_algorithms/fd-electrostatics.hpp"
#include <hipfft.h>
#include "grid_based_algorithms/fd-electrostatics.cuh"
#include "cuda_utils.hpp"
#include <cufft.h>
#include <string>
//#include <cuda_interface.hpp>
#include <cstdio>
......@@ -14,7 +14,7 @@
#endif
__global__ void createGreensfcn();
__global__ void multiplyGreensfcn(cufftComplex *charge_potential);
__global__ void multiplyGreensfcn(hipfftComplex *charge_potential);
__device__ __constant__ FdElectrostatics::Parameters fde_parameters_gpu[1];
......@@ -24,21 +24,21 @@ __device__ unsigned int fde_getThreadIndex() {
threadIdx.x;
}
__device__ cufftReal fde_getNode(int x, int y, int z) {
cufftReal *field =
reinterpret_cast<cufftReal *>(fde_parameters_gpu->charge_potential);
__device__ hipfftReal fde_getNode(int x, int y, int z) {
hipfftReal *field =
reinterpret_cast<hipfftReal *>(fde_parameters_gpu->charge_potential);
return field[fde_parameters_gpu->dim_y * fde_parameters_gpu->dim_x_padded * z +
fde_parameters_gpu->dim_x_padded * y + x];
}
__device__ void fde_setNode(int x, int y, int z, cufftReal value) {
cufftReal *field =
reinterpret_cast<cufftReal *>(fde_parameters_gpu->charge_potential);
__device__ void fde_setNode(int x, int y, int z, hipfftReal value) {
hipfftReal *field =
reinterpret_cast<hipfftReal *>(fde_parameters_gpu->charge_potential);
field[fde_parameters_gpu->dim_y * fde_parameters_gpu->dim_x_padded * z +
fde_parameters_gpu->dim_x_padded * y + x] = value;
}
__device__ cufftReal fde_getNode(int i) {
__device__ hipfftReal fde_getNode(int i) {
int x = i % fde_parameters_gpu->dim_x_padded;
i /= fde_parameters_gpu->dim_x_padded;
int y = i % fde_parameters_gpu->dim_y;
......@@ -46,7 +46,7 @@ __device__ cufftReal fde_getNode(int i) {
return fde_getNode(x, y, z);
}
__device__ void fde_setNode(int i, cufftReal value) {
__device__ void fde_setNode(int i, hipfftReal value) {
int x = i % fde_parameters_gpu->dim_x_padded;
i /= fde_parameters_gpu->dim_x_padded;
int y = i % fde_parameters_gpu->dim_y;
......@@ -55,8 +55,8 @@ __device__ void fde_setNode(int i, cufftReal value) {
}
FdElectrostatics::~FdElectrostatics() {
cufftDestroy(plan_ifft);
cufftDestroy(plan_fft);
hipfftDestroy(plan_ifft);
hipfftDestroy(plan_fft);
cuda_safe_mem(hipFree(parameters.greensfcn));
cuda_safe_mem(hipFree(parameters.charge_potential));
......@@ -66,11 +66,11 @@ FdElectrostatics::FdElectrostatics(InputParameters inputParameters,
hipStream_t stream)
: parameters(inputParameters), cuda_stream(stream) {
cuda_safe_mem(hipMalloc((void **)&parameters.charge_potential,
sizeof(cufftComplex) * parameters.dim_z *
sizeof(hipfftComplex) * parameters.dim_z *
parameters.dim_y * (parameters.dim_x / 2 + 1)));
cuda_safe_mem(hipMalloc((void **)&parameters.greensfcn,
sizeof(cufftReal) * parameters.dim_z *
sizeof(hipfftReal) * parameters.dim_z *
parameters.dim_y * (parameters.dim_x / 2 + 1)));
if (hipGetLastError() != hipSuccess) {
......@@ -91,21 +91,21 @@ FdElectrostatics::FdElectrostatics(InputParameters inputParameters,
/* create 3D FFT plans */
if (cufftPlan3d(&plan_fft, parameters.dim_z, parameters.dim_y,
parameters.dim_x, CUFFT_R2C) != CUFFT_SUCCESS) {
if (hipfftPlan3d(&plan_fft, parameters.dim_z, parameters.dim_y,
parameters.dim_x, HIPFFT_R2C) != HIPFFT_SUCCESS) {
throw std::string("Unable to create fft plan");
}
if (cufftSetStream(plan_fft, cuda_stream) != CUFFT_SUCCESS) {
if (hipfftSetStream(plan_fft, cuda_stream) != HIPFFT_SUCCESS) {
throw std::string("Unable to assign FFT to cuda stream");
}
if (cufftPlan3d(&plan_ifft, parameters.dim_z, parameters.dim_y,
parameters.dim_x, CUFFT_C2R) != CUFFT_SUCCESS) {
if (hipfftPlan3d(&plan_ifft, parameters.dim_z, parameters.dim_y,
parameters.dim_x, HIPFFT_C2R) != HIPFFT_SUCCESS) {
throw std::string("Unable to create ifft plan");
}
if (cufftSetStream(plan_ifft, cuda_stream) != CUFFT_SUCCESS) {
if (hipfftSetStream(plan_ifft, cuda_stream) != HIPFFT_SUCCESS) {
throw std::string("Unable to assign FFT to cuda stream");
}
......@@ -133,11 +133,11 @@ __global__ void createGreensfcn() {
-4.0f * PI_FLOAT * fde_parameters_gpu->prefactor *
fde_parameters_gpu->agrid * fde_parameters_gpu->agrid * 0.5f /
(cos(2.0f * PI_FLOAT * coord[0] /
(cufftReal)fde_parameters_gpu->dim_x) +
(hipfftReal)fde_parameters_gpu->dim_x) +
cos(2.0f * PI_FLOAT * coord[1] /
(cufftReal)fde_parameters_gpu->dim_y) +
(hipfftReal)fde_parameters_gpu->dim_y) +
cos(2.0f * PI_FLOAT * coord[2] /
(cufftReal)fde_parameters_gpu->dim_z) -
(hipfftReal)fde_parameters_gpu->dim_z) -
3.0f) /
(fde_parameters_gpu->dim_x * fde_parameters_gpu->dim_y *
fde_parameters_gpu->dim_z);
......@@ -147,7 +147,7 @@ __global__ void createGreensfcn() {
}
}
__global__ void multiplyGreensfcn(cufftComplex *charge_potential) {
__global__ void multiplyGreensfcn(hipfftComplex *charge_potential) {
unsigned int index = fde_getThreadIndex();
......@@ -162,10 +162,10 @@ void FdElectrostatics::calculatePotential() {
calculatePotential(parameters.charge_potential);
}
void FdElectrostatics::calculatePotential(cufftComplex *charge_potential) {
void FdElectrostatics::calculatePotential(hipfftComplex *charge_potential) {
if (cufftExecR2C(plan_fft, (cufftReal *)charge_potential, charge_potential) !=
CUFFT_SUCCESS) {
if (hipfftExecR2C(plan_fft, (hipfftReal *)charge_potential, charge_potential) !=
HIPFFT_SUCCESS) {
fprintf(stderr, "ERROR: Unable to execute FFT plan\n");
}
......@@ -181,8 +181,8 @@ void FdElectrostatics::calculatePotential(cufftComplex *charge_potential) {
KERNELCALL(multiplyGreensfcn, dim_grid, threads_per_block,
(charge_potential));
if (cufftExecC2R(plan_ifft, charge_potential,
(cufftReal *)charge_potential) != CUFFT_SUCCESS) {
if (hipfftExecC2R(plan_ifft, charge_potential,
(hipfftReal *)charge_potential) != HIPFFT_SUCCESS) {
fprintf(stderr, "ERROR: Unable to execute iFFT plan\n");
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment