Commit 00570bba authored by Martin Bauer's avatar Martin Bauer
Browse files

Memory leak bugfix - regenerated files

parent 77aed6a0
......@@ -129,7 +129,6 @@ static FUNC_PREFIX void kernel_streamCollide(double * const _data_pdfs, double *
const double vel1Term = _data_pdfs_20_31_1m1[_stride_pdfs_0*ctr_0] + _data_pdfs_20_37_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_21_315_1m1[_stride_pdfs_0*ctr_0] + _data_pdfs_2m1_311_1m1[_stride_pdfs_0*ctr_0];
const double vel2Term = _data_pdfs_2m1_312_11[_stride_pdfs_0*ctr_0] + _data_pdfs_2m1_313_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_2m1_35_10[_stride_pdfs_0*ctr_0];
const double rho = vel0Term + vel1Term + vel2Term + _data_pdfs_20_30_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_32_11[_stride_pdfs_0*ctr_0] + _data_pdfs_20_33_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_20_39_11[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_21_316_11[_stride_pdfs_0*ctr_0] + _data_pdfs_21_317_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_21_36_10[_stride_pdfs_0*ctr_0];
const double xi_27 = rho*-0.333333333333333;
const double u_0 = vel0Term + xi_18 + xi_19 - _data_pdfs_20_33_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - _data_pdfs_20_37_1m1[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - _data_pdfs_2m1_313_10[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
const double xi_23 = (u_0*u_0);
const double u_1 = vel1Term + xi_19 + xi_20 - _data_pdfs_20_310_11[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - _data_pdfs_20_32_11[_stride_pdfs_0*ctr_0] + _data_pdfs_20_38_1m1[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - _data_pdfs_2m1_312_11[_stride_pdfs_0*ctr_0];
......@@ -146,9 +145,10 @@ static FUNC_PREFIX void kernel_streamCollide(double * const _data_pdfs, double *
const double u0Pu2 = u_0 + u_2;
const double f_eq_common = rho - xi_23 - xi_24 - xi_25;
const double xi_26 = f_eq_common + rho*-0.666666666666667;
const double xi_28 = f_eq_common + xi_25 + xi_27;
const double xi_29 = f_eq_common + xi_23 + xi_27;
const double xi_30 = f_eq_common + xi_24 + xi_27;
const double xi_27 = f_eq_common + rho*-0.333333333333333;
const double xi_28 = xi_25 + xi_27;
const double xi_29 = xi_23 + xi_27;
const double xi_30 = xi_24 + xi_27;
const double xi_2 = xi_24*2 + xi_26;
const double xi_3 = xi_23*2 + xi_26;
const double xi_4 = xi_25*2 + xi_26;
......@@ -195,62 +195,80 @@ static FUNC_PREFIX void kernel_collide(double * _data_pdfs, int64_t const _size_
const double xi_5 = omega*0.0416666666666667;
for (int ctr_2 = 1; ctr_2 < _size_pdfs_2 - 1; ctr_2 += 1)
{
double * _data_pdfs_20_317 = _data_pdfs + _stride_pdfs_2*ctr_2 + 17*_stride_pdfs_3;
double * _data_pdfs_20_39 = _data_pdfs + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3;
double * _data_pdfs_20_316 = _data_pdfs + _stride_pdfs_2*ctr_2 + 16*_stride_pdfs_3;
double * _data_pdfs_20_310 = _data_pdfs + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3;
double * _data_pdfs_20_312 = _data_pdfs + _stride_pdfs_2*ctr_2 + 12*_stride_pdfs_3;
double * _data_pdfs_20_31 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_3;
double * _data_pdfs_20_314 = _data_pdfs + _stride_pdfs_2*ctr_2 + 14*_stride_pdfs_3;
double * _data_pdfs_20_318 = _data_pdfs + _stride_pdfs_2*ctr_2 + 18*_stride_pdfs_3;
double * _data_pdfs_20_34 = _data_pdfs + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3;
double * _data_pdfs_20_38 = _data_pdfs + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3;
double * _data_pdfs_20_31 = _data_pdfs + _stride_pdfs_2*ctr_2 + _stride_pdfs_3;
double * _data_pdfs_20_311 = _data_pdfs + _stride_pdfs_2*ctr_2 + 11*_stride_pdfs_3;
double * _data_pdfs_20_317 = _data_pdfs + _stride_pdfs_2*ctr_2 + 17*_stride_pdfs_3;
double * _data_pdfs_20_315 = _data_pdfs + _stride_pdfs_2*ctr_2 + 15*_stride_pdfs_3;
double * _data_pdfs_20_37 = _data_pdfs + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3;
double * _data_pdfs_20_312 = _data_pdfs + _stride_pdfs_2*ctr_2 + 12*_stride_pdfs_3;
double * _data_pdfs_20_316 = _data_pdfs + _stride_pdfs_2*ctr_2 + 16*_stride_pdfs_3;
double * _data_pdfs_20_311 = _data_pdfs + _stride_pdfs_2*ctr_2 + 11*_stride_pdfs_3;
double * _data_pdfs_20_39 = _data_pdfs + _stride_pdfs_2*ctr_2 + 9*_stride_pdfs_3;
double * _data_pdfs_20_32 = _data_pdfs + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3;
double * _data_pdfs_20_313 = _data_pdfs + _stride_pdfs_2*ctr_2 + 13*_stride_pdfs_3;
double * _data_pdfs_20_37 = _data_pdfs + _stride_pdfs_2*ctr_2 + 7*_stride_pdfs_3;
double * _data_pdfs_20_310 = _data_pdfs + _stride_pdfs_2*ctr_2 + 10*_stride_pdfs_3;
double * _data_pdfs_20_34 = _data_pdfs + _stride_pdfs_2*ctr_2 + 4*_stride_pdfs_3;
double * _data_pdfs_20_35 = _data_pdfs + _stride_pdfs_2*ctr_2 + 5*_stride_pdfs_3;
double * _data_pdfs_20_30 = _data_pdfs + _stride_pdfs_2*ctr_2;
double * _data_pdfs_20_32 = _data_pdfs + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3;
double * _data_pdfs_20_33 = _data_pdfs + _stride_pdfs_2*ctr_2 + 3*_stride_pdfs_3;
double * _data_pdfs_20_38 = _data_pdfs + _stride_pdfs_2*ctr_2 + 8*_stride_pdfs_3;
double * _data_pdfs_20_30 = _data_pdfs + _stride_pdfs_2*ctr_2;
double * _data_pdfs_20_36 = _data_pdfs + _stride_pdfs_2*ctr_2 + 6*_stride_pdfs_3;
for (int ctr_1 = 1; ctr_1 < _size_pdfs_1 - 1; ctr_1 += 1)
{
double * _data_pdfs_20_317_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_317;
double * _data_pdfs_20_39_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_39;
double * _data_pdfs_20_316_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_316;
double * _data_pdfs_20_310_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_310;
double * _data_pdfs_20_312_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_312;
double * _data_pdfs_20_31_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_31;
double * _data_pdfs_20_314_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_314;
double * _data_pdfs_20_318_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_318;
double * _data_pdfs_20_34_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_34;
double * _data_pdfs_20_38_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_38;
double * _data_pdfs_20_31_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_31;
double * _data_pdfs_20_311_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_311;
double * _data_pdfs_20_317_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_317;
double * _data_pdfs_20_315_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_315;
double * _data_pdfs_20_37_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_37;
double * _data_pdfs_20_312_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_312;
double * _data_pdfs_20_316_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_316;
double * _data_pdfs_20_311_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_311;
double * _data_pdfs_20_39_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_39;
double * _data_pdfs_20_32_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_32;
double * _data_pdfs_20_313_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_313;
double * _data_pdfs_20_37_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_37;
double * _data_pdfs_20_310_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_310;
double * _data_pdfs_20_34_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_34;
double * _data_pdfs_20_35_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_35;
double * _data_pdfs_20_30_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_30;
double * _data_pdfs_20_32_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_32;
double * _data_pdfs_20_33_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_33;
double * _data_pdfs_20_38_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_38;
double * _data_pdfs_20_30_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_30;
double * _data_pdfs_20_36_10 = _stride_pdfs_1*ctr_1 + _data_pdfs_20_36;
for (int ctr_0 = 1; ctr_0 < _size_pdfs_0 - 1; ctr_0 += 1)
{
const double xi_18 = -_data_pdfs_20_317_10[_stride_pdfs_0*ctr_0];
const double xi_19 = -_data_pdfs_20_39_10[_stride_pdfs_0*ctr_0];
const double xi_20 = -_data_pdfs_20_316_10[_stride_pdfs_0*ctr_0];
const double vel0Term = _data_pdfs_20_310_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_314_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_318_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_34_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_38_10[_stride_pdfs_0*ctr_0];
const double vel1Term = _data_pdfs_20_311_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_315_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_31_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_37_10[_stride_pdfs_0*ctr_0];
const double vel2Term = _data_pdfs_20_312_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_313_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_35_10[_stride_pdfs_0*ctr_0];
const double rho = vel0Term + vel1Term + vel2Term + _data_pdfs_20_30_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_316_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_317_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_32_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_33_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_36_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_39_10[_stride_pdfs_0*ctr_0];
const double xi_27 = rho*-0.333333333333333;
const double u_0 = vel0Term + xi_18 + xi_19 - _data_pdfs_20_313_10[_stride_pdfs_0*ctr_0] - _data_pdfs_20_33_10[_stride_pdfs_0*ctr_0] - _data_pdfs_20_37_10[_stride_pdfs_0*ctr_0];
const double Dummy_18 = _data_pdfs_20_312_10[_stride_pdfs_0*ctr_0];
const double Dummy_19 = _data_pdfs_20_31_10[_stride_pdfs_0*ctr_0];
const double Dummy_20 = _data_pdfs_20_314_10[_stride_pdfs_0*ctr_0];
const double Dummy_21 = _data_pdfs_20_318_10[_stride_pdfs_0*ctr_0];
const double Dummy_22 = _data_pdfs_20_317_10[_stride_pdfs_0*ctr_0];
const double Dummy_23 = _data_pdfs_20_315_10[_stride_pdfs_0*ctr_0];
const double Dummy_24 = _data_pdfs_20_316_10[_stride_pdfs_0*ctr_0];
const double Dummy_25 = _data_pdfs_20_311_10[_stride_pdfs_0*ctr_0];
const double Dummy_26 = _data_pdfs_20_39_10[_stride_pdfs_0*ctr_0];
const double Dummy_27 = _data_pdfs_20_32_10[_stride_pdfs_0*ctr_0];
const double Dummy_28 = _data_pdfs_20_313_10[_stride_pdfs_0*ctr_0];
const double Dummy_29 = _data_pdfs_20_37_10[_stride_pdfs_0*ctr_0];
const double Dummy_30 = _data_pdfs_20_310_10[_stride_pdfs_0*ctr_0];
const double Dummy_31 = _data_pdfs_20_34_10[_stride_pdfs_0*ctr_0];
const double Dummy_32 = _data_pdfs_20_35_10[_stride_pdfs_0*ctr_0];
const double Dummy_33 = _data_pdfs_20_33_10[_stride_pdfs_0*ctr_0];
const double Dummy_34 = _data_pdfs_20_38_10[_stride_pdfs_0*ctr_0];
const double Dummy_35 = _data_pdfs_20_30_10[_stride_pdfs_0*ctr_0];
const double Dummy_36 = _data_pdfs_20_36_10[_stride_pdfs_0*ctr_0];
const double xi_18 = -Dummy_22;
const double xi_19 = -Dummy_26;
const double xi_20 = -Dummy_24;
const double vel0Term = Dummy_20 + Dummy_21 + Dummy_30 + Dummy_31 + Dummy_34;
const double vel1Term = Dummy_19 + Dummy_23 + Dummy_25 + Dummy_29;
const double vel2Term = Dummy_18 + Dummy_28 + Dummy_32;
const double rho = Dummy_22 + Dummy_24 + Dummy_26 + Dummy_27 + Dummy_33 + Dummy_35 + Dummy_36 + vel0Term + vel1Term + vel2Term;
const double u_0 = -Dummy_28 - Dummy_29 - Dummy_33 + vel0Term + xi_18 + xi_19;
const double xi_23 = (u_0*u_0);
const double u_1 = vel1Term + xi_19 + xi_20 - _data_pdfs_20_310_10[_stride_pdfs_0*ctr_0] - _data_pdfs_20_312_10[_stride_pdfs_0*ctr_0] - _data_pdfs_20_32_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_38_10[_stride_pdfs_0*ctr_0];
const double u_1 = -Dummy_18 - Dummy_27 - Dummy_30 + Dummy_34 + vel1Term + xi_19 + xi_20;
const double xi_21 = -u_1;
const double xi_24 = (u_1*u_1);
const double u_2 = vel2Term + xi_18 + xi_20 + _data_pdfs_20_311_10[_stride_pdfs_0*ctr_0] + _data_pdfs_20_314_10[_stride_pdfs_0*ctr_0] - _data_pdfs_20_315_10[_stride_pdfs_0*ctr_0] - _data_pdfs_20_318_10[_stride_pdfs_0*ctr_0] - _data_pdfs_20_36_10[_stride_pdfs_0*ctr_0];
const double u_2 = Dummy_20 - Dummy_21 - Dummy_23 + Dummy_25 - Dummy_36 + vel2Term + xi_18 + xi_20;
const double xi_22 = -u_2;
const double xi_25 = (u_2*u_2);
const double u0Mu1 = u_0 + xi_21;
......@@ -261,9 +279,10 @@ static FUNC_PREFIX void kernel_collide(double * _data_pdfs, int64_t const _size_
const double u0Pu2 = u_0 + u_2;
const double f_eq_common = rho - xi_23 - xi_24 - xi_25;
const double xi_26 = f_eq_common + rho*-0.666666666666667;
const double xi_28 = f_eq_common + xi_25 + xi_27;
const double xi_29 = f_eq_common + xi_23 + xi_27;
const double xi_30 = f_eq_common + xi_24 + xi_27;
const double xi_27 = f_eq_common + rho*-0.333333333333333;
const double xi_28 = xi_25 + xi_27;
const double xi_29 = xi_23 + xi_27;
const double xi_30 = xi_24 + xi_27;
const double xi_2 = xi_24*2 + xi_26;
const double xi_3 = xi_23*2 + xi_26;
const double xi_4 = xi_25*2 + xi_26;
......@@ -279,25 +298,25 @@ static FUNC_PREFIX void kernel_collide(double * _data_pdfs, int64_t const _size_
const double xi_15 = (u0Mu2*u0Mu2)*3 + xi_30;
const double xi_16 = u0Pu2*2;
const double xi_17 = (u0Pu2*u0Pu2)*3 + xi_30;
_data_pdfs_20_30_10[_stride_pdfs_0*ctr_0] = omega*(f_eq_common*0.333333333333333 - _data_pdfs_20_30_10[_stride_pdfs_0*ctr_0]) + _data_pdfs_20_30_10[_stride_pdfs_0*ctr_0];
_data_pdfs_20_31_10[_stride_pdfs_0*ctr_0] = xi_1*(u_1 + xi_2 - 6*_data_pdfs_20_31_10[_stride_pdfs_0*ctr_0]) + _data_pdfs_20_31_10[_stride_pdfs_0*ctr_0];
_data_pdfs_20_32_10[_stride_pdfs_0*ctr_0] = xi_1*(xi_2 + xi_21 - 6*_data_pdfs_20_32_10[_stride_pdfs_0*ctr_0]) + _data_pdfs_20_32_10[_stride_pdfs_0*ctr_0];
_data_pdfs_20_33_10[_stride_pdfs_0*ctr_0] = xi_1*(-u_0 + xi_3 - 6*_data_pdfs_20_33_10[_stride_pdfs_0*ctr_0]) + _data_pdfs_20_33_10[_stride_pdfs_0*ctr_0];
_data_pdfs_20_34_10[_stride_pdfs_0*ctr_0] = xi_1*(u_0 + xi_3 - 6*_data_pdfs_20_34_10[_stride_pdfs_0*ctr_0]) + _data_pdfs_20_34_10[_stride_pdfs_0*ctr_0];
_data_pdfs_20_35_10[_stride_pdfs_0*ctr_0] = xi_1*(u_2 + xi_4 - 6*_data_pdfs_20_35_10[_stride_pdfs_0*ctr_0]) + _data_pdfs_20_35_10[_stride_pdfs_0*ctr_0];
_data_pdfs_20_36_10[_stride_pdfs_0*ctr_0] = xi_1*(xi_22 + xi_4 - 6*_data_pdfs_20_36_10[_stride_pdfs_0*ctr_0]) + _data_pdfs_20_36_10[_stride_pdfs_0*ctr_0];
_data_pdfs_20_37_10[_stride_pdfs_0*ctr_0] = xi_5*(-xi_6 + xi_7 - 24*_data_pdfs_20_37_10[_stride_pdfs_0*ctr_0]) + _data_pdfs_20_37_10[_stride_pdfs_0*ctr_0];
_data_pdfs_20_38_10[_stride_pdfs_0*ctr_0] = xi_5*(xi_8 + xi_9 - 24*_data_pdfs_20_38_10[_stride_pdfs_0*ctr_0]) + _data_pdfs_20_38_10[_stride_pdfs_0*ctr_0];
_data_pdfs_20_39_10[_stride_pdfs_0*ctr_0] = xi_5*(-xi_8 + xi_9 - 24*_data_pdfs_20_39_10[_stride_pdfs_0*ctr_0]) + _data_pdfs_20_39_10[_stride_pdfs_0*ctr_0];
_data_pdfs_20_310_10[_stride_pdfs_0*ctr_0] = xi_5*(xi_6 + xi_7 - 24*_data_pdfs_20_310_10[_stride_pdfs_0*ctr_0]) + _data_pdfs_20_310_10[_stride_pdfs_0*ctr_0];
_data_pdfs_20_311_10[_stride_pdfs_0*ctr_0] = xi_5*(xi_10 + xi_11 - 24*_data_pdfs_20_311_10[_stride_pdfs_0*ctr_0]) + _data_pdfs_20_311_10[_stride_pdfs_0*ctr_0];
_data_pdfs_20_312_10[_stride_pdfs_0*ctr_0] = xi_5*(-xi_12 + xi_13 - 24*_data_pdfs_20_312_10[_stride_pdfs_0*ctr_0]) + _data_pdfs_20_312_10[_stride_pdfs_0*ctr_0];
_data_pdfs_20_313_10[_stride_pdfs_0*ctr_0] = xi_5*(-xi_14 + xi_15 - 24*_data_pdfs_20_313_10[_stride_pdfs_0*ctr_0]) + _data_pdfs_20_313_10[_stride_pdfs_0*ctr_0];
_data_pdfs_20_314_10[_stride_pdfs_0*ctr_0] = xi_5*(xi_16 + xi_17 - 24*_data_pdfs_20_314_10[_stride_pdfs_0*ctr_0]) + _data_pdfs_20_314_10[_stride_pdfs_0*ctr_0];
_data_pdfs_20_315_10[_stride_pdfs_0*ctr_0] = xi_5*(xi_12 + xi_13 - 24*_data_pdfs_20_315_10[_stride_pdfs_0*ctr_0]) + _data_pdfs_20_315_10[_stride_pdfs_0*ctr_0];
_data_pdfs_20_316_10[_stride_pdfs_0*ctr_0] = xi_5*(-xi_10 + xi_11 - 24*_data_pdfs_20_316_10[_stride_pdfs_0*ctr_0]) + _data_pdfs_20_316_10[_stride_pdfs_0*ctr_0];
_data_pdfs_20_317_10[_stride_pdfs_0*ctr_0] = xi_5*(-xi_16 + xi_17 - 24*_data_pdfs_20_317_10[_stride_pdfs_0*ctr_0]) + _data_pdfs_20_317_10[_stride_pdfs_0*ctr_0];
_data_pdfs_20_318_10[_stride_pdfs_0*ctr_0] = xi_5*(xi_14 + xi_15 - 24*_data_pdfs_20_318_10[_stride_pdfs_0*ctr_0]) + _data_pdfs_20_318_10[_stride_pdfs_0*ctr_0];
_data_pdfs_20_30_10[_stride_pdfs_0*ctr_0] = Dummy_35 + omega*(-Dummy_35 + f_eq_common*0.333333333333333);
_data_pdfs_20_31_10[_stride_pdfs_0*ctr_0] = Dummy_19 + xi_1*(Dummy_19*-6 + u_1 + xi_2);
_data_pdfs_20_32_10[_stride_pdfs_0*ctr_0] = Dummy_27 + xi_1*(Dummy_27*-6 + xi_2 + xi_21);
_data_pdfs_20_33_10[_stride_pdfs_0*ctr_0] = Dummy_33 + xi_1*(Dummy_33*-6 - u_0 + xi_3);
_data_pdfs_20_34_10[_stride_pdfs_0*ctr_0] = Dummy_31 + xi_1*(Dummy_31*-6 + u_0 + xi_3);
_data_pdfs_20_35_10[_stride_pdfs_0*ctr_0] = Dummy_32 + xi_1*(Dummy_32*-6 + u_2 + xi_4);
_data_pdfs_20_36_10[_stride_pdfs_0*ctr_0] = Dummy_36 + xi_1*(Dummy_36*-6 + xi_22 + xi_4);
_data_pdfs_20_37_10[_stride_pdfs_0*ctr_0] = Dummy_29 + xi_5*(Dummy_29*-24 - xi_6 + xi_7);
_data_pdfs_20_38_10[_stride_pdfs_0*ctr_0] = Dummy_34 + xi_5*(Dummy_34*-24 + xi_8 + xi_9);
_data_pdfs_20_39_10[_stride_pdfs_0*ctr_0] = Dummy_26 + xi_5*(Dummy_26*-24 - xi_8 + xi_9);
_data_pdfs_20_310_10[_stride_pdfs_0*ctr_0] = Dummy_30 + xi_5*(Dummy_30*-24 + xi_6 + xi_7);
_data_pdfs_20_311_10[_stride_pdfs_0*ctr_0] = Dummy_25 + xi_5*(Dummy_25*-24 + xi_10 + xi_11);
_data_pdfs_20_312_10[_stride_pdfs_0*ctr_0] = Dummy_18 + xi_5*(Dummy_18*-24 - xi_12 + xi_13);
_data_pdfs_20_313_10[_stride_pdfs_0*ctr_0] = Dummy_28 + xi_5*(Dummy_28*-24 - xi_14 + xi_15);
_data_pdfs_20_314_10[_stride_pdfs_0*ctr_0] = Dummy_20 + xi_5*(Dummy_20*-24 + xi_16 + xi_17);
_data_pdfs_20_315_10[_stride_pdfs_0*ctr_0] = Dummy_23 + xi_5*(Dummy_23*-24 + xi_12 + xi_13);
_data_pdfs_20_316_10[_stride_pdfs_0*ctr_0] = Dummy_24 + xi_5*(Dummy_24*-24 - xi_10 + xi_11);
_data_pdfs_20_317_10[_stride_pdfs_0*ctr_0] = Dummy_22 + xi_5*(Dummy_22*-24 - xi_16 + xi_17);
_data_pdfs_20_318_10[_stride_pdfs_0*ctr_0] = Dummy_21 + xi_5*(Dummy_21*-24 + xi_14 + xi_15);
}
}
}
......
......@@ -73,7 +73,6 @@ static FUNC_PREFIX void UniformGridGPU_LbKernel(double * const _data_pdfs, doubl
double * const _data_pdfs_11_20_32 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_1 + _stride_pdfs_2*ctr_2 + 2*_stride_pdfs_3;
double * const _data_pdfs_10_21_36 = _data_pdfs + _stride_pdfs_1*ctr_1 + _stride_pdfs_2*ctr_2 + _stride_pdfs_2 + 6*_stride_pdfs_3;
const double rho = vel0Term + vel1Term + vel2Term + _data_pdfs_10_20_30[_stride_pdfs_0*ctr_0] + _data_pdfs_10_20_33[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_10_21_317[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_10_21_36[_stride_pdfs_0*ctr_0] + _data_pdfs_11_20_32[_stride_pdfs_0*ctr_0] + _data_pdfs_11_20_39[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] + _data_pdfs_11_21_316[_stride_pdfs_0*ctr_0];
const double xi_27 = rho*-0.333333333333333;
const double u_0 = vel0Term + xi_18 + xi_19 - _data_pdfs_10_20_33[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - _data_pdfs_10_2m1_313[_stride_pdfs_0*ctr_0 + _stride_pdfs_0] - _data_pdfs_1m1_20_37[_stride_pdfs_0*ctr_0 + _stride_pdfs_0];
const double xi_23 = (u_0*u_0);
const double u_1 = vel1Term + xi_19 + xi_20 - _data_pdfs_11_20_310[_stride_pdfs_0*ctr_0 - _stride_pdfs_0] - _data_pdfs_11_20_32[_stride_pdfs_0*ctr_0] - _data_pdfs_11_2m1_312[_stride_pdfs_0*ctr_0] + _data_pdfs_1m1_20_38[_stride_pdfs_0*ctr_0 - _stride_pdfs_0];
......@@ -90,9 +89,10 @@ static FUNC_PREFIX void UniformGridGPU_LbKernel(double * const _data_pdfs, doubl
const double u0Pu2 = u_0 + u_2;
const double f_eq_common = rho - xi_23 - xi_24 - xi_25;
const double xi_26 = f_eq_common + rho*-0.666666666666667;
const double xi_28 = f_eq_common + xi_25 + xi_27;
const double xi_29 = f_eq_common + xi_23 + xi_27;
const double xi_30 = f_eq_common + xi_24 + xi_27;
const double xi_27 = f_eq_common + rho*-0.333333333333333;
const double xi_28 = xi_25 + xi_27;
const double xi_29 = xi_23 + xi_27;
const double xi_30 = xi_24 + xi_27;
const double xi_2 = xi_24*2 + xi_26;
const double xi_3 = xi_23*2 + xi_26;
const double xi_4 = xi_25*2 + xi_26;
......@@ -183,7 +183,7 @@ void UniformGridGPU_LbKernel::operator() ( IBlock * block , cudaStream_t stream
const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride());
dim3 _block(int(((128 < _size_pdfs_0 - 2) ? 128 : _size_pdfs_0 - 2)), int(((1 < _size_pdfs_1 - 2) ? 1 : _size_pdfs_1 - 2)), int(((1 < _size_pdfs_2 - 2) ? 1 : _size_pdfs_2 - 2)));
dim3 _grid(int(( (_size_pdfs_0 - 2) % int(((128 < _size_pdfs_0 - 2) ? 128 : _size_pdfs_0 - 2)) == 0 ? (int64_t)(_size_pdfs_0 - 2) / (int64_t)(((128 < _size_pdfs_0 - 2) ? 128 : _size_pdfs_0 - 2)) : ( (int64_t)(_size_pdfs_0 - 2) / (int64_t)(((128 < _size_pdfs_0 - 2) ? 128 : _size_pdfs_0 - 2)) ) +1 )), int(( (_size_pdfs_1 - 2) % int(((1 < _size_pdfs_1 - 2) ? 1 : _size_pdfs_1 - 2)) == 0 ? (int64_t)(_size_pdfs_1 - 2) / (int64_t)(((1 < _size_pdfs_1 - 2) ? 1 : _size_pdfs_1 - 2)) : ( (int64_t)(_size_pdfs_1 - 2) / (int64_t)(((1 < _size_pdfs_1 - 2) ? 1 : _size_pdfs_1 - 2)) ) +1 )), int(( (_size_pdfs_2 - 2) % int(((1 < _size_pdfs_2 - 2) ? 1 : _size_pdfs_2 - 2)) == 0 ? (int64_t)(_size_pdfs_2 - 2) / (int64_t)(((1 < _size_pdfs_2 - 2) ? 1 : _size_pdfs_2 - 2)) : ( (int64_t)(_size_pdfs_2 - 2) / (int64_t)(((1 < _size_pdfs_2 - 2) ? 1 : _size_pdfs_2 - 2)) ) +1 )));
dim3 _grid(int(( (_size_pdfs_0 - 2) % (((128 < _size_pdfs_0 - 2) ? 128 : _size_pdfs_0 - 2)) == 0 ? (int64_t)(_size_pdfs_0 - 2) / (int64_t)(((128 < _size_pdfs_0 - 2) ? 128 : _size_pdfs_0 - 2)) : ( (int64_t)(_size_pdfs_0 - 2) / (int64_t)(((128 < _size_pdfs_0 - 2) ? 128 : _size_pdfs_0 - 2)) ) +1 )), int(( (_size_pdfs_1 - 2) % (((1 < _size_pdfs_1 - 2) ? 1 : _size_pdfs_1 - 2)) == 0 ? (int64_t)(_size_pdfs_1 - 2) / (int64_t)(((1 < _size_pdfs_1 - 2) ? 1 : _size_pdfs_1 - 2)) : ( (int64_t)(_size_pdfs_1 - 2) / (int64_t)(((1 < _size_pdfs_1 - 2) ? 1 : _size_pdfs_1 - 2)) ) +1 )), int(( (_size_pdfs_2 - 2) % (((1 < _size_pdfs_2 - 2) ? 1 : _size_pdfs_2 - 2)) == 0 ? (int64_t)(_size_pdfs_2 - 2) / (int64_t)(((1 < _size_pdfs_2 - 2) ? 1 : _size_pdfs_2 - 2)) : ( (int64_t)(_size_pdfs_2 - 2) / (int64_t)(((1 < _size_pdfs_2 - 2) ? 1 : _size_pdfs_2 - 2)) ) +1 )));
internal_UniformGridGPU_LbKernel::UniformGridGPU_LbKernel<<<_grid, _block, 0, stream>>>(_data_pdfs, _data_pdfs_tmp, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, omega);
pdfs->swapDataPointers(pdfs_tmp);
......@@ -212,12 +212,12 @@ void UniformGridGPU_LbKernel::inner( IBlock * block , cudaStream_t stream )
inner.expand(-1);
WALBERLA_ASSERT_GREATER_EQUAL(inner.xMin() - 1, -int_c(pdfs->nrOfGhostLayers()));
WALBERLA_ASSERT_GREATER_EQUAL(inner.zMin() - 1, -int_c(pdfs->nrOfGhostLayers()));
WALBERLA_ASSERT_GREATER_EQUAL(inner.yMin() - 1, -int_c(pdfs->nrOfGhostLayers()));
WALBERLA_ASSERT_GREATER_EQUAL(inner.zMin() - 1, -int_c(pdfs->nrOfGhostLayers()));
double * const _data_pdfs = pdfs->dataAt(inner.xMin() - 1, inner.yMin() - 1, inner.zMin() - 1, 0);
WALBERLA_ASSERT_GREATER_EQUAL(inner.xMin() - 1, -int_c(pdfs_tmp->nrOfGhostLayers()));
WALBERLA_ASSERT_GREATER_EQUAL(inner.zMin() - 1, -int_c(pdfs_tmp->nrOfGhostLayers()));
WALBERLA_ASSERT_GREATER_EQUAL(inner.yMin() - 1, -int_c(pdfs_tmp->nrOfGhostLayers()));
WALBERLA_ASSERT_GREATER_EQUAL(inner.zMin() - 1, -int_c(pdfs_tmp->nrOfGhostLayers()));
double * _data_pdfs_tmp = pdfs_tmp->dataAt(inner.xMin() - 1, inner.yMin() - 1, inner.zMin() - 1, 0);
WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(inner.xSize() + 2));
const int64_t _size_pdfs_0 = int64_t(inner.xSize() + 2);
......@@ -230,7 +230,7 @@ void UniformGridGPU_LbKernel::inner( IBlock * block , cudaStream_t stream )
const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride());
dim3 _block(int(((128 < _size_pdfs_0 - 2) ? 128 : _size_pdfs_0 - 2)), int(((1 < _size_pdfs_1 - 2) ? 1 : _size_pdfs_1 - 2)), int(((1 < _size_pdfs_2 - 2) ? 1 : _size_pdfs_2 - 2)));
dim3 _grid(int(( (_size_pdfs_0 - 2) % int(((128 < _size_pdfs_0 - 2) ? 128 : _size_pdfs_0 - 2)) == 0 ? (int64_t)(_size_pdfs_0 - 2) / (int64_t)(((128 < _size_pdfs_0 - 2) ? 128 : _size_pdfs_0 - 2)) : ( (int64_t)(_size_pdfs_0 - 2) / (int64_t)(((128 < _size_pdfs_0 - 2) ? 128 : _size_pdfs_0 - 2)) ) +1 )), int(( (_size_pdfs_1 - 2) % int(((1 < _size_pdfs_1 - 2) ? 1 : _size_pdfs_1 - 2)) == 0 ? (int64_t)(_size_pdfs_1 - 2) / (int64_t)(((1 < _size_pdfs_1 - 2) ? 1 : _size_pdfs_1 - 2)) : ( (int64_t)(_size_pdfs_1 - 2) / (int64_t)(((1 < _size_pdfs_1 - 2) ? 1 : _size_pdfs_1 - 2)) ) +1 )), int(( (_size_pdfs_2 - 2) % int(((1 < _size_pdfs_2 - 2) ? 1 : _size_pdfs_2 - 2)) == 0 ? (int64_t)(_size_pdfs_2 - 2) / (int64_t)(((1 < _size_pdfs_2 - 2) ? 1 : _size_pdfs_2 - 2)) : ( (int64_t)(_size_pdfs_2 - 2) / (int64_t)(((1 < _size_pdfs_2 - 2) ? 1 : _size_pdfs_2 - 2)) ) +1 )));
dim3 _grid(int(( (_size_pdfs_0 - 2) % (((128 < _size_pdfs_0 - 2) ? 128 : _size_pdfs_0 - 2)) == 0 ? (int64_t)(_size_pdfs_0 - 2) / (int64_t)(((128 < _size_pdfs_0 - 2) ? 128 : _size_pdfs_0 - 2)) : ( (int64_t)(_size_pdfs_0 - 2) / (int64_t)(((128 < _size_pdfs_0 - 2) ? 128 : _size_pdfs_0 - 2)) ) +1 )), int(( (_size_pdfs_1 - 2) % (((1 < _size_pdfs_1 - 2) ? 1 : _size_pdfs_1 - 2)) == 0 ? (int64_t)(_size_pdfs_1 - 2) / (int64_t)(((1 < _size_pdfs_1 - 2) ? 1 : _size_pdfs_1 - 2)) : ( (int64_t)(_size_pdfs_1 - 2) / (int64_t)(((1 < _size_pdfs_1 - 2) ? 1 : _size_pdfs_1 - 2)) ) +1 )), int(( (_size_pdfs_2 - 2) % (((1 < _size_pdfs_2 - 2) ? 1 : _size_pdfs_2 - 2)) == 0 ? (int64_t)(_size_pdfs_2 - 2) / (int64_t)(((1 < _size_pdfs_2 - 2) ? 1 : _size_pdfs_2 - 2)) : ( (int64_t)(_size_pdfs_2 - 2) / (int64_t)(((1 < _size_pdfs_2 - 2) ? 1 : _size_pdfs_2 - 2)) ) +1 )));
internal_UniformGridGPU_LbKernel::UniformGridGPU_LbKernel<<<_grid, _block, 0, stream>>>(_data_pdfs, _data_pdfs_tmp, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, omega);
}
......@@ -284,13 +284,13 @@ void UniformGridGPU_LbKernel::outer( IBlock * block , cudaStream_t stream )
for( auto & ci: layers )
{
parallelSection_.run([&]( auto s ) {
WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin() - 1, -int_c(pdfs->nrOfGhostLayers()));
WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin() - 1, -int_c(pdfs->nrOfGhostLayers()));
WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin() - 1, -int_c(pdfs->nrOfGhostLayers()));
WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin() - 1, -int_c(pdfs->nrOfGhostLayers()));
double * const _data_pdfs = pdfs->dataAt(ci.xMin() - 1, ci.yMin() - 1, ci.zMin() - 1, 0);
WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin() - 1, -int_c(pdfs_tmp->nrOfGhostLayers()));
WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin() - 1, -int_c(pdfs_tmp->nrOfGhostLayers()));
WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin() - 1, -int_c(pdfs_tmp->nrOfGhostLayers()));
WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin() - 1, -int_c(pdfs_tmp->nrOfGhostLayers()));
double * _data_pdfs_tmp = pdfs_tmp->dataAt(ci.xMin() - 1, ci.yMin() - 1, ci.zMin() - 1, 0);
WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(ci.xSize() + 2));
const int64_t _size_pdfs_0 = int64_t(ci.xSize() + 2);
......@@ -303,7 +303,7 @@ void UniformGridGPU_LbKernel::outer( IBlock * block , cudaStream_t stream )
const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride());
dim3 _block(int(((128 < _size_pdfs_0 - 2) ? 128 : _size_pdfs_0 - 2)), int(((1 < _size_pdfs_1 - 2) ? 1 : _size_pdfs_1 - 2)), int(((1 < _size_pdfs_2 - 2) ? 1 : _size_pdfs_2 - 2)));
dim3 _grid(int(( (_size_pdfs_0 - 2) % int(((128 < _size_pdfs_0 - 2) ? 128 : _size_pdfs_0 - 2)) == 0 ? (int64_t)(_size_pdfs_0 - 2) / (int64_t)(((128 < _size_pdfs_0 - 2) ? 128 : _size_pdfs_0 - 2)) : ( (int64_t)(_size_pdfs_0 - 2) / (int64_t)(((128 < _size_pdfs_0 - 2) ? 128 : _size_pdfs_0 - 2)) ) +1 )), int(( (_size_pdfs_1 - 2) % int(((1 < _size_pdfs_1 - 2) ? 1 : _size_pdfs_1 - 2)) == 0 ? (int64_t)(_size_pdfs_1 - 2) / (int64_t)(((1 < _size_pdfs_1 - 2) ? 1 : _size_pdfs_1 - 2)) : ( (int64_t)(_size_pdfs_1 - 2) / (int64_t)(((1 < _size_pdfs_1 - 2) ? 1 : _size_pdfs_1 - 2)) ) +1 )), int(( (_size_pdfs_2 - 2) % int(((1 < _size_pdfs_2 - 2) ? 1 : _size_pdfs_2 - 2)) == 0 ? (int64_t)(_size_pdfs_2 - 2) / (int64_t)(((1 < _size_pdfs_2 - 2) ? 1 : _size_pdfs_2 - 2)) : ( (int64_t)(_size_pdfs_2 - 2) / (int64_t)(((1 < _size_pdfs_2 - 2) ? 1 : _size_pdfs_2 - 2)) ) +1 )));
dim3 _grid(int(( (_size_pdfs_0 - 2) % (((128 < _size_pdfs_0 - 2) ? 128 : _size_pdfs_0 - 2)) == 0 ? (int64_t)(_size_pdfs_0 - 2) / (int64_t)(((128 < _size_pdfs_0 - 2) ? 128 : _size_pdfs_0 - 2)) : ( (int64_t)(_size_pdfs_0 - 2) / (int64_t)(((128 < _size_pdfs_0 - 2) ? 128 : _size_pdfs_0 - 2)) ) +1 )), int(( (_size_pdfs_1 - 2) % (((1 < _size_pdfs_1 - 2) ? 1 : _size_pdfs_1 - 2)) == 0 ? (int64_t)(_size_pdfs_1 - 2) / (int64_t)(((1 < _size_pdfs_1 - 2) ? 1 : _size_pdfs_1 - 2)) : ( (int64_t)(_size_pdfs_1 - 2) / (int64_t)(((1 < _size_pdfs_1 - 2) ? 1 : _size_pdfs_1 - 2)) ) +1 )), int(( (_size_pdfs_2 - 2) % (((1 < _size_pdfs_2 - 2) ? 1 : _size_pdfs_2 - 2)) == 0 ? (int64_t)(_size_pdfs_2 - 2) / (int64_t)(((1 < _size_pdfs_2 - 2) ? 1 : _size_pdfs_2 - 2)) : ( (int64_t)(_size_pdfs_2 - 2) / (int64_t)(((1 < _size_pdfs_2 - 2) ? 1 : _size_pdfs_2 - 2)) ) +1 )));
internal_UniformGridGPU_LbKernel::UniformGridGPU_LbKernel<<<_grid, _block, 0, s>>>(_data_pdfs, _data_pdfs_tmp, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, omega);
});
}
......
......@@ -51,6 +51,15 @@ public:
: pdfsID(pdfsID_), omega(omega_)
{};
~UniformGridGPU_LbKernel() {
for(auto p: cache_pdfs_) {
delete p;
}
}
void operator() ( IBlock * block , cudaStream_t stream = 0 );
void inner( IBlock * block , cudaStream_t stream = 0 );
......
......@@ -63,9 +63,9 @@ static FUNC_PREFIX void boundary_UniformGridGPU_NoSlip(uint8_t * const _data_ind
uint8_t * const _data_indexVector_112 = _data_indexVector + 12;
const int32_t dir = *((int32_t *)(& _data_indexVector_112[16*blockDim.x*blockIdx.x + 16*threadIdx.x]));
double * _data_pdfs_m3B5BEDEA5094B12F = _data_pdfs + _stride_pdfs_1*y + _stride_pdfs_1*cy[dir] + _stride_pdfs_2*z + _stride_pdfs_2*cz[dir] + _stride_pdfs_3*invdir[dir];
double * _data_pdfs_10_20_m2227275638DDD757 = _data_pdfs + _stride_pdfs_1*y + _stride_pdfs_2*z + _stride_pdfs_3*dir;
_data_pdfs_m3B5BEDEA5094B12F[_stride_pdfs_0*x + _stride_pdfs_0*cx[dir]] = _data_pdfs_10_20_m2227275638DDD757[_stride_pdfs_0*x];
double * _data_pdfs_1ACA00C755A3ABE3 = _data_pdfs + _stride_pdfs_1*y + _stride_pdfs_1*cy[dir] + _stride_pdfs_2*z + _stride_pdfs_2*cz[dir] + _stride_pdfs_3*invdir[dir];
double * _data_pdfs_10_20_m7D57D887F63BE1DF = _data_pdfs + _stride_pdfs_1*y + _stride_pdfs_2*z + _stride_pdfs_3*dir;
_data_pdfs_1ACA00C755A3ABE3[_stride_pdfs_0*x + _stride_pdfs_0*cx[dir]] = _data_pdfs_10_20_m7D57D887F63BE1DF[_stride_pdfs_0*x];
}
}
}
......@@ -97,7 +97,7 @@ void UniformGridGPU_NoSlip::run( IBlock * block, IndexVectors::Type type , cudaS
const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride());
dim3 _block(int(((256 < indexVectorSize) ? 256 : indexVectorSize)), int(1), int(1));
dim3 _grid(int(( (indexVectorSize) % int(((256 < indexVectorSize) ? 256 : indexVectorSize)) == 0 ? (int64_t)(indexVectorSize) / (int64_t)(((256 < indexVectorSize) ? 256 : indexVectorSize)) : ( (int64_t)(indexVectorSize) / (int64_t)(((256 < indexVectorSize) ? 256 : indexVectorSize)) ) +1 )), int(1), int(1));
dim3 _grid(int(( (indexVectorSize) % (((256 < indexVectorSize) ? 256 : indexVectorSize)) == 0 ? (int64_t)(indexVectorSize) / (int64_t)(((256 < indexVectorSize) ? 256 : indexVectorSize)) : ( (int64_t)(indexVectorSize) / (int64_t)(((256 < indexVectorSize) ? 256 : indexVectorSize)) ) +1 )), int(1), int(1));
internal_boundary_UniformGridGPU_NoSlip::boundary_UniformGridGPU_NoSlip<<<_grid, _block, 0, stream>>>(_data_indexVector, _data_pdfs, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, indexVectorSize);
}
......
......@@ -63,9 +63,9 @@ static FUNC_PREFIX void boundary_UniformGridGPU_UBB(uint8_t * const _data_indexV
uint8_t * const _data_indexVector_112 = _data_indexVector + 12;
const int32_t dir = *((int32_t *)(& _data_indexVector_112[16*blockDim.x*blockIdx.x + 16*threadIdx.x]));
double * _data_pdfs_m3B5BEDEA5094B12F = _data_pdfs + _stride_pdfs_1*y + _stride_pdfs_1*cy[dir] + _stride_pdfs_2*z + _stride_pdfs_2*cz[dir] + _stride_pdfs_3*invdir[dir];
double * _data_pdfs_10_20_m2227275638DDD757 = _data_pdfs + _stride_pdfs_1*y + _stride_pdfs_2*z + _stride_pdfs_3*dir;
_data_pdfs_m3B5BEDEA5094B12F[_stride_pdfs_0*x + _stride_pdfs_0*cx[dir]] = -0.30000000000000004*cx[dir]*weights[dir] + _data_pdfs_10_20_m2227275638DDD757[_stride_pdfs_0*x];
double * _data_pdfs_1ACA00C755A3ABE3 = _data_pdfs + _stride_pdfs_1*y + _stride_pdfs_1*cy[dir] + _stride_pdfs_2*z + _stride_pdfs_2*cz[dir] + _stride_pdfs_3*invdir[dir];
double * _data_pdfs_10_20_m7D57D887F63BE1DF = _data_pdfs + _stride_pdfs_1*y + _stride_pdfs_2*z + _stride_pdfs_3*dir;
_data_pdfs_1ACA00C755A3ABE3[_stride_pdfs_0*x + _stride_pdfs_0*cx[dir]] = -0.30000000000000004*cx[dir]*weights[dir] + _data_pdfs_10_20_m7D57D887F63BE1DF[_stride_pdfs_0*x];
}
}
}
......@@ -97,7 +97,7 @@ void UniformGridGPU_UBB::run( IBlock * block, IndexVectors::Type type , cudaStre
const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
const int64_t _stride_pdfs_3 = int64_t(pdfs->fStride());
dim3 _block(int(((256 < indexVectorSize) ? 256 : indexVectorSize)), int(1), int(1));
dim3 _grid(int(( (indexVectorSize) % int(((256 < indexVectorSize) ? 256 : indexVectorSize)) == 0 ? (int64_t)(indexVectorSize) / (int64_t)(((256 < indexVectorSize) ? 256 : indexVectorSize)) : ( (int64_t)(indexVectorSize) / (int64_t)(((256 < indexVectorSize) ? 256 : indexVectorSize)) ) +1 )), int(1), int(1));
dim3 _grid(int(( (indexVectorSize) % (((256 < indexVectorSize) ? 256 : indexVectorSize)) == 0 ? (int64_t)(indexVectorSize) / (int64_t)(((256 < indexVectorSize) ? 256 : indexVectorSize)) : ( (int64_t)(indexVectorSize) / (int64_t)(((256 < indexVectorSize) ? 256 : indexVectorSize)) ) +1 )), int(1), int(1));
internal_boundary_UniformGridGPU_UBB::boundary_UniformGridGPU_UBB<<<_grid, _block, 0, stream>>>(_data_indexVector, _data_pdfs, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3, indexVectorSize);
}
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment