Hi,(adsbygoogle = window.adsbygoogle || []).push({});

I'm working on doing some slight parallelization of a for loop in the force calculation in a molecular dynamics simulation, using openMP. (probably not up for a proper MPI implementation yet)

It should only need to parallelize the force loop since 90% ish of the time is spent calculating forces.

The atoms in the simulations is put into several neighbour boxes, for each box it only checks the nearby boxes and calculates the inter atomic forces between the atoms in those boxes. So for each box it needs to cycle through all the nearby boxes and call a force calculation boxes (or only half since all the forces assigned to both atoms when when calculated so calculating box [0,0,0] and [1,0,0] is also calculates what box [1,0,0] and [0,0,0] would do)

I built a small testcase that reproduces the error it generates:

The tabs got messed up when pasting, but I'll attach the program as well :)Code (Text):

#include <iostream>

#include <omp.h>

#include <armadillo>

using namespace std;

void testNeighbourCycling();

int main()

{

cout << "Hello World" << endl;

for(int i = 0 ; i < 10 ; i++)

{

cout << "At timestep: " << i << endl;

testNeighbourCycling();

}

}

void testNeighbourCycling()

{

// cout << "Hello World" << endl;

int noOfNeighbors = 3;

int next_i,next_j,next_k;

int prev_i, prev_j, prev_k;

int i, j, k;

int tid;

arma::cube neighborVisits = arma::zeros( noOfNeighbors, noOfNeighbors, noOfNeighbors);

#pragma omp parallel private( i, j, k, next_i, next_j, next_k, prev_i, prev_j, prev_k, tid) shared(neighborVisits)

{

#pragma omp for schedule(auto)

for(i = 0 ; i < noOfNeighbors ; i ++)

{

for(j=0; j < noOfNeighbors ; j++)

{

for(k = 0; k < noOfNeighbors; k++)

{

// tid = omp_get_thread_num();

// cout << " Thread # " << tid << " did " << i << " " << j << " " << k << endl;

// calculateForcesInsideCell(vec3(i,j,k),system);

//Now atoms should be appended with the increasing neighboring cells

//Could not find any neat way to do it, this is confusing and slow as is. Should work to do it a better way.

next_i = i + 1;

next_j = j + 1;

next_k = k + 1;

prev_i = i -1;

prev_j = j -1;

prev_k = k -1;

if(i == noOfNeighbors - 1)

next_i = 0;

if(j == noOfNeighbors - 1)

next_j = 0;

if(k == noOfNeighbors - 1)

next_k = 0;

if(i == 0)

prev_i = noOfNeighbors - 1;

if(j == 0)

prev_j = noOfNeighbors - 1;

if(k == 0)

prev_k = noOfNeighbors - 1;

// #pragma omp critical

{

//With itself

neighborVisits( i, j, k) += 1;

//The top layer, so it cycles through i and j while k is constant

neighborVisits(i,j,k) += 1; neighborVisits(prev_i, prev_j, next_k) += 1;

neighborVisits(i,j,k) += 1; neighborVisits(i, prev_j, next_k) += 1;

neighborVisits(i,j,k) += 1; neighborVisits(next_i, prev_j, next_k) += 1;

neighborVisits(i,j,k) += 1; neighborVisits(prev_i, j, next_k) += 1;

neighborVisits(i,j,k) += 1; neighborVisits(i, j, next_k) += 1;

neighborVisits(i,j,k) += 1; neighborVisits(next_i, j, next_k) += 1;

neighborVisits(i,j,k) += 1; neighborVisits(prev_i, next_j, next_k) += 1;

neighborVisits(i,j,k) += 1; neighborVisits(i, next_j, next_k) += 1;

neighborVisits(i,j,k) += 1; neighborVisits(next_i, next_j, next_k) += 1;

//The front layer that is left

neighborVisits(i,j,k) += 1; neighborVisits(next_i, prev_j, k) += 1;

neighborVisits(i,j,k) += 1; neighborVisits(next_i, j, k) += 1;

neighborVisits(i,j,k) += 1; neighborVisits(next_i, next_j, k) += 1;

//The rest of the left side

neighborVisits(i,j,k) += 1; neighborVisits(i, next_j, k) += 1;

}

}

}

}

}

arma::cube testMatrix = 27 * arma::ones(noOfNeighbors,noOfNeighbors,noOfNeighbors);

// cout << testMatrix << endl;

// cout << neighborVisits << endl;

// cout << neighborVisits - testMatrix << endl;

// cout << arma::accu(testMatrix - neighborVisits) << endl;

if(arma::accu(testMatrix - neighborVisits) != 0)

{

cout << "Test failed: wrong number of neighbourvisits" << endl;

cout << neighborVisits << endl;

}

}

In this testcase it just counts how many times each neihgbour is visited and all of them should be visited 27 times.

If the #pragma parts are taken away the test never triggers and it reaches and all the neighbors reach 27 each time.

If #pragma critical is uncommented it works but is even slower than not parallelizing it.

If the #pragma pieces is left there it fails on a few of the loops giving outpul like below

So it fails some of the for loops part, or fails to write properly to neighborVisits because of some memory writing conflicts or something.Code (Text):

At timestep: 7

At timestep: 8

Test failed: wrong number of neighbourvisits

[cube slice 0]

27.0000 27.0000 26.0000

26.0000 27.0000 26.0000

25.0000 25.0000 25.0000

[cube slice 1]

27.0000 27.0000 26.0000

27.0000 26.0000 27.0000

25.0000 27.0000 27.0000

[cube slice 2]

27.0000 27.0000 25.0000

27.0000 26.0000 27.0000

25.0000 27.0000 27.0000

BTW, in the real program the "neighborVisits(i,j,k) += 1; neighborVisits(prev_i, prev_j, next_k) += 1;" parts are supposed to be a call to a function that calculates the force between the atoms in neighbour [i,j,k] and neighbour [i, j-1, k+1].

Link to the testProgram at github: https://github.com/Gullik/CompPhysGullik/tree/master/test/neighbour_openmp

**Physics Forums - The Fusion of Science and Community**

The friendliest, high quality science and math community on the planet! Everyone who loves science is here!

# Trouble with parrallelizing for loop with openMP, cpp

Loading...

Similar Threads for Trouble parrallelizing loop |
---|

Java: Which Term determines this is a Sum? |

Why does entering '0' terminate do-while loop here? |

Continuous-time loop computer for NP problems? |

**Physics Forums - The Fusion of Science and Community**