Python Optimizing Prime Pair Combinations for Project Euler Problem 60

Jarvis323 · Sep 9, 2018

Here's my solution. I also used the Miller-Rabin prime tester from Rosseta Code (which still turns out to be the bottleneck). The times I am getting are,

getting primes took: 1 ms
getting primes pairs took: 286 ms
computing all solution sums took: 5 ms
26033

"computing all solution sums" could potentially be optimized a bit more, i.e. in testing that a newly added prime is also in a pair with each other prime in the current solution. I tried binary search, and unorderd_set (also uses more memory), but it seams the brute force is faster for this part anyway.

C:

struct Pair
{
    IntType a;
    IntType b;

    Pair( IntType _a, IntType _b ) : a( _a ), b( _b ) {}
};inline void fillPrimes( vector< IntType > & primes, const IntType n )
{
   for( IntType i = 3, j = 0; j < n; ++i )
   {
        if( prime( i ) )
        {
           primes[ j++ ] = i;
        }
   }
}

inline IntType cat(
   const IntType a,
   const IntType b )
{
    return a*( std::pow( static_cast< IntType >( 10 ),
        ( IntType( std::log10( b ) ) + 1 ) ) ) + b;
}

inline bool catTest( IntType a, IntType b )
{
   return prime( cat( a, b ) )
        && prime( cat( b, a ) );
}

inline bool binarySearch(
   const IntType v,
   const vector< Pair > & pairs,
   IntType start,
   IntType last )
{
    while( start <= last )
    {
        int64_t m = ( start + last ) / 2;
        if( pairs[ m ].b < v )
        {
            start = m + 1;
        }
        else if( pairs[ m ].b > v )
        {
            last = m - 1;
        }
        else
        {
            return true;
        }
    }
    return false;
}

pair< bool, IntType > solutionFrom(
   vector< IntType > & roots,
    IntType & minSum,
   const IntType & sum,
   const IntType & NPP,
   const vector< Pair >    & pairs,
   const vector< IntType > & starts,
   const vector< IntType > & counts,
   const vector< IntType > & primePosition,
   const vector< IntType > & primes )
{
   const IntType root  = roots.back();
   const IntType level = roots.size();
   const IntType NPL   = counts[ root ] - ( NPP - level );

    for( IntType i = 0; i <= NPL; ++i )
    {
       IntType subPrime = pairs[ starts[ root ] + i ].b;
       IntType tempSum = sum + subPrime;
 
       if( tempSum + subPrime >= minSum )
       {
           return { false, 0 };
       }

        bool intersectsAll = true;
        for( IntType k = level - 2; k >= 0; --k )
        {

            bool intersectsSubTree = false;    
           for( IntType j = 0; j < counts[ roots[ k ] ]
               && subPrime >= pairs[ starts[ roots[ k ] ] + j ].b; ++j )
           {
               if( pairs[ starts[ roots[ k ] ] + j ].b == subPrime )
               {
                   intersectsSubTree = true;
                   break;
               }
           }
           if( ! intersectsSubTree )
           {
               intersectsAll = false;
               break;
           }

           // if( binarySearch(
           //    subPrime,
           //    pairs,
           //    starts[ roots[ k ] ],
           //    starts[ roots[ k ] ] + counts[ roots[ k ] ] - 1 ) )
           // {
           //    continue;
           // }

           // intersectsAll = false;
           // break;
        }
        if( ! intersectsAll )
        {
            continue;
        }

        if( level == NPP - 1 )
        {
            return { true, tempSum };
        }
        else
        {
           roots.push_back( primePosition[ subPrime / 2 ] );
           auto s = solutionFrom(
               roots,
               minSum,
               tempSum,
               NPP,
               pairs,
               starts,
               counts,
               primePosition,
               primes );

           if( s.first )
           {
               minSum = s.second;
           }

           roots.pop_back();
       }
    }

    return { false, 0 };
}

int main()
{
    const IntType MAX_N_PRIMES = 1200;
    const IntType NPP = 5;

    vector< IntType > primes( MAX_N_PRIMES );

    auto t1 = high_resolution_clock::now();
    fillPrimes( primes, MAX_N_PRIMES );
    cout << "getting primes took: " <<
            duration_cast< milliseconds >( high_resolution_clock::now() - t1 ).count()
            << " ms" << endl;

    vector< Pair > pairs;
    pairs.reserve( MAX_N_PRIMES );
    vector< IntType > primePairCounts(  MAX_N_PRIMES, 0 );
    vector< IntType > primePairStarts(  MAX_N_PRIMES, 0 );
    vector< IntType > validPairs;
    vector< IntType > primePosition( primes.back() / 2, 0 );

    t1 = high_resolution_clock::now();
    for( IntType i = 0; i < MAX_N_PRIMES; ++i )
    {
       primePosition[ primes[ i ] / 2 ] = i;
       primePairStarts[ i ] = pairs.size();
       for( IntType j = i+1; j < MAX_N_PRIMES; ++j )
       {
            if( catTest( primes[ i ], primes[ j ] ) )
            {
                pairs.push_back( { primes[ i ], primes[ j ] } );
                ++primePairCounts[ i ];
            }
       }
       if( primePairCounts[ i ] >= NPP )
        {
            validPairs.push_back( i );
        }
    }
    cout << "getting primes pairs took: " <<
            duration_cast< milliseconds >( high_resolution_clock::now() - t1 ).count()
            << " ms" << endl;

    t1 = high_resolution_clock::now();
    IntType minSolution = numeric_limits<IntType>::max();
    vector< IntType > roots( NPP );
    roots.shrink_to_fit();

    for( auto root : validPairs )
    {
       IntType sum  = primes[ root ];

       roots.clear();
       roots.push_back( root );

       pair< bool, IntType > p
         = solutionFrom(
          roots,
          minSolution,
          sum,
          NPP,
          pairs,
          primePairStarts,
          primePairCounts,
          primePosition,
          primes );

       if( p.first )
       {
           minSolution = min( minSolution, p.second );
       }    
       else
       {
           continue;
       }
   }    
   cout << "computing all solution sums took: " <<
        duration_cast< milliseconds >( high_resolution_clock::now() - t1 ).count()
        << " ms" << endl;

   cout << minSolution << endl;
}

Seams like a pretty efficient solution. Most of the time is in testing primes, to construct the list of prime pairs.

1) list prime pairs ##P## in sorted order with no duplicates.
2) given the ##i^{th}## prime. You have pairs in the list (from indices ##start(x_i)##, to ##start(x_j)## ), with ##(x_i, x_j)## the last pair with ##x_i## in the list.
3) pick ##x_i##' = ##P[ start[ x_i ] ] + 1##, as the first candidate to include in a solution using ##x_i##, and recurse, with ##x_i = x_i##'.
4) repeat step 3 until you have a solution (while making sure that each prime picked is in a pair with each previous pick), record it as the new min if it is, go back one level and continue where you left off.

verty · Sep 10, 2018

@jarvis, very well done. Having vectors for everything seems to be the way to go! I did a similar thing. Mine was more like a database though. I had a prime record that had everything in it. It was pretty thematic.

Ok, I think we've killed this problem now. @Arman777, would you like to pick another?

Arman777 · Sep 10, 2018

verty said:

Ok, I think we've killed this problem now. @Arman777, would you like to pick another?

Lol I agree, we talked a lot about it. I am trying to solve the next problem #61. If I couldn't solve it I can share.

Vanadium 50 · Sep 17, 2018

verty said:

Ok, I think we've killed this problem now

Actually, I think that the real value is discussing the pros and cons of the various approaches. Not whether someone can drag their code across the finish line.

Interestingly, I modified the code to find pairs, loop over pairs and look for numbers that form triplets. One there are triplets, make sure there are at least 3, and then build up the quintuplet from just that restricted list of numbers. Turns out that didn't help - it takes slightly longer.

What does help is a fast "probable prime" algorithm (100% of the primes are identified as probable primes and a tiny fraction of composites sneak in), and only when there is a quintuplet do the full prime test. It trades time in the primality testing for a slightly longer set of loops. That takes me to 8 seconds, 3 of which are spent caching prime pairs, and the remainder in unordered_map. So it's clear to me that to do better I need to drop the unordered_map.

Jarvis323 · Sep 17, 2018

Vanadium 50 said:

Actually, I think that the real value is discussing the pros and cons of the various approaches. Not whether someone can drag their code across the finish line.

Interestingly, I modified the code to find pairs, loop over pairs and look for numbers that form triplets. One there are triplets, make sure there are at least 3, and then build up the quintuplet from just that restricted list of numbers. Turns out that didn't help - it takes slightly longer.

What does help is a fast "probable prime" algorithm (100% of the primes are identified as probable primes and a tiny fraction of composites sneak in), and only when there is a quintuplet do the full prime test. It trades time in the primality testing for a slightly longer set of loops. That takes me to 8 seconds, 3 of which are spent caching prime pairs, and the remainder in unordered_map. So it's clear to me that to do better I need to drop the unordered_map.

Below is the essence of my solution if it were iterative. I think time is saved by knowing when to quit, and by going over pairs directly (in order) so smaller solutions are found sooner. The pairs look like this ( assuming only b and c are not paired):

Python:

primes = [ a, b, c, d ]
pairs =
(a,b)
(a,c)
(a,d)
(b,d)
(c,d)

starts = [  0,  3,  4,  5]
counts = [ 3, 1, 1, 0]

Then

Python:

for( each p_j paired with p_i )

becomes

Python:

for( int j = starts[ i ]; j < starts[ i ] + counts[ i ]; ++j )
    p_j = pairs[ j ].second

Now in the next loop you want to skip ahead to the primes paired with p_j, so somehow you need to know the index of p_j in primes. You could use a hash map or something,

Python:

for( int j = starts[ i ]; j < starts[ i ] + counts[ i ]; ++j )
    p_j = pairs[ j ].second
    for( int k = starts[ index_map( p_j ) ]; k < starts[ index_map( p_j )]  + counts[ index_map(p_j ) ]; ++k )
        ...

Or better yet, only keep track of the indices in the first place,

Python:

for i = 0 to N-1
    for j = I+1 to N-1
        if( is_pair( primes[ i ], primes[ j ] )
            pairs.push( i, j )
e.g.
pairs =
(0,1)
(0,2)
(0,3)
(1,3)
(2,3)

for( int i = 0; i < N; ++I )
    p1 = primes[ i ]
    for( int j = starts[ i ]; j < starts[ i ] + counts[ i ]; ++j )
        p2 = primes[ pairs[ j ].second ]
        for( int k = starts[ pairs[ j ].second ]; k < starts[ pairs[ j ].second ]  + counts[ pairs[ j ].second  ]; ++k )
            p3 = primes[ pairs[ k ].second ]
            for( int m = starts[ pairs[ k ].second ]; m < starts[ pairs[ k ].second ]  + counts[ pairs[ k ].second  ]; ++m )
                 p4 = primes[ pairs[ m ].second
                ...

Pseudo code for an iterative solution with fixed dimensions and quitting early then looks like this.

Python:

for( p1 in primes ):
    for( p2 in pair with p1 ):
    if( sum( p2, p1 ) >= minSolution ): break
         for( p3 in pair with p2 ):
             if( sum( p3, p2, p1 ) >= minSolution ): break
             if( p3 in pair with p1 ):
                 for( p4 in pair with p3 ):
                    if( sum( p4,p3, p2, p1 ) >= minSolution ): break
                    if( p4 in pair with p2 and p4 in pair with p1 ):
                        for( p5 in pair with p4 ):
                            if( sum( p5,p4,p3, p2, p1 ) >= minSolution ): break
                            if( p5 in pair with p3 and p5 in pair with p2 and p5 in pair with p1 ):
                                minSolution = sum( p5,p4, p3, p2, p1 )

Checking if ##p_i## is paired with ## p_{i-2}, p_{i-3}, ... ,## can be done using brute force (no additional memory), binary search (no additional memory), hash maps (additional memory), or direct tables (a ton of additional memory), depending on how much memory you have. I was surprised that brute force was actually faster then binary search and as fast as hash tables; with the pairs sorted by value, you can stop searching as soon as you see a prime larger than the one you're search for.

C:

inline bool binarySearch(
   const IntType v,
   const vector< Pair > & pairs,
   const vector< IntType > & primes,
   IntType start,
   IntType last  )
{
    while( start <= last )
    {
        IntType m = ( start + last ) / 2;
        if( primes[ pairs[ m ].b ] < v )
        {
            start = m + 1;
        }
        else if( primes[ pairs[ m ].b ] > v )
        {
            last = m - 1;
        }
        else
        {
            return true;
        }
    }
    return false;
}

if( binarySearch(  p3, pairs, primes, starts[ i ], starts[ i ] + counts[ i ] -1 ))
    // then p3 is paired with p1=primes[i]

An additional optimization is that you can quit early at any level if there are not enough subsequent pairs. If you have p1 chosen, you need 4 more, so no need to consider any of the last 3 primes paired with p1 as a choice for p2.

e.g.

Python:

for( int i = 0; i < N; ++I )
    p1 = primes[ i ]
    for( int j = starts[ i ]; j < starts[ i ] + counts[ I ] -  3; ++j )
        p2 = primes[ pairs[ j ].second ]
        if( p1 + p2 >= minSolution ) break
        for( int k = starts[ pairs[ j ].second ];
            k < starts[ pairs[ j ].second ]  + counts[ pairs[ j ].second  ] - 2;
            ++k )
            p3 = primes[ pairs[ k ].second ]
            if( p1 + p2 + p3 >= minSolution ) break
            if( in_pair( p1, p3 ))
            for( int m = starts[ pairs[ k ].second ];
                 m < starts[ pairs[ k ].second ]  + counts[ pairs[ k ].second  ] - 1;
                 ++m )
                 p4 = primes[ pairs[ m ].second]
                ...

An additional insight is that this solution is essentially a depth first search in the tree induced by the pairs. e.g.

Code:

(a,b), (a,c), (a,d), (b,d), (c,d)

           a
       /   |   \
      b    c    d
      |    |
      d    d

I think it would be interesting and very challenging to do it efficiently for general cases, without pre-generating all of the primes/prime pairs, but by only introducing new ones as needed until the point where it's clear that any solution with a larger prime can't be minimal, (and also use big numbers and parallelization). I wonder how long it would take to find solutions for 6 pairs, or 7 pairs?

Vanadium 50 · Sep 18, 2018

The problem I see with "just in time" determination of primes or prime pairs is "where do you put them?" Specifically, how do you find them - if you're doing searches, you're going to be replacing O(1) operations with O(log n) operations. An efficient algorithm needs a good answer to that question.

On parallelization, I was thinking about that as well, partly because I have access to some highly parallel systems. The actual search parallelizes quite nicely, with large outer loops. Unfortunately, my code spends almost half its time finding prime pairs, and the structure I chose to store them in, unordered_map, is going to have a slow reduction time. It's not clear to me how to best efficiently reduce the pair precomputation if done in parallel.

I'm not going to have time to fiddle with this, but I am interested in what the gains would be if I replaced unordered_map by an array. When I was spending minutes doing primality testing, a few seconds of unordered_map navigation was a good thing. Now that I have discovered the key is to use probable primes and only do a full primality test on candidate solutions, it looks like less of a bargain. In the 8s version of my code, 90% or more of them time is spent in unordered_map.

The other advantage is that it's more obvious how to do the reduction.

Vanadium 50 · Sep 30, 2018

Vanadium 50 said:

I'm not going to have time to fiddle with this, but I am interested in what the gains would be if I replaced unordered_map by an array.

That gets the time down to under a second. (940 milliseconds) Essentially all of the time is in primality testing.

Given that, looping over probable primes is no longer helpful. You have to test it for real eventually, so you might as well do it at the beginning. I have repurposed the probable prime tester to do a fast reject before calling Miller-Rabin. If it were necessary to get this well below one second, I would push harder on this front.

Vanadium 50 · Nov 10, 2018

I got a chance to play again. It's at half a second. To do this I:

Replaced the string concatenation with if/else and multiplies
Replaced the iterators with indices. My intent was to allow parallelism, but it did not help - the overhead wiped out the gains
More carefully set loop limits to avoid searching for quintuplets that could not possibly be smaller
Tweaked the fast reject to re-optimize

As it stands, 20% of the time is spent looking at the cached pair results and 80% of the time is doing primality testing, the majority in modular exponentiation.

This is on an i5-6500; faster chips would take less time obviously.

Python Optimizing Prime Pair Combinations for Project Euler Problem 60

Similar threads

How to increase phone signal strength by lying about it

A Crisis for Newly Minted CompSci Majors -- entry level jobs gone

How to calculate Tension for a series of connected points?

Learning Assembly and computer architecture for x86

Learning data structures and algorithms in different programming languages

Insights Thinking Outside The Box Versus Knowing What’s In The Box

Insights Why Entangled Photon-Polarization Qubits Violate Bell’s Inequality

Insights Quantum Entanglement is a Kinematic Fact, not a Dynamical Effect

Insights What Exactly is Dirac’s Delta Function? - Insight

Insights Relativator (Circular Slide-Rule): Simulated with Desmos - Insight

Insights Fixing Things Which Can Go Wrong With Complex Numbers