Merge sort using linked indexes

  • Thread starter Thread starter rcgldr
  • Start date Start date
  • Tags Tags
    Sort
Click For Summary
SUMMARY

This discussion presents a detailed implementation of merge sort using linked indexes in C++. The primary focus is on two examples: a top-down approach and a bottom-up approach, both utilizing a single index array to manage linked lists of indexes instead of traditional two-array methods. The implementation leverages C++ standard libraries, specifically vector for dynamic array management, and demonstrates efficient sorting of large datasets (NUMREC set to 4,194,304). The code effectively merges lists by linking indexes, optimizing memory usage and performance.

PREREQUISITES
  • Understanding of C++ programming, specifically with vector and memory management.
  • Familiarity with sorting algorithms, particularly merge sort principles.
  • Knowledge of linked list structures and their implementation using indexes.
  • Basic understanding of performance measurement in C++ using clock().
NEXT STEPS
  • Explore advanced C++ features such as templates and smart pointers for memory management.
  • Learn about optimizing sorting algorithms for specific data types and structures.
  • Investigate the performance implications of different sorting techniques in large datasets.
  • Study the implementation of other data structures, such as trees and hash tables, for sorting and searching.
USEFUL FOR

Software developers, algorithm engineers, and computer science students interested in advanced sorting techniques and memory-efficient data structures in C++.

rcgldr
Homework Helper
Messages
8,948
Reaction score
687
These are example programs that sort an array of linked indexes to a primary array. Rather than having an array of indexes, the starting index is returned directly, each entry in the array is the index of the next entry in the array. In otherwords, array(i) contains the index of the next element in both the linked index array as well as the original array, sort of like a linked list, but using indexes instead of pointers to next item and using -1 to indicate the end of a linked index list. The advantage is that only one index array is needed, while a typical merge sort of indexes according to original array would use two index arrays, merging back and forth between the two arrays.

This post shows a top down example:

Code:
//----------------------------------------------------------------------//
//      msli.cpp        merge sort using linked indexes top down        //
//----------------------------------------------------------------------//
#include <ctime>
#include <fstream>
#include <iomanip>
#include <iostream>
#include <vector>

#define NUMREC (1024*4096)              // number of records

typedef unsigned long long UI64;

static clock_t dwTimeStart;             // clock values
static clock_t dwTimeStop;

// A[] is array to be sorted
// N[] is array of indexes to next index
// l is index of N[] to left  list
// r is index of N[] to right list
// returns starting index (l or r) for merged list

size_t MergeLists(UI64 A[],  size_t N[], size_t l, size_t r)
{
size_t head;
size_t *pprev = &head;                  // ptr: head or N[]
    while((l != -1) && (r != -1)){      // while not end lists
        if(A[l] <= A[r]){               //   if l <= r
            *pprev = l;                 //      link to l
            pprev = &N[l];              //      advance pprev
            l=*pprev;                   //      advance l
        } else {                        //   else
            *pprev = r;                 //      link to r
            pprev = &N[r];              //      advance pprev
            r=*pprev;                   //      advance r
        }
    }
    if(l == -1)                         // if end of l list
        *pprev=r;                       //   link to rest of r
    else                                // else
        *pprev=l;                       //   link to rest of l
    return head;
}

// A[] is array to be sorted
// N[] is set to array of indexes to next index (-1 = end list)
// low is starting index of A[]
// end is ending index of A[] (1 past last)
// returns starting index of N[] for merged list

size_t MergeSort(UI64 A[], size_t N[], size_t low, size_t end)
{
size_t mid, l, r;
    if((end - low) == 0){               // if size == 0
        return low;                     //  (only on first call)
    }
    if((end - low) == 1){               // if size == 1
        N[low] = -1;                    //    set N[]
        return low;                     //    return index
    }
    if((end - low) == 2){               // if size == 2
        if(A[low] <= A[end-1]){         //   if in order
            N[low]   = end-1;           //    set N[]
            N[end-1] = -1;
            return low;                 //    return index
        } else {                        //   else
            N[end-1] = low;             //    set N[]
            N[low]   = -1;
            return end-1;               //    return index
        }
    }
    mid = (low+end)/2;                  // size > 2, recursively
    l = MergeSort(A, N, low, mid);      //   split lists until
    r = MergeSort(A, N, mid, end);      //   size <= 2
    return MergeLists(A, N, l, r);      // merge a pair of lists
}

int main ()
{
    std::vector <UI64>   vData(NUMREC);
    std::vector <size_t> vNext(NUMREC);
    UI64 d;
    size_t i;

//      generate data

    for(i = 0; i < NUMREC; i++){
        d  = (((UI64)((rand()>>4) & 0xff))<< 0);
        d += (((UI64)((rand()>>4) & 0xff))<< 8);
        d += (((UI64)((rand()>>4) & 0xff))<<16);
        d += (((UI64)((rand()>>4) & 0xff))<<24);
        d += (((UI64)((rand()>>4) & 0xff))<<32);
        d += (((UI64)((rand()>>4) & 0xff))<<40);
        d += (((UI64)((rand()>>4) & 0xff))<<48);
        d += (((UI64)((rand()>>4) & 0xff))<<56);
        *(((UI64 *)&vData[0])+i) = d ;}

//      set and sort N[] according to A[]

    dwTimeStart = clock();

    i = MergeSort(&vData[0], &vNext[0], 0, vData.size());

    dwTimeStop = clock();
    std::cout << "Number of ticks    " << (dwTimeStop-dwTimeStart) << std::endl;

//      output data according to N[]

    std::ofstream ofs("dsti.bin", std::ofstream::binary | std::ofstream::trunc);
    if(!ofs.is_open())
        goto exit0;
    while(i != -1){
        ofs.write((const char *)&vData[i], sizeof(UI64));
        i = vNext[i];
    }
    ofs.close();
exit0:
    return(0);
}
 
Last edited:
Technology news on Phys.org
This post shows a bottom up example that merges single elements into an array of linked indexes, where linked array(i) contains a linked index list of 2^i elements. MergeLists() is the same as the one in the top down example.

Code:
//----------------------------------------------------------------------//
//      mslia.cpp       merge sort using linked indexes and array       //
//----------------------------------------------------------------------//
#include <ctime>
#include <fstream>
#include <iomanip>
#include <iostream>
#include <vector>

#define NUMREC (1024*4096)              // number of records
#define NUMIDX (32)                     // number of indexes in array

typedef unsigned long long UI64;

static clock_t dwTimeStart;             // clock values
static clock_t dwTimeStop;

// A[] is array to be sorted
// N[] is array of indexes to next index
// l is index of N[] to left  list
// r is index of N[] to right list
// returns starting index (l or r) for merged list

size_t MergeLists(UI64 A[],  size_t N[], size_t l, size_t r)
{
size_t head;
size_t *pprev = &head;                  // ptr: head or N[]
    while((l != -1) && (r != -1)){      // while not end lists
        if(A[l] <= A[r]){               //   if l <= r
            *pprev = l;                 //      link to l
            pprev = &N[l];              //      advance pprev
            l=*pprev;                   //      advance l
        } else {                        //   else
            *pprev = r;                 //      link to r
            pprev = &N[r];              //      advance pprev
            r=*pprev;                   //      advance r
        }
    }
    if(l == -1)                         // if end of l list
        *pprev=r;                       //   link to rest of r
    else                                // else
        *pprev=l;                       //   link to rest of l
    return head;
}

// A[] is array to be sorted
// N[] is set to array of indexes to next index (-1 = end list)
// low is starting index of A[]
// end is ending index of A[] (1 past last)
// returns starting index of N[] for merged list
// S[] is array of starting indexes in N[]
// S[i] is starting index of list of size pow(2,i)

size_t MergeSort(UI64 A[], size_t N[], size_t low, size_t end)
{
size_t S[NUMIDX];                       // array of starting indexes
size_t i,j;
    if((end - low) == 0){               // if size == 0
        return low;                     //  (only on first call)
    }
    for(i = 0; i < (end-low); i++)      // init N[]
        N[i] = -1;
    for(i = 0; i < NUMIDX; i++)         // init S[]
        S[i] = -1;
    for(j = low; j < end; j++){         // merge index lists into S[], N[]
        low = j;
        for(i = 0; (i < NUMIDX) && (S[i] != -1); i++){
            low = MergeLists(A, N, S[i], low);
            S[i] = -1;
        }
        if(i == NUMIDX)
            i--;
        S[i] = low;
    }
    low = -1;                           // merge S[] lists to one list in N[]
    for(i = 0; i < NUMIDX; i++)
        low = MergeLists(A, N, S[i], low);
    return low;
}

int main ()
{
    std::vector <UI64>   vData(NUMREC);
    std::vector <size_t> vNext(NUMREC);
    UI64 d;
    size_t i;

//      generate data

    for(i = 0; i < NUMREC; i++){
        d  = (((UI64)((rand()>>4) & 0xff))<< 0);
        d += (((UI64)((rand()>>4) & 0xff))<< 8);
        d += (((UI64)((rand()>>4) & 0xff))<<16);
        d += (((UI64)((rand()>>4) & 0xff))<<24);
        d += (((UI64)((rand()>>4) & 0xff))<<32);
        d += (((UI64)((rand()>>4) & 0xff))<<40);
        d += (((UI64)((rand()>>4) & 0xff))<<48);
        d += (((UI64)((rand()>>4) & 0xff))<<56);
        *(((UI64 *)&vData[0])+i) = d ;}

//      set and sort N[] according to A[]

    dwTimeStart = clock();

    i = MergeSort(&vData[0], &vNext[0], 0, vData.size());

    dwTimeStop = clock();
    std::cout << "Number of ticks    " << (dwTimeStop-dwTimeStart) << std::endl;

//      output data according to N[]

    std::ofstream ofs("dsti.bin", std::ofstream::binary | std::ofstream::trunc);
    if(!ofs.is_open())
        goto exit0;
    while(i != -1){
        ofs.write((const char *)&vData[i], sizeof(UI64));
        i = vNext[i];
    }
    ofs.close();
exit0:
    return(0);
}
 
Last edited:

Similar threads

  • · Replies 2 ·
Replies
2
Views
2K
  • · Replies 1 ·
Replies
1
Views
2K
  • · Replies 2 ·
Replies
2
Views
2K
  • · Replies 3 ·
Replies
3
Views
2K
Replies
5
Views
2K
  • · Replies 75 ·
3
Replies
75
Views
7K
  • · Replies 1 ·
Replies
1
Views
4K
  • · Replies 5 ·
Replies
5
Views
2K
  • · Replies 17 ·
Replies
17
Views
4K
  • · Replies 46 ·
2
Replies
46
Views
9K