Optymalizacje Quicksort
Uczę się algorytmów sortowania i jako następny krok staram się, aby moja implementacja była zbliżona dostd::sort()
. Jestem daleko, do tej pory .. :-)
Mam 3 implementacje quicksort:
standardowy quicksort (przy użyciu tablic tymczasowych).quicksort z następującymi optymalizacjami:mediana3 używana do wybrania medianyrekursja ogonowaquicksort zastosowano tylko do rozmiaru partycji <16. Dla mniejszych partycji używany jest rodzaj wstawiania.sortowanie wstawiania stosowane do całej tablicy naraz, zamiast stosowania do każdej partycji, niesortowane przez quicksort.quicksort ze wszystkimi optymalizacjami wymienionymi powyżej + partycjonowanie w miejscu (bez tablic tymczasowych).Spodziewałem się, że wydajność będzie najlepsza, ale najlepiej!
Co jest nie tak z moją implementacją? Biorąc pod uwagę drastyczną różnicę między występem, zakładam, że coś jest nie tak.
Niektóre liczby, które dają poczucie, jak złe są rzeczy (N = liczba elementów w tablicy, liczby są czasem zajmowanym przez każde algo w mikrosekundach): Sortowanievector<int>
a każde algo ma dokładnie taką samą sekwencję liczb.
N quick mixed mixed_inplace
8 0 0 0
16 0 1 1
32 1 2 2
64 1 3 3
128 1 8 8
256 3 16 17
512 6 34 41
1,024 16 84 87
2,048 28.3 177.1 233.2
4,096 48.5 366.6 410.1
8,192 146.5 833.5 1,012.6
16,384 408.4 1,855.6 1,964.2
32,768 1,343.5 3,895.0 4,241.7
65,536 2,661.1 7,927.5 8,757.8
Korzystanie z Visual Studio Express 2010.
KOD:
// ------------ QUICK SORT ------------------
template<typename T, typename key_compare>
void quicksort( T first, T last, const size_t pivot_index, key_compare comp ) {
T saved_first = first;
size_t N = last - first;
if (N > 1) {
// create a temp new array, which contains all items less than pivot
// on left and greater on right. With pivot value in between.
// vector<typename decltype(*T)> temp(N);
typename iterator_traits<T>::pointer temp = (typename iterator_traits<T>::pointer) malloc(sizeof(T)*N);
size_t left_index = 0, right_index = N - 1 ;
iterator_traits<T>::value_type pivot_val = *(first + pivot_index);
for(; first < saved_first + pivot_index; first++) {
if( !comp(*first, pivot_val) )
temp[right_index--] = *first;
else
temp[left_index++] = *first;
}
// skip the pivot value
// TODO: swap the pivot to end so we can have a single loop instead.
++first;
// do the rest
for(; first < last; first++) {
if( !comp(*first, pivot_val) )
temp[right_index--] = *first;
else
temp[left_index++] = *first;
}
if( right_index == left_index )
temp[left_index] = pivot_val;
else
temp[left_index+1] = pivot_val;
// recurse for left and right..
quicksort(temp, temp+left_index, left_index/2, comp);
quicksort(temp+left_index+1, temp+N, (N-right_index)/2, comp);
// return a concat'd array..
for(size_t i = 0; i < N; i++)
*saved_first++ = temp[i];
free(temp);
}
}
/*
** best, average, worst: n log n, n log n, n^2
** space: log n
*/
template<typename T, typename key_compare >
void quicksort( T first, T last, key_compare comp ) {
size_t pivot_index = (last - first) / 2;
quicksort( first, last, pivot_index, comp);
}
// ------------ QUICK with optimizations ------------------
/*
quicksort partition on range [first, last[ using predicate function as the comparator.
"mid" is in-out param, function uses mid as mid, and updates it before returning it with
current/new mid position after partitioning.
*/
template<typename T, typename key_compare >
void _partial_quicksort_partition( T first, T last, T& mid, key_compare comp ) {
T savedFirst = first;
typedef typename iterator_traits<T>::value_type _val_type;
size_t N = last - first;
_val_type *temp = (_val_type *) malloc((N*sizeof(_val_type)));
// move pivot to the end..
_val_type pivot_val = *mid;
std::swap(*mid, *(last - 1));
size_t left_index = 0, right_index = N - 1;
for( ; first != last - 1; first++ ) {
if( !comp(*first, pivot_val) )
temp[right_index--] = *first;
else
temp[left_index++] = *first;
}
assert( right_index == left_index );
temp[left_index] = pivot_val;
std::copy(temp, temp+N, savedFirst);
free(temp);
mid = savedFirst + left_index;
}
template<typename T, typename key_compare >
void _partial_quicksort( T first, T last, key_compare comp ) {
size_t s = last - first;
// sort only if the list is smaller than our limit.. else it's too small for
// us to bother.. caller would take care of it using some other stupid algo..
if( 16 > s ) {
// only one call to insertion_sort(), after whole array is partially sorted
// using quicksort().
// my_insertion_sort::insertion_sort(first, last, pred);
return ;
}
// select pivot.. use median 3
T mid = my_mixed_inplace_quicksort::median3(first, last - 1, s, comp);
// partition
_partial_quicksort_partition(first, last, mid, comp);
// recurse..
_partial_quicksort(first, mid, comp);
// tail recurse..
// TODO: tail recurse on longer partition..
_partial_quicksort(mid+1, last, comp);
}
template<typename T, typename key_compare >
void mixed_quicksort( T first, T last, key_compare pred ) {
_partial_quicksort(first, last, pred );
my_insertion_sort::insertion_sort(first, last, pred);
}
// ------------ "in place" QUICK with optimizations ------------------
/*
in place quicksort partition on range [first, last[ using predicate function as the comparator.
"mid" is in-out param, function uses mid as mid, and updates it before returning it with
current/new mid position after partitioning.
*/
template<typename T, typename key_compare >
void _partial_inplace_quicksort_partition( T first, T last, T& mid, key_compare comp ) {
typename iterator_traits<T>::value_type midVal = *mid;
// move pivot to end..
std::swap(*mid, *(last - 1));
mid = first;
// in-place quick sort:
for( ; first < last - 1; first++ ) {
if( comp(*first, midVal) ) {
std::swap(*first, *mid);
mid++;
}
}
// bring pivot to the mid..
std::swap(*mid, *(last - 1));
}
// brings best median to middle and returns it..
// works on array as [first, last] NOT [first, last[
template<typename T, typename key_compare >
T median3(T first, T last, size_t size, key_compare comp ) {
T mid = first + size/2;
if (comp(*mid, *first)) {
std::swap(*mid, *first);
}
if (comp(*last, *mid)) {
std::swap(*last, *mid);
}
if (comp(*mid, *first)) {
std::swap(*mid, *first);
}
return mid;
}
template<typename T, typename key_compare >
void _partial_inplace_quicksort( T first, T last, key_compare comp ) {
size_t s = last - first;
// sort only if the list is smaller than our limit.. else it's too small for
// us to bother.. caller would take care of it using some other stupid algo..
if( 16 > s ) {
// only one call to insertion_sort(), after whole array is partially sorted
// using quicksort().
// my_insertion_sort::insertion_sort(first, last, pred);
return ;
}
// select pivot.. use median 3
T mid = median3(first, last - 1, s, comp);
// partition
_partial_inplace_quicksort_partition(first, last, mid, comp);
// recurse..
_partial_inplace_quicksort(first, mid, comp);
// tail recurse..
_partial_inplace_quicksort(mid+1, last, comp);
// print_array(first, last, "_partial_inplace_quicksort(exit2)" );
}
// in-place quick sort
// tail recurse
// median
// final insertion sort..
template<typename T, typename key_compare >
void mixedsort_inplace( T first, T last, key_compare pred ) {
_partial_inplace_quicksort(first, last, pred );
my_insertion_sort::insertion_sort(first, last, pred);
}
// ---------------- INSERTION SORT used above ----------------
namespace my_insertion_sort {
template<typename T, typename key_compare>
void insertion_sort( T first, T last, key_compare comp ) {
// for each element in the array [first+1, last[
for( T j = first+1; j < last; j++) {
iterator_traits<T>::value_type curr = *j;
T hole = j;
// keep moving all the elements comp(hole.e. > or <) hole to right
while( hole > first && comp(curr, *(hole-1)) ) {
*hole = *(hole-1);
--hole;
}
// insert curr at the correct position.
*hole = curr;
}
}
}
Kod używany do testowania:
#include <ctime>
#ifdef _WIN32
#include <Windows.h>
#include <WinBase.h>
#endif // _WIN32
template<typename T, typename key_compare = std::less<T>> class MySortAlgoTester;
template <typename T>
void print_array( T begin, T end, string prefix = "" ) {
cout << prefix.c_str();
for_each(begin, end, []( typename std::iterator_traits<T>::reference it) { cout << it << ','; } );
cout << endl;
}
int main () {
srand(123456789L);
size_t numElements = 4;
vector<char*> algoNames;
map<double, vector<double>> results;
int numTests = 0;
while( (numElements *= 2) <= 4096*16 ) {
MySortAlgoTester<int> tester(numElements);
results[numElements] = vector<double>();
algoNames.push_back("mixedsort_inplace");
results[numElements].push_back(tester.test(my_mixed_inplace_quicksort::mixedsort_inplace, "mixedsort_inplace"));
tester.reset();
algoNames.push_back("quick");
results[numElements].push_back(tester.test(my_quicksort::quicksort, "quicksort"));
tester.reset();
algoNames.push_back("mixed_quicksort");
results[numElements].push_back(tester.test(my_mixed_quicksort::mixed_quicksort, "mixed_quicksort"));
}
// --- print the results...
cout << std::setprecision(2) << std::fixed << endl << "N";
for_each(algoNames.begin(), algoNames.begin()+(algoNames.size()/numTests), [](char* s){cout << ',' << s ;} );
typedef std::pair<double,vector<double>> result_iter;
BOOST_FOREACH(result_iter it, results) {
cout << endl << it.first << ',';
BOOST_FOREACH( double d, it.second ) {
cout << d << ',' ;
}
}
template<typename T, typename key_compare = std::less<T>>
class MySortAlgoTester {
key_compare comp;
vector<T> vec;
typedef typename vector<T>::iterator vecIter;
vector<T> vec_copy;
size_t m_numElements;
bool is_sorted(vecIter first, vecIter last) {
vecIter sFirst = first;
for(vecIter next = first+1; next != last;)
// '>' associativity: left to right
// ++ has precedence over >
if( !comp(*(first++), *(next++)) ) {
if(*(next-1) == *(first-1))
continue;
print_array(sFirst, last, "is_sorted() returning false: ");
cout << "comp(" << *(first-1) << ", " << *(next-1) << ") = false && "
<< *(next-1) << " != " << *(first-1) << endl ;
return false;
}
return true;
}
public:
MySortAlgoTester(size_t numElements) : m_numElements(numElements) {
srand(123456789L);
vec.resize(m_numElements);
vec_copy.resize(m_numElements);
// std::generate(vec.begin(), vec.end(), rand);
for(size_t i = 0; i < vec.size(); i++) {
vec[i] = rand() % (m_numElements * 2);
vec_copy[i] = vec[i];
}
}
~MySortAlgoTester() {
}
void reset() {
// copy the data back so next algo can be tested with same array.
std::copy(vec_copy.begin(), vec_copy.end(), vec.begin());
for(size_t i = 0; i < vec_copy.size(); i++) {
vec[i] = vec_copy[i];
}
// std::copy(vec_copy.begin(), vec_copy.end(), vec);
}
double m___start_time_asdfsa = 0;
double getTimeInMicroSecs() {
#ifdef _WIN32
LARGE_INTEGER li;
if(!QueryPerformanceFrequency(&li))
cout << "getTimeInMicroSecs(): QueryPerformanceFrequency() failed!" << endl;
QueryPerformanceCounter(&li);
return double(li.QuadPart)/1000.0;
#else // _WIN32
struct timeval tv;
gettimeofday(&tv, NULL);
return tv.tv_usec + 10e6 * tv.tv_sec;
}
#endif // _WIN32
inline void printClock( const char* msg ) {
cout << msg << (long)(getTimeInMicroSecs() - m___start_time_asdfsa) << " micro seconds" << endl;
}
inline double getClock() {
return (getTimeInMicroSecs() - m___start_time_asdfsa);
}
inline void startClock() {
m___start_time_asdfsa = getTimeInMicroSecs();
}
double test( void (*sort_func)(typename vector<T>::iterator first, typename vector<T>::iterator last, typename key_compare pred), const char* name ) {
cout << "START Testing: " << name << ". With --- " << m_numElements << " elements." << endl;
startClock();
sort_func(vec.begin(), vec.end(), comp);
double ms = getClock();
if(!MySortAlgoTester::is_sorted(vec.begin(), vec.end())) {
cout << name << " did not sort the array." << endl;
// throw string(name) + " did not sort the array.";
}
cout << "DONE Testing: " << name << ". Time taken (ms): " << ms << endl;
return ms;
}
double test( void (*sort_func)(typename vector<T>::iterator first, typename vector<T>::iterator last), const char* name ) {
cout << "START Testing: " << name << ". With --- " << m_numElements << " elements." << endl;
startClock();
sort_func(vec.begin(), vec.end());
double ms = getClock();
if(!MySortAlgoTester::is_sorted(vec.begin(), vec.end())) {
cout << name << " did not sort the array." << endl;
// throw string(name) + " did not sort the array.";
}
cout << "DONE Testing: " << name << ". Time taken (ms): " << ms << endl;
return ms;
}
};