Skip to content

Commit

Permalink
1D-chain performance test. #3
Browse files Browse the repository at this point in the history
Based on <https://github.com/mariomulansky/hpx_odeint/tree/9792ca4f330bf0cffde4f000e900fb4c1c254891/osc_chain_1d/openmp2>

Use osc_chain_speedup.{sh,gnu} to compute and plot speedup.
"split" uses openmp_state/openmp_algebra;
"simple" uses vector/openmp_range_algebra
  • Loading branch information
Pascal Germroth committed Jul 19, 2013
1 parent daa5497 commit fd5a419
Show file tree
Hide file tree
Showing 6 changed files with 334 additions and 0 deletions.
1 change: 1 addition & 0 deletions Jamroot
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ project
# tests, regression tests and examples
build-project libs/numeric/odeint/test ;
build-project libs/numeric/odeint/examples ;
build-project libs/numeric/odeint/performance/openmp ;


# additional tests with external libraries :
Expand Down
19 changes: 19 additions & 0 deletions libs/numeric/odeint/performance/openmp/Jamfile.v2
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Copyright 2009 Karsten Ahnert and Mario Mulansky.
# Distributed under the Boost Software License, Version 1.0. (See
# accompanying file LICENSE_1_0.txt or copy at
# http://www.boost.org/LICENSE_1_0.txt)

project
: requirements
<include>../../../../..
<include>..
<define>BOOST_ALL_NO_LIB=1
<library>/boost//timer
<cxxflags>--std=c++11
<toolset>gcc:<cxxflags>-fopenmp
<toolset>gcc:<linkflags>-fopenmp
<toolset>intel:<cxxflags>-openmp
<toolset>intel:<linkflags>-openmp
;

exe osc_chain_1d : osc_chain_1d.cpp ;
128 changes: 128 additions & 0 deletions libs/numeric/odeint/performance/openmp/osc_chain_1d.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
/* Boost libs/numeric/odeint/performance/openmp/osc_chain_1d.cpp
Copyright 2009-2012 Karsten Ahnert
Copyright 2009-2012 Mario Mulansky
stronlgy nonlinear hamiltonian lattice in 2d
Distributed under the Boost Software License, Version 1.0.
(See accompanying file LICENSE_1_0.txt or
copy at http://www.boost.org/LICENSE_1_0.txt)
*/

#include <iostream>
#include <vector>
#include <random>

#include <omp.h>

#include <boost/numeric/odeint.hpp>
#include <boost/numeric/odeint/external/openmp/openmp.hpp>

#include <boost/timer/timer.hpp>
#include <boost/foreach.hpp>
#include <boost/accumulators/accumulators.hpp>
#include <boost/accumulators/statistics/stats.hpp>
#include <boost/accumulators/statistics/mean.hpp>
#include <boost/accumulators/statistics/median.hpp>
#include <boost/accumulators/statistics/min.hpp>

#include "osc_chain_1d_system.hpp"

using namespace std;
using namespace boost::numeric::odeint;
using namespace boost::accumulators;

using boost::timer::cpu_timer;

const double p_kappa = 3.3;
const double p_lambda = 4.7;
const double p_beta = 1.0;

int main( int argc , char* argv[] )
{
size_t N = 1024;
size_t blocks = omp_get_max_threads();
size_t steps = 100;
size_t repeat = 5;
bool split_range = true;
if( argc > 1 ) N = boost::lexical_cast<size_t>( argv[1] );
if( argc > 2 ) blocks = boost::lexical_cast<size_t>( argv[2] );
if( argc > 3 ) steps = boost::lexical_cast<size_t>( argv[3] );
if( argc > 4 ) repeat = boost::lexical_cast<size_t>( argv[4] );
if( argc > 5 ) split_range = boost::lexical_cast<bool>( argv[5] );

cout << "Size: " << N << " with " << blocks << " blocks and " << steps << " steps." << endl;

accumulator_set< double, stats<tag::mean, tag::median> > acc_time;

for(size_t n_rep = 0 ; n_rep != repeat ; n_rep++)
{
osc_chain system( p_kappa , p_lambda , p_beta );

// fully random data
vector<double> p_init( N ), q_init( N, 0 );
uniform_real_distribution<double> distribution( 0.0 );
mt19937 engine( 0 );
auto generator = bind( distribution , engine );
generate( p_init.begin() , p_init.end() , generator );

if(split_range) {
typedef openmp_state<double> state_type;
typedef symplectic_rkn_sb3a_mclachlan<
state_type , state_type , double
> stepper_type;

// split into blocks
state_type p( blocks );
boost::numeric::odeint::copy(p_init, p);

state_type q( blocks );
boost::numeric::odeint::copy(q_init, q);

clog << "split " << N << " into";
for(size_t i = 0 ; i != p.size() ; i++)
clog << ' ' << p[i].size();
clog << endl;

for(size_t n_run = 0 ; n_run != 5 ; n_run++) {
cpu_timer timer;
integrate_n_steps( stepper_type() , system ,
make_pair( ref(q) , ref(p) ) ,
0.0 , 0.01 , steps );
double run_time = static_cast<double>(timer.elapsed().wall) * 1.0e-9;
acc_time(run_time);
clog << "run " << n_rep << "-" << n_run << " wall[s]: " << run_time << endl;
}

} else {
typedef vector<double> state_type;
typedef symplectic_rkn_sb3a_mclachlan<
state_type , state_type , double ,
state_type , state_type , double ,
openmp_range_algebra
> stepper_type;

omp_set_num_threads(blocks);

state_type p(p_init), q(q_init);

for(size_t n_run = 0 ; n_run != 5 ; n_run++) {
cpu_timer timer;
integrate_n_steps( stepper_type() , system ,
make_pair( ref(q) , ref(p) ) ,
0.0 , 0.01 , steps );
double run_time = static_cast<double>(timer.elapsed().wall) * 1.0e-9;
acc_time(run_time);
clog << "run " << n_rep << "-" << n_run << " wall[s]: " << run_time << endl;
}

}
}

cout << " mean[s]: " << mean(acc_time)
<< " median[s]: " << median(acc_time) << endl;

return 0;
}

99 changes: 99 additions & 0 deletions libs/numeric/odeint/performance/openmp/osc_chain_1d_system.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
/* Boost libs/numeric/odeint/performance/openmp/osc_chain_1d_system.hpp
Copyright 2009-2012 Karsten Ahnert
Copyright 2009-2012 Mario Mulansky
stronlgy nonlinear hamiltonian lattice
Distributed under the Boost Software License, Version 1.0.
(See accompanying file LICENSE_1_0.txt or
copy at http://www.boost.org/LICENSE_1_0.txt)
*/

#ifndef SYSTEM_HPP
#define SYSTEM_HPP

#include <vector>
#include <cmath>
#include <iostream>

#include <omp.h>

#include <boost/math/special_functions/sign.hpp>
#include <boost/numeric/odeint/external/openmp/openmp.hpp>

typedef std::vector< double > dvec;

namespace checked_math {
inline double pow( double x , double y )
{
if( x==0.0 )
// 0**y = 0, don't care for y = 0 or NaN
return 0.0;
using std::pow;
using std::abs;
return pow( abs(x) , y );
}
}

double signed_pow( double x , double k )
{
using boost::math::sign;
return checked_math::pow( x , k ) * sign(x);
}

struct osc_chain {

const double m_kap, m_lam, m_beta;

osc_chain( const double kap , const double lam , const double beta )
: m_kap( kap ) , m_lam( lam ) , m_beta( beta )
{ }

// Simple case with openmp_range_algebra
void operator()( const std::vector<double> &q ,
std::vector<double> &dpdt ) const
{
const size_t N = q.size();
# pragma omp parallel for schedule(runtime)
for(size_t i = 0 ; i < N ; ++i)
{
// can't store things between iterations
const double q_prev = i == 0 ? 0 : q[i - 1];
const double q_next = i + 1 == N ? 0 : q[i + 1];
const double coupling_l = signed_pow( q_prev - q[i] , m_lam-1 );
const double coupling_r = signed_pow( q[i] - q_next , m_lam-1 );
dpdt[i] = coupling_l - signed_pow( q[i] , m_kap-1 ) - coupling_r;
}
}

// Split case with openmp_algebra
void operator()( const boost::numeric::odeint::openmp_state<double> &q ,
boost::numeric::odeint::openmp_state<double> &dpdt ) const
{
const size_t N = q.size();
# pragma omp parallel for schedule(runtime)
for(size_t i = 0 ; i < N ; ++i)
{
const double q_left = i == 0 ? 0 : q[i-1].back();
const double q_right = i + 1 == N ? 0 : q[i+1].front();

const std::vector<double> &_q = q[i];
std::vector<double> &_dpdt = dpdt[i];

const size_t M = q[i].size();
double coupling_lr = signed_pow( q_left - _q[0] , m_lam-1 );
for(size_t i = 0 ; i < M-1 ; ++i)
{
_dpdt[i] = -signed_pow( _q[i] , m_kap-1 ) + coupling_lr;
coupling_lr = signed_pow( _q[i] - _q[i+1] , m_lam-1 );
_dpdt[i] -= coupling_lr;
}
_dpdt[N-1] = -signed_pow( _q[N-1] , m_kap-1 ) + coupling_lr;
_dpdt[N-1] -= signed_pow( _q[N-1] - q_right , m_lam-1 );
}
}

};

#endif
50 changes: 50 additions & 0 deletions libs/numeric/odeint/performance/openmp/osc_chain_speedup.gnu
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#!/usr/bin/env gnuplot

set terminal pngcairo size 1000,1000
set output "osc_chain_speedup.png"

set multiplot layout 2,2

set key left

set xrange [1:16]
set x2range [1:16]
set x2tics 8 format ""
set grid x2tics
set yrange [0:8]

set title "short: speedup"
plot \
"osc_chain_speedup-short.dat" i 0 u "block":"gcc-s-mul" w lp t "gcc (split)" , \
"osc_chain_speedup-short.dat" i 0 u "block":"gcc-t-mul" w lp t "gcc (simple)", \
"osc_chain_speedup-short.dat" i 0 u "block":"icc-s-mul" w lp t "icc (split)" , \
"osc_chain_speedup-short.dat" i 0 u "block":"icc-t-mul" w lp t "icc (simple)", \
4 lc 0 lt 0 t "target"

unset key

set title "long: speedup"
plot \
"osc_chain_speedup-long.dat" i 0 u "block":"gcc-s-mul" w lp, \
"osc_chain_speedup-long.dat" i 0 u "block":"gcc-t-mul" w lp, \
"osc_chain_speedup-long.dat" i 0 u "block":"icc-s-mul" w lp, \
"osc_chain_speedup-long.dat" i 0 u "block":"icc-t-mul" w lp, \
4 lc 0 lt 0

set yrange [0:*]

set title "short: time[s]"
plot \
"osc_chain_speedup-short.dat" i 0 u "block":"gcc-s-med" w lp, \
"osc_chain_speedup-short.dat" i 0 u "block":"gcc-t-med" w lp, \
"osc_chain_speedup-short.dat" i 0 u "block":"icc-s-med" w lp, \
"osc_chain_speedup-short.dat" i 0 u "block":"icc-t-med" w lp

set title "long: time[s]"
plot \
"osc_chain_speedup-long.dat" i 0 u "block":"gcc-s-med" w lp, \
"osc_chain_speedup-long.dat" i 0 u "block":"gcc-t-med" w lp, \
"osc_chain_speedup-long.dat" i 0 u "block":"icc-s-med" w lp, \
"osc_chain_speedup-long.dat" i 0 u "block":"icc-t-med" w lp

unset multiplot
37 changes: 37 additions & 0 deletions libs/numeric/odeint/performance/openmp/osc_chain_speedup.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
#!/bin/zsh

export LC_NUMERIC=en_US.UTF-8
declare -A times

export OMP_SCHEDULE=static
repeat=2

function run {
n=$1
steps=$2
printf "# n=$n steps=$steps repeat=$repeat\n"
printf '"block"'
for b in gcc icc ; do
for s in s t ; do
for t in med mul ; do
printf "\t\"$b-$s-$t\""
done
done
done
for block in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 ; do
printf '\n%d' $block
for build in gcc-4.7 intel-linux ; do
bench="bin/$build/release/osc_chain_1d"
for split in 1 0 ; do
med=$($bench $n $block $steps $repeat $split | tail -1 | awk '{print $4}')
times[$build-$split-$block]=$med
speedup=$((${times[$build-$split-1]}/$med))
printf '\t%f\t%f' $med $speedup
done
done
done
printf '\n\n\n'
}

run 4096 1024 | tee osc_chain_speedup-short.dat
run 4194304 1 | tee osc_chain_speedup-long.dat

0 comments on commit fd5a419

Please sign in to comment.