python3 -m venv test_performance_venv
source test_performance_venv/bin/activate
python3 -m pip install -r requirements.txt


! ./scripts/install_BiasAdjustCXX.sh

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 2909k  100 2909k    0     0   953k      0  0:00:03  0:00:03 --:--:--  953k
yes: stdout: Broken pipe
-- The CXX compiler identification is Clang 12.0.0
-- Detecting CXX compiler ABI info
-- Detecting CXX compiler ABI info - done
-- Check for working CXX compiler: /Users/benjamin/opt/miniconda3/bin/x86_64-apple-darwin13.4.0-clang++ - skipped
-- Detecting CXX compile features
-- Detecting CXX compile features - done
-- searching netCDFCxx...
-- Found netCDFCxx: /usr/local/lib/libnetcdf-cxx4.dylib  
-- netCDFCxx found!
-- Configuring done
-- Generating done
-- Build files have been written to: /Users/benjamin/repositories/awi-workspace/BiasAdjustCXX-Performance-Test/BiasAdjustCXX/build
[ 14%] Building CXX object CMakeFiles/BiasAdjustCXX.dir/src/main.cxx.o
[ 28%] Building CXX object CMakeFiles/BiasAdjustCXX.dir/src/CMethods.cxx.o
[ 42%] Building CXX object CMakeFiles/BiasAdjustCXX.dir/src/Utils.cxx.o
[ 57%] Building CXX object CMakeFiles/BiasAdjustCXX.dir/src/NcFileHandler.cxx.o
[ 71%] Building CXX object CMakeFiles/BiasAdjustCXX.dir/src/MathUtils.cxx.o
[ 85%] Building CXX object CMakeFiles/BiasAdjustCXX.dir/src/Manager.cxx.o
[100%] Linking CXX executable BiasAdjustCXX
ld: warning: dylib (/usr/local/lib/libnetcdf-cxx4.dylib) was built for newer macOS version (12.0) than being linked (10.15)
[100%] Built target BiasAdjustCXX


%%bash

# install the required python modules (if not done as described in the Requirements section)
# python3 -m pip install xarray numpy

# start generating the test data sets
mkdir -p input_data
python3 scripts/generate_test_data.py


import xarray as xr 
xr.open_dataset('input_data/obsh-10x10.nc')

<xarray.Dataset>
Dimensions:  (time: 10950, lat: 10, lon: 10)
Coordinates:
  * time     (time) object 1971-01-01 00:00:00 ... 2000-12-31 00:00:00
  * lat      (lat) int64 0 1 2 3 4 5 6 7 8 9
  * lon      (lon) int64 0 1 2 3 4 5 6 7 8 9
Data variables:
    dummy    (time, lat, lon) float64 ...

array([cftime.DatetimeNoLeap(1971, 1, 1, 0, 0, 0, 0, has_year_zero=True),
       cftime.DatetimeNoLeap(1971, 1, 2, 0, 0, 0, 0, has_year_zero=True),
       cftime.DatetimeNoLeap(1971, 1, 3, 0, 0, 0, 0, has_year_zero=True), ...,
       cftime.DatetimeNoLeap(2000, 12, 29, 0, 0, 0, 0, has_year_zero=True),
       cftime.DatetimeNoLeap(2000, 12, 30, 0, 0, 0, 0, has_year_zero=True),
       cftime.DatetimeNoLeap(2000, 12, 31, 0, 0, 0, 0, has_year_zero=True)],
      dtype=object)

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

[1095000 values with dtype=float64]

PandasIndex(CFTimeIndex([1971-01-01 00:00:00, 1971-01-02 00:00:00, 1971-01-03 00:00:00,
             1971-01-04 00:00:00, 1971-01-05 00:00:00, 1971-01-06 00:00:00,
             1971-01-07 00:00:00, 1971-01-08 00:00:00, 1971-01-09 00:00:00,
             1971-01-10 00:00:00,
             ...
             2000-12-22 00:00:00, 2000-12-23 00:00:00, 2000-12-24 00:00:00,
             2000-12-25 00:00:00, 2000-12-26 00:00:00, 2000-12-27 00:00:00,
             2000-12-28 00:00:00, 2000-12-29 00:00:00, 2000-12-30 00:00:00,
             2000-12-31 00:00:00],
            dtype='object', length=10950, calendar='noleap', freq='D'))

PandasIndex(Int64Index([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype='int64', name='lat'))

PandasIndex(Int64Index([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype='int64', name='lon'))


# make the *.sh files executable
! cmod +x *.sh


# run the `BiasAdjustCXX` performance test
./scripts/test_performance_BiasAdjustCXX.sh


# test QDM from the `xclim` module 
! ./scripts/test_performance_xclim_sdba.sh


# test QM and QDM from the `python-cmethods` module
! ./scripts/test_performance_python-cmethods.sh


# install required modules ...
# ! python3 -m pip install pandas matplotlib #...


import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import ticker
import xarray as xr
import warnings
import numpy as np
warnings.filterwarnings('ignore')


path = 'performance_results'
# Results from the performance tests of BiasAdjustCXX 
cxx_dm = pd.read_csv(f'{path}/performance_BiasAdjustCXX_method-delta_method.csv')
cxx_ls = pd.read_csv(f'{path}/performance_BiasAdjustCXX_method-linear_scaling.csv')
cxx_vs = pd.read_csv(f'{path}/performance_BiasAdjustCXX_method-variance_scaling.csv')
cxx_qm = pd.read_csv(f'{path}/performance_BiasAdjustCXX_method-quantile_mapping.csv')
cxx_qdm = pd.read_csv(f'{path}/performance_BiasAdjustCXX_method-quantile_delta_mapping.csv')

# Results from the performance tests of `python-cmethods` Quantile Mapping (QM) and Quantile Delta Mapping (QDM)
pycmethods_qm = pd.read_csv(f'{path}/performance_python-cmethods_method-quantile_mapping.csv')
pycmethods_qdm = pd.read_csv(f'{path}/performance_python-cmethods_method-quantile_delta_mapping.csv')

# Result from the performance test of `xclim` Quantile Delta Mapping (QDM)
xclim_qdm = pd.read_csv(f'{path}/performance_xclim_method-quantile_delta_mapping.csv')


def plot_multiple_threds(data: pd.DataFrame, method: str) -> None:
    ''' Plots 4 lines, each representing the execution time
        for different resolutions and number of threads
    '''
    plt.figure(figsize=(10,5), dpi=300)

    ax = plt.gca()
    for job in range(4):
        l1 = data.where(data['jobs'] == job).groupby('resolution').quantile(.5)
        l1 = l1.tail(-1).append(l1.head(1))
        l1.plot(y='time (seconds)', ax=ax, label=f'{job} job(s)')

    ax.xaxis.set_major_locator(ticker.MultipleLocator(1))
    ax.set_xticklabels([f'{x}x{x}' for x in range(0,101,10)])
    plt.grid(alpha=.2)

    plt.ylabel('execution time (seconds)')
    plt.xlabel('spatial grid resolution')
    plt.legend()
    plt.title(f'Mean execution time of BiasAdjustcXX applied 10 times to 30 years of daily test temperature values\n\
        (10950 time steps per grid cell) using {method}');


def plot_1thread_quantile(data: pd.DataFrame, method: str) -> None:
    ''' Plots 3 lines, representing the 0.25, 0.5 and 0.75 Quantile
        of the data while only respecting entries that used 1 thread.        
    '''
    plt.figure(figsize=(10,5), dpi=300)

    ax = plt.gca()

    # get entries where jobs == 1
    data_1job = data.where(data['jobs'] == 1)
    data_q25 = data_1job.groupby('resolution').quantile(.25)
    data_q25 = data_q25.tail(-1).append(data_q25.iloc[0])

    data_q50 = data_1job.groupby('resolution').quantile(.5)
    data_q50 = data_q50.tail(-1).append(data_q50.iloc[0])

    data_q75 = data_1job.groupby('resolution').quantile(.75)
    data_q75 = data_q75.tail(-1).append(data_q75.iloc[0])

    y = 'time (seconds)'
    data_q25[y].plot(ax=ax, color='red', alpha=.5)
    data_q50[y].plot(ax=ax, color='black')
    data_q75[y].plot(ax=ax, color='red', alpha=.5)

    ax.fill_between(np.arange(0,10,1),list(data_q25[y]), list(data_q75[y]), color='red', alpha=.2)
    ax.legend([ '$Q_{0.25}$', '$Q_{0.5}$', '$Q_{0.75}$' ]);

    ax.xaxis.set_major_locator(ticker.MultipleLocator(1))
    ax.set_xticklabels([f'{x}x{x}' for x in range(0,101,10)])
    plt.grid(alpha=.2)

    plt.ylabel('execution time (seconds)')
    plt.xlabel('spatial grid resolution')
    plt.title(f'BiasAdjustCXX applied 10 times to 30 years of daily test temperature values\n (10950 time steps per grid cell) using {method} (1 thread)');


plot_multiple_threds(data=cxx_dm, method='Delta Method')


plot_1thread_quantile(data=cxx_dm, method='Delta Method')


if True: # local space 
    data_1job = cxx_dm.where(cxx_dm['jobs'] == 1)
    data = data_1job.groupby('resolution').quantile(.25)
    data['$Q_{0.25}$'] = data.tail(-1).append(data.iloc[0])['time (seconds)']
    data['$Q_{0.50}$'] = data_1job.groupby('resolution').quantile(.55).tail(-1).append(data.iloc[0])['time (seconds)']
    data['$Q_{0.75}$'] = data_1job.groupby('resolution').quantile(.75).tail(-1).append(data.iloc[0])['time (seconds)']
    data = data.drop(columns=['time (seconds)'])
    print(data)

            jobs  $Q_{0.25}$  $Q_{0.50}$  $Q_{0.75}$
resolution                                          
100x100      1.0       72.00        72.0       72.00
10x10        1.0        0.00         0.0        0.00
20x20        1.0        2.00         2.0        2.00
30x30        1.0        3.00         3.0        3.00
40x40        1.0        6.25         7.0        7.00
50x50        1.0       13.00        13.0       13.00
60x60        1.0       20.00        20.0       20.00
70x70        1.0       29.00        29.0       29.00
80x80        1.0       39.00        40.0       41.00
90x90        1.0       58.00        60.9       61.75


plot_multiple_threds(data=cxx_ls, method='Linear Scaling')


plot_1thread_quantile(data=cxx_ls, method='Linear Scaling')


plot_multiple_threds(data=cxx_vs, method='Variance Scaling')


plot_1thread_quantile(data=cxx_vs, method='Variance Scaling')


plot_multiple_threds(data=cxx_qm, method='Quantile Mapping')


plot_1thread_quantile(data=cxx_qm, method='Quantile Mapping')


plot_multiple_threds(data=cxx_qdm, method='Quantile Delta Mapping')


plot_1thread_quantile(data=cxx_qdm, method='Quantile Delta Mapping')


plt.figure(figsize=(10,5), dpi=300)

ax = plt.gca()

qdm_25 = xclim_qdm.groupby('resolution').quantile(.25)
qdm_50 = xclim_qdm.groupby('resolution').quantile(.5)
qdm_75 = xclim_qdm.groupby('resolution').quantile(.75)

v = 'time (seconds)'

qdm_25[v].plot(ax=ax, color='red', alpha=.5)
qdm_50[v].plot(ax=ax, color='black')
qdm_75[v].plot(ax=ax, color='red', alpha=.5)

ax.fill_between(np.arange(0,6,1),list(qdm_25[v]), list(qdm_75[v]), color='red', alpha=.2)
ax.legend([ '$Q_{0.25}$', '$Q_{0.5}$', '$Q_{0.75}$' ]);

ax.xaxis.set_major_locator(ticker.MultipleLocator(1))
ax.set_xticklabels([f'{x}x{x}' for x in range(0,101,10)])
plt.grid(alpha=.2)

plt.ylabel('execution time (seconds)')
plt.xlabel('spatial grid resolution')
plt.title('xclim applied 10 times to 30 years of daily test temperature values\n (10950 time steps per grid cell) using QDM (1 thread)');


plt.figure(figsize=(10,5), dpi=300)

ax = plt.gca()
qm_25 = pycmethods_qm.groupby('resolution').quantile(.25)
qm_50 = pycmethods_qm.groupby('resolution').quantile(.5)
qm_75 = pycmethods_qm.groupby('resolution').quantile(.75)

v = 'time (seconds)'

qm_25[v].plot(ax=ax, color='red', alpha=.5)
qm_50[v].plot(ax=ax, color='black')
qm_75[v].plot(ax=ax, color='red', alpha=.5)

ax.fill_between(np.arange(0,6,1),list(qm_25[v]), list(qm_75[v]), color='red', alpha=.2)
ax.legend([ '$Q_{0.25}$', '$Q_{0.5}$', '$Q_{0.75}$' ]);

ax.xaxis.set_major_locator(ticker.MultipleLocator(1))
ax.set_xticklabels([f'{x}x{x}' for x in range(0,101,10)])
plt.grid(alpha=.2)

plt.ylabel('execution time (seconds)')
plt.xlabel('spatial grid resolution')
plt.title('python-cmethods applied 10 times to 30 years of daily test temperature values\n(10950 time steps per grid cell) using QM (1 thread)');


plt.figure(figsize=(10,5), dpi=300)

ax = plt.gca()
qdm_25 = pycmethods_qdm.groupby('resolution').quantile(.25)
qdm_50 = pycmethods_qdm.groupby('resolution').quantile(.5)
qdm_75 = pycmethods_qdm.groupby('resolution').quantile(.75)

v = 'time (seconds)'

qdm_25[v].plot(ax=ax, color='red', alpha=.5)
qdm_50[v].plot(ax=ax, color='black')
qdm_75[v].plot(ax=ax, color='red', alpha=.5)

ax.fill_between(np.arange(0,6,1),list(qdm_25[v]), list(qdm_75[v]), color='red', alpha=.2)
ax.legend([ '$Q_{0.25}$', '$Q_{0.5}$', '$Q_{0.75}$' ]);

ax.xaxis.set_major_locator(ticker.MultipleLocator(1))
ax.set_xticklabels([f'{x}x{x}' for x in range(0,101,10)])
plt.grid(alpha=.2)

plt.ylabel('execution time (seconds)')
plt.xlabel('spatial grid resolution')
plt.title('python-cmethods applied to 30 years of daily test temperature values\n(10950 time steps per grid cell) using QDM (1 thread)');


plt.figure(figsize=(10,5), dpi=300)

ax = plt.gca()
biasadjustcxx_qdm_q05 = cxx_qdm.where(cxx_qdm['jobs'] == 1).groupby('resolution').quantile(.5)
biasadjustcxx_qdm_q05 = biasadjustcxx_qdm_q05.tail(-1).append(biasadjustcxx_qdm_q05.iloc[0]) # 100x100 must be shifted to the end
biasadjustcxx_qdm_q05.plot(y='time (seconds)', ax=ax)

pycmethods_qdm_q05 = pycmethods_qdm.groupby('resolution').quantile(.5)
pycmethods_qdm_q05.plot(ax=ax)

xclim_qdm_q05 = xclim_qdm.groupby('resolution').quantile(.5)
xclim_qdm_q05.plot(ax=ax)

ax.legend([ 'BiasAdjustCXX $Q_{0.5}$', 'python-cmethods $Q_{0.5}$', 'xclim $Q_{0.5}$' ]);
ax.xaxis.set_major_locator(ticker.MultipleLocator(1))
ax.set_xticklabels([f'{x}x{x}' for x in range(0,101,10)])
plt.grid(alpha=.2)

ax.set_xlim(0,9)

plt.ylabel('execution time (seconds)')
plt.xlabel('spatial grid resolution')
plt.title('Mean, execution time of QDM using different tools to adjust\n \
    30 years of daily temperature values for different grid resolutions\n \
        (10950 time steps per grid cell; 1 thread)');


plt.figure(figsize=(10,5), dpi=300)

ax = plt.gca()

(pycmethods_qdm_q05['time (seconds)'] - biasadjustcxx_qdm_q05['time (seconds)'].head(6)).plot(ax=ax)

ax.legend(['python-cmethods QDM minus BiasAdjustCXX QDM']);
ax.xaxis.set_major_locator(ticker.MultipleLocator(1))
ax.set_xticklabels([f'{x}x{x}' for x in range(0,101,10)])
plt.grid(alpha=.2)

ax.set_xlim(0,6)

plt.ylabel('execution time (seconds)')
plt.xlabel('spatial grid resolution')
plt.title('Difference in execution time of QDM between python-cmethods and BiasAdjustCXX\n \
    30 years of daily temperature values for different grid resolutions\n \
        (10950 time steps per grid cell; 1 thread)');

Performance test of the BiasAdjustCXX v1.8 command-line tool

... and comparison to xclim v0.40.0 and python-cmethods v0.6.1

Introduction¶

Tools¶

Test environment and approach¶

Notes:¶

Requirements¶

Disclaimer¶

Table of contents¶

Abbreviations¶

References¶

1. Download and compile the BiasAdjustCXX tool (v1.8)¶

2. Generate test data sets¶

3. Run the performance tests for `BiasAdjustCXX` v1.8, `xclim` v0.40.0 and `python-cmethods` v0.6.1¶

4. Evaluation of the results¶

4.1 Results of BiasAdjustCXX v1.8 (DM, LS, VS, QM, and QDM)¶

BiasAdjustCXX: Delta Method¶

BiasAdjustCXX: Linear Scaling¶

BiasAdjustCXX: Variance Scaling¶

BiasAdjustCXX: Quantile Mapping¶

BiasAdjustCXX: Quantile Delta Mapping¶

4.2 Results of xclim v0.40.0 (QDM)¶

4.3 Results of python-cmethods v0.6.1 (QM and QDM)¶

4.4 Comparison of the execution time of QDM¶

5. Conclusion¶

Phrase	Definitiion
DM	Delta Method
LS	Linear Scaling
VS	Variance Scaling
QM	Quantile Mapping
QDM	Quantile Delta Mapping

Performance test of the BiasAdjustCXX v1.8 command-line tool

... and comparison to xclim v0.40.0 and python-cmethods v0.6.1

Introduction¶

Tools¶

Test environment and approach¶

Notes:¶

Requirements¶

Disclaimer¶

Table of contents¶

Abbreviations¶

References¶

1. Download and compile the BiasAdjustCXX tool (v1.8)¶

2. Generate test data sets¶

3. Run the performance tests for BiasAdjustCXX v1.8, xclim v0.40.0 and python-cmethods v0.6.1¶

4. Evaluation of the results¶

4.1 Results of BiasAdjustCXX v1.8 (DM, LS, VS, QM, and QDM)¶

BiasAdjustCXX: Delta Method¶

BiasAdjustCXX: Linear Scaling¶

BiasAdjustCXX: Variance Scaling¶

BiasAdjustCXX: Quantile Mapping¶

BiasAdjustCXX: Quantile Delta Mapping¶

4.2 Results of xclim v0.40.0 (QDM)¶

4.3 Results of python-cmethods v0.6.1 (QM and QDM)¶

4.4 Comparison of the execution time of QDM¶

5. Conclusion¶

3. Run the performance tests for `BiasAdjustCXX` v1.8, `xclim` v0.40.0 and `python-cmethods` v0.6.1¶