diff --git a/core/solver/ir.cpp b/core/solver/ir.cpp
index 62243b169af..248ac7c0e80 100644
--- a/core/solver/ir.cpp
+++ b/core/solver/ir.cpp
@@ -63,6 +63,7 @@ void Ir<ValueType>::apply_impl(const LinOp *b, LinOp *x) const
     auto dense_b = as<const Vector>(b);
     auto dense_x = as<Vector>(x);
     auto residual = Vector::create_with_config_of(dense_b);
+    auto inner_solution = Vector::create_with_config_of(dense_b);
 
     bool one_changed{};
     Array<stopping_status> stop_status(exec, dense_b->get_size()[1]);
@@ -91,7 +92,16 @@ void Ir<ValueType>::apply_impl(const LinOp *b, LinOp *x) const
             break;
         }
 
-        solver_->apply(lend(one_op), lend(residual), lend(one_op), dense_x);
+        // Use the inner solver to solve
+        // A * inner_solution = residual
+        // with residual as initial guess.
+        inner_solution->copy_from(residual.get());
+        solver_->apply(lend(residual), lend(inner_solution));
+
+        // x = x + inner_solution
+        dense_x->add_scaled(lend(one_op), lend(inner_solution));
+
+        // residual = b - A * x
         residual->copy_from(dense_b);
         system_matrix_->apply(lend(neg_one_op), dense_x, lend(one_op),
                               lend(residual));
diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
index 8b8c8533868..dabebf283cb 100644
--- a/examples/CMakeLists.txt
+++ b/examples/CMakeLists.txt
@@ -10,6 +10,7 @@ add_subdirectory(ginkgo-overhead)
 add_subdirectory(ginkgo-ranges)
 add_subdirectory(ilu-preconditioned-solver)
 add_subdirectory(inverse-iteration)
+add_subdirectory(iterative-refinement)
 add_subdirectory(minimal-cuda-solver)
 add_subdirectory(nine-pt-stencil-solver)
 add_subdirectory(papi-logging)
@@ -20,4 +21,3 @@ add_subdirectory(simple-solver)
 add_subdirectory(simple-solver-logging)
 add_subdirectory(three-pt-stencil-solver)
 add_subdirectory(twentyseven-pt-stencil-solver)
-
diff --git a/examples/iterative-refinement/CMakeLists.txt b/examples/iterative-refinement/CMakeLists.txt
new file mode 100644
index 00000000000..a21b54d2a96
--- /dev/null
+++ b/examples/iterative-refinement/CMakeLists.txt
@@ -0,0 +1,4 @@
+add_executable(iterative-refinement iterative-refinement.cpp)
+target_link_libraries(iterative-refinement ginkgo)
+target_include_directories(iterative-refinement PRIVATE ${PROJECT_SOURCE_DIR})
+configure_file(data/A.mtx data/A.mtx COPYONLY)
diff --git a/examples/iterative-refinement/build.sh b/examples/iterative-refinement/build.sh
new file mode 100644
index 00000000000..06f7d201f1b
--- /dev/null
+++ b/examples/iterative-refinement/build.sh
@@ -0,0 +1,38 @@
+#!/bin/bash
+
+# set up script
+if [ $# -ne 1 ]; then
+    echo -e "Usage: $0 GINKGO_BUILD_DIRECTORY"
+    exit 1
+fi
+BUILD_DIR=$1
+THIS_DIR=$( cd "$( dirname "${BASH_SOURCE[0]}" )" &>/dev/null && pwd )
+
+# copy libraries
+LIBRARY_DIRS="core core/device_hooks reference omp cuda hip"
+LIBRARY_NAMES="ginkgo ginkgo_reference ginkgo_omp ginkgo_cuda ginkgo_hip"
+SUFFIXES=".so .dylib .dll d.so d.dylib d.dll"
+for prefix in ${LIBRARY_DIRS}; do
+    for name in ${LIBRARY_NAMES}; do
+        for suffix in ${SUFFIXES}; do
+            cp ${BUILD_DIR}/${prefix}/lib${name}${suffix} \
+                ${THIS_DIR}/lib${name}${suffix} 2>/dev/null
+        done
+    done
+done
+
+# figure out correct compiler flags
+if ls ${THIS_DIR} | grep -F "libginkgo." >/dev/null; then
+    LINK_FLAGS="-lginkgo -lginkgo_omp -lginkgo_cuda -lginkgo_reference -lginkgo_hip"
+else
+    LINK_FLAGS="-lginkgod -lginkgo_ompd -lginkgo_cudad -lginkgo_referenced -lginkgo_hipd"
+fi
+if [ -z "${CXX}" ]; then
+    CXX="c++"
+fi
+
+# build
+${CXX} -std=c++11 -o ${THIS_DIR}/iterative-refinement \
+    ${THIS_DIR}/iterative-refinement.cpp \
+    -I${THIS_DIR}/../../include -I${BUILD_DIR}/include \
+    -L${THIS_DIR} ${LINK_FLAGS}
diff --git a/examples/iterative-refinement/data/A.mtx b/examples/iterative-refinement/data/A.mtx
new file mode 100644
index 00000000000..c67437da567
--- /dev/null
+++ b/examples/iterative-refinement/data/A.mtx
@@ -0,0 +1,114 @@
+%%MatrixMarket matrix coordinate integer symmetric
+%-------------------------------------------------------------------------------
+% UF Sparse Matrix Collection, Tim Davis
+% http://www.cise.ufl.edu/research/sparse/matrices/JGD_Trefethen/Trefethen_20b
+% name: JGD_Trefethen/Trefethen_20b
+% [Diagonal matrices with primes, Nick Trefethen, Oxford Univ.]
+% id: 2203
+% date: 2008
+% author: N. Trefethen
+% ed: J.-G. Dumas
+% fields: name title A id date author ed kind notes
+% kind: combinatorial problem
+%-------------------------------------------------------------------------------
+% notes:
+% Diagonal matrices with primes, Nick Trefethen, Oxford Univ.          
+% From Jean-Guillaume Dumas' Sparse Integer Matrix Collection,         
+% http://ljk.imag.fr/membres/Jean-Guillaume.Dumas/simc.html            
+%                                                                      
+% Problem 7 of the Hundred-dollar, Hundred-digit Challenge Problems,   
+% SIAM News, vol 35, no. 1.                                            
+%                                                                      
+% 7. Let A be the 20,000 x 20,000 matrix whose entries are zero        
+% everywhere except for the primes 2, 3, 5, 7, . . . , 224737 along the
+% main diagonal and the number 1 in all the positions A(i,j) with      
+% |i-j| = 1,2,4,8, . . . ,16384.  What is the (1,1) entry of inv(A)?   
+%                                                                      
+% http://www.siam.org/news/news.php?id=388                             
+%                                                                      
+% Filename in JGD collection: Trefethen/trefethen_20__19_minor.sms     
+%-------------------------------------------------------------------------------
+19 19 83
+1 1 3
+2 1 1
+3 1 1
+5 1 1
+9 1 1
+17 1 1
+2 2 5
+3 2 1
+4 2 1
+6 2 1
+10 2 1
+18 2 1
+3 3 7
+4 3 1
+5 3 1
+7 3 1
+11 3 1
+19 3 1
+4 4 11
+5 4 1
+6 4 1
+8 4 1
+12 4 1
+5 5 13
+6 5 1
+7 5 1
+9 5 1
+13 5 1
+6 6 17
+7 6 1
+8 6 1
+10 6 1
+14 6 1
+7 7 19
+8 7 1
+9 7 1
+11 7 1
+15 7 1
+8 8 23
+9 8 1
+10 8 1
+12 8 1
+16 8 1
+9 9 29
+10 9 1
+11 9 1
+13 9 1
+17 9 1
+10 10 31
+11 10 1
+12 10 1
+14 10 1
+18 10 1
+11 11 37
+12 11 1
+13 11 1
+15 11 1
+19 11 1
+12 12 41
+13 12 1
+14 12 1
+16 12 1
+13 13 43
+14 13 1
+15 13 1
+17 13 1
+14 14 47
+15 14 1
+16 14 1
+18 14 1
+15 15 53
+16 15 1
+17 15 1
+19 15 1
+16 16 59
+17 16 1
+18 16 1
+17 17 61
+18 17 1
+19 17 1
+18 18 67
+19 18 1
+19 19 71
diff --git a/examples/iterative-refinement/doc/builds-on b/examples/iterative-refinement/doc/builds-on
new file mode 100644
index 00000000000..369aa997770
--- /dev/null
+++ b/examples/iterative-refinement/doc/builds-on
@@ -0,0 +1 @@
+simple-solver
diff --git a/examples/iterative-refinement/doc/intro.dox b/examples/iterative-refinement/doc/intro.dox
new file mode 100644
index 00000000000..049c0f24cc7
--- /dev/null
+++ b/examples/iterative-refinement/doc/intro.dox
@@ -0,0 +1,8 @@
+<a name="Iterative Refinement"></a>
+<h1>This example shows how to use the iterative refinement solver.</h1>
+
+<h3> In this example, we first read in a matrix from file, then generate a
+right-hand side and an initial guess. An inaccurate CG solver is used as the
+inner solver to an iterative refinement (IR) method which solves a linear
+system. The example features the iteration count and runtime of the IR solver.
+</h3>
diff --git a/examples/iterative-refinement/doc/kind b/examples/iterative-refinement/doc/kind
new file mode 100644
index 00000000000..15a13db4511
--- /dev/null
+++ b/examples/iterative-refinement/doc/kind
@@ -0,0 +1 @@
+basic
diff --git a/examples/iterative-refinement/doc/results.dox b/examples/iterative-refinement/doc/results.dox
new file mode 100644
index 00000000000..1ee878f6e02
--- /dev/null
+++ b/examples/iterative-refinement/doc/results.dox
@@ -0,0 +1,19 @@
+<h1>Results</h1>
+This is the expected output:
+
+@code{.cpp}
+
+Initial residual norm sqrt(r^T r):
+%%MatrixMarket matrix array real general
+1 1
+194.679
+Final residual norm sqrt(r^T r):
+%%MatrixMarket matrix array real general
+1 1
+4.23821e-11
+IR iteration count:     24
+IR execution time [ms]: 18.0692
+
+@endcode
+
+<h3> Comments about programming and debugging </h3>
diff --git a/examples/iterative-refinement/doc/short-intro b/examples/iterative-refinement/doc/short-intro
new file mode 100644
index 00000000000..a91594e87a5
--- /dev/null
+++ b/examples/iterative-refinement/doc/short-intro
@@ -0,0 +1 @@
+The iterative refinement solver example.
diff --git a/examples/iterative-refinement/doc/tooltip b/examples/iterative-refinement/doc/tooltip
new file mode 100644
index 00000000000..852c8b02e65
--- /dev/null
+++ b/examples/iterative-refinement/doc/tooltip
@@ -0,0 +1 @@
+Use an iterative refinement method in Ginkgo. Solve a linear system.
diff --git a/examples/iterative-refinement/iterative-refinement.cpp b/examples/iterative-refinement/iterative-refinement.cpp
new file mode 100644
index 00000000000..41a4047b771
--- /dev/null
+++ b/examples/iterative-refinement/iterative-refinement.cpp
@@ -0,0 +1,149 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2020, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+
+#include <ginkgo/ginkgo.hpp>
+
+
+#include <fstream>
+#include <iomanip>
+#include <iostream>
+#include <string>
+
+
+int main(int argc, char *argv[])
+{
+    // Some shortcuts
+    using ValueType = double;
+    using IndexType = int;
+    using vec = gko::matrix::Dense<ValueType>;
+    using mtx = gko::matrix::Csr<ValueType, IndexType>;
+    using cg = gko::solver::Cg<ValueType>;
+    using ir = gko::solver::Ir<ValueType>;
+
+    // Print version information
+    std::cout << gko::version_info::get() << std::endl;
+
+    // Figure out where to run the code
+    std::shared_ptr<gko::Executor> exec;
+    if (argc == 1 || std::string(argv[1]) == "reference") {
+        exec = gko::ReferenceExecutor::create();
+    } else if (argc == 2 && std::string(argv[1]) == "omp") {
+        exec = gko::OmpExecutor::create();
+    } else if (argc == 2 && std::string(argv[1]) == "cuda" &&
+               gko::CudaExecutor::get_num_devices() > 0) {
+        exec = gko::CudaExecutor::create(0, gko::OmpExecutor::create());
+    } else if (argc == 2 && std::string(argv[1]) == "hip" &&
+               gko::HipExecutor::get_num_devices() > 0) {
+        exec = gko::HipExecutor::create(0, gko::OmpExecutor::create());
+    } else {
+        std::cerr << "Usage: " << argv[0] << " [executor]" << std::endl;
+        std::exit(-1);
+    }
+
+    // Read data
+    auto A = share(gko::read<mtx>(std::ifstream("data/A.mtx"), exec));
+    // Create RHS and initial guess as 1
+    gko::size_type size = A->get_size()[0];
+    auto host_x = gko::matrix::Dense<ValueType>::create(exec->get_master(),
+                                                        gko::dim<2>(size, 1));
+    for (auto i = 0; i < size; i++) {
+        host_x->at(i, 0) = 1.;
+    }
+    auto x = gko::matrix::Dense<ValueType>::create(exec);
+    auto b = gko::matrix::Dense<ValueType>::create(exec);
+    x->copy_from(host_x.get());
+    b->copy_from(host_x.get());
+
+    // Calculate initial residual by overwriting b
+    auto one = gko::initialize<vec>({1.0}, exec);
+    auto neg_one = gko::initialize<vec>({-1.0}, exec);
+    auto initres = gko::initialize<vec>({0.0}, exec);
+    A->apply(lend(one), lend(x), lend(neg_one), lend(b));
+    b->compute_norm2(lend(initres));
+
+    // copy b again
+    b->copy_from(host_x.get());
+    gko::size_type max_iters= 10000u; 
+    gko::remove_complex<ValueType> outer_reduction_factor = 1e-12;
+    auto iter_stop =
+        gko::stop::Iteration::build().with_max_iters(max_iters).on(exec);
+    auto tol_stop = gko::stop::ResidualNormReduction<ValueType>::build()
+                        .with_reduction_factor(outer_reduction_factor)
+                        .on(exec);
+
+    std::shared_ptr<const gko::log::Convergence<ValueType>> logger =
+        gko::log::Convergence<ValueType>::create(exec);
+    iter_stop->add_logger(logger);
+    tol_stop->add_logger(logger);
+
+    // Create solver factory
+    gko::remove_complex<ValueType> inner_reduction_factor = 1e-2;
+    auto solver_gen =
+        ir::build()
+            .with_solver(
+                cg::build()
+                    .with_criteria(
+                        gko::stop::ResidualNormReduction<ValueType>::build()
+                            .with_reduction_factor(inner_reduction_factor)
+                            .on(exec))
+                    .on(exec))
+            .with_criteria(gko::share(iter_stop), gko::share(tol_stop))
+            .on(exec);
+    // Create solver
+    auto solver = solver_gen->generate(A);
+
+
+    // Solve system
+    exec->synchronize();
+    std::chrono::nanoseconds time(0);
+    auto tic = std::chrono::steady_clock::now();
+    solver->apply(lend(b), lend(x));
+    auto toc = std::chrono::steady_clock::now();
+    time += std::chrono::duration_cast<std::chrono::nanoseconds>(toc - tic);
+
+    // Calculate residual
+    auto res = gko::initialize<vec>({0.0}, exec);
+    A->apply(lend(one), lend(x), lend(neg_one), lend(b));
+    b->compute_norm2(lend(res));
+
+    std::cout << "Initial residual norm sqrt(r^T r): \n";
+    write(std::cout, lend(initres));
+    std::cout << "Final residual norm sqrt(r^T r): \n";
+    write(std::cout, lend(res));
+
+    // Print solver statistics
+    std::cout << "IR iteration count:     " << logger->get_num_iterations()
+              << std::endl;
+    std::cout << "IR execution time [ms]: "
+              << static_cast<double>(time.count()) / 1000000.0 << std::endl;
+}