Ginkgo Generated from branch based on main. Ginkgo version 1.10.0
A numerical linear algebra library targeting many-core architectures
Loading...
Searching...
No Matches
The heat-equation program

The heat equation example.

This example depends on simple-solver, three-pt-stencil-solver.

Table of contents
  1. Introduction
  2. The commented program
  1. Results
  2. The plain program

Introduction

This example solves a 2D heat conduction equation

$    u : [0, d]^2 \rightarrow R\\
    \partial_t u = \delta u + f
$

with Dirichlet boundary conditions and given initial condition and constant-in-time source function f.

The partial differential equation (PDE) is solved with a finite difference spatial discretization on an equidistant grid: For n grid points, and grid distance $h = 1/n$ we write

$    u_{i,j}' =
    \alpha\frac{u_{i-1,j}+u_{i+1,j}+u_{i,j-1}+u_{i,j+1}-4u_{i,j}}{h^2}+f_{i,j}
$

We then build an implicit Euler integrator by discretizing with time step $\tau$

$    \frac{u_{i,j}^{k+1} - u_{i,j}^k}{\tau} =
    \alpha\frac{u_{i-1,j}^{k+1}+u_{i+1,j}^{k+1}
               -u_{i,j-1}^{k+1}-u_{i,j+1}^{k+1}+4u_{i,j}^{k+1}}{h^2}
    +f_{i,j}
$

and solve the resulting linear system for $ u_{\cdot}^{k+1}$ using Ginkgo's CG solver preconditioned with an incomplete Cholesky factorization for each time step, occasionally writing the resulting grid values into a video file using OpenCV and a custom color mapping.

The intention of this example is to provide a mini-app showing matrix assembly, vector initialization, solver setup and the use of Ginkgo in a more complex setting.

About the example

The commented program

The intention of this example is to provide a mini-app showing matrix assembly,
vector initialization, solver setup and the use of Ginkgo in a more complex
setting.
*****************************<DESCRIPTION>********************************** /
#include <chrono>
#include <fstream>
#include <iostream>
#include <opencv2/core.hpp>
#include <opencv2/videoio.hpp>
#include <ginkgo/ginkgo.hpp>

This function implements a simple Ginkgo-themed clamped color mapping for values in the range [0,5].

void set_val(unsigned char* data, double value)
{

RGB values for the 6 colors used for values 0, 1, ..., 5 We will interpolate linearly between these values.

double col_r[] = {255, 221, 129, 201, 249, 255};
double col_g[] = {255, 220, 130, 161, 158, 204};
double col_b[] = {255, 220, 133, 93, 24, 8};
value = std::max(0.0, value);
auto i = std::max(0, std::min(4, int(value)));
auto d = std::max(0.0, std::min(1.0, value - i));

OpenCV uses BGR instead of RGB by default, revert indices

data[2] = static_cast<unsigned char>(col_r[i + 1] * d + col_r[i] * (1 - d));
data[1] = static_cast<unsigned char>(col_g[i + 1] * d + col_g[i] * (1 - d));
data[0] = static_cast<unsigned char>(col_b[i + 1] * d + col_b[i] * (1 - d));
}

Initialize video output with given dimension and FPS (frames per seconds)

std::pair<cv::VideoWriter, cv::Mat> build_output(int n, double fps)
{
cv::Size videosize{n, n};
auto output =
std::make_pair(cv::VideoWriter{}, cv::Mat{videosize, CV_8UC3});
auto fourcc = cv::VideoWriter::fourcc('a', 'v', 'c', '1');
output.first.open("heat.mp4", fourcc, fps, videosize);
return output;
}

Write the current frame to video output using the above color mapping

void output_timestep(std::pair<cv::VideoWriter, cv::Mat>& output, int n,
const double* data)
{
for (int i = 0; i < n; i++) {
auto row = output.second.ptr(i);
for (int j = 0; j < n; j++) {
set_val(&row[3 * j], data[i * n + j]);
}
}
output.first.write(output.second);
}
int main(int argc, char* argv[])
{
using mtx = gko::matrix::Csr<>;
using vec = gko::matrix::Dense<>;
CSR is a matrix format which stores only the nonzero coefficients by compressing each row of the matr...
Definition csr.hpp:126
Dense is a matrix format which explicitly stores all values of the matrix.
Definition dense.hpp:120

Problem parameters: simulation length

auto t0 = 5.0;

diffusion factor

auto diffusion = 0.0005;

scaling factor for heat source

auto source_scale = 2.5;

Simulation parameters: inner grid points per discretization direction

auto n = 256;

number of simulation steps per second

auto steps_per_sec = 500;

number of video frames per second

auto fps = 25;

number of grid points

auto n2 = n * n;

grid point distance (ignoring boundary points)

auto h = 1.0 / (n + 1);
auto h2 = h * h;

time step size for the simulation

auto tau = 1.0 / steps_per_sec;

create a CUDA executor with an associated OpenMP host executor

static std::shared_ptr< CudaExecutor > create(int device_id, std::shared_ptr< Executor > master, bool device_reset, allocation_mode alloc_mode=default_cuda_alloc_mode, CUstream_st *stream=nullptr)
Creates a new CudaExecutor.
static std::shared_ptr< OmpExecutor > create(std::shared_ptr< CpuAllocatorBase > alloc=std::make_shared< CpuAllocator >())
Creates a new OmpExecutor.
Definition executor.hpp:1396

load heat source and initial state vectors

std::ifstream initial_stream("data/gko_logo_2d.mtx");
std::ifstream source_stream("data/gko_text_2d.mtx");
auto source = gko::read<vec>(source_stream, exec);
auto in_vector = gko::read<vec>(initial_stream, exec);
std::unique_ptr< MatrixType > read(StreamType &&is, MatrixArgs &&... args)
Reads a matrix stored in matrix market format from an input stream.
Definition mtx_io.hpp:159

create output vector with initial guess for

auto out_vector = in_vector->clone();

create scalar for source update

auto tau_source_scalar = gko::initialize<vec>({source_scale * tau}, exec);
std::unique_ptr< Matrix > initialize(size_type stride, std::initializer_list< typename Matrix::value_type > vals, std::shared_ptr< const Executor > exec, TArgs &&... create_args)
Creates and initializes a column-vector.
Definition dense.hpp:1574

create stencil matrix as shared_ptr for solver

auto stencil_matrix = gko::share(mtx::create(exec));
detail::shared_type< OwningPointer > share(OwningPointer &&p)
Marks the object pointed to by p as shared.
Definition utils_helper.hpp:224

assemble matrix

gko::matrix_data<> mtx_data{gko::dim<2>(n2, n2)};
for (int i = 0; i < n; i++) {
for (int j = 0; j < n; j++) {
auto c = i * n + j;
auto c_val = diffusion * tau * 4.0 / h2 + 1.0;
auto off_val = -diffusion * tau / h2;
A type representing the dimensions of a multidimensional object.
Definition dim.hpp:26
This structure is used as an intermediate data type to store a sparse matrix.
Definition matrix_data.hpp:126

for each grid point: insert 5 stencil points with Dirichlet boundary conditions, i.e. with zero boundary value

if (i > 0) {
mtx_data.nonzeros.emplace_back(c, c - n, off_val);
}
if (j > 0) {
mtx_data.nonzeros.emplace_back(c, c - 1, off_val);
}
mtx_data.nonzeros.emplace_back(c, c, c_val);
if (j < n - 1) {
mtx_data.nonzeros.emplace_back(c, c + 1, off_val);
}
if (i < n - 1) {
mtx_data.nonzeros.emplace_back(c, c + n, off_val);
}
}
}
stencil_matrix->read(mtx_data);
std::vector< nonzero_type > nonzeros
A vector of tuples storing the non-zeros of the matrix.
Definition matrix_data.hpp:453

prepare video output

auto output = build_output(n, fps);

build CG solver on stencil with incomplete Cholesky preconditioner stopping at 1e-10 relative accuracy

auto solver =
gko::solver::Cg<>::build()
.with_preconditioner(gko::preconditioner::Ic<>::build())
.with_criteria(gko::stop::ResidualNorm<>::build()
.with_baseline(gko::stop::mode::rhs_norm)
.with_reduction_factor(1e-10))
.on(exec)
->generate(stencil_matrix);

time stamp of the last output frame (initialized to a sentinel value)

double last_t = -t0;

execute implicit Euler method: for each timestep, solve stencil system

for (double t = 0; t < t0; t += tau) {

if enough time has passed, output the next video frame

if (t - last_t > 1.0 / fps) {
last_t = t;
std::cout << t << std::endl;
output_timestep(
output, n,
gko::make_temporary_clone(exec->get_master(), in_vector)
->get_const_values());
}
detail::temporary_clone< detail::pointee< Ptr > > make_temporary_clone(std::shared_ptr< const Executor > exec, Ptr &&ptr)
Creates a temporary_clone.
Definition temporary_clone.hpp:208

add heat source contribution

in_vector->add_scaled(tau_source_scalar, source);

execute Euler step

solver->apply(in_vector, out_vector);

swap input and output

std::swap(in_vector, out_vector);
}
}

Results

The program will generate a video file named heat.mp4 and output the timestamp of each generated frame.

Comments about programming and debugging

The plain program

/*****************************<DESCRIPTION>***********************************
This example solves a 2D heat conduction equation
u : [0, d]^2 \rightarrow R\\
\partial_t u = \delta u + f
with Dirichlet boundary conditions and given initial condition and
constant-in-time source function f.
The partial differential equation (PDE) is solved with a finite difference
spatial discretization on an equidistant grid: For `n` grid points,
and grid distance @f$h = 1/n@f$ we write
u_{i,j}' = \alpha (u_{i-1,j} + u_{i+1,j} + u_{i,j-1} + u_{i,j+1}
- 4 u_{i,j}) / h^2
+ f_{i,j}
We then build an implicit Euler integrator by discretizing with time step @f$\tau@f$
(u_{i,j}^{k+1} - u_{i,j}^k) / \tau =
\alpha (u_{i-1,j}^{k+1} - u_{i+1,j}^{k+1}
+ u_{i,j-1}^{k+1} - u_{i,j+1}^{k+1} - 4 u_{i,j}^{k+1}) / h^2
+ f_{i,j}
and solve the resulting linear system for @f$ u_{\cdot}^{k+1}@f$ using Ginkgo's CG
solver preconditioned with an incomplete Cholesky factorization for each time
step, occasionally writing the resulting grid values into a video file using
OpenCV and a custom color mapping.
The intention of this example is to provide a mini-app showing matrix assembly,
vector initialization, solver setup and the use of Ginkgo in a more complex
setting.
*****************************<DESCRIPTION>**********************************/
#include <chrono>
#include <fstream>
#include <iostream>
#include <opencv2/core.hpp>
#include <opencv2/videoio.hpp>
#include <ginkgo/ginkgo.hpp>
void set_val(unsigned char* data, double value)
{
double col_r[] = {255, 221, 129, 201, 249, 255};
double col_g[] = {255, 220, 130, 161, 158, 204};
double col_b[] = {255, 220, 133, 93, 24, 8};
value = std::max(0.0, value);
auto i = std::max(0, std::min(4, int(value)));
auto d = std::max(0.0, std::min(1.0, value - i));
data[2] = static_cast<unsigned char>(col_r[i + 1] * d + col_r[i] * (1 - d));
data[1] = static_cast<unsigned char>(col_g[i + 1] * d + col_g[i] * (1 - d));
data[0] = static_cast<unsigned char>(col_b[i + 1] * d + col_b[i] * (1 - d));
}
std::pair<cv::VideoWriter, cv::Mat> build_output(int n, double fps)
{
cv::Size videosize{n, n};
auto output =
std::make_pair(cv::VideoWriter{}, cv::Mat{videosize, CV_8UC3});
auto fourcc = cv::VideoWriter::fourcc('a', 'v', 'c', '1');
output.first.open("heat.mp4", fourcc, fps, videosize);
return output;
}
void output_timestep(std::pair<cv::VideoWriter, cv::Mat>& output, int n,
const double* data)
{
for (int i = 0; i < n; i++) {
auto row = output.second.ptr(i);
for (int j = 0; j < n; j++) {
set_val(&row[3 * j], data[i * n + j]);
}
}
output.first.write(output.second);
}
int main(int argc, char* argv[])
{
using mtx = gko::matrix::Csr<>;
using vec = gko::matrix::Dense<>;
auto t0 = 5.0;
auto diffusion = 0.0005;
auto source_scale = 2.5;
auto n = 256;
auto steps_per_sec = 500;
auto fps = 25;
auto n2 = n * n;
auto h = 1.0 / (n + 1);
auto h2 = h * h;
auto tau = 1.0 / steps_per_sec;
std::ifstream initial_stream("data/gko_logo_2d.mtx");
std::ifstream source_stream("data/gko_text_2d.mtx");
auto source = gko::read<vec>(source_stream, exec);
auto in_vector = gko::read<vec>(initial_stream, exec);
auto out_vector = in_vector->clone();
auto tau_source_scalar = gko::initialize<vec>({source_scale * tau}, exec);
auto stencil_matrix = gko::share(mtx::create(exec));
gko::matrix_data<> mtx_data{gko::dim<2>(n2, n2)};
for (int i = 0; i < n; i++) {
for (int j = 0; j < n; j++) {
auto c = i * n + j;
auto c_val = diffusion * tau * 4.0 / h2 + 1.0;
auto off_val = -diffusion * tau / h2;
if (i > 0) {
mtx_data.nonzeros.emplace_back(c, c - n, off_val);
}
if (j > 0) {
mtx_data.nonzeros.emplace_back(c, c - 1, off_val);
}
mtx_data.nonzeros.emplace_back(c, c, c_val);
if (j < n - 1) {
mtx_data.nonzeros.emplace_back(c, c + 1, off_val);
}
if (i < n - 1) {
mtx_data.nonzeros.emplace_back(c, c + n, off_val);
}
}
}
stencil_matrix->read(mtx_data);
auto output = build_output(n, fps);
auto solver =
gko::solver::Cg<>::build()
.with_preconditioner(gko::preconditioner::Ic<>::build())
.with_criteria(gko::stop::ResidualNorm<>::build()
.with_baseline(gko::stop::mode::rhs_norm)
.with_reduction_factor(1e-10))
.on(exec)
->generate(stencil_matrix);
double last_t = -t0;
for (double t = 0; t < t0; t += tau) {
if (t - last_t > 1.0 / fps) {
last_t = t;
std::cout << t << std::endl;
output_timestep(
output, n,
gko::make_temporary_clone(exec->get_master(), in_vector)
->get_const_values());
}
in_vector->add_scaled(tau_source_scalar, source);
solver->apply(in_vector, out_vector);
std::swap(in_vector, out_vector);
}
}
@ solver
Solver events.
Definition profiler_hook.hpp:34