8000 Dev openmp by martkir0919 · Pull Request #722 · acados/acados · GitHub
[go: up one dir, main page]
More Web Proxy on the site http://driver.im/
Skip to content

Dev openmp #722

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 37 additions & 12 deletions acados/ocp_nlp/ocp_nlp_common.c
Original file line number Diff line number Diff line change
Expand Up @@ -2035,8 +2035,8 @@ void ocp_nlp_approximate_qp_matrices(ocp_nlp_config *config, ocp_nlp_dims *dims,

}

for (i = 0; i <= N; i++)
{
//for (i = 0; i <= N; i++)
//{
// TODO(rien) where should the update happen??? move to qp update ???
// TODO(all): fix and move where appropriate
// if (i<N)
Expand All @@ -2051,7 +2051,7 @@ void ocp_nlp_approximate_qp_matrices(ocp_nlp_config *config, ocp_nlp_dims *dims,
// BLASFEO_DVECEL(nlp_mem->cost_grad+i, j) += work->sim_out[i]->grad[nx+j];
// }
// }
}
//}
}


Expand Down Expand Up @@ -2555,46 +2555,68 @@ void ocp_nlp_res_compute(ocp_nlp_dims *dims, ocp_nlp_in *in, ocp_nlp_out *out, o
int *ni = dims->ni;

double tmp_res;
double inf_norm_res_stat = 0.0;
double inf_norm_res_eq = 0.0;
double inf_norm_res_ineq = 0.0;
double inf_norm_res_comp = 0.0;

// res_stat
res->inf_norm_res_stat = 0.0;
#if defined(ACADOS_WITH_OPENMP)
#pragma omp declare reduction(dmax : double : \
omp_out = (omp_in > omp_out) ? omp_in : omp_out)
#pragma omp parallel
{ // beginning of parallel region
#pragma omp for private(tmp_res) reduction(dmax:inf_norm_res_stat) nowait
#endif
for (int ii = 0; ii <= N; ii++)
{
blasfeo_daxpy(nv[ii], -1.0, mem->ineq_adj + ii, 0, mem->cost_grad + ii, 0, res->res_stat + ii,
0);
blasfeo_daxpy(nu[ii] + nx[ii], -1.0, mem->dyn_adj + ii, 0, res->res_stat + ii, 0,
res->res_stat + ii, 0);
blasfeo_dvecnrm_inf(nv[ii], res->res_stat + ii, 0, &tmp_res);
res->inf_norm_res_stat = tmp_res > res->inf_norm_res_stat ? tmp_res : res->inf_norm_res_stat;
inf_norm_res_stat = tmp_res > inf_norm_res_stat ? tmp_res : inf_norm_res_stat;
}

// res_eq
res->inf_norm_res_eq = 0.0;
#if defined(ACADOS_WITH_OPENMP)
#pragma omp for private(tmp_res) reduction(dmax:inf_norm_res_eq) nowait
#endif
for (int ii = 0; ii < N; ii++)
{
blasfeo_dveccp(nx[ii + 1], mem->dyn_fun + ii, 0, res->res_eq + ii, 0);
blasfeo_dvecnrm_inf(nx[ii + 1], res->res_eq + ii, 0, &tmp_res);
res->inf_norm_res_eq = tmp_res > res->inf_norm_res_eq ? tmp_res : res->inf_norm_res_eq;
inf_norm_res_eq = tmp_res > inf_norm_res_eq ? tmp_res : inf_norm_res_eq;
}

// res_ineq
res->inf_norm_res_ineq = 0.0;
#if defined(ACADOS_WITH_OPENMP)
#pragma omp for private(tmp_res) reduction(dmax:inf_norm_res_ineq) nowait
#endif
for (int ii = 0; ii <= N; ii++)
{
blasfeo_daxpy(2 * ni[ii], 1.0, out->t + ii, 0, mem->ineq_fun + ii, 0, res->res_ineq + ii, 0);
blasfeo_dvecnrm_inf(2 * ni[ii], res->res_ineq + ii, 0, &tmp_res);
res->inf_norm_res_ineq = tmp_res > res->inf_norm_res_ineq ? tmp_res : res->inf_norm_res_ineq;
inf_norm_res_ineq = tmp_res > inf_norm_res_ineq ? tmp_res : inf_norm_res_ineq;
}

// res_comp
res->inf_norm_res_comp = 0.0;
#if defined(ACADOS_WITH_OPENMP)
#pragma omp for private(tmp_res) reduction(dmax:inf_norm_res_comp) nowait
#endif
for (int ii = 0; ii <= N; ii++)
{
blasfeo_dvecmul(2 * ni[ii], out->lam + ii, 0, out->t + ii, 0, res->res_comp + ii, 0);
blasfeo_dvecnrm_inf(2 * ni[ii], res->res_comp + ii, 0, &tmp_res);
res->inf_norm_res_comp = tmp_res > res->inf_norm_res_comp ? tmp_res : res->inf_norm_res_comp;
inf_norm_res_comp = tmp_res > inf_norm_res_comp ? tmp_res : inf_norm_res_comp;
}

#if defined(ACADOS_WITH_OPENMP)
} // end of parallel region
#endif
res->inf_norm_res_stat = inf_norm_res_stat;
res->inf_norm_res_eq = inf_norm_res_eq;
res->inf_norm_res_ineq = inf_norm_res_ineq;
res->inf_norm_res_comp = inf_norm_res_comp;
// printf("computed residuals g: %e, b: %e, d: %e, m: %e\n", res->inf_norm_res_stat, res->inf_norm_res_eq,
// res->inf_norm_res_ineq, res->inf_norm_res_comp);
}
Expand All @@ -2609,6 +2631,9 @@ void ocp_nlp_cost_compute(ocp_nlp_config *config, ocp_nlp_dims *dims, ocp_nlp_in
double* tmp_cost = NULL;
double total_cost = 0.0;

#if defined(ACADOS_WITH_OPENMP)
#pragma omp parallel for private(tmp_cost) reduction(+:total_cost)
#endif
for (int ii = 0; ii <= N; ii++)
{
// set pointers
Expand Down
33 changes: 21 additions & 12 deletions acados/ocp_nlp/ocp_nlp_constraints_bgh.c
Original file line number Diff line number Diff line change
Expand Up @@ -1405,13 +1405,16 @@ void ocp_nlp_constraints_bgh_update_qp_matrices(void *config_, void *dims_, void
blasfeo_daxpy(nb+ng+nh, -1.0, &model->d, nb+ng+nh, &work->tmp_ni, 0, &memory->fun, nb+ng+nh);

// soft
// subtract slacks from softened constraints
// fun_i = fun_i - slack_i for i \in I_slacked
blasfeo_dvecad_sp(ns, -1.0, memory->ux, nu+nx, model->idxs, &memory->fun, 0);
blasfeo_dvecad_sp(ns, -1.0, memory->ux, nu+nx+ns, model->idxs, &memory->fun, nb+ng+nh);
if (ns)
{
// subtract slacks from softened constraints
// fun_i = fun_i - slack_i for i \in I_slacked
blasfeo_dvecad_sp(ns, -1.0, memory->ux, nu+nx, model->idxs, &memory->fun, 0);
blasfeo_dvecad_sp(ns, -1.0, memory->ux, nu+nx+ns, model->idxs, &memory->fun, nb+ng+nh);

// fun[2*ni:end] = - slack + slack_bounds
blasfeo_daxpy(2*ns, -1.0, memory->ux, nu+nx, &model->d, 2*nb+2*ng+2*nh, &memory->fun, 2*nb+2*ng+2*nh);
// fun[2*ni:end] = - slack + slack_bounds
blasfeo_daxpy(2*ns, -1.0, memory->ux, nu+nx, &model->d, 2*nb+2*ng+2*nh, &memory->fun, 2*nb+2*ng+2*nh);
}

// nlp_mem: ineq_adj
if (opts->compute_adj)
Expand All @@ -1421,9 +1424,12 @@ void ocp_nlp_constraints_bgh_update_qp_matrices(void *config_, void *dims_, void
blasfeo_dvecad_sp(nb, 1.0, &work->tmp_ni, 0, model->idxb, &memory->adj, 0);
blasfeo_dgemv_n(nu+nx, ng+nh, 1.0, memory->DCt, 0, 0, &work->tmp_ni, nb, 1.0, &memory->adj, 0, &memory->adj, 0);
// soft
blasfeo_dvecex_sp(ns, 1.0, model->idxs, memory->lam, 0, &memory->adj, nu+nx);
blasfeo_dvecex_sp(ns, 1.0, model->idxs, memory->lam, nb+ng+nh, &memory->adj, nu+nx+ns);
blasfeo_daxpy(2*ns, 1.0, memory->lam, 2*nb+2*ng+2*nh, &memory->adj, nu+nx, &memory->adj, nu+nx);
if (ns)
{
blasfeo_dvecex_sp(ns, 1.0, model->idxs, memory->lam, 0, &memory->adj, nu+nx);
blasfeo_dvecex_sp(ns, 1.0, model->idxs, memory->lam, nb+ng+nh, &memory->adj, nu+nx+ns);
blasfeo_daxpy(2*ns, 1.0, memory->lam, 2*nb+2*ng+2*nh, &memory->adj, nu+nx, &memory->adj, nu+nx);
}
}

return;
Expand Down Expand Up @@ -1515,10 +1521,13 @@ void ocp_nlp_constraints_bgh_compute_fun(void *config_, void *dims_, void *model
blasfeo_daxpy(nb+ng+nh, -1.0, &model->d, nb+ng+nh, &work->tmp_ni, 0, &memory->fun, nb+ng+nh);

// soft
blasfeo_dvecad_sp(ns, -1.0, memory->ux, nu+nx, model->idxs, &memory->fun, 0);
blasfeo_dvecad_sp(ns, -1.0, memory->ux, nu+nx+ns, model->idxs, &memory->fun, nb+ng+nh);
if (ns)
{
blasfeo_dvecad_sp(ns, -1.0, memory->ux, nu+nx, model->idxs, &memory->fun, 0);
blasfeo_dvecad_sp(ns, -1.0, memory->ux, nu+nx+ns, model->idxs, &memory->fun, nb+ng+nh);

blasfeo_daxpy(2*ns, -1.0, memory->ux, nu+nx, &model->d, 2*nb+2*ng+2*nh, &memory->fun, 2*nb+2*ng+2*nh);
blasfeo_daxpy(2*ns, -1.0, memory->ux, nu+nx, &model->d, 2*nb+2*ng+2*nh, &memory->fun, 2*nb+2*ng+2*nh);
}

return;
}
Expand Down
30 changes: 18 additions & 12 deletions acados/ocp_nlp/ocp_nlp_cost_external.c
Original file line number Diff line number Diff line change
Expand Up @@ -639,14 +639,17 @@ void ocp_nlp_cost_external_update_qp_matrices(void *config_, void *dims_, void *
blasfeo_dgead(nx+nu, nx+nu, model->scaling, &work->tmp_nv_nv, 0, 0, memory->RSQrq, 0, 0);
}

// slack update gradient
blasfeo_dveccp(2*ns, &model->z, 0, &memory->grad, nu+nx);
blasfeo_dvecmulacc(2*ns, &model->Z, 0, memory->ux, nu+nx, &memory->grad, nu+nx);

// slack update function value
blasfeo_dveccpsc(2*ns, 2.0, &model->z, 0, &work->tmp_2ns, 0);
blasfeo_dvecmulacc(2*ns, &model->Z, 0, memory->ux, nu+nx, &work->tmp_2ns, 0);
memory->fun += 0.5 * blasfeo_ddot(2*ns, &work->tmp_2ns, 0, memory->ux, nu+nx);
if (ns)
{
// slack update gradient
blasfeo_dveccp(2*ns, &model->z, 0, &memory->grad, nu+nx);
blasfeo_dvecmulacc(2*ns, &model->Z, 0, memory->ux, nu+nx, &memory->grad, nu+nx);

// slack update function value
blasfeo_dveccpsc(2*ns, 2.0, &model->z, 0, &work->tmp_2ns, 0);
blasfeo_dvecmulacc(2*ns, &model->Z, 0, memory->ux, nu+nx, &work->tmp_2ns, 0);
memory->fun += 0.5 * blasfeo_ddot(2*ns, &work->tmp_2ns, 0, memory->ux, nu+nx);
}

// scale
if (model->scaling!=1.0)
Expand Down Expand Up @@ -716,10 +719,13 @@ void ocp_nlp_cost_external_compute_fun(void *config_, void *dims_, void *model_,
model->ext_cost_fun->evaluate(model->ext_cost_fun, ext_fun_type_in, ext_fun_in,
ext_fun_type_out, ext_fun_out);

// slack update function value
blasfeo_dveccpsc(2*ns, 2.0, &model->z, 0, &work->tmp_2ns, 0);
blasfeo_dvecmulacc(2*ns, &model->Z, 0, memory->tmp_ux, nu+nx, &work->tmp_2ns, 0);
memory->fun += 0.5 * blasfeo_ddot(2*ns, &work->tmp_2ns, 0, memory->tmp_ux, nu+nx);
if (ns)
{
// slack update function value
blasfeo_dveccpsc(2*ns, 2.0, &model->z, 0, &work->tmp_2ns, 0);
blasfeo_dvecmulacc(2*ns, &model->Z, 0, memory->tmp_ux, nu+nx, &work->tmp_2ns, 0);
memory->fun += 0.5 * blasfeo_ddot(2*ns, &work->tmp_2ns, 0, memory->tmp_ux, nu+nx);
}

// scale
if(model->scaling!=1.0)
Expand Down
53 changes: 39 additions & 14 deletions acados/ocp_nlp/ocp_nlp_reg_project.c
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,9 @@
#include <stdio.h>
#include <string.h>
#include <assert.h>
#if defined(ACADOS_WITH_OPENMP)
#include <omp.h>
#endif

#include "acados/ocp_nlp/ocp_nlp_reg_common.h"
#include "acados/utils/math.h"
Expand Down Expand Up @@ -108,6 +111,10 @@ acados_size_t ocp_nlp_reg_project_memory_calculate_size(void *config_, ocp_nlp_r
int N = dims->N;

int ii;
int numThreadWS = 1;
#if defined(ACADOS_WITH_OPENMP)
numThreadWS = ACADOS_NUM_THREADS;
#endif

int nuxM = nu[0]+nx[0];
for(ii=1; ii<=N; ii++)
Expand All @@ -119,9 +126,9 @@ acados_size_t ocp_nlp_reg_project_memory_calculate_size(void *config_, ocp_nlp_r

size += sizeof(ocp_nlp_reg_project_memory);

size += nuxM*nuxM*sizeof(double); // reg_hess
size += nuxM*nuxM*sizeof(double); // V
size += 2*nuxM*sizeof(double); // d e
size += numThreadWS*nuxM*nuxM*sizeof(double); // reg_hess
size += numThreadWS*nuxM*nuxM*sizeof(double); // V
size += numThreadWS*2*nuxM*sizeof(double); // d e
size += (N+1)*sizeof(struct blasfeo_dmat *); // RSQrq

return size;
Expand All @@ -136,6 +143,10 @@ void *ocp_nlp_reg_project_memory_assign(void *config_, ocp_nlp_reg_dims *dims, v
int N = dims->N;

int ii;
int numThreadWS = 1;
#if defined(ACADOS_WITH_OPENMP)
numThreadWS = ACADOS_NUM_THREADS;
#endif

int nuxM = nu[0]+nx[0];
for(ii=1; ii<=N; ii++)
Expand All @@ -149,16 +160,16 @@ void *ocp_nlp_reg_project_memory_assign(void *config_, ocp_nlp_reg_dims *dims, v
c_ptr += sizeof(ocp_nlp_reg_project_memory);

mem->reg_hess = (double *) c_ptr;
c_ptr += nuxM*nuxM*sizeof(double); // reg_hess
c_ptr += numThreadWS*nuxM*nuxM*sizeof(double); // reg_hess

mem->V = (double *) c_ptr;
c_ptr += nuxM*nuxM*sizeof(double); // V
c_ptr += numThreadWS*nuxM*nuxM*sizeof(double); // V

mem->d = (double *) c_ptr;
c_ptr += nuxM*sizeof(double); // d
c_ptr += numThreadWS*nuxM*sizeof(double); // d

mem->e = (double *) c_ptr;
c_ptr += nuxM*sizeof(double); // e
c_ptr += numThreadWS*nuxM*sizeof(double); // e

mem->RSQrq = (struct blasfeo_dmat **) c_ptr;
c_ptr += (N+1)*sizeof(struct blasfeo_dmat *); // RSQrq
Expand Down Expand Up @@ -275,21 +286,35 @@ void ocp_nlp_reg_project_regularize_hessian(void *config, ocp_nlp_reg_dims *dims
ocp_nlp_reg_project_memory *mem = (ocp_nlp_reg_project_memory *) mem_;
ocp_nlp_reg_project_opts *opts = opts_;

int ii;

int *nx = dims->nx;
int *nu = dims->nu;

for(ii=0; ii<=dims->N; ii++)
#if defined(ACADOS_WITH_OPENMP)
const int nuxM = nu[0]+nx[0];

#pragma omp parallel
{ // beginning of parallel region
const int wsVecStart = omp_get_thread_num() * nuxM;
const int wsMatStart = wsVecStart * nuxM;
#pragma omp for nowait
#else
const int wsVecStart = 0;
const int wsMatStart = 0;
#endif
for(int ii=0; ii<=dims->N; ii++)
{
const int nuxii = nu[ii]+nx[ii];
// make symmetric
blasfeo_dtrtr_l(nu[ii]+nx[ii], mem->RSQrq[ii], 0, 0, mem->RSQrq[ii], 0, 0);
blasfeo_dtrtr_l(nuxii, mem->RSQrq[ii], 0, 0, mem->RSQrq[ii], 0, 0);

// regularize
blasfeo_unpack_dmat(nu[ii]+nx[ii], nu[ii]+nx[ii], mem->RSQrq[ii], 0, 0, mem->reg_hess, nu[ii]+nx[ii]);
acados_project(nu[ii]+nx[ii], mem->reg_hess, mem->V, mem->d, mem->e, opts->epsilon);
blasfeo_pack_dmat(nu[ii]+nx[ii], nu[ii]+nx[ii], mem->reg_hess, nu[ii]+nx[ii], mem->RSQrq[ii], 0, 0);
blasfeo_unpack_dmat(nuxii, nuxii, mem->RSQrq[ii], 0, 0, mem->reg_hess + wsMatStart, nuxii);
acados_project(nuxii, mem->reg_hess + wsMatStart, mem->V + wsMatStart, mem->d + wsVecStart, mem->e + wsVecStart, opts->ep 105BD silon);
blasfeo_pack_dmat(nuxii, nuxii, mem->reg_hess + wsMatStart, nuxii, mem->RSQrq[ii], 0, 0);
}
#if defined(ACADOS_WITH_OPENMP)
} // end of parallel region
#endif
}


Expand Down
8 changes: 4 additions & 4 deletions acados/ocp_nlp/ocp_nlp_sqp.c
Original file line number Diff line number Diff line change
Expand Up @@ -514,7 +514,7 @@ int ocp_nlp_sqp(void *config_, void *dims_, void *nlp_in_, void *nlp_out_,

// alias to dynamics_memory
#if defined(ACADOS_WITH_OPENMP)
#pragma omp for
#pragma omp for nowait
#endif
for (ii = 0; ii < N; ii++)
{
Expand All @@ -533,7 +533,7 @@ int ocp_nlp_sqp(void *config_, void *dims_, void *nlp_in_, void *nlp_out_,

// alias to cost_memory
#if defined(ACADOS_WITH_OPENMP)
#pragma omp for
#pragma omp for nowait
#endif
for (ii = 0; ii <= N; ii++)
{
Expand All @@ -546,7 +546,7 @@ int ocp_nlp_sqp(void *config_, void *dims_, void *nlp_in_, void *nlp_out_,
}
// alias to constraints_memory
#if defined(ACADOS_WITH_OPENMP)
#pragma omp for
#pragma omp for nowait
#endif
for (ii = 0; ii <= N; ii++)
{
Expand Down Expand Up @@ -576,7 +576,7 @@ int ocp_nlp_sqp(void *config_, void *dims_, void *nlp_in_, void *nlp_out_,

// copy sampling times into dynamics model
#if defined(ACADOS_WITH_OPENMP)
#pragma omp for
#pragma omp for nowait
#endif

// NOTE(oj): this will lead in an error for irk_gnsf, T must be set in precompute;
Expand Down
0