diff --git a/pyextra/acados_template/acados_layout.json b/pyextra/acados_template/acados_layout.json index 59aa13aa87..c9f0b90c73 100644 --- a/pyextra/acados_template/acados_layout.json +++ b/pyextra/acados_template/acados_layout.json @@ -668,6 +668,9 @@ "hessian_approx": [ "str" ], + "hpipm_mode": [ + "str" + ], "regularize_method": [ "str" ], diff --git a/pyextra/acados_template/acados_ocp.py b/pyextra/acados_template/acados_ocp.py index 8ea129936f..80970239eb 100644 --- a/pyextra/acados_template/acados_ocp.py +++ b/pyextra/acados_template/acados_ocp.py @@ -2155,6 +2155,7 @@ class AcadosOcpOptions: self.__globalization_use_SOC = 0 self.__full_step_dual = 0 self.__eps_sufficient_descent = 1e-4 + self.__hpipm_mode = 'BALANCE' @property @@ -2165,6 +2166,21 @@ class AcadosOcpOptions: """ return self.__qp_solver + @property + def hpipm_mode(self): + """ + Mode of HPIPM to be used, + + String in ('BALANCE', 'SPEED_ABS', 'SPEED', 'ROBUST'). + + Default: 'BALANCE'. + + see https://cdn.syscop.de/publications/Frison2020a.pdf + and the HPIPM code: + https://github.com/giaf/hpipm/blob/master/ocp_qp/x_ocp_qp_ipm.c#L69 + """ + return self.__hpipm_mode + @property def hessian_approx(self): """Hessian approximation. @@ -2541,6 +2557,15 @@ class AcadosOcpOptions: raise Exception('Invalid collocation_type value. Possible values are:\n\n' \ + ',\n'.join(collocation_types) + '.\n\nYou have: ' + collocation_type + '.\n\nExiting.') + @hpipm_mode.setter + def hpipm_mode(self, hpipm_mode): + hpipm_modes = ('BALANCE', 'SPEED_ABS', 'SPEED', 'ROBUST') + if hpipm_mode in hpipm_modes: + self.__hpipm_mode = hpipm_mode + else: + raise Exception('Invalid hpipm_mode value. Possible values are:\n\n' \ + + ',\n'.join(hpipm_modes) + '.\n\nYou have: ' + hpipm_mode + '.\n\nExiting.') + @hessian_approx.setter def hessian_approx(self, hessian_approx): hessian_approxs = ('GAUSS_NEWTON', 'EXACT') @@ -2890,10 +2915,10 @@ class AcadosOcp: self.solver_options = AcadosOcpOptions() """Solver Options, type :py:class:`acados_template.acados_ocp.AcadosOcpOptions`""" - self.acados_include_path = f'{acados_path}/include' - """Path to acados include directory, type: string""" - self.acados_lib_path = f'{acados_path}/lib' - """Path to where acados library is located, type: string""" + self.acados_include_path = os.path.join(acados_path, 'include').replace(os.sep, '/') # the replace part is important on Windows for CMake + """Path to acados include directory (set automatically), type: `string`""" + self.acados_lib_path = os.path.join(acados_path, 'lib').replace(os.sep, '/') # the replace part is important on Windows for CMake + """Path to where acados library is located, type: `string`""" import numpy self.cython_include_dirs = numpy.get_include() diff --git a/pyextra/acados_template/acados_ocp_solver.py b/pyextra/acados_template/acados_ocp_solver.py index 6d1ff9869c..beeda2cb0c 100644 --- a/pyextra/acados_template/acados_ocp_solver.py +++ b/pyextra/acados_template/acados_ocp_solver.py @@ -54,6 +54,7 @@ from .acados_model import acados_model_strip_casadi_symbolics from .utils import is_column, is_empty, casadi_length, render_template,\ format_class_dict, ocp_check_against_layout, np_array_to_list, make_model_consistent,\ set_up_imported_gnsf_model, get_ocp_nlp_layout, get_python_interface_path +from .builders import CMakeBuilder def make_ocp_dims_consistent(acados_ocp): @@ -132,6 +133,15 @@ def make_ocp_dims_consistent(acados_ocp): f'\nGot W_0[{cost.W.shape}], yref_0[{cost.yref_0.shape}]\n') dims.ny_0 = ny_0 + elif cost.cost_type_0 == 'EXTERNAL': + if opts.hessian_approx == 'GAUSS_NEWTON' and opts.ext_cost_num_hess == 0 and model.cost_expr_ext_cost_custom_hess_0 is None: + print("\nWARNING: Gauss-Newton Hessian approximation with EXTERNAL cost type not possible!\n" + "got cost_type_0: EXTERNAL, hessian_approx: 'GAUSS_NEWTON.'\n" + "GAUSS_NEWTON hessian is only supported for cost_types [NON]LINEAR_LS.\n" + "If you continue, acados will proceed computing the exact hessian for the cost term.\n" + "Note: There is also the option to use the external cost module with a numerical hessian approximation (see `ext_cost_num_hess`).\n" + "OR the option to provide a symbolic custom hessian approximation (see `cost_expr_ext_cost_custom_hess`).\n") + # path if cost.cost_type == 'LINEAR_LS': ny = cost.W.shape[0] @@ -161,6 +171,15 @@ def make_ocp_dims_consistent(acados_ocp): f'\nGot W[{cost.W.shape}], yref[{cost.yref.shape}]\n') dims.ny = ny + elif cost.cost_type == 'EXTERNAL': + if opts.hessian_approx == 'GAUSS_NEWTON' and opts.ext_cost_num_hess == 0 and model.cost_expr_ext_cost_custom_hess is None: + print("\nWARNING: Gauss-Newton Hessian approximation with EXTERNAL cost type not possible!\n" + "got cost_type: EXTERNAL, hessian_approx: 'GAUSS_NEWTON.'\n" + "GAUSS_NEWTON hessian is only supported for cost_types [NON]LINEAR_LS.\n" + "If you continue, acados will proceed computing the exact hessian for the cost term.\n" + "Note: There is also the option to use the external cost module with a numerical hessian approximation (see `ext_cost_num_hess`).\n" + "OR the option to provide a symbolic custom hessian approximation (see `cost_expr_ext_cost_custom_hess`).\n") + # terminal if cost.cost_type_e == 'LINEAR_LS': ny_e = cost.W_e.shape[0] @@ -183,6 +202,14 @@ def make_ocp_dims_consistent(acados_ocp): raise Exception('inconsistent dimension: regarding W_e, yref_e.') dims.ny_e = ny_e + elif cost.cost_type_e == 'EXTERNAL': + if opts.hessian_approx == 'GAUSS_NEWTON' and opts.ext_cost_num_hess == 0 and model.cost_expr_ext_cost_custom_hess_e is None: + print("\nWARNING: Gauss-Newton Hessian approximation with EXTERNAL cost type not possible!\n" + "got cost_type_e: EXTERNAL, hessian_approx: 'GAUSS_NEWTON.'\n" + "GAUSS_NEWTON hessian is only supported for cost_types [NON]LINEAR_LS.\n" + "If you continue, acados will proceed computing the exact hessian for the cost term.\n" + "Note: There is also the option to use the external cost module with a numerical hessian approximation (see `ext_cost_num_hess`).\n" + "OR the option to provide a symbolic custom hessian approximation (see `cost_expr_ext_cost_custom_hess`).\n") ## constraints # initial @@ -622,15 +649,25 @@ def ocp_generate_external_functions(acados_ocp, model): generate_c_code_external_cost(model, 'terminal', opts) -def ocp_render_templates(acados_ocp, json_file): +def ocp_get_default_cmake_builder() -> CMakeBuilder: + """ + If :py:class:`~acados_template.acados_ocp_solver.AcadosOcpSolver` is used with `CMake` this function returns a good first setting. + :return: default :py:class:`~acados_template.builders.CMakeBuilder` + """ + cmake_builder = CMakeBuilder() + cmake_builder.options_on = ['BUILD_ACADOS_OCP_SOLVER_LIB'] + return cmake_builder + + +def ocp_render_templates(acados_ocp, json_file, cmake_builder=None): name = acados_ocp.model.name # setting up loader and environment - json_path = os.path.join(os.getcwd(), json_file) + json_path = os.path.abspath(json_file) if not os.path.exists(json_path): - raise Exception('{} not found!'.format(json_path)) + raise Exception(f'Path "{json_path}" not found!') code_export_dir = acados_ocp.code_export_directory template_dir = code_export_dir @@ -652,9 +689,14 @@ def ocp_render_templates(acados_ocp, json_file): out_file = f'acados_solver.pxd' render_template(in_file, out_file, template_dir, json_path) - in_file = 'Makefile.in' - out_file = 'Makefile' - render_template(in_file, out_file, template_dir, json_path) + if cmake_builder is not None: + in_file = 'CMakeLists.in.txt' + out_file = 'CMakeLists.txt' + render_template(in_file, out_file, template_dir, json_path) + else: + in_file = 'Makefile.in' + out_file = 'Makefile' + render_template(in_file, out_file, template_dir, json_path) in_file = 'acados_solver_sfun.in.c' out_file = f'acados_solver_sfunction_{name}.c' @@ -764,25 +806,30 @@ class AcadosOcpSolver: """ Class to interact with the acados ocp solver C object. - :param acados_ocp: type AcadosOcp - description of the OCP for acados + :param acados_ocp: type :py:class:`~acados_template.acados_ocp.AcadosOcp` - description of the OCP for acados :param json_file: name for the json file used to render the templated code - default: acados_ocp_nlp.json :param simulink_opts: Options to configure Simulink S-function blocks, mainly to activate possible Inputs and Outputs """ if sys.platform=="win32": from ctypes import wintypes - dlclose = ctypes.WinDLL('kernel32', use_last_error=True).FreeLibrary + from ctypes import WinDLL + dlclose = WinDLL('kernel32', use_last_error=True).FreeLibrary dlclose.argtypes = [wintypes.HMODULE] else: dlclose = CDLL(None).dlclose dlclose.argtypes = [c_void_p] @classmethod - def generate(cls, acados_ocp, json_file='acados_ocp_nlp.json', simulink_opts=None): + def generate(cls, acados_ocp, json_file='acados_ocp_nlp.json', simulink_opts=None, cmake_builder: CMakeBuilder = None): """ Generates the code for an acados OCP solver, given the description in acados_ocp. :param acados_ocp: type AcadosOcp - description of the OCP for acados - :param json_file: name for the json file used to render the templated code - default: acados_ocp_nlp.json - :param simulink_opts: Options to configure Simulink S-function blocks, mainly to activate possible Inputs and Outputs + :param json_file: name for the json file used to render the templated code - default: `acados_ocp_nlp.json` + :param simulink_opts: Options to configure Simulink S-function blocks, mainly to activate possible inputs and + outputs; default: `None` + :param cmake_builder: type :py:class:`~acados_template.builders.CMakeBuilder` generate a `CMakeLists.txt` and use + the `CMake` pipeline instead of a `Makefile` (`CMake` seems to be the better option in conjunction with + `MS Visual Studio`); default: `None` """ model = acados_ocp.model acados_ocp.code_export_directory = os.path.abspath(acados_ocp.code_export_directory) @@ -810,25 +857,32 @@ class AcadosOcpSolver: ocp_formulation_json_dump(acados_ocp, simulink_opts, json_file) # render templates - ocp_render_templates(acados_ocp, json_file) + ocp_render_templates(acados_ocp, json_file, cmake_builder=cmake_builder) acados_ocp.json_file = json_file @classmethod - def build(cls, code_export_dir, with_cython=False): + def build(cls, code_export_dir, with_cython=False, cmake_builder: CMakeBuilder = None): """ Builds the code for an acados OCP solver, that has been generated in code_export_dir :param code_export_dir: directory in which acados OCP solver has been generated, see generate() :param with_cython: option indicating if the cython interface is build, default: False. + :param cmake_builder: type :py:class:`~acados_template.builders.CMakeBuilder` generate a `CMakeLists.txt` and use + the `CMake` pipeline instead of a `Makefile` (`CMake` seems to be the better option in conjunction with + `MS Visual Studio`); default: `None` """ + code_export_dir = os.path.abspath(code_export_dir) cwd=os.getcwd() os.chdir(code_export_dir) if with_cython: os.system('make clean_ocp_cython') os.system('make ocp_cython') else: - os.system('make clean_ocp_shared_lib') - os.system('make ocp_shared_lib') + if cmake_builder is not None: + cmake_builder.exec(code_export_dir) + else: + os.system('make clean_ocp_shared_lib') + os.system('make ocp_shared_lib') os.chdir(cwd) @@ -856,11 +910,11 @@ class AcadosOcpSolver: acados_ocp_json['dims']['N']) - def __init__(self, acados_ocp, json_file='acados_ocp_nlp.json', simulink_opts=None, build=True, generate=True): + def __init__(self, acados_ocp, json_file='acados_ocp_nlp.json', simulink_opts=None, build=True, generate=True, cmake_builder: CMakeBuilder = None): self.solver_created = False if generate: - self.generate(acados_ocp, json_file=json_file, simulink_opts=simulink_opts) + self.generate(acados_ocp, json_file=json_file, simulink_opts=simulink_opts, cmake_builder=cmake_builder) # load json, store options in object with open(json_file, 'r') as f: @@ -873,14 +927,22 @@ class AcadosOcpSolver: code_export_directory = acados_ocp_json['code_export_directory'] if build: - self.build(code_export_directory, with_cython=False) + self.build(code_export_directory, with_cython=False, cmake_builder=cmake_builder) + + # prepare library loading + lib_prefix = 'lib' + lib_ext = '.so' + if os.name == 'nt': + lib_prefix = '' + lib_ext = '' + # ToDo: check for mac # Load acados library to avoid unloading the library. # This is necessary if acados was compiled with OpenMP, since the OpenMP threads can't be destroyed. # Unloading a library which uses OpenMP results in a segfault (on any platform?). # see [https://stackoverflow.com/questions/34439956/vc-crash-when-freeing-a-dll-built-with-openmp] # or [https://python.hotexamples.com/examples/_ctypes/-/dlclose/python-dlclose-function-examples.html] - libacados_name = 'libacados.so' + libacados_name = f'{lib_prefix}acados{lib_ext}' libacados_filepath = os.path.join(acados_lib_path, libacados_name) self.__acados_lib = CDLL(libacados_filepath) # find out if acados was compiled with OpenMP @@ -892,8 +954,8 @@ class AcadosOcpSolver: print('acados was compiled with OpenMP.') else: print('acados was compiled without OpenMP.') - - self.shared_lib_name = f'{code_export_directory}/libacados_ocp_solver_{self.model_name}.so' + libacados_ocp_solver_name = f'{lib_prefix}acados_ocp_solver_{self.model_name}{lib_ext}' + self.shared_lib_name = os.path.join(code_export_directory, libacados_ocp_solver_name) # get shared_lib self.shared_lib = CDLL(self.shared_lib_name) @@ -959,6 +1021,17 @@ class AcadosOcpSolver: return self.status + def reset(self): + """ + Sets current iterate to all zeros. + """ + getattr(self.shared_lib, f"{self.model_name}_acados_reset").argtypes = [c_void_p] + getattr(self.shared_lib, f"{self.model_name}_acados_reset").restype = c_int + getattr(self.shared_lib, f"{self.model_name}_acados_reset")(self.capsule) + + return + + def set_new_time_steps(self, new_time_steps): """ Set new time steps. @@ -1364,12 +1437,12 @@ class AcadosOcpSolver: return out[0] - def get_residuals(self): + def get_residuals(self, recompute=False): """ Returns an array of the form [res_stat, res_eq, res_ineq, res_comp]. """ # compute residuals if RTI - if self.solver_options['nlp_solver_type'] == 'SQP_RTI': + if self.solver_options['nlp_solver_type'] == 'SQP_RTI' or recompute: self.shared_lib.ocp_nlp_eval_residuals.argtypes = [c_void_p, c_void_p, c_void_p] self.shared_lib.ocp_nlp_eval_residuals(self.nlp_solver, self.nlp_in, self.nlp_out) @@ -1403,7 +1476,7 @@ class AcadosOcpSolver: Set numerical data inside the solver. :param stage: integer corresponding to shooting node - :param field: string in ['x', 'u', 'pi', 'lam', 't', 'p'] + :param field: string in ['x', 'u', 'pi', 'lam', 't', 'p', 'xdot_guess', 'z_guess'] .. note:: regarding lam, t: \n the inequalities are internally organized in the following order: \n @@ -1419,7 +1492,7 @@ class AcadosOcpSolver: cost_fields = ['y_ref', 'yref'] constraints_fields = ['lbx', 'ubx', 'lbu', 'ubu'] out_fields = ['x', 'u', 'pi', 'lam', 't', 'z', 'sl', 'su'] - mem_fields = ['xdot_guess'] + mem_fields = ['xdot_guess', 'z_guess'] # cast value_ to avoid conversion issues if isinstance(value_, (float, int)): @@ -1663,11 +1736,21 @@ class AcadosOcpSolver: """ Set options of the solver. - :param field: string, e.g. 'print_level', 'rti_phase', 'initialize_t_slacks', 'step_length', 'alpha_min', 'alpha_reduction', 'qp_warm_start', 'line_search_use_sufficient_descent', 'full_step_dual', 'globalization_use_SOC' - :param value: of type int, float + :param field: string, e.g. 'print_level', 'rti_phase', 'initialize_t_slacks', 'step_length', 'alpha_min', 'alpha_reduction', 'qp_warm_start', 'line_search_use_sufficient_descent', 'full_step_dual', 'globalization_use_SOC', 'qp_tol_stat', 'qp_tol_eq', 'qp_tol_ineq', 'qp_tol_comp', 'qp_tau_min', 'qp_mu0' + + :param value: of type int, float, string + + - qp_tol_stat: QP solver tolerance stationarity + - qp_tol_eq: QP solver tolerance equalities + - qp_tol_ineq: QP solver tolerance inequalities + - qp_tol_comp: QP solver tolerance complementarity + - qp_tau_min: for HPIPM QP solvers: minimum value of barrier parameter in HPIPM + - qp_mu0: for HPIPM QP solvers: initial value for complementarity slackness + - warm_start_first_qp: indicates if first QP in SQP is warm_started """ - int_fields = ['print_level', 'rti_phase', 'initialize_t_slacks', 'qp_warm_start', 'line_search_use_sufficient_descent', 'full_step_dual', 'globalization_use_SOC'] - double_fields = ['step_length', 'tol_eq', 'tol_stat', 'tol_ineq', 'tol_comp', 'alpha_min', 'alpha_reduction', 'eps_sufficient_descent'] + int_fields = ['print_level', 'rti_phase', 'initialize_t_slacks', 'qp_warm_start', 'line_search_use_sufficient_descent', 'full_step_dual', 'globalization_use_SOC', 'warm_start_first_qp'] + double_fields = ['step_length', 'tol_eq', 'tol_stat', 'tol_ineq', 'tol_comp', 'alpha_min', 'alpha_reduction', 'eps_sufficient_descent', + 'qp_tol_stat', 'qp_tol_eq', 'qp_tol_ineq', 'qp_tol_comp', 'qp_tau_min', 'qp_mu0'] string_fields = ['globalization'] # check field availability and type diff --git a/pyextra/acados_template/acados_ocp_solver_pyx.pyx b/pyextra/acados_template/acados_ocp_solver_pyx.pyx index 97d5b2855f..fe7fa8425a 100644 --- a/pyextra/acados_template/acados_ocp_solver_pyx.pyx +++ b/pyextra/acados_template/acados_ocp_solver_pyx.pyx @@ -112,6 +112,13 @@ cdef class AcadosOcpSolverCython: return acados_solver.acados_solve(self.capsule) + def reset(self): + """ + Sets current iterate to all zeros. + """ + return acados_solver.acados_reset(self.capsule) + + def set_new_time_steps(self, new_time_steps): """ Set new time steps. @@ -450,12 +457,12 @@ cdef class AcadosOcpSolverCython: return out - def get_residuals(self): + def get_residuals(self, recompute=False): """ Returns an array of the form [res_stat, res_eq, res_ineq, res_comp]. """ # compute residuals if RTI - if self.nlp_solver_type == 'SQP_RTI': + if self.nlp_solver_type == 'SQP_RTI' or recompute: acados_solver_common.ocp_nlp_eval_residuals(self.nlp_solver, self.nlp_in, self.nlp_out) # create output array @@ -504,7 +511,7 @@ cdef class AcadosOcpSolverCython: cost_fields = ['y_ref', 'yref'] constraints_fields = ['lbx', 'ubx', 'lbu', 'ubu'] out_fields = ['x', 'u', 'pi', 'lam', 't', 'z', 'sl', 'su'] - mem_fields = ['xdot_guess'] + mem_fields = ['xdot_guess', 'z_guess'] field = field_.encode('utf-8') @@ -635,11 +642,21 @@ cdef class AcadosOcpSolverCython: """ Set options of the solver. - :param field: string, e.g. 'print_level', 'rti_phase', 'initialize_t_slacks', 'step_length', 'alpha_min', 'alpha_reduction' - :param value: of type int, float + :param field: string, e.g. 'print_level', 'rti_phase', 'initialize_t_slacks', 'step_length', 'alpha_min', 'alpha_reduction', 'qp_warm_start', 'line_search_use_sufficient_descent', 'full_step_dual', 'globalization_use_SOC', 'qp_tol_stat', 'qp_tol_eq', 'qp_tol_ineq', 'qp_tol_comp', 'qp_tau_min', 'qp_mu0' + + :param value: of type int, float, string + + - qp_tol_stat: QP solver tolerance stationarity + - qp_tol_eq: QP solver tolerance equalities + - qp_tol_ineq: QP solver tolerance inequalities + - qp_tol_comp: QP solver tolerance complementarity + - qp_tau_min: for HPIPM QP solvers: minimum value of barrier parameter in HPIPM + - qp_mu0: for HPIPM QP solvers: initial value for complementarity slackness + - warm_start_first_qp: indicates if first QP in SQP is warm_started """ - int_fields = ['print_level', 'rti_phase', 'initialize_t_slacks', 'qp_warm_start', 'line_search_use_sufficient_descent', 'full_step_dual', 'globalization_use_SOC'] - double_fields = ['step_length', 'tol_eq', 'tol_stat', 'tol_ineq', 'tol_comp', 'alpha_min', 'alpha_reduction', 'eps_sufficient_descent'] + int_fields = ['print_level', 'rti_phase', 'initialize_t_slacks', 'qp_warm_start', 'line_search_use_sufficient_descent', 'full_step_dual', 'globalization_use_SOC', 'warm_start_first_qp'] + double_fields = ['step_length', 'tol_eq', 'tol_stat', 'tol_ineq', 'tol_comp', 'alpha_min', 'alpha_reduction', 'eps_sufficient_descent', + 'qp_tol_stat', 'qp_tol_eq', 'qp_tol_ineq', 'qp_tol_comp', 'qp_tau_min', 'qp_mu0'] string_fields = ['globalization'] # encode diff --git a/pyextra/acados_template/acados_sim.py b/pyextra/acados_template/acados_sim.py index c63cffc316..93d5f298db 100644 --- a/pyextra/acados_template/acados_sim.py +++ b/pyextra/acados_template/acados_sim.py @@ -294,9 +294,9 @@ class AcadosSim: self.solver_options = AcadosSimOpts() """Solver Options, type :py:class:`acados_template.acados_sim.AcadosSimOpts`""" - self.acados_include_path = f'{acados_path}/include' - """Path to acados include directors (set automatically), type: `string`""" - self.acados_lib_path = f'{acados_path}/lib' + self.acados_include_path = os.path.join(acados_path, 'include').replace(os.sep, '/') # the replace part is important on Windows for CMake + """Path to acados include directory (set automatically), type: `string`""" + self.acados_lib_path = os.path.join(acados_path, 'lib').replace(os.sep, '/') # the replace part is important on Windows for CMake """Path to where acados library is located (set automatically), type: `string`""" self.code_export_directory = 'c_generated_code' diff --git a/pyextra/acados_template/acados_sim_solver.py b/pyextra/acados_template/acados_sim_solver.py index a12ecb1765..3588dd38cd 100644 --- a/pyextra/acados_template/acados_sim_solver.py +++ b/pyextra/acados_template/acados_sim_solver.py @@ -47,6 +47,7 @@ from .acados_ocp import AcadosOcp from .acados_model import acados_model_strip_casadi_symbolics from .utils import is_column, render_template, format_class_dict, np_array_to_list,\ make_model_consistent, set_up_imported_gnsf_model, get_python_interface_path +from .builders import CMakeBuilder def make_sim_dims_consistent(acados_sim): @@ -111,7 +112,17 @@ def sim_formulation_json_dump(acados_sim, json_file='acados_sim.json'): json.dump(sim_json, f, default=np_array_to_list, indent=4, sort_keys=True) -def sim_render_templates(json_file, model_name, code_export_dir): +def sim_get_default_cmake_builder() -> CMakeBuilder: + """ + If :py:class:`~acados_template.acados_sim_solver.AcadosSimSolver` is used with `CMake` this function returns a good first setting. + :return: default :py:class:`~acados_template.builders.CMakeBuilder` + """ + cmake_builder = CMakeBuilder() + cmake_builder.options_on = ['BUILD_ACADOS_SIM_SOLVER_LIB'] + return cmake_builder + + +def sim_render_templates(json_file, model_name, code_export_dir, cmake_options: CMakeBuilder = None): # setting up loader and environment json_path = os.path.join(os.getcwd(), json_file) @@ -129,9 +140,15 @@ def sim_render_templates(json_file, model_name, code_export_dir): out_file = f'acados_sim_solver_{model_name}.h' render_template(in_file, out_file, template_dir, json_path) - in_file = 'Makefile.in' - out_file = f'Makefile' - render_template(in_file, out_file, template_dir, json_path) + # Builder + if cmake_options is not None: + in_file = 'CMakeLists.in.txt' + out_file = 'CMakeLists.txt' + render_template(in_file, out_file, template_dir, json_path) + else: + in_file = 'Makefile.in' + out_file = 'Makefile' + render_template(in_file, out_file, template_dir, json_path) in_file = 'main_sim.in.c' out_file = f'main_sim_{model_name}.c' @@ -161,15 +178,19 @@ def sim_generate_casadi_functions(acados_sim): elif integrator_type == 'GNSF': generate_c_code_gnsf(model, opts) + class AcadosSimSolver: """ Class to interact with the acados integrator C object. - :param acados_sim: type :py:class:`acados_template.acados_ocp.AcadosOcp` (takes values to generate an instance :py:class:`acados_template.acados_sim.AcadosSim`) or :py:class:`acados_template.acados_sim.AcadosSim` - :param json_file: Default: 'acados_sim.json' - :param build: Default: True + :param acados_sim: type :py:class:`~acados_template.acados_ocp.AcadosOcp` (takes values to generate an instance :py:class:`~acados_template.acados_sim.AcadosSim`) or :py:class:`~acados_template.acados_sim.AcadosSim` + :param json_file: Default: 'acados_sim.json' + :param build: Default: True + :param cmake_builder: type :py:class:`~acados_template.utils.CMakeBuilder` generate a `CMakeLists.txt` and use + the `CMake` pipeline instead of a `Makefile` (`CMake` seems to be the better option in conjunction with + `MS Visual Studio`); default: `None` """ - def __init__(self, acados_sim_, json_file='acados_sim.json', build=True): + def __init__(self, acados_sim_, json_file='acados_sim.json', build=True, cmake_builder: CMakeBuilder = None): self.solver_created = False @@ -203,12 +224,16 @@ class AcadosSimSolver: code_export_dir = acados_sim.code_export_directory if build: # render templates - sim_render_templates(json_file, model_name, code_export_dir) + sim_render_templates(json_file, model_name, code_export_dir, cmake_builder) - ## Compile solver + # Compile solver cwd = os.getcwd() + code_export_dir = os.path.abspath(code_export_dir) os.chdir(code_export_dir) - os.system('make sim_shared_lib') + if cmake_builder is not None: + cmake_builder.exec(code_export_dir) + else: + os.system('make sim_shared_lib') os.chdir(cwd) self.sim_struct = acados_sim @@ -234,8 +259,15 @@ class AcadosSimSolver: print('acados was compiled without OpenMP.') # Ctypes - shared_lib = f'{code_export_dir}/libacados_sim_solver_{model_name}.so' - self.shared_lib = CDLL(shared_lib) + lib_prefix = 'lib' + lib_ext = '.so' + if os.name == 'nt': + lib_prefix = '' + lib_ext = '' + self.shared_lib_name = os.path.join(code_export_dir, f'{lib_prefix}acados_sim_solver_{model_name}{lib_ext}') + print(f'self.shared_lib_name = "{self.shared_lib_name}"') + + self.shared_lib = CDLL(self.shared_lib_name) # create capsule diff --git a/pyextra/acados_template/builders.py b/pyextra/acados_template/builders.py new file mode 100644 index 0000000000..f595033ceb --- /dev/null +++ b/pyextra/acados_template/builders.py @@ -0,0 +1,116 @@ +# -*- coding: future_fstrings -*- +# +# Copyright 2019 Gianluca Frison, Dimitris Kouzoupis, Robin Verschueren, +# Andrea Zanelli, Niels van Duijkeren, Jonathan Frey, Tommaso Sartor, +# Branimir Novoselnik, Rien Quirynen, Rezart Qelibari, Dang Doan, +# Jonas Koenemann, Yutao Chen, Tobias Schöls, Jonas Schlagenhauf, Moritz Diehl +# +# This file is part of acados. +# +# The 2-Clause BSD License +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE.; +# + +import os +import sys +from subprocess import call + + +class CMakeBuilder: + """ + Class to work with the `CMake` build system. + """ + def __init__(self): + self._source_dir = None # private source directory, this is set to code_export_dir + self.build_dir = 'build' + self._build_dir = None # private build directory, usually rendered to abspath(build_dir) + self.generator = None + """Defines the generator, options can be found via `cmake --help` under 'Generator'. Type: string. Linux default 'Unix Makefiles', Windows 'Visual Studio 15 2017 Win64'; default value: `None`.""" + # set something for Windows + if os.name == 'nt': + self.generator = 'Visual Studio 15 2017 Win64' + self.build_targets = None + """A comma-separated list of the build targets, if `None` then all targets will be build; type: List of strings; default: `None`.""" + self.options_on = None + """List of strings as CMake options which are translated to '-D Opt[0]=ON -D Opt[1]=ON ...'; default: `None`.""" + + # Generate the command string for handling the cmake command. + def get_cmd1_cmake(self): + defines_str = '' + if self.options_on is not None: + defines_arr = [f' -D{opt}=ON' for opt in self.options_on] + defines_str = ' '.join(defines_arr) + generator_str = '' + if self.generator is not None: + generator_str = f' -G"{self.generator}"' + return f'cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX="{self._source_dir}"{defines_str}{generator_str} -Wdev -S"{self._source_dir}" -B"{self._build_dir}"' + + # Generate the command string for handling the build. + def get_cmd2_build(self): + import multiprocessing + cmd = f'cmake --build "{self._build_dir}" --config Release -j{multiprocessing.cpu_count()}' + if self.build_targets is not None: + cmd += f' -t {self.build_targets}' + return cmd + + # Generate the command string for handling the install command. + def get_cmd3_install(self): + return f'cmake --install "{self._build_dir}"' + + def exec(self, code_export_directory): + """ + Execute the compilation using `CMake` with the given settings. + :param code_export_directory: must be the absolute path to the directory where the code was exported to + """ + if(os.path.isabs(code_export_directory) is False): + print(f'(W) the code export directory "{code_export_directory}" is not an absolute path!') + self._source_dir = code_export_directory + self._build_dir = os.path.abspath(self.build_dir) + try: + os.mkdir(self._build_dir) + except FileExistsError as e: + pass + + try: + os.chdir(self._build_dir) + cmd_str = self.get_cmd1_cmake() + print(f'call("{cmd_str})"') + retcode = call(cmd_str, shell=True) + if retcode != 0: + raise RuntimeError(f'CMake command "{cmd_str}" was terminated by signal {retcode}') + cmd_str = self.get_cmd2_build() + print(f'call("{cmd_str}")') + retcode = call(cmd_str, shell=True) + if retcode != 0: + raise RuntimeError(f'Build command "{cmd_str}" was terminated by signal {retcode}') + cmd_str = self.get_cmd3_install() + print(f'call("{cmd_str}")') + retcode = call(cmd_str, shell=True) + if retcode != 0: + raise RuntimeError(f'Install command "{cmd_str}" was terminated by signal {retcode}') + except OSError as e: + print("Execution failed:", e, file=sys.stderr) + except Exception as e: + print("Execution failed:", e, file=sys.stderr) + exit(1) diff --git a/pyextra/acados_template/c_templates_tera/CMakeLists.in.txt b/pyextra/acados_template/c_templates_tera/CMakeLists.in.txt new file mode 100644 index 0000000000..3d6483b5d2 --- /dev/null +++ b/pyextra/acados_template/c_templates_tera/CMakeLists.in.txt @@ -0,0 +1,374 @@ +# +# Copyright 2019 Gianluca Frison, Dimitris Kouzoupis, Robin Verschueren, +# Andrea Zanelli, Niels van Duijkeren, Jonathan Frey, Tommaso Sartor, +# Branimir Novoselnik, Rien Quirynen, Rezart Qelibari, Dang Doan, +# Jonas Koenemann, Yutao Chen, Tobias Schöls, Jonas Schlagenhauf, Moritz Diehl +# +# This file is part of acados. +# +# The 2-Clause BSD License +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE.; +# + +{%- if solver_options.qp_solver %} + {%- set qp_solver = solver_options.qp_solver %} +{%- else %} + {%- set qp_solver = "FULL_CONDENSING_HPIPM" %} +{%- endif %} + +{%- if solver_options.hessian_approx %} + {%- set hessian_approx = solver_options.hessian_approx %} +{%- elif solver_options.sens_hess %} + {%- set hessian_approx = "EXACT" %} +{%- else %} + {%- set hessian_approx = "GAUSS_NEWTON" %} +{%- endif %} + +{%- if constraints.constr_type %} + {%- set constr_type = constraints.constr_type %} +{%- else %} + {%- set constr_type = "NONE" %} +{%- endif %} + +{%- if constraints.constr_type_e %} + {%- set constr_type_e = constraints.constr_type_e %} +{%- else %} + {%- set constr_type_e = "NONE" %} +{%- endif %} + +{%- if cost.cost_type %} + {%- set cost_type = cost.cost_type %} +{%- else %} + {%- set cost_type = "NONE" %} +{%- endif %} + +{%- if cost.cost_type_e %} + {%- set cost_type_e = cost.cost_type_e %} +{%- else %} + {%- set cost_type_e = "NONE" %} +{%- endif %} + +{%- if cost.cost_type_0 %} + {%- set cost_type_0 = cost.cost_type_0 %} +{%- else %} + {%- set cost_type_0 = "NONE" %} +{%- endif %} + +{%- if dims.nh %} + {%- set dims_nh = dims.nh %} +{%- else %} + {%- set dims_nh = 0 %} +{%- endif %} + +{%- if dims.nphi %} + {%- set dims_nphi = dims.nphi %} +{%- else %} + {%- set dims_nphi = 0 %} +{%- endif %} + +{%- if dims.nh_e %} + {%- set dims_nh_e = dims.nh_e %} +{%- else %} + {%- set dims_nh_e = 0 %} +{%- endif %} + +{%- if dims.nphi_e %} + {%- set dims_nphi_e = dims.nphi_e %} +{%- else %} + {%- set dims_nphi_e = 0 %} +{%- endif %} + +{%- if solver_options.model_external_shared_lib_dir %} + {%- set model_external_shared_lib_dir = solver_options.model_external_shared_lib_dir %} +{%- endif %} + +{%- if solver_options.model_external_shared_lib_name %} + {%- set model_external_shared_lib_name = solver_options.model_external_shared_lib_name %} +{%- endif %} + +{#- control operator #} +{%- if os and os == "pc" %} + {%- set control = "&" %} +{%- else %} + {%- set control = ";" %} +{%- endif %} + +{%- if acados_link_libs and os and os == "pc" %}{# acados linking libraries and flags #} + {%- set link_libs = acados_link_libs.qpoases ~ " " ~ acados_link_libs.hpmpc ~ " " ~ acados_link_libs.osqp -%} + {%- set openmp_flag = acados_link_libs.openmp %} +{%- else %} + {%- set openmp_flag = " " %} + {%- if qp_solver == "FULL_CONDENSING_QPOASES" %} + {%- set link_libs = "-lqpOASES_e" %} + {%- else %} + {%- set link_libs = "" %} + {%- endif %} +{%- endif %} + +cmake_minimum_required(VERSION 3.10) + +project({{ model.name }}) + +# build options. +option(BUILD_ACADOS_SOLVER_LIB "Should the solver library acados_solver_{{ model.name }} be build?" OFF) +option(BUILD_ACADOS_OCP_SOLVER_LIB "Should the OCP solver library acados_ocp_solver_{{ model.name }} be build?" OFF) +option(BUILD_EXAMPLE "Should the example main_{{ model.name }} be build?" OFF) +{%- if solver_options.integrator_type != "DISCRETE" %} +option(BUILD_SIM_EXAMPLE "Should the simulation example main_sim_{{ model.name }} be build?" OFF) +option(BUILD_ACADOS_SIM_SOLVER_LIB "Should the simulation solver library acados_sim_solver_{{ model.name }} be build?" OFF) +{%- endif %} + +# object target names +set(MODEL_OBJ model_{{ model.name }}) +set(OCP_OBJ ocp_{{ model.name }}) +set(SIM_OBJ sim_{{ model.name }}) + +# model +set(MODEL_SRC +{%- if solver_options.integrator_type == "ERK" %} + {{ model.name }}_model/{{ model.name }}_expl_ode_fun.c + {{ model.name }}_model/{{ model.name }}_expl_vde_forw.c + {%- if hessian_approx == "EXACT" %} + {{ model.name }}_model/{{ model.name }}_expl_ode_hess.c + {%- endif %} +{%- elif solver_options.integrator_type == "IRK" %} + {{ model.name }}_model/{{ model.name }}_impl_dae_fun.c + {{ model.name }}_model/{{ model.name }}_impl_dae_fun_jac_x_xdot_z.c + {{ model.name }}_model/{{ model.name }}_impl_dae_jac_x_xdot_u_z.c + {%- if hessian_approx == "EXACT" %} + {{ model.name }}_model/{{ model.name }}_impl_dae_hess.c + {%- endif %} +{%- elif solver_options.integrator_type == "LIFTED_IRK" %} + {{ model.name }}_model/{{ model.name }}_impl_dae_fun.c + {{ model.name }}_model/{{ model.name }}_impl_dae_fun_jac_x_xdot_u.c + {%- if hessian_approx == "EXACT" %} + {{ model.name }}_model/{{ model.name }}_impl_dae_hess.c + {%- endif %} +{%- elif solver_options.integrator_type == "GNSF" %} + {% if model.gnsf.purely_linear != 1 %} + {{ model.name }}_model/{{ model.name }}_gnsf_phi_fun.c + {{ model.name }}_model/{{ model.name }}_gnsf_phi_fun_jac_y.c + {{ model.name }}_model/{{ model.name }}_gnsf_phi_jac_y_uhat.c + {% if model.gnsf.nontrivial_f_LO == 1 %} + {{ model.name }}_model/{{ model.name }}_gnsf_f_lo_fun_jac_x1k1uz.c + {%- endif %} + {%- endif %} + {{ model.name }}_model/{{ model.name }}_gnsf_get_matrices_fun.c +{%- elif solver_options.integrator_type == "DISCRETE" %} + {%- if model.dyn_ext_fun_type == "casadi" %} + {{ model.name }}_model/{{ model.name }}_dyn_disc_phi_fun.c + {{ model.name }}_model/{{ model.name }}_dyn_disc_phi_fun_jac.c + {%- if hessian_approx == "EXACT" %} + {{ model.name }}_model/{{ model.name }}_dyn_disc_phi_fun_jac_hess.c + {%- endif %} + {%- else %} + {{ model.name }}_model/{{ model.dyn_source_discrete }} + {%- endif %} +{%- endif -%} +) +add_library(${MODEL_OBJ} OBJECT ${MODEL_SRC} ) + +# optimal control problem - mostly CasADi exports +if(${BUILD_ACADOS_SOLVER_LIB} OR ${BUILD_ACADOS_OCP_SOLVER_LIB} OR ${BUILD_EXAMPLE}) + set(OCP_SRC +{%- if constr_type == "BGP" and dims_nphi > 0 %} + {{ model.name }}_constraints/{{ model.name }}_phi_constraint.c +{%- endif %} +{%- if constr_type_e == "BGP" and dims_nphi_e > 0 %} + {{ model.name }}_constraints/{{ model.name }}_phi_e_constraint.c +{%- endif %} + +{%- if constr_type == "BGH" and dims_nh > 0 %} + {{ model.name }}_constraints/{{ model.name }}_constr_h_fun_jac_uxt_zt.c + {{ model.name }}_constraints/{{ model.name }}_constr_h_fun.c + {%- if hessian_approx == "EXACT" %} + {{ model.name }}_constraints/{{ model.name }}_constr_h_fun_jac_uxt_zt_hess.c + {%- endif %} +{%- endif %} + +{%- if constr_type_e == "BGH" and dims_nh_e > 0 %} + {{ model.name }}_constraints/{{ model.name }}_constr_h_e_fun_jac_uxt_zt.c + {{ model.name }}_constraints/{{ model.name }}_constr_h_e_fun.c + {%- if hessian_approx == "EXACT" %} + {{ model.name }}_constraints/{{ model.name }}_constr_h_e_fun_jac_uxt_zt_hess.c + {%- endif %} +{%- endif %} + +{%- if cost_type_0 == "NONLINEAR_LS" %} + {{ model.name }}_cost/{{ model.name }}_cost_y_0_fun.c + {{ model.name }}_cost/{{ model.name }}_cost_y_0_fun_jac_ut_xt.c + {{ model.name }}_cost/{{ model.name }}_cost_y_0_hess.c +{%- elif cost_type_0 == "EXTERNAL" %} + {%- if cost.cost_ext_fun_type_0 == "casadi" %} + {{ model.name }}_cost/{{ model.name }}_cost_ext_cost_0_fun.c + {{ model.name }}_cost/{{ model.name }}_cost_ext_cost_0_fun_jac.c + {{ model.name }}_cost/{{ model.name }}_cost_ext_cost_0_fun_jac_hess.c + {%- else %} + {{ model.name }}_cost/{{ cost.cost_source_ext_cost_0 }} + {%- endif %} +{%- endif %} +{%- if cost_type == "NONLINEAR_LS" %} + {{ model.name }}_cost/{{ model.name }}_cost_y_fun.c + {{ model.name }}_cost/{{ model.name }}_cost_y_fun_jac_ut_xt.c + {{ model.name }}_cost/{{ model.name }}_cost_y_hess.c +{%- elif cost_type == "EXTERNAL" %} + {%- if cost.cost_ext_fun_type == "casadi" %} + {{ model.name }}_cost/{{ model.name }}_cost_ext_cost_fun.c + {{ model.name }}_cost/{{ model.name }}_cost_ext_cost_fun_jac.c + {{ model.name }}_cost/{{ model.name }}_cost_ext_cost_fun_jac_hess.c + {%- elif cost.cost_source_ext_cost != cost.cost_source_ext_cost_0 %} + {{ model.name }}_cost/{{ cost.cost_source_ext_cost }} + {%- endif %} +{%- endif %} +{%- if cost_type_e == "NONLINEAR_LS" %} + {{ model.name }}_cost/{{ model.name }}_cost_y_e_fun.c + {{ model.name }}_cost/{{ model.name }}_cost_y_e_fun_jac_ut_xt.c + {{ model.name }}_cost/{{ model.name }}_cost_y_e_hess.c +{%- elif cost_type_e == "EXTERNAL" %} + {%- if cost.cost_ext_fun_type_e == "casadi" %} + {{ model.name }}_cost/{{ model.name }}_cost_ext_cost_e_fun.c + {{ model.name }}_cost/{{ model.name }}_cost_ext_cost_e_fun_jac.c + {{ model.name }}_cost/{{ model.name }}_cost_ext_cost_e_fun_jac_hess.c + {%- elif cost.cost_source_ext_cost_e != cost.cost_source_ext_cost_0 %} + {{ model.name }}_cost/{{ cost.cost_source_ext_cost_e }} + {%- endif %} +{%- endif %} + acados_solver_{{ model.name }}.c) + add_library(${OCP_OBJ} OBJECT ${OCP_SRC}) +endif() + +{%- if solver_options.integrator_type != "DISCRETE" %} +# for sim solver +if(${BUILD_ACADOS_SOLVER_LIB} OR ${BUILD_EXAMPLE} + {%- if solver_options.integrator_type != "DISCRETE" %} + OR ${BUILD_SIM_EXAMPLE} OR ${BUILD_ACADOS_SIM_SOLVER_LIB} + {%- endif -%} + ) + set(SIM_SRC acados_sim_solver_{{ model.name }}.c) + add_library(${SIM_OBJ} OBJECT ${SIM_SRC}) +endif() +{%- endif %} + +# for target example +set(EX_SRC main_{{ model.name }}.c) +set(EX_EXE main_{{ model.name }}) + +{%- if model_external_shared_lib_dir and model_external_shared_lib_name %} +set(EXTERNAL_DIR {{ model_external_shared_lib_dir }}) +set(EXTERNAL_LIB {{ model_external_shared_lib_name }}) +{%- else %} +set(EXTERNAL_DIR) +set(EXTERNAL_LIB) +{%- endif %} + +# set some search paths for preprocessor and linker +set(ACADOS_INCLUDE_PATH {{ acados_include_path }} CACHE PATH "Define the path which contains the include directory for acados.") +set(ACADOS_LIB_PATH {{ acados_lib_path }} CACHE PATH "Define the path which contains the lib directory for acados.") + +# c-compiler flags for debugging +set(CMAKE_C_FLAGS_DEBUG "-O0 -ggdb") + +set(CMAKE_C_FLAGS " +{%- if qp_solver == "FULL_CONDENSING_QPOASES" -%} + -DACADOS_WITH_QPOASES +{%- endif -%} +{%- if qp_solver == "PARTIAL_CONDENSING_OSQP" -%} + -DACADOS_WITH_OSQP +{%- endif -%} +{%- if qp_solver == "PARTIAL_CONDENSING_QPDUNES" -%} + -DACADOS_WITH_QPDUNES +{%- endif -%} + -fPIC -std=c99 {{ openmp_flag }}") +#-fno-diagnostics-show-line-numbers -g + +include_directories( + ${ACADOS_INCLUDE_PATH} + ${ACADOS_INCLUDE_PATH}/acados + ${ACADOS_INCLUDE_PATH}/blasfeo/include + ${ACADOS_INCLUDE_PATH}/hpipm/include +{%- if qp_solver == "FULL_CONDENSING_QPOASES" %} + ${ACADOS_INCLUDE_PATH}/qpOASES_e/ +{%- endif %} +) + +# linker flags +link_directories(${ACADOS_LIB_PATH}) + +# link to libraries +if(UNIX) + link_libraries(acados hpipm blasfeo m {{ link_libs }}) +else() + link_libraries(acados hpipm blasfeo {{ link_libs }}) +endif() + +# the targets + +# bundled_shared_lib +if(${BUILD_ACADOS_SOLVER_LIB}) + set(LIB_ACADOS_SOLVER acados_solver_{{ model.name }}) + add_library(${LIB_ACADOS_SOLVER} SHARED $ $ + {%- if solver_options.integrator_type != "DISCRETE" %} + $ + {%- endif -%} + ) + install(TARGETS ${LIB_ACADOS_SOLVER} DESTINATION ${CMAKE_INSTALL_PREFIX}) +endif(${BUILD_ACADOS_SOLVER_LIB}) + +# ocp_shared_lib +if(${BUILD_ACADOS_OCP_SOLVER_LIB}) + set(LIB_ACADOS_OCP_SOLVER acados_ocp_solver_{{ model.name }}) + add_library(${LIB_ACADOS_OCP_SOLVER} SHARED $ $) + # Specify libraries or flags to use when linking a given target and/or its dependents. + target_link_libraries(${LIB_ACADOS_OCP_SOLVER} PRIVATE ${EXTERNAL_LIB}) + target_link_directories(${LIB_ACADOS_OCP_SOLVER} PRIVATE ${EXTERNAL_DIR}) + install(TARGETS ${LIB_ACADOS_OCP_SOLVER} DESTINATION ${CMAKE_INSTALL_PREFIX}) +endif(${BUILD_ACADOS_OCP_SOLVER_LIB}) + +# example +if(${BUILD_EXAMPLE}) + add_executable(${EX_EXE} ${EX_SRC} $ $ + {%- if solver_options.integrator_type != "DISCRETE" %} + $ + {%- endif -%} + ) + install(TARGETS ${EX_EXE} DESTINATION ${CMAKE_INSTALL_PREFIX}) +endif(${BUILD_EXAMPLE}) + +{% if solver_options.integrator_type != "DISCRETE" -%} +# example_sim +if(${BUILD_SIM_EXAMPLE}) + set(EX_SIM_SRC main_sim_{{ model.name }}.c) + set(EX_SIM_EXE main_sim_{{ model.name }}) + add_executable(${EX_SIM_EXE} ${EX_SIM_SRC} $ $) + install(TARGETS ${EX_SIM_EXE} DESTINATION ${CMAKE_INSTALL_PREFIX}) +endif(${BUILD_SIM_EXAMPLE}) + +# sim_shared_lib +if(${BUILD_ACADOS_SIM_SOLVER_LIB}) + set(LIB_ACADOS_SIM_SOLVER acados_sim_solver_{{ model.name }}) + add_library(${LIB_ACADOS_SIM_SOLVER} SHARED $ $) + install(TARGETS ${LIB_ACADOS_SIM_SOLVER} DESTINATION ${CMAKE_INSTALL_PREFIX}) +endif(${BUILD_ACADOS_SIM_SOLVER_LIB}) +{%- endif %} + diff --git a/pyextra/acados_template/c_templates_tera/acados_sim_solver.in.h b/pyextra/acados_template/c_templates_tera/acados_sim_solver.in.h index 4da8202d3c..7306491baf 100644 --- a/pyextra/acados_template/c_templates_tera/acados_sim_solver.in.h +++ b/pyextra/acados_template/c_templates_tera/acados_sim_solver.in.h @@ -80,21 +80,21 @@ typedef struct sim_solver_capsule } sim_solver_capsule; -int {{ model.name }}_acados_sim_create(sim_solver_capsule *capsule); -int {{ model.name }}_acados_sim_solve(sim_solver_capsule *capsule); -int {{ model.name }}_acados_sim_free(sim_solver_capsule *capsule); -int {{ model.name }}_acados_sim_update_params(sim_solver_capsule *capsule, double *value, int np); +ACADOS_SYMBOL_EXPORT int {{ model.name }}_acados_sim_create(sim_solver_capsule *capsule); +ACADOS_SYMBOL_EXPORT int {{ model.name }}_acados_sim_solve(sim_solver_capsule *capsule); +ACADOS_SYMBOL_EXPORT int {{ model.name }}_acados_sim_free(sim_solver_capsule *capsule); +ACADOS_SYMBOL_EXPORT int {{ model.name }}_acados_sim_update_params(sim_solver_capsule *capsule, double *value, int np); -sim_config * {{ model.name }}_acados_get_sim_config(sim_solver_capsule *capsule); -sim_in * {{ model.name }}_acados_get_sim_in(sim_solver_capsule *capsule); -sim_out * {{ model.name }}_acados_get_sim_out(sim_solver_capsule *capsule); -void * {{ model.name }}_acados_get_sim_dims(sim_solver_capsule *capsule); -sim_opts * {{ model.name }}_acados_get_sim_opts(sim_solver_capsule *capsule); -sim_solver * {{ model.name }}_acados_get_sim_solver(sim_solver_capsule *capsule); +ACADOS_SYMBOL_EXPORT sim_config * {{ model.name }}_acados_get_sim_config(sim_solver_capsule *capsule); +ACADOS_SYMBOL_EXPORT sim_in * {{ model.name }}_acados_get_sim_in(sim_solver_capsule *capsule); +ACADOS_SYMBOL_EXPORT sim_out * {{ model.name }}_acados_get_sim_out(sim_solver_capsule *capsule); +ACADOS_SYMBOL_EXPORT void * {{ model.name }}_acados_get_sim_dims(sim_solver_capsule *capsule); +ACADOS_SYMBOL_EXPORT sim_opts * {{ model.name }}_acados_get_sim_opts(sim_solver_capsule *capsule); +ACADOS_SYMBOL_EXPORT sim_solver * {{ model.name }}_acados_get_sim_solver(sim_solver_capsule *capsule); -sim_solver_capsule * {{ model.name }}_acados_sim_solver_create_capsule(void); -int {{ model.name }}_acados_sim_solver_free_capsule(sim_solver_capsule *capsule); +ACADOS_SYMBOL_EXPORT sim_solver_capsule * {{ model.name }}_acados_sim_solver_create_capsule(void); +ACADOS_SYMBOL_EXPORT int {{ model.name }}_acados_sim_solver_free_capsule(sim_solver_capsule *capsule); #ifdef __cplusplus } diff --git a/pyextra/acados_template/c_templates_tera/acados_solver.in.c b/pyextra/acados_template/c_templates_tera/acados_solver.in.c index a42937ed28..0af8127709 100644 --- a/pyextra/acados_template/c_templates_tera/acados_solver.in.c +++ b/pyextra/acados_template/c_templates_tera/acados_solver.in.c @@ -1862,6 +1862,11 @@ void {{ model.name }}_acados_create_6_set_opts({{ model.name }}_solver_capsule* {%- endif %} +{%- if solver_options.qp_solver is containing("HPIPM") %} + // set HPIPM mode: should be done before setting other QP solver options + ocp_nlp_solver_opts_set(nlp_config, nlp_opts, "qp_hpipm_mode", "{{ solver_options.hpipm_mode }}"); +{%- endif %} + {% if solver_options.nlp_solver_type == "SQP" %} // set SQP specific options double nlp_solver_tol_stat = {{ solver_options.nlp_solver_tol_stat }}; @@ -2071,6 +2076,63 @@ int {{ model.name }}_acados_update_qp_solver_cond_N({{ model.name }}_solver_caps } +int {{ model.name }}_acados_reset({{ model.name }}_solver_capsule* capsule) +{ + + // set initialization to all zeros +{# TODO: use guess values / initial state value from json instead?! #} + const int N = capsule->nlp_solver_plan->N; + ocp_nlp_config* nlp_config = capsule->nlp_config; + ocp_nlp_dims* nlp_dims = capsule->nlp_dims; + ocp_nlp_out* nlp_out = capsule->nlp_out; + ocp_nlp_in* nlp_in = capsule->nlp_in; + ocp_nlp_solver* nlp_solver = capsule->nlp_solver; + + int nx, nu, nv, ns, nz, ni, dim; + + double* buffer = calloc(NX+NU+NZ+2*NS+2*NSN+NBX+NBU+NG+NH+NPHI+NBX0+NBXN+NHN+NPHIN+NGN, sizeof(double)); + + for(int i=0; i reset memory + int qp_status; + ocp_nlp_get(capsule->nlp_config, capsule->nlp_solver, "qp_status", &qp_status); + if (qp_status == 3) + { + // printf("\nin reset qp_status %d -> resetting QP memory\n", qp_status); + ocp_nlp_solver_reset_qp_memory(nlp_solver, nlp_in, nlp_out); + } +{%- endif %} + + free(buffer); + return 0; +} + + + + int {{ model.name }}_acados_update_params({{ model.name }}_solver_capsule* capsule, int stage, double *p, int np) { int solver_status = 0; @@ -2096,7 +2158,7 @@ int {{ model.name }}_acados_update_params({{ model.name }}_solver_capsule* capsu {%- endif %} {% elif solver_options.integrator_type == "LIFTED_IRK" %} capsule->impl_dae_fun[stage].set_param(capsule->impl_dae_fun+stage, p); - capsule->impl_dae_fun_jac_x_xdot_z[stage].set_param(capsule->impl_dae_fun_jac_x_xdot_z+stage, p); + capsule->impl_dae_fun_jac_x_xdot_u[stage].set_param(capsule->impl_dae_fun_jac_x_xdot_u+stage, p); {% elif solver_options.integrator_type == "ERK" %} capsule->forw_vde_casadi[stage].set_param(capsule->forw_vde_casadi+stage, p); capsule->expl_ode_fun[stage].set_param(capsule->expl_ode_fun+stage, p); diff --git a/pyextra/acados_template/c_templates_tera/acados_solver.in.h b/pyextra/acados_template/c_templates_tera/acados_solver.in.h index 2efc9e5157..1c82ef3ba0 100644 --- a/pyextra/acados_template/c_templates_tera/acados_solver.in.h +++ b/pyextra/acados_template/c_templates_tera/acados_solver.in.h @@ -34,6 +34,8 @@ #ifndef ACADOS_SOLVER_{{ model.name }}_H_ #define ACADOS_SOLVER_{{ model.name }}_H_ +#include "acados/utils/types.h" + #include "acados_c/ocp_nlp_interface.h" #include "acados_c/external_function_interface.h" @@ -172,38 +174,41 @@ typedef struct {{ model.name }}_solver_capsule } {{ model.name }}_solver_capsule; -{{ model.name }}_solver_capsule * {{ model.name }}_acados_create_capsule(void); -int {{ model.name }}_acados_free_capsule({{ model.name }}_solver_capsule *capsule); +ACADOS_SYMBOL_EXPORT {{ model.name }}_solver_capsule * {{ model.name }}_acados_create_capsule(void); +ACADOS_SYMBOL_EXPORT int {{ model.name }}_acados_free_capsule({{ model.name }}_solver_capsule *capsule); + +ACADOS_SYMBOL_EXPORT int {{ model.name }}_acados_create({{ model.name }}_solver_capsule * capsule); + +ACADOS_SYMBOL_EXPORT int {{ model.name }}_acados_reset({{ model.name }}_solver_capsule* capsule); -int {{ model.name }}_acados_create({{ model.name }}_solver_capsule * capsule); /** * Generic version of {{ model.name }}_acados_create which allows to use a different number of shooting intervals than * the number used for code generation. If new_time_steps=NULL and n_time_steps matches the number used for code * generation, the time-steps from code generation is used. */ -int {{ model.name }}_acados_create_with_discretization({{ model.name }}_solver_capsule * capsule, int n_time_steps, double* new_time_steps); +ACADOS_SYMBOL_EXPORT int {{ model.name }}_acados_create_with_discretization({{ model.name }}_solver_capsule * capsule, int n_time_steps, double* new_time_steps); /** * Update the time step vector. Number N must be identical to the currently set number of shooting nodes in the * nlp_solver_plan. Returns 0 if no error occurred and a otherwise a value other than 0. */ -int {{ model.name }}_acados_update_time_steps({{ model.name }}_solver_capsule * capsule, int N, double* new_time_steps); +ACADOS_SYMBOL_EXPORT int {{ model.name }}_acados_update_time_steps({{ model.name }}_solver_capsule * capsule, int N, double* new_time_steps); /** * This function is used for updating an already initialized solver with a different number of qp_cond_N. */ -int {{ model.name }}_acados_update_qp_solver_cond_N({{ model.name }}_solver_capsule * capsule, int qp_solver_cond_N); -int {{ model.name }}_acados_update_params({{ model.name }}_solver_capsule * capsule, int stage, double *value, int np); -int {{ model.name }}_acados_solve({{ model.name }}_solver_capsule * capsule); -int {{ model.name }}_acados_free({{ model.name }}_solver_capsule * capsule); -void {{ model.name }}_acados_print_stats({{ model.name }}_solver_capsule * capsule); - -ocp_nlp_in *{{ model.name }}_acados_get_nlp_in({{ model.name }}_solver_capsule * capsule); -ocp_nlp_out *{{ model.name }}_acados_get_nlp_out({{ model.name }}_solver_capsule * capsule); -ocp_nlp_out *{{ model.name }}_acados_get_sens_out({{ model.name }}_solver_capsule * capsule); -ocp_nlp_solver *{{ model.name }}_acados_get_nlp_solver({{ model.name }}_solver_capsule * capsule); -ocp_nlp_config *{{ model.name }}_acados_get_nlp_config({{ model.name }}_solver_capsule * capsule); -void *{{ model.name }}_acados_get_nlp_opts({{ model.name }}_solver_capsule * capsule); -ocp_nlp_dims *{{ model.name }}_acados_get_nlp_dims({{ model.name }}_solver_capsule * capsule); -ocp_nlp_plan_t *{{ model.name }}_acados_get_nlp_plan({{ model.name }}_solver_capsule * capsule); +ACADOS_SYMBOL_EXPORT int {{ model.name }}_acados_update_qp_solver_cond_N({{ model.name }}_solver_capsule * capsule, int qp_solver_cond_N); +ACADOS_SYMBOL_EXPORT int {{ model.name }}_acados_update_params({{ model.name }}_solver_capsule * capsule, int stage, double *value, int np); +ACADOS_SYMBOL_EXPORT int {{ model.name }}_acados_solve({{ model.name }}_solver_capsule * capsule); +ACADOS_SYMBOL_EXPORT int {{ model.name }}_acados_free({{ model.name }}_solver_capsule * capsule); +ACADOS_SYMBOL_EXPORT void {{ model.name }}_acados_print_stats({{ model.name }}_solver_capsule * capsule); + +ACADOS_SYMBOL_EXPORT ocp_nlp_in *{{ model.name }}_acados_get_nlp_in({{ model.name }}_solver_capsule * capsule); +ACADOS_SYMBOL_EXPORT ocp_nlp_out *{{ model.name }}_acados_get_nlp_out({{ model.name }}_solver_capsule * capsule); +ACADOS_SYMBOL_EXPORT ocp_nlp_out *{{ model.name }}_acados_get_sens_out({{ model.name }}_solver_capsule * capsule); +ACADOS_SYMBOL_EXPORT ocp_nlp_solver *{{ model.name }}_acados_get_nlp_solver({{ model.name }}_solver_capsule * capsule); +ACADOS_SYMBOL_EXPORT ocp_nlp_config *{{ model.name }}_acados_get_nlp_config({{ model.name }}_solver_capsule * capsule); +ACADOS_SYMBOL_EXPORT void *{{ model.name }}_acados_get_nlp_opts({{ model.name }}_solver_capsule * capsule); +ACADOS_SYMBOL_EXPORT ocp_nlp_dims *{{ model.name }}_acados_get_nlp_dims({{ model.name }}_solver_capsule * capsule); +ACADOS_SYMBOL_EXPORT ocp_nlp_plan_t *{{ model.name }}_acados_get_nlp_plan({{ model.name }}_solver_capsule * capsule); #ifdef __cplusplus } /* extern "C" */ diff --git a/pyextra/acados_template/c_templates_tera/acados_solver.in.pxd b/pyextra/acados_template/c_templates_tera/acados_solver.in.pxd index 2cc5ee3910..09f8755cbe 100644 --- a/pyextra/acados_template/c_templates_tera/acados_solver.in.pxd +++ b/pyextra/acados_template/c_templates_tera/acados_solver.in.pxd @@ -48,6 +48,7 @@ cdef extern from "acados_solver_{{ model.name }}.h": int acados_update_params "{{ model.name }}_acados_update_params"(nlp_solver_capsule * capsule, int stage, double *value, int np_) int acados_solve "{{ model.name }}_acados_solve"(nlp_solver_capsule * capsule) + int acados_reset "{{ model.name }}_acados_reset"(nlp_solver_capsule * capsule) int acados_free "{{ model.name }}_acados_free"(nlp_solver_capsule * capsule) void acados_print_stats "{{ model.name }}_acados_print_stats"(nlp_solver_capsule * capsule) diff --git a/pyextra/acados_template/c_templates_tera/acados_solver_sfun.in.c b/pyextra/acados_template/c_templates_tera/acados_solver_sfun.in.c index a27ba8837f..96e0983de6 100644 --- a/pyextra/acados_template/c_templates_tera/acados_solver_sfun.in.c +++ b/pyextra/acados_template/c_templates_tera/acados_solver_sfun.in.c @@ -126,10 +126,14 @@ static void mdlInitializeSizes (SimStruct *S) {%- if dims.ny_0 > 0 and simulink_opts.inputs.cost_W_0 %} {#- cost_W_0 #} {%- set n_inputs = n_inputs + 1 %} {%- endif -%} - {%- if dims.ny > 0 and simulink_opts.inputs.cost_W -%} {#- cost_W #} + {%- if dims.ny > 0 and simulink_opts.inputs.cost_W %} {#- cost_W #} {%- set n_inputs = n_inputs + 1 %} {%- endif -%} - {%- if dims.ny_e > 0 and simulink_opts.inputs.cost_W_e -%} {#- cost_W_e #} + {%- if dims.ny_e > 0 and simulink_opts.inputs.cost_W_e %} {#- cost_W_e #} + {%- set n_inputs = n_inputs + 1 -%} + {%- endif -%} + + {%- if simulink_opts.inputs.reset_solver -%} {#- reset_solver #} {%- set n_inputs = n_inputs + 1 -%} {%- endif -%} @@ -256,7 +260,7 @@ static void mdlInitializeSizes (SimStruct *S) ssSetInputPortVectorDimension(S, {{ i_input }}, {{ dims.ny_0 * dims.ny_0 }}); {%- endif %} - {%- if dims.ny_0 > 0 and simulink_opts.inputs.cost_W %} {#- cost_W #} + {%- if dims.ny > 0 and simulink_opts.inputs.cost_W %} {#- cost_W #} {%- set i_input = i_input + 1 %} // cost_W ssSetInputPortVectorDimension(S, {{ i_input }}, {{ dims.ny * dims.ny }}); @@ -268,6 +272,12 @@ static void mdlInitializeSizes (SimStruct *S) ssSetInputPortVectorDimension(S, {{ i_input }}, {{ dims.ny_e * dims.ny_e }}); {%- endif %} + {%- if simulink_opts.inputs.reset_solver -%} {#- reset_solver #} + {%- set i_input = i_input + 1 %} + // reset_solver + ssSetInputPortVectorDimension(S, {{ i_input }}, 1); + {%- endif -%} + {%- if simulink_opts.inputs.x_init -%} {#- x_init #} {%- set i_input = i_input + 1 %} // x_init @@ -406,13 +416,13 @@ static void mdlOutputs(SimStruct *S, int_T tid) {%- set buffer_sizes = buffer_sizes | concat(with=(dims.ny_e)) %} {%- endif %} - {%- if dims.ny_0 > 0 and simulink_opts.inputs.cost_W_0 %} {# cost_W_0 #} + {%- if dims.ny_0 > 0 and simulink_opts.inputs.cost_W_0 %} {#- cost_W_0 #} {%- set buffer_sizes = buffer_sizes | concat(with=(dims.ny_0 * dims.ny_0)) %} {%- endif %} - {%- if dims.ny > 0 and simulink_opts.inputs.cost_W %} {# cost_W #} + {%- if dims.ny > 0 and simulink_opts.inputs.cost_W %} {#- cost_W #} {%- set buffer_sizes = buffer_sizes | concat(with=(dims.ny * dims.ny)) %} {%- endif %} - {%- if dims.ny_e > 0 and simulink_opts.inputs.cost_W_e %} {# cost_W_e #} + {%- if dims.ny_e > 0 and simulink_opts.inputs.cost_W_e %} {#- cost_W_e #} {%- set buffer_sizes = buffer_sizes | concat(with=(dims.ny_e * dims.ny_e)) %} {%- endif %} @@ -602,7 +612,7 @@ static void mdlOutputs(SimStruct *S, int_T tid) ocp_nlp_constraints_model_set(nlp_config, nlp_dims, nlp_in, ii, "uh", buffer); {%- endif -%} - {%- if dims.ny_0 > 0 and simulink_opts.inputs.cost_W_0 %} {# cost_W_0 #} + {%- if dims.ny_0 > 0 and simulink_opts.inputs.cost_W_0 %} {#- cost_W_0 #} // cost_W_0 {%- set i_input = i_input + 1 %} in_sign = ssGetInputPortRealSignalPtrs(S, {{ i_input }}); @@ -612,7 +622,7 @@ static void mdlOutputs(SimStruct *S, int_T tid) ocp_nlp_cost_model_set(nlp_config, nlp_dims, nlp_in, 0, "W", buffer); {%- endif %} - {%- if dims.ny > 0 and simulink_opts.inputs.cost_W %} {# cost_W #} + {%- if dims.ny > 0 and simulink_opts.inputs.cost_W %} {#- cost_W #} // cost_W {%- set i_input = i_input + 1 %} in_sign = ssGetInputPortRealSignalPtrs(S, {{ i_input }}); @@ -633,6 +643,17 @@ static void mdlOutputs(SimStruct *S, int_T tid) ocp_nlp_cost_model_set(nlp_config, nlp_dims, nlp_in, {{ dims.N }}, "W", buffer); {%- endif %} + {%- if simulink_opts.inputs.reset_solver %} {#- reset_solver #} + // reset_solver + {%- set i_input = i_input + 1 %} + in_sign = ssGetInputPortRealSignalPtrs(S, {{ i_input }}); + double reset = (double)(*in_sign[0]); + if (reset) + { + {{ model.name }}_acados_reset(capsule); + } + {%- endif %} + {%- if simulink_opts.inputs.x_init %} {#- x_init #} // x_init {%- set i_input = i_input + 1 %} diff --git a/pyextra/acados_template/c_templates_tera/make_sfun.in.m b/pyextra/acados_template/c_templates_tera/make_sfun.in.m index 30339bf5dc..77603a78fa 100644 --- a/pyextra/acados_template/c_templates_tera/make_sfun.in.m +++ b/pyextra/acados_template/c_templates_tera/make_sfun.in.m @@ -264,6 +264,11 @@ input_note = strcat(input_note, num2str(i_in), ') cost_W_e in column-major forma i_in = i_in + 1; {%- endif %} +{%- if simulink_opts.inputs.reset_solver %} {#- reset_solver #} +input_note = strcat(input_note, num2str(i_in), ') reset_solver determines if iterate is set to all zeros before other initializations (x_init, u_init) are set and before solver is called, size [1]\n '); +i_in = i_in + 1; +{%- endif %} + {%- if simulink_opts.inputs.x_init %} {#- x_init #} input_note = strcat(input_note, num2str(i_in), ') initialization of x for all shooting nodes, size [{{ dims.nx * (dims.N+1) }}]\n '); i_in = i_in + 1; diff --git a/pyextra/acados_template/generate_c_code_discrete_dynamics.py b/pyextra/acados_template/generate_c_code_discrete_dynamics.py index 334e18dab7..c6a245ff81 100644 --- a/pyextra/acados_template/generate_c_code_discrete_dynamics.py +++ b/pyextra/acados_template/generate_c_code_discrete_dynamics.py @@ -32,12 +32,12 @@ # import os -from casadi import * +import casadi as ca from .utils import ALLOWED_CASADI_VERSIONS, casadi_length, casadi_version_warning def generate_c_code_discrete_dynamics( model, opts ): - casadi_version = CasadiMeta.version() + casadi_version = ca.CasadiMeta.version() casadi_opts = dict(mex=False, casadi_int='int', casadi_real='double') if casadi_version not in (ALLOWED_CASADI_VERSIONS): @@ -49,13 +49,12 @@ def generate_c_code_discrete_dynamics( model, opts ): p = model.p phi = model.disc_dyn_expr model_name = model.name - nx = x.size()[0] + nx = casadi_length(x) - - if isinstance(phi, casadi.MX): - symbol = MX.sym - elif isinstance(phi, casadi.SX): - symbol = SX.sym + if isinstance(phi, ca.MX): + symbol = ca.MX.sym + elif isinstance(phi, ca.SX): + symbol = ca.SX.sym else: Exception("generate_c_code_disc_dyn: disc_dyn_expr must be a CasADi expression, you have type: {}".format(type(phi))) @@ -63,12 +62,12 @@ def generate_c_code_discrete_dynamics( model, opts ): lam = symbol('lam', nx, 1) # generate jacobians - ux = vertcat(u,x) - jac_ux = jacobian(phi, ux) + ux = ca.vertcat(u,x) + jac_ux = ca.jacobian(phi, ux) # generate adjoint - adj_ux = jtimes(phi, ux, lam, True) + adj_ux = ca.jtimes(phi, ux, lam, True) # generate hessian - hess_ux = jacobian(adj_ux, ux) + hess_ux = ca.jacobian(adj_ux, ux) ## change directory code_export_dir = opts["code_export_directory"] @@ -85,15 +84,15 @@ def generate_c_code_discrete_dynamics( model, opts ): # set up & generate Functions fun_name = model_name + '_dyn_disc_phi_fun' - phi_fun = Function(fun_name, [x, u, p], [phi]) + phi_fun = ca.Function(fun_name, [x, u, p], [phi]) phi_fun.generate(fun_name, casadi_opts) fun_name = model_name + '_dyn_disc_phi_fun_jac' - phi_fun_jac_ut_xt = Function(fun_name, [x, u, p], [phi, jac_ux.T]) + phi_fun_jac_ut_xt = ca.Function(fun_name, [x, u, p], [phi, jac_ux.T]) phi_fun_jac_ut_xt.generate(fun_name, casadi_opts) fun_name = model_name + '_dyn_disc_phi_fun_jac_hess' - phi_fun_jac_ut_xt_hess = Function(fun_name, [x, u, lam, p], [phi, jac_ux.T, hess_ux]) + phi_fun_jac_ut_xt_hess = ca.Function(fun_name, [x, u, lam, p], [phi, jac_ux.T, hess_ux]) phi_fun_jac_ut_xt_hess.generate(fun_name, casadi_opts) os.chdir(cwd) diff --git a/pyextra/acados_template/simulink_default_opts.json b/pyextra/acados_template/simulink_default_opts.json index 258a224cb2..70c939cb88 100644 --- a/pyextra/acados_template/simulink_default_opts.json +++ b/pyextra/acados_template/simulink_default_opts.json @@ -32,6 +32,7 @@ "cost_W_0": 0, "cost_W": 0, "cost_W_e": 0, + "reset_solver": 0, "x_init": 0, "u_init": 0 }, diff --git a/pyextra/acados_template/utils.py b/pyextra/acados_template/utils.py index a60e99fc75..8f44f61e7e 100644 --- a/pyextra/acados_template/utils.py +++ b/pyextra/acados_template/utils.py @@ -1,3 +1,4 @@ +# -*- coding: future_fstrings -*- # # Copyright 2019 Gianluca Frison, Dimitris Kouzoupis, Robin Verschueren, # Andrea Zanelli, Niels van Duijkeren, Jonathan Frey, Tommaso Sartor, @@ -49,7 +50,7 @@ def get_acados_path(): ACADOS_PATH = os.path.realpath(acados_path) msg = 'Warning: Did not find environment variable ACADOS_SOURCE_DIR, ' msg += 'guessed ACADOS_PATH to be {}.\n'.format(ACADOS_PATH) - msg += 'Please export ACADOS_SOURCE_DIR to not avoid this warning.' + msg += 'Please export ACADOS_SOURCE_DIR to avoid this warning.' print(msg) return ACADOS_PATH @@ -74,7 +75,7 @@ def get_tera_exec_path(): platform2tera = { "linux": "linux", "darwin": "osx", - "win32": "window.exe" + "win32": "windows" } @@ -212,16 +213,14 @@ def render_template(in_file, out_file, template_dir, json_path): template_glob = os.path.join(acados_path, 'c_templates_tera', '*') # call tera as system cmd - os_cmd = "{tera_path} '{template_glob}' '{in_file}' '{json_path}' '{out_file}'".format( - tera_path=tera_path, - template_glob=template_glob, - json_path=json_path, - in_file=in_file, - out_file=out_file - ) + os_cmd = f"{tera_path} '{template_glob}' '{in_file}' '{json_path}' '{out_file}'" + # Windows cmd.exe can not cope with '...', so use "..." instead: + if os.name == 'nt': + os_cmd = os_cmd.replace('\'', '\"') + status = os.system(os_cmd) if (status != 0): - raise Exception('Rendering of {} failed!\n\nAttempted to execute OS command:\n{}\n\nExiting.\n'.format(in_file, os_cmd)) + raise Exception(f'Rendering of {in_file} failed!\n\nAttempted to execute OS command:\n{os_cmd}\n\nExiting.\n') os.chdir(cwd) @@ -235,9 +234,7 @@ def np_array_to_list(np_array): elif isinstance(np_array, (DM)): return np_array.full() else: - raise(Exception( - "Cannot convert to list type {}".format(type(np_array)) - )) + raise(Exception(f"Cannot convert to list type {type(np_array)}")) def format_class_dict(d): diff --git a/selfdrive/controls/lib/lateral_mpc_lib/SConscript b/selfdrive/controls/lib/lateral_mpc_lib/SConscript index 64eada95c1..76f7fadce7 100644 --- a/selfdrive/controls/lib/lateral_mpc_lib/SConscript +++ b/selfdrive/controls/lib/lateral_mpc_lib/SConscript @@ -44,11 +44,14 @@ generated_files = [ ] + build_files acados_dir = '#third_party/acados' +acados_templates_dir = '#pyextra/acados_template/c_templates_tera' + source_list = ['lat_mpc.py', f'{acados_dir}/include/acados_c/ocp_nlp_interface.h', f'{acados_dir}/aarch64/lib/libacados.so', f'{acados_dir}/x86_64/lib/libacados.so', f'{acados_dir}/larch64/lib/libacados.so', + f'{acados_templates_dir}/acados_solver.in.c', ] lenv = env.Clone() diff --git a/selfdrive/controls/lib/longitudinal_mpc_lib/SConscript b/selfdrive/controls/lib/longitudinal_mpc_lib/SConscript index cd764114c4..fd0da63a37 100644 --- a/selfdrive/controls/lib/longitudinal_mpc_lib/SConscript +++ b/selfdrive/controls/lib/longitudinal_mpc_lib/SConscript @@ -51,11 +51,14 @@ generated_files = [ ] + build_files acados_dir = '#third_party/acados' +acados_templates_dir = '#pyextra/acados_template/c_templates_tera' + source_list = ['long_mpc.py', f'{acados_dir}/include/acados_c/ocp_nlp_interface.h', f'{acados_dir}/aarch64/lib/libacados.so', f'{acados_dir}/x86_64/lib/libacados.so', f'{acados_dir}/larch64/lib/libacados.so', + f'{acados_templates_dir}/acados_solver.in.c', ] lenv = env.Clone() diff --git a/selfdrive/controls/lib/longitudinal_mpc_lib/long_mpc.py b/selfdrive/controls/lib/longitudinal_mpc_lib/long_mpc.py index 69bf081ee3..74608dc1fe 100644 --- a/selfdrive/controls/lib/longitudinal_mpc_lib/long_mpc.py +++ b/selfdrive/controls/lib/longitudinal_mpc_lib/long_mpc.py @@ -192,11 +192,14 @@ def gen_long_ocp(): class LongitudinalMpc: def __init__(self, e2e=False): self.e2e = e2e + self.solver = AcadosOcpSolverCython(MODEL_NAME, ACADOS_SOLVER_TYPE, N) self.reset() self.source = SOURCES[2] def reset(self): - self.solver = AcadosOcpSolverCython(MODEL_NAME, ACADOS_SOLVER_TYPE, N) + # self.solver = AcadosOcpSolverCython(MODEL_NAME, ACADOS_SOLVER_TYPE, N) + self.solver.reset() + # self.solver.options_set('print_level', 2) self.v_solution = np.zeros(N+1) self.a_solution = np.zeros(N+1) self.prev_a = np.array(self.a_solution) @@ -352,6 +355,8 @@ class LongitudinalMpc: self.run() def run(self): + # t0 = sec_since_boot() + # reset = 0 for i in range(N+1): self.solver.set(i, 'p', self.params[i]) self.solver.constraints_set(0, "lbx", self.x0) @@ -386,6 +391,8 @@ class LongitudinalMpc: self.last_cloudlog_t = t cloudlog.warning(f"Long mpc reset, solution_status: {self.solution_status}") self.reset() + # reset = 1 + # print(f"long_mpc timings: total internal {self.solve_time:.2e}, external: {(sec_since_boot() - t0):.2e} qp {self.time_qp_solution:.2e}, lin {self.time_linearization:.2e} qp_iter {qp_iter}, reset {reset}") if __name__ == "__main__": diff --git a/third_party/acados/aarch64/lib/libacados.so b/third_party/acados/aarch64/lib/libacados.so index 640de2c2ac..ef102d116a 100755 Binary files a/third_party/acados/aarch64/lib/libacados.so and b/third_party/acados/aarch64/lib/libacados.so differ diff --git a/third_party/acados/aarch64/lib/libblasfeo.so b/third_party/acados/aarch64/lib/libblasfeo.so index fae141e9fc..5e3d2b1333 100755 Binary files a/third_party/acados/aarch64/lib/libblasfeo.so and b/third_party/acados/aarch64/lib/libblasfeo.so differ diff --git a/third_party/acados/build.sh b/third_party/acados/build.sh index 96aa23419b..e0f603e682 100755 --- a/third_party/acados/build.sh +++ b/third_party/acados/build.sh @@ -17,8 +17,8 @@ if [ ! -d acados_repo/ ]; then # git clone https://github.com/commaai/acados.git $DIR/acados_repo fi cd acados_repo -git fetch -git checkout 105e06df87f06ea02df4af825867c946b31defdd +git fetch --all +git checkout 8ea8827fafb1b23b4c7da1c4cf650de1cbd73584 git submodule update --recursive --init # build diff --git a/third_party/acados/include/acados/dense_qp/dense_qp_qpoases.h b/third_party/acados/include/acados/dense_qp/dense_qp_qpoases.h index 8fca17dd17..9f0bb1af68 100644 --- a/third_party/acados/include/acados/dense_qp/dense_qp_qpoases.h +++ b/third_party/acados/include/acados/dense_qp/dense_qp_qpoases.h @@ -118,6 +118,8 @@ int dense_qp_qpoases(void *config, dense_qp_in *qp_in, dense_qp_out *qp_out, voi // void dense_qp_qpoases_eval_sens(void *config_, void *qp_in, void *qp_out, void *opts_, void *mem_, void *work_); // +void dense_qp_qpoases_memory_reset(void *config_, void *qp_in, void *qp_out, void *opts_, void *mem_, void *work_); +// void dense_qp_qpoases_config_initialize_default(void *config_); #ifdef __cplusplus diff --git a/third_party/acados/include/acados/ocp_nlp/ocp_nlp_common.h b/third_party/acados/include/acados/ocp_nlp/ocp_nlp_common.h index 4a7f19a07e..60d835fc58 100644 --- a/third_party/acados/include/acados/ocp_nlp/ocp_nlp_common.h +++ b/third_party/acados/include/acados/ocp_nlp/ocp_nlp_common.h @@ -93,6 +93,7 @@ typedef struct ocp_nlp_config char *field, int stage, int index, void *sens_nlp_out); // prepare memory int (*precompute)(void *config, void *dims, void *nlp_in, void *nlp_out, void *opts_, void *mem, void *work); + void (*memory_reset_qp_solver)(void *config, void *dims, void *nlp_in, void *nlp_out, void *opts_, void *mem, void *work); // initalize this struct with default values void (*config_initialize_default)(void *config); // general getter @@ -306,6 +307,7 @@ typedef struct ocp_nlp_res struct blasfeo_dvec *res_eq; // dynamics struct blasfeo_dvec *res_ineq; // inequality constraints struct blasfeo_dvec *res_comp; // complementarity + struct blasfeo_dvec tmp; // tmp double inf_norm_res_stat; double inf_norm_res_eq; double inf_norm_res_ineq; @@ -402,7 +404,7 @@ ocp_nlp_workspace *ocp_nlp_workspace_assign(ocp_nlp_config *config, ocp_nlp_dims void ocp_nlp_alias_memory_to_submodules(ocp_nlp_config *config, ocp_nlp_dims *dims, ocp_nlp_in *in, ocp_nlp_out *out, ocp_nlp_opts *opts, ocp_nlp_memory *mem, ocp_nlp_workspace *work); // -void ocp_nlp_initialize_qp(ocp_nlp_config *config, ocp_nlp_dims *dims, ocp_nlp_in *in, +void ocp_nlp_initialize_submodules(ocp_nlp_config *config, ocp_nlp_dims *dims, ocp_nlp_in *in, ocp_nlp_out *out, ocp_nlp_opts *opts, ocp_nlp_memory *mem, ocp_nlp_workspace *work); // void ocp_nlp_approximate_qp_matrices(ocp_nlp_config *config, ocp_nlp_dims *dims, ocp_nlp_in *in, diff --git a/third_party/acados/include/acados/ocp_nlp/ocp_nlp_sqp.h b/third_party/acados/include/acados/ocp_nlp/ocp_nlp_sqp.h index 5e4fc810d9..74cfb042e3 100644 --- a/third_party/acados/include/acados/ocp_nlp/ocp_nlp_sqp.h +++ b/third_party/acados/include/acados/ocp_nlp/ocp_nlp_sqp.h @@ -122,7 +122,9 @@ typedef struct acados_size_t ocp_nlp_sqp_memory_calculate_size(void *config, void *dims, void *opts_); // void *ocp_nlp_sqp_memory_assign(void *config, void *dims, void *opts_, void *raw_memory); - +// +void ocp_nlp_sqp_memory_reset_qp_solver(void *config_, void *dims_, void *nlp_in_, void *nlp_out_, + void *opts_, void *mem_, void *work_); /************************************************ diff --git a/third_party/acados/include/acados/ocp_qp/ocp_qp_common.h b/third_party/acados/include/acados/ocp_qp/ocp_qp_common.h index bba55f8f75..caf6caf2f0 100644 --- a/third_party/acados/include/acados/ocp_qp/ocp_qp_common.h +++ b/third_party/acados/include/acados/ocp_qp/ocp_qp_common.h @@ -72,6 +72,7 @@ typedef struct void (*memory_get)(void *config_, void *mem_, const char *field, void* value); acados_size_t (*workspace_calculate_size)(void *config, void *dims, void *opts); int (*evaluate)(void *config, void *qp_in, void *qp_out, void *opts, void *mem, void *work); + void (*memory_reset)(void *config, void *qp_in, void *qp_out, void *opts, void *mem, void *work); void (*eval_sens)(void *config, void *qp_in, void *qp_out, void *opts, void *mem, void *work); } qp_solver_config; #endif diff --git a/third_party/acados/include/acados/ocp_qp/ocp_qp_hpipm.h b/third_party/acados/include/acados/ocp_qp/ocp_qp_hpipm.h index 84e874863f..91690e458d 100644 --- a/third_party/acados/include/acados/ocp_qp/ocp_qp_hpipm.h +++ b/third_party/acados/include/acados/ocp_qp/ocp_qp_hpipm.h @@ -63,6 +63,7 @@ typedef struct ocp_qp_hpipm_memory_ struct d_ocp_qp_ipm_ws *hpipm_workspace; double time_qp_solver_call; int iter; + int status; } ocp_qp_hpipm_memory; @@ -87,6 +88,8 @@ acados_size_t ocp_qp_hpipm_workspace_calculate_size(void *config, void *dims, vo // int ocp_qp_hpipm(void *config, void *qp_in, void *qp_out, void *opts_, void *mem_, void *work_); // +void ocp_qp_hpipm_memory_reset(void *config_, void *qp_in_, void *qp_out_, void *opts_, void *mem_, void *work_); +// void ocp_qp_hpipm_eval_sens(void *config, void *qp_in, void *qp_out, void *opts_, void *mem_, void *work_); // void ocp_qp_hpipm_config_initialize_default(void *config); diff --git a/third_party/acados/include/acados/ocp_qp/ocp_qp_hpmpc.h b/third_party/acados/include/acados/ocp_qp/ocp_qp_hpmpc.h index 37d1c0a337..de6ce501b1 100644 --- a/third_party/acados/include/acados/ocp_qp/ocp_qp_hpmpc.h +++ b/third_party/acados/include/acados/ocp_qp/ocp_qp_hpmpc.h @@ -99,6 +99,7 @@ typedef struct ocp_qp_hpmpc_memory_ double time_qp_solver_call; int iter; + int status; } ocp_qp_hpmpc_memory; diff --git a/third_party/acados/include/acados/ocp_qp/ocp_qp_ooqp.h b/third_party/acados/include/acados/ocp_qp/ocp_qp_ooqp.h index e1b789b96a..1a3b7b2fa2 100644 --- a/third_party/acados/include/acados/ocp_qp/ocp_qp_ooqp.h +++ b/third_party/acados/include/acados/ocp_qp/ocp_qp_ooqp.h @@ -112,6 +112,7 @@ typedef struct ocp_qp_ooqp_memory_ int nnz; // max(nnzQ, nnzA, nnzC) double time_qp_solver_call; int iter; + int status; } ocp_qp_ooqp_memory; diff --git a/third_party/acados/include/acados/ocp_qp/ocp_qp_osqp.h b/third_party/acados/include/acados/ocp_qp/ocp_qp_osqp.h index 55321a46b2..4ae391a9ff 100644 --- a/third_party/acados/include/acados/ocp_qp/ocp_qp_osqp.h +++ b/third_party/acados/include/acados/ocp_qp/ocp_qp_osqp.h @@ -75,6 +75,7 @@ typedef struct ocp_qp_osqp_memory_ double time_qp_solver_call; int iter; + int status; } ocp_qp_osqp_memory; diff --git a/third_party/acados/include/acados/ocp_qp/ocp_qp_qpdunes.h b/third_party/acados/include/acados/ocp_qp/ocp_qp_qpdunes.h index 348627c676..ad4d094516 100644 --- a/third_party/acados/include/acados/ocp_qp/ocp_qp_qpdunes.h +++ b/third_party/acados/include/acados/ocp_qp/ocp_qp_qpdunes.h @@ -71,6 +71,7 @@ typedef struct ocp_qp_qpdunes_memory_ qpData_t qpData; double time_qp_solver_call; int iter; + int status; } ocp_qp_qpdunes_memory; diff --git a/third_party/acados/include/acados/ocp_qp/ocp_qp_xcond_solver.h b/third_party/acados/include/acados/ocp_qp/ocp_qp_xcond_solver.h index 9d4f309bd9..d4b30e007b 100644 --- a/third_party/acados/include/acados/ocp_qp/ocp_qp_xcond_solver.h +++ b/third_party/acados/include/acados/ocp_qp/ocp_qp_xcond_solver.h @@ -92,6 +92,7 @@ typedef struct acados_size_t (*memory_calculate_size)(void *config, ocp_qp_xcond_solver_dims *dims, void *opts); void *(*memory_assign)(void *config, ocp_qp_xcond_solver_dims *dims, void *opts, void *raw_memory); void (*memory_get)(void *config_, void *mem_, const char *field, void* value); + void (*memory_reset)(void *config, ocp_qp_xcond_solver_dims *dims, ocp_qp_in *qp_in, ocp_qp_out *qp_out, void *opts, void *mem, void *work); acados_size_t (*workspace_calculate_size)(void *config, ocp_qp_xcond_solver_dims *dims, void *opts); int (*evaluate)(void *config, ocp_qp_xcond_solver_dims *dims, ocp_qp_in *qp_in, ocp_qp_out *qp_out, void *opts, void *mem, void *work); void (*eval_sens)(void *config, ocp_qp_xcond_solver_dims *dims, ocp_qp_in *param_qp_in, ocp_qp_out *sens_qp_out, void *opts, void *mem, void *work); diff --git a/third_party/acados/include/acados/sim/sim_lifted_irk_integrator.h b/third_party/acados/include/acados/sim/sim_lifted_irk_integrator.h index 8e476b0e27..9ec2d97bed 100644 --- a/third_party/acados/include/acados/sim/sim_lifted_irk_integrator.h +++ b/third_party/acados/include/acados/sim/sim_lifted_irk_integrator.h @@ -96,6 +96,7 @@ typedef struct struct blasfeo_dvec *u; // controls (nu) -- for expansion step int update_sens; + // int init_K; double time_sim; double time_ad; diff --git a/third_party/acados/include/acados/utils/types.h b/third_party/acados/include/acados/utils/types.h index 99ad375fea..d3da0a86b7 100644 --- a/third_party/acados/include/acados/utils/types.h +++ b/third_party/acados/include/acados/utils/types.h @@ -35,6 +35,21 @@ #ifndef ACADOS_UTILS_TYPES_H_ #define ACADOS_UTILS_TYPES_H_ +/* Symbol visibility in DLLs */ +#ifndef ACADOS_SYMBOL_EXPORT + #if defined(_WIN32) || defined(__WIN32__) || defined(__CYGWIN__) + #if defined(STATIC_LINKED) + #define ACADOS_SYMBOL_EXPORT + #else + #define ACADOS_SYMBOL_EXPORT __declspec(dllexport) + #endif + #elif defined(__GNUC__) && ((__GNUC__ >= 4) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)) + #define ACADOS_SYMBOL_EXPORT __attribute__ ((visibility ("default"))) + #else + #define ACADOS_SYMBOL_EXPORT + #endif +#endif + #ifdef __cplusplus extern "C" { #endif diff --git a/third_party/acados/include/acados_c/ocp_nlp_interface.h b/third_party/acados/include/acados_c/ocp_nlp_interface.h index 0ec82a437a..50bc0cf1af 100644 --- a/third_party/acados/include/acados_c/ocp_nlp_interface.h +++ b/third_party/acados/include/acados_c/ocp_nlp_interface.h @@ -46,6 +46,7 @@ extern "C" { #include "acados/sim/sim_irk_integrator.h" #include "acados/sim/sim_lifted_irk_integrator.h" #include "acados/sim/sim_gnsf.h" +#include "acados/utils/types.h" // acados_c #include "acados_c/ocp_qp_interface.h" #include "acados_c/sim_interface.h" @@ -151,46 +152,46 @@ typedef struct ocp_nlp_solver /// default/invalid state. /// /// \param N Horizon length -ocp_nlp_plan_t *ocp_nlp_plan_create(int N); +ACADOS_SYMBOL_EXPORT ocp_nlp_plan_t *ocp_nlp_plan_create(int N); /// Destructor for plan struct, frees memory. /// /// \param plan_ The plan struct to destroy. -void ocp_nlp_plan_destroy(void* plan_); +ACADOS_SYMBOL_EXPORT void ocp_nlp_plan_destroy(void* plan_); /// Constructs an nlp configuration struct from a plan. /// /// \param plan The plan (user nlp configuration). -ocp_nlp_config *ocp_nlp_config_create(ocp_nlp_plan_t plan); +ACADOS_SYMBOL_EXPORT ocp_nlp_config *ocp_nlp_config_create(ocp_nlp_plan_t plan); /// Desctructor of the nlp configuration. /// /// \param config_ The configuration struct. -void ocp_nlp_config_destroy(void *config_); +ACADOS_SYMBOL_EXPORT void ocp_nlp_config_destroy(void *config_); /// Constructs an struct that contains the dimensions of the variables. /// /// \param config_ The configuration struct. -ocp_nlp_dims *ocp_nlp_dims_create(void *config_); +ACADOS_SYMBOL_EXPORT ocp_nlp_dims *ocp_nlp_dims_create(void *config_); /// Destructor of The dimension struct. /// /// \param dims_ The dimension struct. -void ocp_nlp_dims_destroy(void *dims_); +ACADOS_SYMBOL_EXPORT void ocp_nlp_dims_destroy(void *dims_); /// Constructs an input struct for a non-linear programs. /// /// \param config The configuration struct. /// \param dims The dimension struct. -ocp_nlp_in *ocp_nlp_in_create(ocp_nlp_config *config, ocp_nlp_dims *dims); +ACADOS_SYMBOL_EXPORT ocp_nlp_in *ocp_nlp_in_create(ocp_nlp_config *config, ocp_nlp_dims *dims); /// Destructor of the inputs struct. /// /// \param in The inputs struct. -void ocp_nlp_in_destroy(void *in); +ACADOS_SYMBOL_EXPORT void ocp_nlp_in_destroy(void *in); /// Sets the sampling times for the given stage. @@ -201,9 +202,12 @@ void ocp_nlp_in_destroy(void *in); /// \param stage Stage number. /// \param field Has to be "Ts" (TBC other options). /// \param value The sampling times (floating point). -void ocp_nlp_in_set(ocp_nlp_config *config, ocp_nlp_dims *dims, ocp_nlp_in *in, int stage, +ACADOS_SYMBOL_EXPORT void ocp_nlp_in_set(ocp_nlp_config *config, ocp_nlp_dims *dims, ocp_nlp_in *in, int stage, const char *field, void *value); +/// +// void ocp_nlp_in_get(ocp_nlp_config *config, ocp_nlp_dims *dims, ocp_nlp_in *in, int stage, +// const char *field, void *value); /// Sets the function pointers to the dynamics functions for the given stage. /// @@ -214,7 +218,7 @@ void ocp_nlp_in_set(ocp_nlp_config *config, ocp_nlp_dims *dims, ocp_nlp_in *in, /// \param fun_type The name of the function type, either impl_ode_fun, /// impl_ode_fun_jac_x_xdot, impl_ode_jac_x_xdot_u (TBC) /// \param fun_ptr Function pointer to the dynamics function. -int ocp_nlp_dynamics_model_set(ocp_nlp_config *config, ocp_nlp_dims *dims, ocp_nlp_in *in, +ACADOS_SYMBOL_EXPORT int ocp_nlp_dynamics_model_set(ocp_nlp_config *config, ocp_nlp_dims *dims, ocp_nlp_in *in, int stage, const char *fun_type, void *fun_ptr); @@ -227,7 +231,7 @@ int ocp_nlp_dynamics_model_set(ocp_nlp_config *config, ocp_nlp_dims *dims, ocp_n /// \param field The name of the field, either nls_res_jac, /// y_ref, W (others TBC) /// \param value Cost values. -int ocp_nlp_cost_model_set(ocp_nlp_config *config, ocp_nlp_dims *dims, ocp_nlp_in *in, +ACADOS_SYMBOL_EXPORT int ocp_nlp_cost_model_set(ocp_nlp_config *config, ocp_nlp_dims *dims, ocp_nlp_in *in, int stage, const char *field, void *value); @@ -239,7 +243,7 @@ int ocp_nlp_cost_model_set(ocp_nlp_config *config, ocp_nlp_dims *dims, ocp_nlp_i /// \param stage Stage number. /// \param field The name of the field, either lb, ub (others TBC) /// \param value Constraints function or values. -int ocp_nlp_constraints_model_set(ocp_nlp_config *config, ocp_nlp_dims *dims, +ACADOS_SYMBOL_EXPORT int ocp_nlp_constraints_model_set(ocp_nlp_config *config, ocp_nlp_dims *dims, ocp_nlp_in *in, int stage, const char *field, void *value); /* out */ @@ -248,12 +252,12 @@ int ocp_nlp_constraints_model_set(ocp_nlp_config *config, ocp_nlp_dims *dims, /// /// \param config The configuration struct. /// \param dims The dimension struct. -ocp_nlp_out *ocp_nlp_out_create(ocp_nlp_config *config, ocp_nlp_dims *dims); +ACADOS_SYMBOL_EXPORT ocp_nlp_out *ocp_nlp_out_create(ocp_nlp_config *config, ocp_nlp_dims *dims); /// Destructor of the output struct. /// /// \param out The output struct. -void ocp_nlp_out_destroy(void *out); +ACADOS_SYMBOL_EXPORT void ocp_nlp_out_destroy(void *out); /// Sets fields in the output struct of an nlp solver, used to initialize the solver. @@ -264,7 +268,7 @@ void ocp_nlp_out_destroy(void *out); /// \param stage Stage number. /// \param field The name of the field, either x, u, pi. /// \param value Initialization values. -void ocp_nlp_out_set(ocp_nlp_config *config, ocp_nlp_dims *dims, ocp_nlp_out *out, +ACADOS_SYMBOL_EXPORT void ocp_nlp_out_set(ocp_nlp_config *config, ocp_nlp_dims *dims, ocp_nlp_out *out, int stage, const char *field, void *value); @@ -276,21 +280,21 @@ void ocp_nlp_out_set(ocp_nlp_config *config, ocp_nlp_dims *dims, ocp_nlp_out *ou /// \param stage Stage number. /// \param field The name of the field, either x, u, z, pi. /// \param value Pointer to the output memory. -void ocp_nlp_out_get(ocp_nlp_config *config, ocp_nlp_dims *dims, ocp_nlp_out *out, +ACADOS_SYMBOL_EXPORT void ocp_nlp_out_get(ocp_nlp_config *config, ocp_nlp_dims *dims, ocp_nlp_out *out, int stage, const char *field, void *value); // -void ocp_nlp_get_at_stage(ocp_nlp_config *config, ocp_nlp_dims *dims, ocp_nlp_solver *solver, +ACADOS_SYMBOL_EXPORT void ocp_nlp_get_at_stage(ocp_nlp_config *config, ocp_nlp_dims *dims, ocp_nlp_solver *solver, int stage, const char *field, void *value); // TODO(andrea): remove this once/if the MATLAB interface uses the new setters below? -int ocp_nlp_dims_get_from_attr(ocp_nlp_config *config, ocp_nlp_dims *dims, ocp_nlp_out *out, +ACADOS_SYMBOL_EXPORT int ocp_nlp_dims_get_from_attr(ocp_nlp_config *config, ocp_nlp_dims *dims, ocp_nlp_out *out, int stage, const char *field); -void ocp_nlp_constraint_dims_get_from_attr(ocp_nlp_config *config, ocp_nlp_dims *dims, ocp_nlp_out *out, +ACADOS_SYMBOL_EXPORT void ocp_nlp_constraint_dims_get_from_attr(ocp_nlp_config *config, ocp_nlp_dims *dims, ocp_nlp_out *out, int stage, const char *field, int *dims_out); -void ocp_nlp_cost_dims_get_from_attr(ocp_nlp_config *config, ocp_nlp_dims *dims, ocp_nlp_out *out, +ACADOS_SYMBOL_EXPORT void ocp_nlp_cost_dims_get_from_attr(ocp_nlp_config *config, ocp_nlp_dims *dims, ocp_nlp_out *out, int stage, const char *field, int *dims_out); void ocp_nlp_dynamics_dims_get_from_attr(ocp_nlp_config *config, ocp_nlp_dims *dims, ocp_nlp_out *out, @@ -302,12 +306,12 @@ void ocp_nlp_dynamics_dims_get_from_attr(ocp_nlp_config *config, ocp_nlp_dims *d /// /// \param config The configuration struct. /// \param dims The dimension struct. -void *ocp_nlp_solver_opts_create(ocp_nlp_config *config, ocp_nlp_dims *dims); +ACADOS_SYMBOL_EXPORT void *ocp_nlp_solver_opts_create(ocp_nlp_config *config, ocp_nlp_dims *dims); /// Destructor of the options. /// /// \param opts The options struct. -void ocp_nlp_solver_opts_destroy(void *opts); +ACADOS_SYMBOL_EXPORT void ocp_nlp_solver_opts_destroy(void *opts); /// Sets an option. /// @@ -315,10 +319,13 @@ void ocp_nlp_solver_opts_destroy(void *opts); /// \param opts_ The options struct. /// \param field Name of the option. /// \param value Value of the option. -void ocp_nlp_solver_opts_set(ocp_nlp_config *config, void *opts_, const char *field, void* value); +ACADOS_SYMBOL_EXPORT void ocp_nlp_solver_opts_set(ocp_nlp_config *config, void *opts_, const char *field, void* value); + + +// ACADOS_SYMBOL_EXPORT void ocp_nlp_solver_opts_get(ocp_nlp_config *config, void *opts_, const char *field, void* value); -void ocp_nlp_solver_opts_set_at_stage(ocp_nlp_config *config, void *opts_, int stage, const char *field, void* value); +ACADOS_SYMBOL_EXPORT void ocp_nlp_solver_opts_set_at_stage(ocp_nlp_config *config, void *opts_, int stage, const char *field, void* value); /// TBC @@ -327,7 +334,7 @@ void ocp_nlp_solver_opts_set_at_stage(ocp_nlp_config *config, void *opts_, int s /// \param config The configuration struct. /// \param dims The dimension struct. /// \param opts_ The options struct. -void ocp_nlp_solver_opts_update(ocp_nlp_config *config, ocp_nlp_dims *dims, void *opts_); +ACADOS_SYMBOL_EXPORT void ocp_nlp_solver_opts_update(ocp_nlp_config *config, ocp_nlp_dims *dims, void *opts_); /* solver */ @@ -338,12 +345,12 @@ void ocp_nlp_solver_opts_update(ocp_nlp_config *config, ocp_nlp_dims *dims, void /// \param dims The dimension struct. /// \param opts_ The options struct. /// \return The solver. -ocp_nlp_solver *ocp_nlp_solver_create(ocp_nlp_config *config, ocp_nlp_dims *dims, void *opts_); +ACADOS_SYMBOL_EXPORT ocp_nlp_solver *ocp_nlp_solver_create(ocp_nlp_config *config, ocp_nlp_dims *dims, void *opts_); /// Destructor of the solver. /// /// \param solver The solver struct. -void ocp_nlp_solver_destroy(void *solver); +ACADOS_SYMBOL_EXPORT void ocp_nlp_solver_destroy(void *solver); /// Solves the optimal control problem. Call ocp_nlp_precompute before /// calling this functions (TBC). @@ -351,7 +358,17 @@ void ocp_nlp_solver_destroy(void *solver); /// \param solver The solver struct. /// \param nlp_in The inputs struct. /// \param nlp_out The output struct. -int ocp_nlp_solve(ocp_nlp_solver *solver, ocp_nlp_in *nlp_in, ocp_nlp_out *nlp_out); +ACADOS_SYMBOL_EXPORT int ocp_nlp_solve(ocp_nlp_solver *solver, ocp_nlp_in *nlp_in, ocp_nlp_out *nlp_out); + + + +/// Resets the memory of the QP solver +/// +/// \param solver The solver struct. +/// \param nlp_in The inputs struct. +/// \param nlp_out The output struct. +ACADOS_SYMBOL_EXPORT void ocp_nlp_solver_reset_qp_memory(ocp_nlp_solver *solver, ocp_nlp_in *nlp_in, ocp_nlp_out *nlp_out); + /// Performs precomputations for the solver. Needs to be called before /// ocl_nlp_solve (TBC). @@ -359,7 +376,7 @@ int ocp_nlp_solve(ocp_nlp_solver *solver, ocp_nlp_in *nlp_in, ocp_nlp_out *nlp_o /// \param solver The solver struct. /// \param nlp_in The inputs struct. /// \param nlp_out The output struct. -int ocp_nlp_precompute(ocp_nlp_solver *solver, ocp_nlp_in *nlp_in, ocp_nlp_out *nlp_out); +ACADOS_SYMBOL_EXPORT int ocp_nlp_precompute(ocp_nlp_solver *solver, ocp_nlp_in *nlp_in, ocp_nlp_out *nlp_out); /// Computes cost function value. @@ -367,7 +384,7 @@ int ocp_nlp_precompute(ocp_nlp_solver *solver, ocp_nlp_in *nlp_in, ocp_nlp_out * /// \param solver The solver struct. /// \param nlp_in The inputs struct. /// \param nlp_out The output struct. -void ocp_nlp_eval_cost(ocp_nlp_solver *solver, ocp_nlp_in *nlp_in, ocp_nlp_out *nlp_out); +ACADOS_SYMBOL_EXPORT void ocp_nlp_eval_cost(ocp_nlp_solver *solver, ocp_nlp_in *nlp_in, ocp_nlp_out *nlp_out); // void ocp_nlp_eval_residuals(ocp_nlp_solver *solver, ocp_nlp_in *nlp_in, ocp_nlp_out *nlp_out); @@ -377,18 +394,18 @@ void ocp_nlp_eval_residuals(ocp_nlp_solver *solver, ocp_nlp_in *nlp_in, ocp_nlp_ /// \param solver The solver struct. /// \param nlp_in The inputs struct. /// \param nlp_out The output struct. -void ocp_nlp_eval_residuals(ocp_nlp_solver *solver, ocp_nlp_in *nlp_in, ocp_nlp_out *nlp_out); +ACADOS_SYMBOL_EXPORT void ocp_nlp_eval_residuals(ocp_nlp_solver *solver, ocp_nlp_in *nlp_in, ocp_nlp_out *nlp_out); // -void ocp_nlp_eval_param_sens(ocp_nlp_solver *solver, char *field, int stage, int index, ocp_nlp_out *sens_nlp_out); +ACADOS_SYMBOL_EXPORT void ocp_nlp_eval_param_sens(ocp_nlp_solver *solver, char *field, int stage, int index, ocp_nlp_out *sens_nlp_out); /* get */ /// \param config The configuration struct. /// \param solver The solver struct. /// \param field Supports "sqp_iter", "status", "nlp_res", "time_tot", ... /// \param return_value_ Pointer to the output memory. -void ocp_nlp_get(ocp_nlp_config *config, ocp_nlp_solver *solver, +ACADOS_SYMBOL_EXPORT void ocp_nlp_get(ocp_nlp_config *config, ocp_nlp_solver *solver, const char *field, void *return_value_); /* set */ @@ -399,7 +416,7 @@ void ocp_nlp_get(ocp_nlp_config *config, ocp_nlp_solver *solver, /// \param stage Stage number. /// \param field Supports "z_guess", "xdot_guess" (IRK), "phi_guess" (GNSF-IRK) /// \param value The initial guess for the algebraic variables in the integrator (if continuous model is used). -void ocp_nlp_set(ocp_nlp_config *config, ocp_nlp_solver *solver, +ACADOS_SYMBOL_EXPORT void ocp_nlp_set(ocp_nlp_config *config, ocp_nlp_solver *solver, int stage, const char *field, void *value); diff --git a/third_party/acados/include/acados_c/sim_interface.h b/third_party/acados/include/acados_c/sim_interface.h index 92969e8017..5dce6f153a 100644 --- a/third_party/acados/include/acados_c/sim_interface.h +++ b/third_party/acados/include/acados_c/sim_interface.h @@ -74,64 +74,64 @@ typedef struct /* config */ // -sim_config *sim_config_create(sim_solver_plan_t plan); +ACADOS_SYMBOL_EXPORT sim_config *sim_config_create(sim_solver_plan_t plan); // -void sim_config_destroy(void *config); +ACADOS_SYMBOL_EXPORT void sim_config_destroy(void *config); /* dims */ // -void *sim_dims_create(void *config_); +ACADOS_SYMBOL_EXPORT void *sim_dims_create(void *config_); // -void sim_dims_destroy(void *dims); +ACADOS_SYMBOL_EXPORT void sim_dims_destroy(void *dims); // -void sim_dims_set(sim_config *config, void *dims, const char *field, const int* value); +ACADOS_SYMBOL_EXPORT void sim_dims_set(sim_config *config, void *dims, const char *field, const int* value); // -void sim_dims_get(sim_config *config, void *dims, const char *field, int* value); +ACADOS_SYMBOL_EXPORT void sim_dims_get(sim_config *config, void *dims, const char *field, int* value); // -void sim_dims_get_from_attr(sim_config *config, void *dims, const char *field, int *dims_out); +ACADOS_SYMBOL_EXPORT void sim_dims_get_from_attr(sim_config *config, void *dims, const char *field, int *dims_out); /* in */ // -sim_in *sim_in_create(sim_config *config, void *dims); +ACADOS_SYMBOL_EXPORT sim_in *sim_in_create(sim_config *config, void *dims); // -void sim_in_destroy(void *out); +ACADOS_SYMBOL_EXPORT void sim_in_destroy(void *out); // -int sim_in_set(void *config_, void *dims_, sim_in *in, const char *field, void *value); +ACADOS_SYMBOL_EXPORT int sim_in_set(void *config_, void *dims_, sim_in *in, const char *field, void *value); /* out */ // -sim_out *sim_out_create(sim_config *config, void *dims); +ACADOS_SYMBOL_EXPORT sim_out *sim_out_create(sim_config *config, void *dims); // -void sim_out_destroy(void *out); +ACADOS_SYMBOL_EXPORT void sim_out_destroy(void *out); // -int sim_out_get(void *config, void *dims, sim_out *out, const char *field, void *value); +ACADOS_SYMBOL_EXPORT int sim_out_get(void *config, void *dims, sim_out *out, const char *field, void *value); /* opts */ // -void *sim_opts_create(sim_config *config, void *dims); +ACADOS_SYMBOL_EXPORT void *sim_opts_create(sim_config *config, void *dims); // -void sim_opts_destroy(void *opts); +ACADOS_SYMBOL_EXPORT void sim_opts_destroy(void *opts); // -void sim_opts_set(sim_config *config, void *opts, const char *field, void *value); +ACADOS_SYMBOL_EXPORT void sim_opts_set(sim_config *config, void *opts, const char *field, void *value); // -void sim_opts_get(sim_config *config, void *opts, const char *field, void *value); +ACADOS_SYMBOL_EXPORT void sim_opts_get(sim_config *config, void *opts, const char *field, void *value); /* solver */ // -acados_size_t sim_calculate_size(sim_config *config, void *dims, void *opts_); +ACADOS_SYMBOL_EXPORT acados_size_t sim_calculate_size(sim_config *config, void *dims, void *opts_); // -sim_solver *sim_assign(sim_config *config, void *dims, void *opts_, void *raw_memory); +ACADOS_SYMBOL_EXPORT sim_solver *sim_assign(sim_config *config, void *dims, void *opts_, void *raw_memory); // -sim_solver *sim_solver_create(sim_config *config, void *dims, void *opts_); +ACADOS_SYMBOL_EXPORT sim_solver *sim_solver_create(sim_config *config, void *dims, void *opts_); // -void sim_solver_destroy(void *solver); +ACADOS_SYMBOL_EXPORT void sim_solver_destroy(void *solver); // -int sim_solve(sim_solver *solver, sim_in *in, sim_out *out); +ACADOS_SYMBOL_EXPORT int sim_solve(sim_solver *solver, sim_in *in, sim_out *out); // -int sim_precompute(sim_solver *solver, sim_in *in, sim_out *out); +ACADOS_SYMBOL_EXPORT int sim_precompute(sim_solver *solver, sim_in *in, sim_out *out); // -int sim_solver_set(sim_solver *solver, const char *field, void *value); +ACADOS_SYMBOL_EXPORT int sim_solver_set(sim_solver *solver, const char *field, void *value); #ifdef __cplusplus } /* extern "C" */ diff --git a/third_party/acados/include/blasfeo/include/blasfeo_block_size.h b/third_party/acados/include/blasfeo/include/blasfeo_block_size.h index 0ccd4f1bba..5d0907a61c 100644 --- a/third_party/acados/include/blasfeo/include/blasfeo_block_size.h +++ b/third_party/acados/include/blasfeo/include/blasfeo_block_size.h @@ -53,6 +53,7 @@ #define D_PS 8 // panel size #define D_PLD 8 // 4 // GCD of panel length #define D_M_KERNEL 24 // max kernel size +#define D_N_KERNEL 8 // max kernel size #define D_KC 128 //256 // 192 #define D_NC 144 //72 //96 //72 // 120 // 512 #define D_MC 2400 // 6000 @@ -60,6 +61,7 @@ #define S_PS 16 // panel size #define S_PLD 4 // GCD of panel length TODO probably 16 when writing assebly #define S_M_KERNEL 32 // max kernel size +#define S_N_KERNEL 8 // max kernel size #define S_KC 128 //256 #define S_NC 128 //144 #define S_MC 3000 @@ -74,13 +76,15 @@ #define D_PS 4 // panel size #define D_PLD 4 // 2 // GCD of panel length #define D_M_KERNEL 12 // max kernel size +#define D_N_KERNEL 8 // max kernel size #define D_KC 256 // 192 -#define D_NC 72 //96 //72 // 120 // 512 -#define D_MC 1500 // 6000 +#define D_NC 64 //96 //72 // 120 // 512 +#define D_MC 1500 // single #define S_PS 8 // panel size #define S_PLD 4 // 2 // GCD of panel length #define S_M_KERNEL 24 // max kernel size +#define S_N_KERNEL 8 // max kernel size #define S_KC 256 #define S_NC 144 #define S_MC 3000 @@ -95,6 +99,7 @@ #define D_PS 4 // panel size #define D_PLD 4 // 2 // GCD of panel length #define D_M_KERNEL 8 // max kernel size +#define D_N_KERNEL 4 // max kernel size #define D_KC 256 //320 //256 //320 #define D_NC 72 //64 //72 //60 // 120 #define D_MC 1000 // 800 @@ -102,6 +107,7 @@ #define S_PS 8 // panel size #define S_PLD 4 // 2 // GCD of panel length #define S_M_KERNEL 16 // max kernel size +#define S_N_KERNEL 8 // max kernel size #define S_KC 256 #define S_NC 144 #define S_MC 2000 @@ -114,6 +120,7 @@ #define D_PS 4 // panel size #define D_PLD 4 // 2 // GCD of panel length #define D_M_KERNEL 4 // max kernel size +#define D_N_KERNEL 4 // max kernel size #define D_KC 256 #define D_NC 128 // TODO these are just dummy #define D_MC 3000 // TODO these are just dummy @@ -121,6 +128,7 @@ #define S_PS 4 #define S_PLD 4 //2 #define S_M_KERNEL 4 // max kernel size +#define S_N_KERNEL 4 // max kernel size #define S_KC 256 #define S_NC 128 // TODO these are just dummy #define S_MC 3000 // TODO these are just dummy @@ -133,6 +141,7 @@ #define D_PS 4 // panel size #define D_PLD 4 // 2 // GCD of panel length #define D_M_KERNEL 4 // max kernel size +#define D_N_KERNEL 4 // max kernel size #define D_KC 256 #define D_NC 128 // TODO these are just dummy #define D_MC 3000 // TODO these are just dummy @@ -140,6 +149,7 @@ #define S_PS 4 #define S_PLD 4 //2 #define S_M_KERNEL 4 // max kernel size +#define S_N_KERNEL 4 // max kernel size #define S_KC 256 #define S_NC 128 // TODO these are just dummy #define S_MC 3000 // TODO these are just dummy @@ -153,6 +163,7 @@ #define D_PS 4 // panel size #define D_PLD 4 // 2 // GCD of panel length #define D_M_KERNEL 4 // max kernel size +#define D_N_KERNEL 4 // max kernel size #define D_KC 256 #define D_NC 128 // TODO these are just dummy #define D_MC 3000 // TODO these are just dummy @@ -160,6 +171,7 @@ #define S_PS 4 #define S_PLD 4 //2 #define S_M_KERNEL 4 // max kernel size +#define S_N_KERNEL 4 // max kernel size #define S_KC 256 #define S_NC 128 // TODO these are just dummy #define S_MC 3000 // TODO these are just dummy @@ -173,6 +185,7 @@ #define D_PS 4 // panel size #define D_PLD 4 // 2 // GCD of panel length #define D_M_KERNEL 4 // max kernel size +#define D_N_KERNEL 4 // max kernel size #define D_KC 256 #define D_NC 128 // TODO these are just dummy #define D_MC 3000 // TODO these are just dummy @@ -180,11 +193,35 @@ #define S_PS 4 #define S_PLD 4 //2 #define S_M_KERNEL 4 // max kernel size +#define S_N_KERNEL 4 // max kernel size #define S_KC 256 #define S_NC 128 // TODO these are just dummy #define S_MC 3000 // TODO these are just dummy +#elif defined(TARGET_ARMV8A_APPLE_M1) +// common +#define CACHE_LINE_SIZE 64 +#define L1_CACHE_SIZE (128*1024) // L1 data cache size (big cores): 64 kB, ?-way ; DTLB1 ? +#define LLC_CACHE_SIZE (12*1024*1024) // LLC (L2) cache size (big cores): 12 MB +// double +#define D_PS 4 // panel size +#define D_PLD 4 // 2 // GCD of panel length +#define D_M_KERNEL 8 // max kernel size +#define D_N_KERNEL 4 // max kernel size +#define D_KC 512 //256 +#define D_NC 128 //256 +#define D_MC 6000 +// single +#define S_PS 4 +#define S_PLD 4 //2 +#define S_M_KERNEL 8 // max kernel size +#define S_N_KERNEL 8 // max kernel size +#define S_KC 512 +#define S_NC 256 +#define S_MC 6000 + + #elif defined(TARGET_ARMV8A_ARM_CORTEX_A76) // common #define CACHE_LINE_SIZE 64 @@ -194,6 +231,7 @@ #define D_PS 4 // panel size #define D_PLD 4 // 2 // GCD of panel length #define D_M_KERNEL 8 // max kernel size +#define D_N_KERNEL 4 // max kernel size #define D_KC 512 //256 #define D_NC 128 //256 #define D_MC 6000 @@ -201,6 +239,7 @@ #define S_PS 4 #define S_PLD 4 //2 #define S_M_KERNEL 8 // max kernel size +#define S_N_KERNEL 8 // max kernel size #define S_KC 512 #define S_NC 256 #define S_MC 6000 @@ -215,6 +254,7 @@ #define D_PS 4 // panel size #define D_PLD 4 // 2 // GCD of panel length #define D_M_KERNEL 8 // max kernel size +#define D_N_KERNEL 4 // max kernel size #define D_KC 320 #define D_NC 256 #define D_MC 6000 @@ -222,6 +262,7 @@ #define S_PS 4 #define S_PLD 4 //2 #define S_M_KERNEL 8 // max kernel size +#define S_N_KERNEL 8 // max kernel size #define S_KC 256 #define S_NC 128 // TODO these are just dummy #define S_MC 3000 // TODO these are just dummy @@ -236,13 +277,15 @@ #define D_PS 4 // panel size #define D_PLD 4 // 2 // GCD of panel length #define D_M_KERNEL 8 // max kernel size -#define D_KC 224 //256 //192 -#define D_NC 40 //36 //48 -#define D_MC 512 //488 //600 +#define D_N_KERNEL 4 // max kernel size +#define D_KC 128 //224 //256 //192 +#define D_NC 72 //40 //36 //48 +#define D_MC (4*192) //512 //488 //600 // single #define S_PS 4 #define S_PLD 4 //2 #define S_M_KERNEL 8 // max kernel size +#define S_N_KERNEL 8 // max kernel size #define S_KC 256 #define S_NC 128 // TODO these are just dummy #define S_MC 3000 // TODO these are just dummy @@ -257,6 +300,7 @@ #define D_PS 4 // panel size #define D_PLD 4 // 2 // GCD of panel length #define D_M_KERNEL 12 // max kernel size +#define D_N_KERNEL 4 // max kernel size #define D_KC 224 #define D_NC 160 #define D_MC 6000 @@ -264,6 +308,7 @@ #define S_PS 4 #define S_PLD 4 //2 #define S_M_KERNEL 8 // max kernel size +#define S_N_KERNEL 8 // max kernel size #define S_KC 256 #define S_NC 128 // TODO these are just dummy #define S_MC 3000 // TODO these are just dummy @@ -278,6 +323,7 @@ #define D_PS 4 // panel size #define D_PLD 4 // 2 // GCD of panel length #define D_M_KERNEL 12 // max kernel size +#define D_N_KERNEL 4 // max kernel size #define D_KC 160 #define D_NC 128 #define D_MC 6000 @@ -285,6 +331,7 @@ #define S_PS 4 #define S_PLD 4 //2 #define S_M_KERNEL 8 // max kernel size +#define S_N_KERNEL 8 // max kernel size #define S_KC 256 #define S_NC 128 // TODO these are just dummy #define S_MC 3000 // TODO these are just dummy @@ -298,6 +345,7 @@ #define D_PS 4 // panel size #define D_PLD 4 // 2 // GCD of panel length #define D_M_KERNEL 4 // max kernel size +#define D_N_KERNEL 4 // max kernel size #define D_KC 256 #define D_NC 128 // TODO these are just dummy #define D_MC 3000 // TODO these are just dummy @@ -305,6 +353,7 @@ #define S_PS 4 #define S_PLD 4 //2 #define S_M_KERNEL 4 // max kernel size +#define S_N_KERNEL 4 // max kernel size #define S_KC 256 #define S_NC 128 // TODO these are just dummy #define S_MC 3000 // TODO these are just dummy @@ -318,6 +367,7 @@ #define D_PS 4 // panel size #define D_PLD 4 // 2 // GCD of panel length #define D_M_KERNEL 4 // max kernel size +#define D_N_KERNEL 4 // max kernel size #define D_KC 256 #define D_NC 128 // TODO these are just dummy #define D_MC 3000 // TODO these are just dummy @@ -325,6 +375,7 @@ #define S_PS 4 #define S_PLD 4 //2 #define S_M_KERNEL 4 // max kernel size +#define S_N_KERNEL 4 // max kernel size #define S_KC 256 #define S_NC 128 // TODO these are just dummy #define S_MC 3000 // TODO these are just dummy @@ -338,6 +389,7 @@ #define D_PS 4 // panel size #define D_PLD 4 // 2 // GCD of panel length #define D_M_KERNEL 4 // max kernel size +#define D_N_KERNEL 4 // max kernel size #define D_KC 256 #define D_NC 128 // TODO these are just dummy #define D_MC 3000 // TODO these are just dummy @@ -345,6 +397,7 @@ #define S_PS 4 #define S_PLD 4 //2 #define S_M_KERNEL 4 // max kernel size +#define S_N_KERNEL 4 // max kernel size #define S_KC 256 #define S_NC 128 // TODO these are just dummy #define S_MC 3000 // TODO these are just dummy @@ -358,6 +411,7 @@ #define D_PS 4 // panel size #define D_PLD 4 // 2 // GCD of panel length #define D_M_KERNEL 4 // max kernel size +#define D_N_KERNEL 4 // max kernel size #define D_KC 256 #define D_NC 128 // TODO these are just dummy #define D_MC 3000 // TODO these are just dummy @@ -366,6 +420,7 @@ #define S_PS 4 #define S_PLD 4 //2 #define S_M_KERNEL 4 // max kernel size +#define S_N_KERNEL 4 // max kernel size #define S_KC 256 #define S_NC 128 // TODO these are just dummy #define S_MC 3000 // TODO these are just dummy diff --git a/third_party/acados/include/blasfeo/include/blasfeo_common.h b/third_party/acados/include/blasfeo/include/blasfeo_common.h index a648142ba0..03b6c36d84 100644 --- a/third_party/acados/include/blasfeo/include/blasfeo_common.h +++ b/third_party/acados/include/blasfeo/include/blasfeo_common.h @@ -198,6 +198,24 @@ struct blasfeo_pm_smat int memsize; // size of needed memory }; +struct blasfeo_pm_dvec + { + double *mem; // pointer to passed chunk of memory + double *pa; // pointer to a pm array of doubles, the first is aligned to cache line size + int m; // size + int pm; // packed size + int memsize; // size of needed memory + }; + +struct blasfeo_pm_svec + { + float *mem; // pointer to passed chunk of memory + float *pa; // pointer to a pm array of floats, the first is aligned to cache line size + int m; // size + int pm; // packed size + int memsize; // size of needed memory + }; + // Explicitly column-major matrix structure struct blasfeo_cm_dmat { @@ -221,11 +239,31 @@ struct blasfeo_cm_smat int memsize; // size of needed memory }; +struct blasfeo_cm_dvec + { + double *mem; // pointer to passed chunk of memory + double *pa; // pointer to a m array of doubles, the first is aligned to cache line size + int m; // size + int memsize; // size of needed memory + }; + +struct blasfeo_cm_svec + { + float *mem; // pointer to passed chunk of memory + float *pa; // pointer to a m array of floats, the first is aligned to cache line size + int m; // size + int memsize; // size of needed memory + }; + #define BLASFEO_PM_DMATEL(sA,ai,aj) ((sA)->pA[((ai)-((ai)&((sA)->ps-1)))*(sA)->cn+(aj)*((sA)->ps)+((ai)&((sA)->ps-1))]) #define BLASFEO_PM_SMATEL(sA,ai,aj) ((sA)->pA[((ai)-((ai)&((sA)->ps-1)))*(sA)->cn+(aj)*((sA)->ps)+((ai)&((sA)->ps-1))]) +#define BLASFEO_PM_DVECEL(sa,ai) ((sa)->pa[ai]) +#define BLASFEO_PM_SVECEL(sa,ai) ((sa)->pa[ai]) #define BLASFEO_CM_DMATEL(sA,ai,aj) ((sA)->pA[(ai)+(aj)*(sA)->m]) #define BLASFEO_CM_SMATEL(sA,ai,aj) ((sA)->pA[(ai)+(aj)*(sA)->m]) +#define BLASFEO_CM_DVECEL(sa,ai) ((sa)->pa[ai]) +#define BLASFEO_CM_SVECEL(sa,ai) ((sa)->pa[ai]) diff --git a/third_party/acados/include/blasfeo/include/blasfeo_d_aux.h b/third_party/acados/include/blasfeo/include/blasfeo_d_aux.h index d4d805c3e4..a6d6119c6c 100644 --- a/third_party/acados/include/blasfeo/include/blasfeo_d_aux.h +++ b/third_party/acados/include/blasfeo/include/blasfeo_d_aux.h @@ -237,6 +237,17 @@ void blasfeo_cm_create_dmat(int m, int n, struct blasfeo_pm_dmat *sA, void *memo +// +// BLAS API helper functions +// + +#if ( defined(BLAS_API) & defined(MF_PANELMAJ) ) +// aux +void blasfeo_cm_dgetr(int m, int n, struct blasfeo_cm_dmat *sA, int ai, int aj, struct blasfeo_cm_dmat *sB, int bi, int bj); +#endif + + + #ifdef __cplusplus } #endif diff --git a/third_party/acados/include/blasfeo/include/blasfeo_d_blas_api.h b/third_party/acados/include/blasfeo/include/blasfeo_d_blas_api.h index 64dd43f677..ff9e9d4c16 100644 --- a/third_party/acados/include/blasfeo/include/blasfeo_d_blas_api.h +++ b/third_party/acados/include/blasfeo/include/blasfeo_d_blas_api.h @@ -44,6 +44,30 @@ +#ifdef BLAS_API +#ifdef CBLAS_API +#ifndef BLASFEO_CBLAS_ENUM +#define BLASFEO_CBLAS_ENUM +#ifdef FORTRAN_BLAS_API +#ifndef CBLAS_H +enum CBLAS_ORDER {CblasRowMajor=101, CblasColMajor=102}; +enum CBLAS_TRANSPOSE {CblasNoTrans=111, CblasTrans=112, CblasConjTrans=113}; +enum CBLAS_UPLO {CblasUpper=121, CblasLower=122}; +enum CBLAS_DIAG {CblasNonUnit=131, CblasUnit=132}; +enum CBLAS_SIDE {CblasLeft=141, CblasRight=142}; +#endif // CBLAS_H +#else // FORTRAN_BLAS_API +enum BLASFEO_CBLAS_ORDER {BlasfeoCblasRowMajor=101, BlasfeoCblasColMajor=102}; +enum BLASFEO_CBLAS_TRANSPOSE {BlasfeoCblasNoTrans=111, BlasfeoCblasTrans=112, BlasfeoCblasConjTrans=113}; +enum BLASFEO_CBLAS_UPLO {BlasfeoCblasUpper=121, BlasfeoCblasLower=122}; +enum BLASFEO_CBLAS_DIAG {BlasfeoCblasNonUnit=131, BlasfeoCblasUnit=132}; +enum BLASFEO_CBLAS_SIDE {BlasfeoCblasLeft=141, BlasfeoCblasRight=142}; +#endif // FORTRAN_BLAS_API +#endif // BLASFEO_CBLAS_ENUM +#endif // CBLAS_API +#endif // BLAS_API + + #ifdef __cplusplus extern "C" { #endif @@ -66,6 +90,14 @@ void dcopy_(int *n, double *x, int *incx, double *y, int *incy); // double ddot_(int *n, double *x, int *incx, double *y, int *incy); +// BLAS 2 +// +void dgemv_(char *tran, int *m, int *n, double *alpha, double *A, int *lda, double *x, int *incx, double *beta, double *y, int *incy); +// +void dsymv_(char *uplo, int *n, double *alpha, double *A, int *lda, double *x, int *incx, double *beta, double *y, int *incy); +// +void dger_(int *m, int *n, double *alpha, double *x, int *incx, double *y, int *incy, double *A, int *lda); + // BLAS 3 // void dgemm_(char *ta, char *tb, int *m, int *n, int *k, double *alpha, double *A, int *lda, double *B, int *ldb, double *beta, double *C, int *ldc); @@ -75,6 +107,8 @@ void dsyrk_(char *uplo, char *ta, int *m, int *k, double *alpha, double *A, int void dtrmm_(char *side, char *uplo, char *transa, char *diag, int *m, int *n, double *alpha, double *A, int *lda, double *B, int *ldb); // void dtrsm_(char *side, char *uplo, char *transa, char *diag, int *m, int *n, double *alpha, double *A, int *lda, double *B, int *ldb); +// +void dsyr2k_(char *uplo, char *ta, int *m, int *k, double *alpha, double *A, int *lda, double *B, int *ldb, double *beta, double *C, int *ldc); @@ -100,49 +134,125 @@ void dtrtrs_(char *uplo, char *trans, char *diag, int *m, int *n, double *A, int +// aux +void dgetr_(int *m, int *n, double *A, int *lda, double *B, int *ldb); + + + +#ifdef CBLAS_API + + + +// CBLAS 1 +// +void cblas_daxpy(const int N, const double alpha, const double *X, const int incX, double *Y, const int incY); +// +void cblas_dswap(const int N, double *X, const int incX, double *Y, const int incY); +// +void cblas_dcopy(const int N, const double *X, const int incX, double *Y, const int incY); + +// CBLAS 3 +// +void cblas_dgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, const double alpha, const double *A, const int lda, const double *B, const int ldb, const double beta, double *C, const int ldc); +// +void cblas_dsyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const double alpha, const double *A, const int lda, const double beta, double *C, const int ldc); +// +void cblas_dtrmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const double alpha, const double *A, const int lda, double *B, const int ldb); +// +void cblas_dtrsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const double alpha, const double *A, const int lda, double *B, const int ldb); + + + +#endif // CBLAS_API + + + #else // BLASFEO_API // BLAS 1 // -void blas_daxpy(int *n, double *alpha, double *x, int *incx, double *y, int *incy); +void blasfeo_blas_daxpy(int *n, double *alpha, double *x, int *incx, double *y, int *incy); // -double blas_ddot(int *n, double *x, int *incx, double *y, int *incy); +double blasfeo_blas_ddot(int *n, double *x, int *incx, double *y, int *incy); // -void blas_dcopy(int *n, double *x, int *incx, double *y, int *incy); +void blasfeo_blas_dcopy(int *n, double *x, int *incx, double *y, int *incy); + +// BLAS 2 +// +void blasfeo_blas_dgemv(char *trans, int *m, int *n, double *alpha, double *A, int *lda, double *x, int *incx, double *beta, double *y, int *incy); +// +void blasfeo_blas_dsymv(char *uplo, int *n, double *alpha, double *A, int *lda, double *x, int *incx, double *beta, double *y, int *incy); +// +void blasfeo_blas_dger(int *m, int *n, double *alpha, double *x, int *incx, double *y, int *incy, double *A, int *lda); // BLAS 3 // -void blas_dgemm(char *ta, char *tb, int *m, int *n, int *k, double *alpha, double *A, int *lda, double *B, int *ldb, double *beta, double *C, int *ldc); +void blasfeo_blas_dgemm(char *ta, char *tb, int *m, int *n, int *k, double *alpha, double *A, int *lda, double *B, int *ldb, double *beta, double *C, int *ldc); +// +void blasfeo_blas_dsyrk(char *uplo, char *ta, int *m, int *k, double *alpha, double *A, int *lda, double *beta, double *C, int *ldc); // -void blas_dsyrk(char *uplo, char *ta, int *m, int *k, double *alpha, double *A, int *lda, double *beta, double *C, int *ldc); +void blasfeo_blas_dtrmm(char *side, char *uplo, char *transa, char *diag, int *m, int *n, double *alpha, double *A, int *lda, double *B, int *ldb); // -void blas_dtrmm(char *side, char *uplo, char *transa, char *diag, int *m, int *n, double *alpha, double *A, int *lda, double *B, int *ldb); +void blasfeo_blas_dtrsm(char *side, char *uplo, char *transa, char *diag, int *m, int *n, double *alpha, double *A, int *lda, double *B, int *ldb); // -void blas_dtrsm(char *side, char *uplo, char *transa, char *diag, int *m, int *n, double *alpha, double *A, int *lda, double *B, int *ldb); +void blasfeo_blas_dsyr2k(char *uplo, char *ta, int *m, int *k, double *alpha, double *A, int *lda, double *B, int *ldb, double *beta, double *C, int *ldc); // LAPACK // -void blas_dgesv(int *m, int *n, double *A, int *lda, int *ipiv, double *B, int *ldb, int *info); +void blasfeo_lapack_dgesv(int *m, int *n, double *A, int *lda, int *ipiv, double *B, int *ldb, int *info); // -void blas_dgetrf(int *m, int *n, double *A, int *lda, int *ipiv, int *info); +void blasfeo_lapack_dgetrf(int *m, int *n, double *A, int *lda, int *ipiv, int *info); // -void blas_dgetrf_np(int *m, int *n, double *A, int *lda, int *info); +void blasfeo_lapack_dgetrf_np(int *m, int *n, double *A, int *lda, int *info); // -void blas_dgetrs(char *trans, int *m, int *n, double *A, int *lda, int *ipiv, double *B, int *ldb, int *info); +void blasfeo_lapack_dgetrs(char *trans, int *m, int *n, double *A, int *lda, int *ipiv, double *B, int *ldb, int *info); // -void blas_dlaswp(int *n, double *A, int *lda, int *k1, int *k2, int *ipiv, int *incx); +void blasfeo_lapack_dlaswp(int *n, double *A, int *lda, int *k1, int *k2, int *ipiv, int *incx); // -void blas_dposv(char *uplo, int *m, int *n, double *A, int *lda, double *B, int *ldb, int *info); +void blasfeo_lapack_dposv(char *uplo, int *m, int *n, double *A, int *lda, double *B, int *ldb, int *info); // -void blas_dpotrf(char *uplo, int *m, double *A, int *lda, int *info); +void blasfeo_lapack_dpotrf(char *uplo, int *m, double *A, int *lda, int *info); // -void blas_dpotrs(char *uplo, int *m, int *n, double *A, int *lda, double *B, int *ldb, int *info); +void blasfeo_lapack_dpotrs(char *uplo, int *m, int *n, double *A, int *lda, double *B, int *ldb, int *info); // -void blas_dtrtrs(char *uplo, char *trans, char *diag, int *m, int *n, double *A, int *lda, double *B, int *ldb, int *info); +void blasfeo_lapack_dtrtrs(char *uplo, char *trans, char *diag, int *m, int *n, double *A, int *lda, double *B, int *ldb, int *info); + + + +// aux +void blasfeo_blas_dgetr(int *m, int *n, double *A, int *lda, double *B, int *ldb); + + + +#ifdef CBLAS_API + + + +// CBLAS 1 +// +void blasfeo_cblas_daxpy(const int N, const double alpha, const double *X, const int incX, double *Y, const int incY); +// +void blasfeo_cblas_dswap(const int N, double *X, const int incX, double *Y, const int incY); +// +void blasfeo_cblas_dcopy(const int N, const double *X, const int incX, double *Y, const int incY); + +// CBLAS 3 +// +void blasfeo_cblas_dgemm(const enum BLASFEO_CBLAS_ORDER Order, const enum BLASFEO_CBLAS_TRANSPOSE TransA, const enum BLASFEO_CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, const double alpha, const double *A, const int lda, const double *B, const int ldb, const double beta, double *C, const int ldc); +// +void blasfeo_cblas_dsyrk(const enum BLASFEO_CBLAS_ORDER Order, const enum BLASFEO_CBLAS_UPLO Uplo, const enum BLASFEO_CBLAS_TRANSPOSE Trans, const int N, const int K, const double alpha, const double *A, const int lda, const double beta, double *C, const int ldc); +// +void blasfeo_cblas_dtrmm(const enum BLASFEO_CBLAS_ORDER Order, const enum BLASFEO_CBLAS_SIDE Side, const enum BLASFEO_CBLAS_UPLO Uplo, const enum BLASFEO_CBLAS_TRANSPOSE TransA, const enum BLASFEO_CBLAS_DIAG Diag, const int M, const int N, const double alpha, const double *A, const int lda, double *B, const int ldb); +// +void blasfeo_cblas_dtrsm(const enum BLASFEO_CBLAS_ORDER Order, const enum BLASFEO_CBLAS_SIDE Side, const enum BLASFEO_CBLAS_UPLO Uplo, const enum BLASFEO_CBLAS_TRANSPOSE TransA, const enum BLASFEO_CBLAS_DIAG Diag, const int M, const int N, const double alpha, const double *A, const int lda, double *B, const int ldb); + + + +#endif // CBLAS_API diff --git a/third_party/acados/include/blasfeo/include/blasfeo_d_blasfeo_api.h b/third_party/acados/include/blasfeo/include/blasfeo_d_blasfeo_api.h index 15915f436f..88bb3fce22 100644 --- a/third_party/acados/include/blasfeo/include/blasfeo_d_blasfeo_api.h +++ b/third_party/acados/include/blasfeo/include/blasfeo_d_blasfeo_api.h @@ -119,6 +119,10 @@ void blasfeo_dgemv_nt(int m, int n, double alpha_n, double alpha_t, struct blasf // z <= beta * y + alpha * A * x, where A is symmetric and only the lower triangular patr of A is accessed void blasfeo_dsymv_l(int m, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dvec *sx, int xi, double beta, struct blasfeo_dvec *sy, int yi, struct blasfeo_dvec *sz, int zi); void blasfeo_dsymv_l_mn(int m, int n, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dvec *sx, int xi, double beta, struct blasfeo_dvec *sy, int yi, struct blasfeo_dvec *sz, int zi); +// z <= beta * y + alpha * A * x, where A is symmetric and only the upper triangular patr of A is accessed +void blasfeo_dsymv_u(int m, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dvec *sx, int xi, double beta, struct blasfeo_dvec *sy, int yi, struct blasfeo_dvec *sz, int zi); +// D = C + alpha * x * y^T +void blasfeo_dger(int m, int n, double alpha, struct blasfeo_dvec *sx, int xi, struct blasfeo_dvec *sy, int yi, struct blasfeo_dmat *sC, int ci, int cj, struct blasfeo_dmat *sD, int di, int dj); // diagonal @@ -227,6 +231,14 @@ void blasfeo_dtrsm_runu(int m, int n, double alpha, struct blasfeo_dmat *sA, int void blasfeo_dtrsm_rutn(int m, int n, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, struct blasfeo_dmat *sD, int di, int dj); // D <= alpha * B * A^{-T} , with A upper triangular with unit diagonal void blasfeo_dtrsm_rutu(int m, int n, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, struct blasfeo_dmat *sD, int di, int dj); +// D <= beta * C + alpha * A * B^T + alpha * B * A^T; C, D lower triangular +void blasfeo_dsyr2k_ln(int m, int k, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, double beta, struct blasfeo_dmat *sC, int ci, int cj, struct blasfeo_dmat *sD, int di, int dj); +// D <= beta * C + alpha * A^T * B + alpha * B^T * A; C, D lower triangular +void blasfeo_dsyr2k_lt(int m, int k, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, double beta, struct blasfeo_dmat *sC, int ci, int cj, struct blasfeo_dmat *sD, int di, int dj); +// D <= beta * C + alpha * A * B^T + alpha * B * A^T; C, D upper triangular +void blasfeo_dsyr2k_un(int m, int k, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, double beta, struct blasfeo_dmat *sC, int ci, int cj, struct blasfeo_dmat *sD, int di, int dj); +// D <= beta * C + alpha * A^T * B + alpha * B^T * A; C, D upper triangular +void blasfeo_dsyr2k_ut(int m, int k, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, double beta, struct blasfeo_dmat *sC, int ci, int cj, struct blasfeo_dmat *sD, int di, int dj); // diagonal @@ -327,6 +339,16 @@ void blasfeo_cm_dtrmm_runn(int m, int n, double alpha, struct blasfeo_cm_dmat *s void blasfeo_cm_dtrmm_runu(int m, int n, double alpha, struct blasfeo_cm_dmat *sA, int ai, int aj, struct blasfeo_cm_dmat *sB, int bi, int bj, struct blasfeo_cm_dmat *sD, int di, int dj); void blasfeo_cm_dtrmm_rutn(int m, int n, double alpha, struct blasfeo_cm_dmat *sA, int ai, int aj, struct blasfeo_cm_dmat *sB, int bi, int bj, struct blasfeo_cm_dmat *sD, int di, int dj); void blasfeo_cm_dtrmm_rutu(int m, int n, double alpha, struct blasfeo_cm_dmat *sA, int ai, int aj, struct blasfeo_cm_dmat *sB, int bi, int bj, struct blasfeo_cm_dmat *sD, int di, int dj); +void blasfeo_cm_dsyr2k_ln(int m, int k, double alpha, struct blasfeo_cm_dmat *sA, int ai, int aj, struct blasfeo_cm_dmat *sB, int bi, int bj, double beta, struct blasfeo_cm_dmat *sC, int ci, int cj, struct blasfeo_cm_dmat *sD, int di, int dj); +void blasfeo_cm_dsyr2k_lt(int m, int k, double alpha, struct blasfeo_cm_dmat *sA, int ai, int aj, struct blasfeo_cm_dmat *sB, int bi, int bj, double beta, struct blasfeo_cm_dmat *sC, int ci, int cj, struct blasfeo_cm_dmat *sD, int di, int dj); +void blasfeo_cm_dsyr2k_un(int m, int k, double alpha, struct blasfeo_cm_dmat *sA, int ai, int aj, struct blasfeo_cm_dmat *sB, int bi, int bj, double beta, struct blasfeo_cm_dmat *sC, int ci, int cj, struct blasfeo_cm_dmat *sD, int di, int dj); +void blasfeo_cm_dsyr2k_ut(int m, int k, double alpha, struct blasfeo_cm_dmat *sA, int ai, int aj, struct blasfeo_cm_dmat *sB, int bi, int bj, double beta, struct blasfeo_cm_dmat *sC, int ci, int cj, struct blasfeo_cm_dmat *sD, int di, int dj); +// BLAS 2 +void blasfeo_cm_dgemv_n(int m, int n, double alpha, struct blasfeo_cm_dmat *sA, int ai, int aj, struct blasfeo_cm_dvec *sx, int xi, double beta, struct blasfeo_cm_dvec *sy, int yi, struct blasfeo_cm_dvec *sz, int zi); +void blasfeo_cm_dgemv_t(int m, int n, double alpha, struct blasfeo_cm_dmat *sA, int ai, int aj, struct blasfeo_cm_dvec *sx, int xi, double beta, struct blasfeo_cm_dvec *sy, int yi, struct blasfeo_cm_dvec *sz, int zi); +void blasfeo_cm_dsymv_l(int m, double alpha, struct blasfeo_cm_dmat *sA, int ai, int aj, struct blasfeo_cm_dvec *sx, int xi, double beta, struct blasfeo_cm_dvec *sy, int yi, struct blasfeo_cm_dvec *sz, int zi); +void blasfeo_cm_dsymv_u(int m, double alpha, struct blasfeo_cm_dmat *sA, int ai, int aj, struct blasfeo_cm_dvec *sx, int xi, double beta, struct blasfeo_cm_dvec *sy, int yi, struct blasfeo_cm_dvec *sz, int zi); +void blasfeo_cm_dger(int m, int n, double alpha, struct blasfeo_cm_dvec *sx, int xi, struct blasfeo_cm_dvec *sy, int yi, struct blasfeo_cm_dmat *sC, int ci, int cj, struct blasfeo_cm_dmat *sD, int di, int dj); // LAPACK void blasfeo_cm_dpotrf_l(int m, struct blasfeo_cm_dmat *sC, int ci, int cj, struct blasfeo_cm_dmat *sD, int di, int dj); void blasfeo_cm_dpotrf_u(int m, struct blasfeo_cm_dmat *sC, int ci, int cj, struct blasfeo_cm_dmat *sD, int di, int dj); diff --git a/third_party/acados/include/blasfeo/include/blasfeo_d_blasfeo_hp_api.h b/third_party/acados/include/blasfeo/include/blasfeo_d_blasfeo_hp_api.h new file mode 100644 index 0000000000..405733f13e --- /dev/null +++ b/third_party/acados/include/blasfeo/include/blasfeo_d_blasfeo_hp_api.h @@ -0,0 +1,84 @@ +/************************************************************************************************** +* * +* This file is part of BLASFEO. * +* * +* BLASFEO -- BLAS For Embedded Optimization. * +* Copyright (C) 2019 by Gianluca Frison. * +* Developed at IMTEK (University of Freiburg) under the supervision of Moritz Diehl. * +* All rights reserved. * +* * +* The 2-Clause BSD License * +* * +* Redistribution and use in source and binary forms, with or without * +* modification, are permitted provided that the following conditions are met: * +* * +* 1. Redistributions of source code must retain the above copyright notice, this * +* list of conditions and the following disclaimer. * +* 2. Redistributions in binary form must reproduce the above copyright notice, * +* this list of conditions and the following disclaimer in the documentation * +* and/or other materials provided with the distribution. * +* * +* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * +* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * +* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * +* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR * +* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * +* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * +* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * +* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * +* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * +* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * +* * +* Author: Gianluca Frison, gianluca.frison (at) imtek.uni-freiburg.de * +* * +**************************************************************************************************/ + +#ifndef BLASFEO_D_BLASFEO_HP_API_H_ +#define BLASFEO_D_BLASFEO_HP_API_H_ + + + +#include "blasfeo_common.h" + + + +#ifdef __cplusplus +extern "C" { +#endif + + + +// +// level 3 BLAS +// + +// dense + + +// D <= beta * C + alpha * A^T * B +void blasfeo_hp_dgemm_tn(int m, int n, int k, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, double beta, struct blasfeo_dmat *sC, int ci, int cj, struct blasfeo_dmat *sD, int di, int dj); +// D <= beta * C + alpha * A * B^T; C, D lower triangular +void blasfeo_hp_dsyrk_ln(int m, int k, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, double beta, struct blasfeo_dmat *sC, int ci, int cj, struct blasfeo_dmat *sD, int di, int dj); +// D <= beta * C + alpha * A * A^T ; C, D lower triangular +void blasfeo_hp_dsyrk3_ln(int m, int k, double alpha, struct blasfeo_dmat *sA, int ai, int aj, double beta, struct blasfeo_dmat *sC, int ci, int cj, struct blasfeo_dmat *sD, int di, int dj); +// D <= alpha * B * A^{-T} , with A lower triangular +void blasfeo_hp_dtrsm_rltn(int m, int n, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, struct blasfeo_dmat *sD, int di, int dj); + + +// +// level 2 BLAS +// + +// dense + +// z <= beta * y + alpha * A * x +void blasfeo_hp_dgemv_n(int m, int n, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dvec *sx, int xi, double beta, struct blasfeo_dvec *sy, int yi, struct blasfeo_dvec *sz, int zi); + + + +#ifdef __cplusplus +} +#endif + +#endif // BLASFEO_D_BLASFEO_HP_API_H_ + diff --git a/third_party/acados/include/blasfeo/include/blasfeo_d_blasfeo_ref_api.h b/third_party/acados/include/blasfeo/include/blasfeo_d_blasfeo_ref_api.h index 5773715cfa..1a8b22c67e 100644 --- a/third_party/acados/include/blasfeo/include/blasfeo_d_blasfeo_ref_api.h +++ b/third_party/acados/include/blasfeo/include/blasfeo_d_blasfeo_ref_api.h @@ -119,6 +119,10 @@ void blasfeo_ref_dgemv_nt(int m, int n, double alpha_n, double alpha_t, struct b // z <= beta * y + alpha * A * x, where A is symmetric and only the lower triangular patr of A is accessed void blasfeo_ref_dsymv_l(int m, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dvec *sx, int xi, double beta, struct blasfeo_dvec *sy, int yi, struct blasfeo_dvec *sz, int zi); void blasfeo_ref_dsymv_l_mn(int m, int n, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dvec *sx, int xi, double beta, struct blasfeo_dvec *sy, int yi, struct blasfeo_dvec *sz, int zi); +// z <= beta * y + alpha * A * x, where A is symmetric and only the upper triangular patr of A is accessed +void blasfeo_ref_dsymv_u(int m, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dvec *sx, int xi, double beta, struct blasfeo_dvec *sy, int yi, struct blasfeo_dvec *sz, int zi); +// D = C + alpha * x * y^T +void blasfeo_ref_dger(int m, int n, double alpha, struct blasfeo_dvec *sx, int xi, struct blasfeo_dvec *sy, int yi, struct blasfeo_dmat *sC, int ci, int cj, struct blasfeo_dmat *sD, int di, int dj); // diagonal @@ -214,6 +218,14 @@ void blasfeo_ref_dtrsm_runu(int m, int n, double alpha, struct blasfeo_dmat *sA, void blasfeo_ref_dtrsm_rutn(int m, int n, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, struct blasfeo_dmat *sD, int di, int dj); // D <= alpha * B * A^{-T} , with A upper triangular with unit diagonal void blasfeo_ref_dtrsm_rutu(int m, int n, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, struct blasfeo_dmat *sD, int di, int dj); +// D <= beta * C + alpha * A * B^T + alpha * B * A^T ; C, D lower triangular +void blasfeo_ref_dsyr2k_ln(int m, int k, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, double beta, struct blasfeo_dmat *sC, int ci, int cj, struct blasfeo_dmat *sD, int di, int dj); +// D <= beta * C + alpha * A^T * B + alpha * B^T * A ; C, D lower triangular +void blasfeo_ref_dsyr2k_lt(int m, int k, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, double beta, struct blasfeo_dmat *sC, int ci, int cj, struct blasfeo_dmat *sD, int di, int dj); +// D <= beta * C + alpha * A * B^T + alpha * B * A^T ; C, D upper triangular +void blasfeo_ref_dsyr2k_un(int m, int k, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, double beta, struct blasfeo_dmat *sC, int ci, int cj, struct blasfeo_dmat *sD, int di, int dj); +// D <= beta * C + alpha * A^T * B + alpha * B^T * A ; C, D upper triangular +void blasfeo_ref_dsyr2k_ut(int m, int k, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dmat *sB, int bi, int bj, double beta, struct blasfeo_dmat *sC, int ci, int cj, struct blasfeo_dmat *sD, int di, int dj); // diagonal diff --git a/third_party/acados/include/blasfeo/include/blasfeo_d_kernel.h b/third_party/acados/include/blasfeo/include/blasfeo_d_kernel.h index 7094857697..d20ac38bbd 100644 --- a/third_party/acados/include/blasfeo/include/blasfeo_d_kernel.h +++ b/third_party/acados/include/blasfeo/include/blasfeo_d_kernel.h @@ -728,14 +728,22 @@ void kernel_dsyrk_nt_l_12x4_lib44cc(int kmax, double *alpha, double *A, int sda, void kernel_dsyrk_nt_l_12x4_vs_lib44cc(int kmax, double *alpha, double *A, int sda, double *B, double *beta, double *C, int ldc, double *D, int ldd, int m1, int n1); void kernel_dsyrk_nt_u_12x4_lib44cc(int kmax, double *alpha, double *A, int sda, double *B, double *beta, double *C, int ldc, double *D, int ldd); void kernel_dsyrk_nt_u_12x4_vs_lib44cc(int kmax, double *alpha, double *A, int sda, double *B, double *beta, double *C, int ldc, double *D, int ldd, int m1, int n1); +void kernel_dger2k_nt_12x4_lib44cc(int kmax, double *alpha, double *A0, int sda0, double *B0, double *A1, int sda1, double *B1, double *beta, double *C, int ldc, double *D, int ldd); +void kernel_dger2k_nt_12x4_vs_lib44cc(int kmax, double *alpha, double *A0, int sda0, double *B0, double *A1, int sda1, double *B1, double *beta, double *C, int ldc, double *D, int ldd, int m1, int n1); +void kernel_dsyr2k_nt_l_12x4_lib44cc(int kmax, double *alpha, double *A0, int sda0, double *B0, double *A1, int sda1, double *B1, double *beta, double *C, int ldc, double *D, int ldd); +void kernel_dsyr2k_nt_l_12x4_vs_lib44cc(int kmax, double *alpha, double *A0, int sda0, double *B0, double *A1, int sda1, double *B1, double *beta, double *C, int ldc, double *D, int ldd, int m1, int n1); void kernel_dtrmm_nt_rl_12x4_lib44cc(int kmax, double *alpha, double *A, int sda, double *B, double *beta, double *C, int ldc, double *D, int ldd); void kernel_dtrmm_nt_rl_12x4_vs_lib44cc(int kmax, double *alpha, double *A, int sda, double *B, double *beta, double *C, int ldc, double *D, int ldd, int m1, int n1); void kernel_dtrmm_nt_rl_one_12x4_lib44cc(int kmax, double *alpha, double *A, int sda, double *B, double *beta, double *C, int ldc, double *D, int ldd); void kernel_dtrmm_nt_rl_one_12x4_vs_lib44cc(int kmax, double *alpha, double *A, int sda, double *B, double *beta, double *C, int ldc, double *D, int ldd, int m1, int n1); +void kernel_dtrmm_nt_ru_12x4_lib444c(int kmax, double *alpha, double *A, int sda, double *B, double *beta, double *C, int sdc, double *D, int ldd); +void kernel_dtrmm_nt_ru_12x4_vs_lib444c(int kmax, double *alpha, double *A, int sda, double *B, double *beta, double *C, int sdc, double *D, int ldd, int m1, int n1); void kernel_dtrmm_nt_ru_12x4_lib44cc(int kmax, double *alpha, double *A, int sda, double *B, double *beta, double *C, int ldc, double *D, int ldd); void kernel_dtrmm_nt_ru_12x4_vs_lib44cc(int kmax, double *alpha, double *A, int sda, double *B, double *beta, double *C, int ldc, double *D, int ldd, int m1, int n1); void kernel_dtrmm_nt_ru_one_12x4_lib44cc(int kmax, double *alpha, double *A, int sda, double *B, double *beta, double *C, int ldc, double *D, int ldd); void kernel_dtrmm_nt_ru_one_12x4_vs_lib44cc(int kmax, double *alpha, double *A, int sda, double *B, double *beta, double *C, int ldc, double *D, int ldd, int m1, int n1); +void kernel_dtrsm_nt_ll_inv_12x4_lib44cc4(int kmax, double *A, int sda, double *B, double *beta, double *C, int ldc, double *D, int ldd, double *E, int sde, double *inv_diag_E); +void kernel_dtrsm_nt_ll_inv_12x4_vs_lib44cc4(int kmax, double *A, int sda, double *B, double *beta, double *C, int ldc, double *D, int ldd, double *E, int sde, double *inv_diag_E, int km, int kn); void kernel_dtrsm_nt_rl_inv_12x4_lib44cc4(int kmax, double *A, int sda, double *B, double *beta, double *C, int ldc, double *D, int ldd, double *E, double *dE); void kernel_dtrsm_nt_rl_inv_12x4_vs_lib44cc4(int kmax, double *A, int sda, double *B, double *beta, double *C, int ldc, double *D, int ldd, double *E, double *dE, int m1, int n1); void kernel_dtrsm_nt_rl_inv_12x4_lib44ccc(int kmax, double *A, int sda, double *B, double *C, int ldc, double *D, int ldd, double *E, int lde, double *dE); @@ -751,6 +759,7 @@ void kernel_dpotrf_nt_l_12x4_vs_lib44cc(int kmax, double *A, int sda, double *B, // 4x12 void kernel_dgemm_nt_4x12_lib44cc(int kmax, double *alpha, double *A, double *B, int sdb, double *beta, double *C, int ldc, double *D, int ldd); void kernel_dgemm_nt_4x12_vs_lib44cc(int kmax, double *alpha, double *A, double *B, int sdb, double *beta, double *C, int ldc, double *D, int ldd, int m1, int n1); +void kernel_dgemm_nt_4x12_p0_vs_lib44cc(int kmax, double *alpha, double *A, double *B, int sdb, double *beta, double *C, int ldc, double *D, int ldd, int m1, int n1, double *A_p, double *B_p); void kernel_dtrmm_nt_rl_4x12_tran_lib444c(int kmax, double *alpha, double *A, int sda, double *B, double *beta, double *C, int sdc, double *D, int ldd); void kernel_dtrmm_nt_rl_4x12_tran_vs_lib444c(int kmax, double *alpha, double *A, int sda, double *B, double *beta, double *C, int sdc, double *D, int ldd, int m1, int n1); void kernel_dtrmm_nt_rl_one_4x12_tran_lib444c(int kmax, double *alpha, double *A, int sda, double *B, double *beta, double *C, int sdc, double *D, int ldd); @@ -764,6 +773,10 @@ void kernel_dsyrk_nt_l_8x8_lib44cc(int kmax, double *alpha, double *A, int sda, void kernel_dsyrk_nt_l_8x8_vs_lib44cc(int kmax, double *alpha, double *A, int sda, double *B, int sdb, double *beta, double *C, int ldc, double *D, int ldd, int m1, int n1); void kernel_dsyrk_nt_u_8x8_lib44cc(int kmax, double *alpha, double *A, int sda, double *B, int sdb, double *beta, double *C, int ldc, double *D, int ldd); void kernel_dsyrk_nt_u_8x8_vs_lib44cc(int kmax, double *alpha, double *A, int sda, double *B, int sdb, double *beta, double *C, int ldc, double *D, int ldd, int m1, int n1); +void kernel_dger2k_nt_8x8_lib44cc(int kmax, double *alpha, double *A0, int sda0, double *B0, int sdb0, double *A1, int sda1, double *B1, int sdb1, double *beta, double *C, int ldc, double *D, int ldd); +void kernel_dger2k_nt_8x8_vs_lib44cc(int kmax, double *alpha, double *A0, int sda0, double *B0, int sdb0, double *A1, int sda1, double *B1, int sdb1, double *beta, double *C, int ldc, double *D, int ldd, int m1, int n1); +void kernel_dsyr2k_nt_l_8x8_lib44cc(int kmax, double *alpha, double *A0, int sda0, double *B0, int sdb0, double *A1, int sda1, double *B1, int sdb1, double *beta, double *C, int ldc, double *D, int ldd); +void kernel_dsyr2k_nt_l_8x8_vs_lib44cc(int kmax, double *alpha, double *A0, int sda0, double *B0, int sdb0, double *A1, int sda1, double *B1, int sdb1, double *beta, double *C, int ldc, double *D, int ldd, int m1, int n1); void kernel_dpotrf_nt_l_8x8_lib44cc(int kmax, double *A, int sda, double *B, int sdb, double *C, int ldc, double *D, int ldd, double *dD); void kernel_dpotrf_nt_l_8x8_vs_lib44cc(int kmax, double *A, int sda, double *B, int sdb, double *C, int ldc, double *D, int ldd, double *dD, int m1, int n1); // 8x4 @@ -775,14 +788,22 @@ void kernel_dsyrk_nt_l_8x4_lib44cc(int kmax, double *alpha, double *A, int sda, void kernel_dsyrk_nt_l_8x4_vs_lib44cc(int kmax, double *alpha, double *A, int sda, double *B, double *beta, double *C, int ldc, double *D, int ldd, int m1, int n1); void kernel_dsyrk_nt_u_8x4_lib44cc(int kmax, double *alpha, double *A, int sda, double *B, double *beta, double *C, int ldc, double *D, int ldd); void kernel_dsyrk_nt_u_8x4_vs_lib44cc(int kmax, double *alpha, double *A, int sda, double *B, double *beta, double *C, int ldc, double *D, int ldd, int m1, int n1); +void kernel_dger2k_nt_8x4_lib44cc(int kmax, double *alpha, double *A0, int sda0, double *B0, double *A1, int sda1, double *B1, double *beta, double *C, int ldc, double *D, int ldd); +void kernel_dger2k_nt_8x4_vs_lib44cc(int kmax, double *alpha, double *A0, int sda0, double *B0, double *A1, int sda1, double *B1, double *beta, double *C, int ldc, double *D, int ldd, int m1, int n1); +void kernel_dsyr2k_nt_l_8x4_lib44cc(int kmax, double *alpha, double *A0, int sda0, double *B0, double *A1, int sda1, double *B1, double *beta, double *C, int ldc, double *D, int ldd); +void kernel_dsyr2k_nt_l_8x4_vs_lib44cc(int kmax, double *alpha, double *A0, int sda0, double *B0, double *A1, int sda1, double *B1, double *beta, double *C, int ldc, double *D, int ldd, int m1, int n1); void kernel_dtrmm_nt_rl_8x4_lib44cc(int kmax, double *alpha, double *A, int sda, double *B, double *beta, double *C, int ldc, double *D, int ldd); void kernel_dtrmm_nt_rl_8x4_vs_lib44cc(int kmax, double *alpha, double *A, int sda, double *B, double *beta, double *C, int ldc, double *D, int ldd, int m1, int n1); void kernel_dtrmm_nt_rl_one_8x4_lib44cc(int kmax, double *alpha, double *A, int sda, double *B, double *beta, double *C, int ldc, double *D, int ldd); void kernel_dtrmm_nt_rl_one_8x4_vs_lib44cc(int kmax, double *alpha, double *A, int sda, double *B, double *beta, double *C, int ldc, double *D, int ldd, int m1, int n1); +void kernel_dtrmm_nt_ru_8x4_lib444c(int kmax, double *alpha, double *A, int sda, double *B, double *beta, double *C, int sdc, double *D, int ldd); +void kernel_dtrmm_nt_ru_8x4_vs_lib444c(int kmax, double *alpha, double *A, int sda, double *B, double *beta, double *C, int sdc, double *D, int ldd, int m1, int n1); void kernel_dtrmm_nt_ru_8x4_lib44cc(int kmax, double *alpha, double *A, int sda, double *B, double *beta, double *C, int ldc, double *D, int ldd); void kernel_dtrmm_nt_ru_8x4_vs_lib44cc(int kmax, double *alpha, double *A, int sda, double *B, double *beta, double *C, int ldc, double *D, int ldd, int m1, int n1); void kernel_dtrmm_nt_ru_one_8x4_lib44cc(int kmax, double *alpha, double *A, int sda, double *B, double *beta, double *C, int ldc, double *D, int ldd); void kernel_dtrmm_nt_ru_one_8x4_vs_lib44cc(int kmax, double *alpha, double *A, int sda, double *B, double *beta, double *C, int ldc, double *D, int ldd, int m1, int n1); +void kernel_dtrsm_nt_ll_inv_8x4_lib44cc4(int kmax, double *A, int sda, double *B, double *beta, double *C, int ldc, double *D, int ldd, double *E, int sde, double *inv_diag_E); +void kernel_dtrsm_nt_ll_inv_8x4_vs_lib44cc4(int kmax, double *A, int sda, double *B, double *beta, double *C, int ldc, double *D, int ldd, double *E, int sde, double *inv_diag_E, int km, int kn); void kernel_dtrsm_nt_rl_inv_8x4_lib44cc4(int kmax, double *A, int sda, double *B, double *beta, double *C, int ldc, double *D, int ldd, double *E, double *dE); void kernel_dtrsm_nt_rl_inv_8x4_vs_lib44cc4(int kmax, double *A, int sda, double *B, double *beta, double *C, int ldc, double *D, int ldd, double *E, double *dE, int m1, int n1); void kernel_dtrsm_nt_rl_inv_8x4_lib44ccc(int kmax, double *A, int sda, double *B, double *C, int ldc, double *D, int ldd, double *E, int lde, double *dE); @@ -813,6 +834,10 @@ void kernel_dsyrk_nt_l_4x4_lib44cc(int kmax, double *alpha, double *A, double *B void kernel_dsyrk_nt_l_4x4_vs_lib44cc(int kmax, double *alpha, double *A, double *B, double *beta, double *C, int ldc, double *D, int ldd, int m1, int n1); void kernel_dsyrk_nt_u_4x4_lib44cc(int kmax, double *alpha, double *A, double *B, double *beta, double *C, int ldc, double *D, int ldd); void kernel_dsyrk_nt_u_4x4_vs_lib44cc(int kmax, double *alpha, double *A, double *B, double *beta, double *C, int ldc, double *D, int ldd, int m1, int n1); +void kernel_dger2k_nt_4x4_lib44cc(int kmax, double *alpha, double *A0, double *B0, double *A1, double *B1, double *beta, double *C, int ldc, double *D, int ldd); +void kernel_dger2k_nt_4x4_vs_lib44cc(int kmax, double *alpha, double *A0, double *B0, double *A1, double *B1, double *beta, double *C, int ldc, double *D, int ldd, int m1, int n1); +void kernel_dsyr2k_nt_l_4x4_lib44cc(int kmax, double *alpha, double *A0, double *B0, double *A1, double *B1, double *beta, double *C, int ldc, double *D, int ldd); +void kernel_dsyr2k_nt_l_4x4_vs_lib44cc(int kmax, double *alpha, double *A0, double *B0, double *A1, double *B1, double *beta, double *C, int ldc, double *D, int ldd, int m1, int n1); void kernel_dtrmm_nt_rl_4x4_lib44cc(int kmax, double *alpha, double *A, double *B, double *beta, double *C, int ldc, double *D, int ldd); void kernel_dtrmm_nt_rl_4x4_vs_lib44cc(int kmax, double *alpha, double *A, double *B, double *beta, double *C, int ldc, double *D, int ldd, int m1, int n1); void kernel_dtrmm_nt_rl_4x4_tran_lib444c(int kmax, double *alpha, double *A, double *B, double *beta, double *C, double *D, int ldd); @@ -821,6 +846,8 @@ void kernel_dtrmm_nt_rl_one_4x4_lib44cc(int kmax, double *alpha, double *A, doub void kernel_dtrmm_nt_rl_one_4x4_vs_lib44cc(int kmax, double *alpha, double *A, double *B, double *beta, double *C, int ldc, double *D, int ldd, int m1, int n1); void kernel_dtrmm_nt_rl_one_4x4_tran_lib444c(int kmax, double *alpha, double *A, double *B, double *beta, double *C, double *D, int ldd); void kernel_dtrmm_nt_rl_one_4x4_tran_vs_lib444c(int kmax, double *alpha, double *A, double *B, double *beta, double *C, double *D, int ldd, int m1, int n1); +void kernel_dtrmm_nt_ru_4x4_lib444c(int kmax, double *alpha, double *A, double *B, double *beta, double *C, double *D, int ldd); +void kernel_dtrmm_nt_ru_4x4_vs_lib444c(int kmax, double *alpha, double *A, double *B, double *beta, double *C, double *D, int ldd, int m1, int n1); void kernel_dtrmm_nt_ru_4x4_lib44cc(int kmax, double *alpha, double *A, double *B, double *beta, double *C, int ldc, double *D, int ldd); void kernel_dtrmm_nt_ru_4x4_vs_lib44cc(int kmax, double *alpha, double *A, double *B, double *beta, double *C, int ldc, double *D, int ldd, int m1, int n1); void kernel_dtrmm_nt_ru_4x4_tran_lib444c(int kmax, double *alpha, double *A, double *B, double *beta, double *C, double *D, int ldd); @@ -829,6 +856,8 @@ void kernel_dtrmm_nt_ru_one_4x4_lib44cc(int kmax, double *alpha, double *A, doub void kernel_dtrmm_nt_ru_one_4x4_vs_lib44cc(int kmax, double *alpha, double *A, double *B, double *beta, double *C, int ldc, double *D, int ldd, int m1, int n1); void kernel_dtrmm_nt_ru_one_4x4_tran_lib444c(int kmax, double *alpha, double *A, double *B, double *beta, double *C, double *D, int ldd); void kernel_dtrmm_nt_ru_one_4x4_tran_vs_lib444c(int kmax, double *alpha, double *A, double *B, double *beta, double *C, double *D, int ldd, int m1, int n1); +void kernel_dtrsm_nt_ll_inv_4x4_lib44cc4(int kmax, double *A, double *B, double *beta, double *C, int ldc, double *D, int ldd, double *E, double *inv_diag_E); +void kernel_dtrsm_nt_ll_inv_4x4_vs_lib44cc4(int kmax, double *A, double *B, double *beta, double *C, int ldc, double *D, int ldd, double *E, double *inv_diag_E, int km, int kn); void kernel_dtrsm_nt_rl_inv_4x4_lib44cc4(int kmax, double *A, double *B, double *beta, double *C, int ldc, double *D, int ldd, double *E, double *dE); void kernel_dtrsm_nt_rl_inv_4x4_vs_lib44cc4(int kmax, double *A, double *B, double *beta, double *C, int ldc, double *D, int ldd, double *E, double *dE, int m1, int n1); void kernel_dtrsm_nt_rl_inv_4x4_lib44ccc(int kmax, double *A, double *B, double *beta, double *C, int ldc, double *D, int ldd, double *E, int lde, double *dE); @@ -852,6 +881,8 @@ void kernel_dgemm_nt_12x4_lib4ccc(int kmax, double *alpha, double *A, int sda, d void kernel_dgemm_nt_12x4_vs_lib4ccc(int kmax, double *alpha, double *A, int sda, double *B, int ldb, double *beta, double *C, int ldc, double *D, int ldd, int m1, int n1); void kernel_dsyrk_nt_l_12x4_lib4ccc(int kmax, double *alpha, double *A, int sda, double *B, int ldb, double *beta, double *C, int ldc, double *D, int ldd); void kernel_dsyrk_nt_l_12x4_vs_lib4ccc(int kmax, double *alpha, double *A, int sda, double *B, int ldb, double *beta, double *C, int ldc, double *D, int ldd, int m1, int n1); +void kernel_dger2k_nt_12x4_lib4ccc(int kmax, double *alpha, double *A0, int sda0, double *B0, int ldb0, double *A1, int sda1, double *B1, int ldb1, double *beta, double *C, int ldc, double *D, int ldd); +void kernel_dger2k_nt_12x4_vs_lib4ccc(int kmax, double *alpha, double *A0, int sda0, double *B0, int ldb0, double *A1, int sda1, double *B1, int ldb1, double *beta, double *C, int ldc, double *D, int ldd, int m1, int n1); void kernel_dtrmm_nn_rl_12x4_lib4ccc(int kmax, double *alpha, double *A, int sda, double *B, int ldb, double *beta, double *C, int ldc, double *D, int ldd); void kernel_dtrmm_nn_rl_12x4_vs_lib4ccc(int kmax, double *alpha, double *A, int sda, double *B, int ldb, double *beta, double *C, int ldc, double *D, int ldd, int m1, int n1); void kernel_dtrmm_nn_rl_one_12x4_lib4ccc(int kmax, double *alpha, double *A, int sda, double *B, int ldb, double *beta, double *C, int ldc, double *D, int ldd); @@ -868,6 +899,8 @@ void kernel_dtrmm_nt_ru_12x4_lib4ccc(int kmax, double *alpha, double *A, int sda void kernel_dtrmm_nt_ru_12x4_vs_lib4ccc(int kmax, double *alpha, double *A, int sda, double *B, int ldb, double *beta, double *C, int ldc, double *D, int ldd, int m1, int n1); void kernel_dtrmm_nt_ru_one_12x4_lib4ccc(int kmax, double *alpha, double *A, int sda, double *B, int ldb, double *beta, double *C, int ldc, double *D, int ldd); void kernel_dtrmm_nt_ru_one_12x4_vs_lib4ccc(int kmax, double *alpha, double *A, int sda, double *B, int ldb, double *beta, double *C, int ldc, double *D, int ldd, int m1, int n1); +void kernel_dtrsm_nn_ll_inv_12x4_lib4ccc4(int kmax, double *A, int sda, double *B, int ldb, double *beta, double *C, int ldc, double *D, int ldd, double *E, int sde, double *inv_diag_E); +void kernel_dtrsm_nn_ll_inv_12x4_vs_lib4ccc4(int kmax, double *A, int sda, double *B, int ldb, double *beta, double *C, int ldc, double *D, int ldd, double *E, int sde, double *inv_diag_E, int km, int kn); void kernel_dtrsm_nn_ll_one_12x4_lib4cccc(int kmax, double *A, int sda, double *B, int ldb, double *beta, double *C, int ldc, double *D, int ldd, double *E, int lde); void kernel_dtrsm_nn_ll_one_12x4_vs_lib4cccc(int kmax, double *A, int sda, double *B, int ldb, double *beta, double *C, int ldc, double *D, int ldd, double *E, int lde, int m1, int n1); void kernel_dtrsm_nn_rl_inv_12x4_lib4cccc(int kmax, double *A, int sda, double *B, int ldb, double *beta, double *C, int ldc, double *D, int ldd, double *E, int lde, double *dE); @@ -920,6 +953,8 @@ void kernel_dgemm_nt_8x4_lib4ccc(int kmax, double *alpha, double *A, int sda, do void kernel_dgemm_nt_8x4_vs_lib4ccc(int kmax, double *alpha, double *A, int sda, double *B, int ldb, double *beta, double *C, int ldc, double *D, int ldd, int m1, int n1); void kernel_dsyrk_nt_l_8x4_lib4ccc(int kmax, double *alpha, double *A, int sda, double *B, int ldb, double *beta, double *C, int ldc, double *D, int ldd); void kernel_dsyrk_nt_l_8x4_vs_lib4ccc(int kmax, double *alpha, double *A, int sda, double *B, int ldb, double *beta, double *C, int ldc, double *D, int ldd, int m1, int n1); +void kernel_dger2k_nt_8x4_lib4ccc(int kmax, double *alpha, double *A0, int sda0, double *B0, int ldb0, double *A1, int sda1, double *B1, int ldb1, double *beta, double *C, int ldc, double *D, int ldd); +void kernel_dger2k_nt_8x4_vs_lib4ccc(int kmax, double *alpha, double *A0, int sda0, double *B0, int ldb0, double *A1, int sda1, double *B1, int ldb1, double *beta, double *C, int ldc, double *D, int ldd, int m1, int n1); void kernel_dtrmm_nn_rl_8x4_lib4ccc(int kmax, double *alpha, double *A, int sda, double *B, int ldb, double *beta, double *C, int ldc, double *D, int ldd); void kernel_dtrmm_nn_rl_8x4_vs_lib4ccc(int kmax, double *alpha, double *A, int sda, double *B, int ldb, double *beta, double *C, int ldc, double *D, int ldd, int m1, int n1); void kernel_dtrmm_nn_rl_one_8x4_lib4ccc(int kmax, double *alpha, double *A, int sda, double *B, int ldb, double *beta, double *C, int ldc, double *D, int ldd); @@ -936,6 +971,8 @@ void kernel_dtrmm_nt_ru_8x4_lib4ccc(int kmax, double *alpha, double *A, int sda, void kernel_dtrmm_nt_ru_8x4_vs_lib4ccc(int kmax, double *alpha, double *A, int sda, double *B, int ldb, double *beta, double *C, int ldc, double *D, int ldd, int m1, int n1); void kernel_dtrmm_nt_ru_one_8x4_lib4ccc(int kmax, double *alpha, double *A, int sda, double *B, int ldb, double *beta, double *C, int ldc, double *D, int ldd); void kernel_dtrmm_nt_ru_one_8x4_vs_lib4ccc(int kmax, double *alpha, double *A, int sda, double *B, int ldb, double *beta, double *C, int ldc, double *D, int ldd, int m1, int n1); +void kernel_dtrsm_nn_ll_inv_8x4_lib4ccc4(int kmax, double *A, int sda, double *B, int ldb, double *beta, double *C, int ldc, double *D, int ldd, double *E, int sde, double *inv_diag_E); +void kernel_dtrsm_nn_ll_inv_8x4_vs_lib4ccc4(int kmax, double *A, int sda, double *B, int ldb, double *beta, double *C, int ldc, double *D, int ldd, double *E, int sde, double *inv_diag_E, int km, int kn); void kernel_dtrsm_nn_ll_one_8x4_lib4cccc(int kmax, double *A, int sda, double *B, int ldb, double *beta, double *C, int ldc, double *D, int ldd, double *E, int lde); void kernel_dtrsm_nn_ll_one_8x4_vs_lib4cccc(int kmax, double *A, int sda, double *B, int ldb, double *beta, double *C, int ldc, double *D, int ldd, double *E, int lde, int m1, int n1); void kernel_dtrsm_nn_rl_inv_8x4_lib4cccc(int kmax, double *A, int sda, double *B, int ldb, double *beta, double *C, int ldc, double *D, int ldd, double *E, int lde, double *dE); @@ -986,6 +1023,8 @@ void kernel_dgemm_nt_4x4_lib4ccc(int kmax, double *alpha, double *A, double *B, void kernel_dgemm_nt_4x4_vs_lib4ccc(int kmax, double *alpha, double *A, double *B, int ldb, double *beta, double *C, int ldc, double *D, int ldd, int m1, int n1); void kernel_dsyrk_nt_l_4x4_lib4ccc(int kmax, double *alpha, double *A, double *B, int ldb, double *beta, double *C, int ldc, double *D, int ldd); void kernel_dsyrk_nt_l_4x4_vs_lib4ccc(int kmax, double *alpha, double *A, double *B, int ldb, double *beta, double *C, int ldc, double *D, int ldd, int m1, int n1); +void kernel_dger2k_nt_4x4_lib4ccc(int kmax, double *alpha, double *A0, double *B0, int ldb0, double *A1, double *B1, int ldb1, double *beta, double *C, int ldc, double *D, int ldd); +void kernel_dger2k_nt_4x4_vs_lib4ccc(int kmax, double *alpha, double *A0, double *B0, int ldb0, double *A1, double *B1, int ldb1, double *beta, double *C, int ldc, double *D, int ldd, int m1, int n1); void kernel_dtrmm_nn_rl_4x4_lib4ccc(int kmax, double *alpha, double *A, double *B, int ldb, double *beta, double *C, int ldc, double *D, int ldd); void kernel_dtrmm_nn_rl_4x4_vs_lib4ccc(int kmax, double *alpha, double *A, double *B, int ldb, double *beta, double *C, int ldc, double *D, int ldd, int m1, int n1); void kernel_dtrmm_nn_rl_4x4_tran_lib4c4c(int kmax, double *alpha, double *A, double *B, int ldb, double *beta, double *C, double *D, int ldd); @@ -1018,6 +1057,8 @@ void kernel_dtrmm_nt_ru_one_4x4_lib4ccc(int kmax, double *alpha, double *A, doub void kernel_dtrmm_nt_ru_one_4x4_vs_lib4ccc(int kmax, double *alpha, double *A, double *B, int ldb, double *beta, double *C, int ldc, double *D, int ldd, int m1, int n1); void kernel_dtrmm_nt_ru_one_4x4_tran_lib4c4c(int kmax, double *alpha, double *A, double *B, int ldb, double *beta, double *C, double *D, int ldd); void kernel_dtrmm_nt_ru_one_4x4_tran_vs_lib4c4c(int kmax, double *alpha, double *A, double *B, int ldb, double *beta, double *C, double *D, int ldd, int m1, int n1); +void kernel_dtrsm_nn_ll_inv_4x4_lib4ccc4(int kmax, double *A, double *B, int ldb, double *beta, double *C, int ldc, double *D, int ldd, double *E, double *inv_diag_E); +void kernel_dtrsm_nn_ll_inv_4x4_vs_lib4ccc4(int kmax, double *A, double *B, int ldb, double *beta, double *C, int ldc, double *D, int ldd, double *E, double *inv_diag_E, int km, int kn); void kernel_dtrsm_nn_ll_one_4x4_lib4cccc(int kmax, double *A, double *B, int ldb, double *beta, double *C, int ldc, double *D, int ldd, double *E, int lde); void kernel_dtrsm_nn_ll_one_4x4_vs_lib4cccc(int kmax, double *A, double *B, int ldb, double *beta, double *C, int ldc, double *D, int ldd, double *E, int lde, int m1, int n1); void kernel_dtrsm_nn_rl_inv_4x4_lib4cccc(int kmax, double *A, double *B, int ldb, double *beta, double *C, int ldc, double *D, int ldd, double *E, int lde, double *dE); @@ -1181,6 +1222,7 @@ void kernel_drowsw_lib(int kmax, double *pA, int lda, double *pC, int ldc); // 12 void kernel_dgemm_nt_12xn_p0_lib44cc(int n, int k, double *alpha, double *A, int sda, double *B, int sdb, double *beta, double *C, int ldc, double *D, int ldd, double *A_p, double *B_p); void kernel_dgemm_nt_12xn_pl_lib44cc(int n, int k, double *alpha, double *A, int sda, double *B, int sdb, double *beta, double *C, int ldc, double *D, int ldd, double *A_p, double *B_p); +void kernel_dgemm_nt_mx12_p0_lib44cc(int m, int k, double *alpha, double *A, int sda, double *B, int sdb, double *beta, double *C, int ldc, double *D, int ldd, double *A_p, double *B_p); // 8 void kernel_dgemm_nt_8xn_p0_lib44cc(int n, int k, double *alpha, double *A, int sda, double *B, int sdb, double *beta, double *C, int ldc, double *D, int ldd, double *A_p, double *B_p); @@ -1232,11 +1274,43 @@ void kernel_dgemm_tt_8x8_libc8cc(int kmax, double *alpha, double *A, int lda, do void kernel_dgemm_tt_8x8_vs_libc8cc(int kmax, double *alpha, double *A, int lda, double *B, double *beta, double *C, int ldc, double *D, int ldd, int m1, int n1); +// level 2 BLAS +void kernel_dgemv_n_4_libc(int kmax, double *alpha, double *A, int lda, double *x, double *z); +void kernel_dgemv_n_4_vs_libc(int kmax, double *alpha, double *A, int lda, double *x, double *z, int km); +void kernel_dgemv_t_4_libc(int kmax, double *alpha, double *A, int lda, double *x, double *beta, double *y, double *z); +void kernel_dgemv_t_4_vs_libc(int kmax, double *alpha, double *A, int lda, double *x, double *beta, double *y, double *z, int km); +void kernel_dsymv_l_4_libc(int kmax, double *alpha, double *A, int lda, double *x, double *z); +void kernel_dsymv_l_4_vs_libc(int kmax, double *alpha, double *A, int lda, double *x, double *z, int km); +void kernel_dsymv_u_4_libc(int kmax, double *alpha, double *A, int lda, double *x, double *z); +void kernel_dsymv_u_4_vs_libc(int kmax, double *alpha, double *A, int lda, double *x, double *z, int km); +void kernel_dger_4_libc(int kmax, double *alpha, double *x, double *y, double *C, int ldc, double *D, int ldd); +void kernel_dger_4_vs_libc(int kmax, double *alpha, double *x, double *y, double *C, int ldc, double *D, int ldd, int km); + + // aux void kernel_dvecld_inc1(int kmax, double *x); void kernel_dveccp_inc1(int kmax, double *x, double *y); +//void kernel_dgetr_nt_8_p0_lib(int kmax, double *A, int lda, double *C, int ldc, double *Ap, double *Bp); +//void kernel_dgetr_nt_8_lib(int kmax, double *A, int lda, double *C, int ldc); +//void kernel_dgetr_nt_4_lib(int kmax, double *A, int lda, double *C, int ldc); +void kernel_dgetr_tn_8_p0_lib(int kmax, double *A, int lda, double *C, int ldc, double *Ap, double *Bp); +void kernel_dgetr_tn_8_lib(int kmax, double *A, int lda, double *C, int ldc); +void kernel_dgetr_tn_4_lib(int kmax, double *A, int lda, double *C, int ldc); +void kernel_dgetr_tn_4_vs_lib(int kmax, double *A, int lda, double *C, int ldc, int m1); + + +// building blocks for blocked algorithms +// +void blasfeo_hp_dgemm_nt_m2(int m, int n, int k, double alpha, double *pA, int sda, double *pB, int sdb, double beta, double *C, int ldc, double *D, int ldd); +void blasfeo_hp_dgemm_nt_n2(int m, int n, int k, double alpha, double *pA, int sda, double *pB, int sdb, double beta, double *C, int ldc, double *D, int ldd); +// +void kernel_dpack_buffer_fn(int m, int n, double *A, int lda, double *pA, int sda); +void kernel_dpack_buffer_ft(int m, int n, double *A, int lda, double *pA, int sda); +void kernel_dpack_buffer_ln(int m, double *A, int lda, double *pA, int sda); +void kernel_dpack_buffer_lt(int m, double *A, int lda, double *pA, int sda); +void kernel_dpack_buffer_ut(int m, double *A, int lda, double *pA, int sda); diff --git a/third_party/acados/include/blasfeo/include/blasfeo_processor_features.h b/third_party/acados/include/blasfeo/include/blasfeo_processor_features.h index 65a0c7e80a..67a0a4da04 100644 --- a/third_party/acados/include/blasfeo/include/blasfeo_processor_features.h +++ b/third_party/acados/include/blasfeo/include/blasfeo_processor_features.h @@ -39,20 +39,20 @@ /** * Flags to indicate the different processor features */ -enum -{ - // x86-64 CPU features - BLASFEO_PROCESSOR_FEATURE_AVX = 0x0001, /// AVX instruction set - BLASFEO_PROCESSOR_FEATURE_AVX2 = 0x0002, /// AVX2 instruction set - BLASFEO_PROCESSOR_FEATURE_FMA = 0x0004, /// FMA instruction set - BLASFEO_PROCESSOR_FEATURE_SSE3 = 0x0008, /// SSE3 instruction set - - // ARM CPU features - BLASFEO_PROCESSOR_FEATURE_VFPv3 = 0x0100, /// VFPv3 instruction set - BLASFEO_PROCESSOR_FEATURE_NEON = 0x0100, /// NEON instruction set - BLASFEO_PROCESSOR_FEATURE_VFPv4 = 0x0100, /// VFPv4 instruction set - BLASFEO_PROCESSOR_FEATURE_NEONv2 = 0x0100, /// NEONv2 instruction set -} BLASFEO_PROCESSOR_FEATURES; +//enum +//{ +// // x86-64 CPU features +// BLASFEO_PROCESSOR_FEATURE_AVX = 0x0001, /// AVX instruction set +// BLASFEO_PROCESSOR_FEATURE_AVX2 = 0x0002, /// AVX2 instruction set +// BLASFEO_PROCESSOR_FEATURE_FMA = 0x0004, /// FMA instruction set +// BLASFEO_PROCESSOR_FEATURE_SSE3 = 0x0008, /// SSE3 instruction set +// +// // ARM CPU features +// BLASFEO_PROCESSOR_FEATURE_VFPv3 = 0x0100, /// VFPv3 instruction set +// BLASFEO_PROCESSOR_FEATURE_NEON = 0x0100, /// NEON instruction set +// BLASFEO_PROCESSOR_FEATURE_VFPv4 = 0x0100, /// VFPv4 instruction set +// BLASFEO_PROCESSOR_FEATURE_NEONv2 = 0x0100, /// NEONv2 instruction set +//} BLASFEO_PROCESSOR_FEATURES; /** * Test the features that this processor provides against what the library was compiled with. diff --git a/third_party/acados/include/blasfeo/include/blasfeo_s_blas_api.h b/third_party/acados/include/blasfeo/include/blasfeo_s_blas_api.h index d0fa2245c5..cf2b3e0a2d 100644 --- a/third_party/acados/include/blasfeo/include/blasfeo_s_blas_api.h +++ b/third_party/acados/include/blasfeo/include/blasfeo_s_blas_api.h @@ -44,6 +44,31 @@ +#ifdef BLAS_API +#ifdef CBLAS_API +#ifndef BLASFEO_CBLAS_ENUM +#define BLASFEO_CBLAS_ENUM +#ifdef FORTRAN_BLAS_API +#ifndef CBLAS_H +enum CBLAS_ORDER {CblasRowMajor=101, CblasColMajor=102}; +enum CBLAS_TRANSPOSE {CblasNoTrans=111, CblasTrans=112, CblasConjTrans=113}; +enum CBLAS_UPLO {CblasUpper=121, CblasLower=122}; +enum CBLAS_DIAG {CblasNonUnit=131, CblasUnit=132}; +enum CBLAS_SIDE {CblasLeft=141, CblasRight=142}; +#endif // CBLAS_H +#else // FORTRAN_BLAS_API +enum BLASFEO_CBLAS_ORDER {CblasRowMajor=101, CblasColMajor=102}; +enum BLASFEO_CBLAS_TRANSPOSE {CblasNoTrans=111, CblasTrans=112, CblasConjTrans=113}; +enum BLASFEO_CBLAS_UPLO {CblasUpper=121, CblasLower=122}; +enum BLASFEO_CBLAS_DIAG {CblasNonUnit=131, CblasUnit=132}; +enum BLASFEO_CBLAS_SIDE {CblasLeft=141, CblasRight=142}; +#endif // FORTRAN_BLAS_API +#endif // BLASFEO_CBLAS_ENUM +#endif // CBLAS_API +#endif // BLAS_API + + + #ifdef __cplusplus extern "C" { #endif @@ -78,27 +103,67 @@ void spotrf_(char *uplo, int *m, float *A, int *lda, int *info); +#ifdef CBLAS_API + + + +// CBLAS 1 +// +void cblas_saxpy(const int N, const float alpha, const float *X, const int incX, float *Y, const int incY); + +// CBLAS 3 +// +void cblas_sgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, const float alpha, const float *A, const int lda, const float *B, const int ldb, const float beta, float *C, const int ldc); +// +void cblas_strsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const float alpha, const float *A, const int lda, float *B, const int ldb); + + + +#endif // CBLAS_API + + + #else // BLASFEO_API // BLAS 1 // -void blas_saxpy(int *n, float *alpha, float *x, int *incx, float *y, int *incy); +void blasfeo_blas_saxpy(int *n, float *alpha, float *x, int *incx, float *y, int *incy); // -float blas_sdot(int *n, float *x, int *incx, float *y, int *incy); +float blasfeo_blas_sdot(int *n, float *x, int *incx, float *y, int *incy); // BLAS 3 // -void blas_sgemm(char *ta, char *tb, int *m, int *n, int *k, float *alpha, float *A, int *lda, float *B, int *ldb, float *beta, float *C, int *ldc); +void blasfeo_blas_sgemm(char *ta, char *tb, int *m, int *n, int *k, float *alpha, float *A, int *lda, float *B, int *ldb, float *beta, float *C, int *ldc); // -void blas_strsm(char *side, char *uplo, char *transa, char *diag, int *m, int *n, float *alpha, float *A, int *lda, float *B, int *ldb); +void blasfeo_blas_strsm(char *side, char *uplo, char *transa, char *diag, int *m, int *n, float *alpha, float *A, int *lda, float *B, int *ldb); // LAPACK // -void blas_spotrf(char *uplo, int *m, float *A, int *lda, int *info); +void blasfeo_lapack_spotrf(char *uplo, int *m, float *A, int *lda, int *info); + + + +#ifdef CBLAS_API + + + +// CBLAS 1 +// +void blasfeo_cblas_saxpy(const int N, const float alpha, const float *X, const int incX, float *Y, const int incY); + +// CBLAS 3 +// +void blasfeo_cblas_sgemm(const enum BLASFEO_CBLAS_ORDER Order, const enum BLASFEO_CBLAS_TRANSPOSE TransA, const enum BLASFEO_CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, const float alpha, const float *A, const int lda, const float *B, const int ldb, const float beta, float *C, const int ldc); +// +void blasfeo_cblas_strsm(const enum BLASFEO_CBLAS_ORDER Order, const enum BLASFEO_CBLAS_SIDE Side, const enum BLASFEO_CBLAS_UPLO Uplo, const enum BLASFEO_CBLAS_TRANSPOSE TransA, const enum BLASFEO_CBLAS_DIAG Diag, const int M, const int N, const float alpha, const float *A, const int lda, float *B, const int ldb); + + + +#endif // CBLAS_API diff --git a/third_party/acados/include/blasfeo/include/blasfeo_s_blasfeo_api.h b/third_party/acados/include/blasfeo/include/blasfeo_s_blasfeo_api.h index 24b21ead25..8b98104735 100644 --- a/third_party/acados/include/blasfeo/include/blasfeo_s_blasfeo_api.h +++ b/third_party/acados/include/blasfeo/include/blasfeo_s_blasfeo_api.h @@ -115,6 +115,10 @@ void blasfeo_sgemv_nt(int m, int n, float alpha_n, float alpha_t, struct blasfeo // z <= beta * y + alpha * A * x, where A is symmetric and only the lower triangular patr of A is accessed void blasfeo_ssymv_l(int m, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_svec *sx, int xi, float beta, struct blasfeo_svec *sy, int yi, struct blasfeo_svec *sz, int zi); void blasfeo_ssymv_l_mn(int m, int n, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_svec *sx, int xi, float beta, struct blasfeo_svec *sy, int yi, struct blasfeo_svec *sz, int zi); +// z <= beta * y + alpha * A * x, where A is symmetric and only the upper triangular patr of A is accessed +void blasfeo_ssymv_u(int m, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_svec *sx, int xi, float beta, struct blasfeo_svec *sy, int yi, struct blasfeo_svec *sz, int zi); +// D = C + alpha * x * y^T +void blasfeo_sger(int m, int n, float alpha, struct blasfeo_svec *sx, int xi, struct blasfeo_svec *sy, int yi, struct blasfeo_smat *sC, int ci, int cj, struct blasfeo_smat *sD, int di, int dj); // diagonal @@ -182,6 +186,14 @@ void blasfeo_strsm_runu(int m, int n, float alpha, struct blasfeo_smat *sA, int void blasfeo_strsm_rutn(int m, int n, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_smat *sB, int bi, int bj, struct blasfeo_smat *sD, int di, int dj); // D <= alpha * B * A^{-T} , with A upper triangular with unit diagonal void blasfeo_strsm_rutu(int m, int n, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_smat *sB, int bi, int bj, struct blasfeo_smat *sD, int di, int dj); +// D <= beta * C + alpha * A * B^T + alpha * B * A^T; C, D lower triangular +void blasfeo_ssyr2k_ln(int m, int k, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_smat *sB, int bi, int bj, float beta, struct blasfeo_smat *sC, int ci, int cj, struct blasfeo_smat *sD, int di, int dj); +// D <= beta * C + alpha * A^T * B + alpha * B^T * A; C, D lower triangular +void blasfeo_ssyr2k_lt(int m, int k, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_smat *sB, int bi, int bj, float beta, struct blasfeo_smat *sC, int ci, int cj, struct blasfeo_smat *sD, int di, int dj); +// D <= beta * C + alpha * A * B^T + alpha * B * A^T; C, D upper triangular +void blasfeo_ssyr2k_un(int m, int k, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_smat *sB, int bi, int bj, float beta, struct blasfeo_smat *sC, int ci, int cj, struct blasfeo_smat *sD, int di, int dj); +// D <= beta * C + alpha * A^T * B + alpha * B^T * A; C, D upper triangular +void blasfeo_ssyr2k_ut(int m, int k, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_smat *sB, int bi, int bj, float beta, struct blasfeo_smat *sC, int ci, int cj, struct blasfeo_smat *sD, int di, int dj); // diagonal diff --git a/third_party/acados/include/blasfeo/include/blasfeo_s_blasfeo_ref_api.h b/third_party/acados/include/blasfeo/include/blasfeo_s_blasfeo_ref_api.h index 805914cc05..17cb179695 100644 --- a/third_party/acados/include/blasfeo/include/blasfeo_s_blasfeo_ref_api.h +++ b/third_party/acados/include/blasfeo/include/blasfeo_s_blasfeo_ref_api.h @@ -115,6 +115,10 @@ void blasfeo_ref_sgemv_nt(int m, int n, float alpha_n, float alpha_t, struct bla // z <= beta * y + alpha * A * x, where A is symmetric and only the lower triangular patr of A is accessed void blasfeo_ref_ssymv_l(int m, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_svec *sx, int xi, float beta, struct blasfeo_svec *sy, int yi, struct blasfeo_svec *sz, int zi); void blasfeo_ref_ssymv_l_mn(int m, int n, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_svec *sx, int xi, float beta, struct blasfeo_svec *sy, int yi, struct blasfeo_svec *sz, int zi); +// z <= beta * y + alpha * A * x, where A is symmetric and only the upper triangular patr of A is accessed +void blasfeo_ref_ssymv_u(int m, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_svec *sx, int xi, float beta, struct blasfeo_svec *sy, int yi, struct blasfeo_svec *sz, int zi); +// D = C + alpha * x * y^T +void blasfeo_ref_sger(int m, int n, float alpha, struct blasfeo_svec *sx, int xi, struct blasfeo_svec *sy, int yi, struct blasfeo_smat *sC, int ci, int cj, struct blasfeo_smat *sD, int di, int dj); // diagonal @@ -182,6 +186,14 @@ void blasfeo_ref_strsm_runu(int m, int n, float alpha, struct blasfeo_smat *sA, void blasfeo_ref_strsm_rutn(int m, int n, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_smat *sB, int bi, int bj, struct blasfeo_smat *sD, int di, int dj); // D <= alpha * B * A^{-T} , with A upper triangular with unit diagonal void blasfeo_ref_strsm_rutu(int m, int n, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_smat *sB, int bi, int bj, struct blasfeo_smat *sD, int di, int dj); +// D <= beta * C + alpha * A * B^T + alpha * B * A^T ; C, D lower triangular +void blasfeo_ref_ssyr2k_ln(int m, int k, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_smat *sB, int bi, int bj, float beta, struct blasfeo_smat *sC, int ci, int cj, struct blasfeo_smat *sD, int di, int dj); +// D <= beta * C + alpha * A^T * B + alpha * B^T * A ; C, D lower triangular +void blasfeo_ref_ssyr2k_lt(int m, int k, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_smat *sB, int bi, int bj, float beta, struct blasfeo_smat *sC, int ci, int cj, struct blasfeo_smat *sD, int di, int dj); +// D <= beta * C + alpha * A * B^T + alpha * B * A^T ; C, D upper triangular +void blasfeo_ref_ssyr2k_un(int m, int k, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_smat *sB, int bi, int bj, float beta, struct blasfeo_smat *sC, int ci, int cj, struct blasfeo_smat *sD, int di, int dj); +// D <= beta * C + alpha * A^T * B + alpha * B^T * A ; C, D upper triangular +void blasfeo_ref_ssyr2k_ut(int m, int k, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_smat *sB, int bi, int bj, float beta, struct blasfeo_smat *sC, int ci, int cj, struct blasfeo_smat *sD, int di, int dj); // diagonal diff --git a/third_party/acados/include/blasfeo/include/blasfeo_target.h b/third_party/acados/include/blasfeo/include/blasfeo_target.h index 51f617a649..4ff4f307b3 100644 --- a/third_party/acados/include/blasfeo/include/blasfeo_target.h +++ b/third_party/acados/include/blasfeo/include/blasfeo_target.h @@ -1,13 +1,13 @@ -#ifndef TARGET_X64_INTEL_HASWELL -#define TARGET_X64_INTEL_HASWELL +#ifndef TARGET_ARMV8A_ARM_CORTEX_A57 +#define TARGET_ARMV8A_ARM_CORTEX_A57 #endif #ifndef TARGET_NEED_FEATURE_AVX2 -#define TARGET_NEED_FEATURE_AVX2 1 +/* #undef TARGET_NEED_FEATURE_AVX2 */ #endif #ifndef TARGET_NEED_FEATURE_FMA -#define TARGET_NEED_FEATURE_FMA 1 +/* #undef TARGET_NEED_FEATURE_FMA */ #endif #ifndef TARGET_NEED_FEATURE_SSE3 @@ -27,11 +27,11 @@ #endif #ifndef TARGET_NEED_FEATURE_VFPv4 -/* #undef TARGET_NEED_FEATURE_VFPv4 */ +#define TARGET_NEED_FEATURE_VFPv4 1 #endif #ifndef TARGET_NEED_FEATURE_NEONv2 -/* #undef TARGET_NEED_FEATURE_NEONv2 */ +#define TARGET_NEED_FEATURE_NEONv2 1 #endif #ifndef LA_HIGH_PERFORMANCE diff --git a/third_party/acados/include/blasfeo/include/d_blas.h b/third_party/acados/include/blasfeo/include/d_blas.h index d6f8786721..ffd2578da3 100644 --- a/third_party/acados/include/blasfeo/include/d_blas.h +++ b/third_party/acados/include/blasfeo/include/d_blas.h @@ -61,6 +61,7 @@ void dgemm_(char *ta, char *tb, int *m, int *n, int *k, double *alpha, double *A void dsyrk_(char *uplo, char *trans, int *n, int *k, double *alpha, double *A, int *lda, double *beta, double *C, int *ldc); void dtrmm_(char *side, char *uplo, char *trans, char *diag, int *m, int *n, double *alpha, double *A, int *lda, double *B, int *ldb); void dtrsm_(char *side, char *uplo, char *trans, char *diag, int *m, int *n, double *alpha, double *A, int *lda, double *B, int *ldb); +void dsyr2k_(char *uplo, char *trans, int *n, int *k, double *alpha, double *A, int *lda, double *B, int *ldb, double *beta, double *C, int *ldc); // lapack void dpotrf_(char *uplo, int *m, double *A, int *lda, int *info); diff --git a/third_party/acados/include/blasfeo/include/s_blas.h b/third_party/acados/include/blasfeo/include/s_blas.h index 58b2bb0a1a..3b299ce64c 100644 --- a/third_party/acados/include/blasfeo/include/s_blas.h +++ b/third_party/acados/include/blasfeo/include/s_blas.h @@ -61,6 +61,7 @@ void sgemm_(char *ta, char *tb, int *m, int *n, int *k, float *alpha, float *A, void ssyrk_(char *uplo, char *trans, int *n, int *k, float *alpha, float *A, int *lda, float *beta, float *C, int *ldc); void strmm_(char *side, char *uplo, char *transa, char *diag, int *m, int *n, float *alpha, float *A, int *lda, float *B, int *ldb); void strsm_(char *side, char *uplo, char *transa, char *diag, int *m, int *n, float *alpha, float *A, int *lda, float *B, int *ldb); +void ssyr2k_(char *uplo, char *trans, int *n, int *k, float *alpha, float *A, int *lda, float *B, int *ldb, float *beta, float *C, int *ldc); // lapack void spotrf_(char *uplo, int *m, float *A, int *lda, int *info); diff --git a/third_party/acados/larch64/lib/libacados.so b/third_party/acados/larch64/lib/libacados.so index c9448d9632..35f83d4159 100644 Binary files a/third_party/acados/larch64/lib/libacados.so and b/third_party/acados/larch64/lib/libacados.so differ diff --git a/third_party/acados/larch64/lib/libblasfeo.so b/third_party/acados/larch64/lib/libblasfeo.so index 583769e982..6b0e26de79 100644 Binary files a/third_party/acados/larch64/lib/libblasfeo.so and b/third_party/acados/larch64/lib/libblasfeo.so differ diff --git a/third_party/acados/x86_64/lib/libacados.so b/third_party/acados/x86_64/lib/libacados.so index 5c3203b553..b8a6280d68 100644 Binary files a/third_party/acados/x86_64/lib/libacados.so and b/third_party/acados/x86_64/lib/libacados.so differ diff --git a/third_party/acados/x86_64/lib/libblasfeo.so b/third_party/acados/x86_64/lib/libblasfeo.so index 262cf9f12a..e4a19caf13 100644 Binary files a/third_party/acados/x86_64/lib/libblasfeo.so and b/third_party/acados/x86_64/lib/libblasfeo.so differ diff --git a/third_party/acados/x86_64/lib/libhpipm.so b/third_party/acados/x86_64/lib/libhpipm.so index 48421183fd..428c76a117 100644 Binary files a/third_party/acados/x86_64/lib/libhpipm.so and b/third_party/acados/x86_64/lib/libhpipm.so differ diff --git a/third_party/acados/x86_64/lib/libqpOASES_e.so.3.1 b/third_party/acados/x86_64/lib/libqpOASES_e.so.3.1 index 6aa7b4f8b5..cf3604688c 100644 Binary files a/third_party/acados/x86_64/lib/libqpOASES_e.so.3.1 and b/third_party/acados/x86_64/lib/libqpOASES_e.so.3.1 differ diff --git a/third_party/acados/x86_64/t_renderer b/third_party/acados/x86_64/t_renderer index 5e7871bf8a..5ced89c28d 100755 Binary files a/third_party/acados/x86_64/t_renderer and b/third_party/acados/x86_64/t_renderer differ