diff --git a/SConstruct b/SConstruct index 5a111b5615..73c66690ad 100644 --- a/SConstruct +++ b/SConstruct @@ -68,6 +68,7 @@ lenv = { "PYTHONPATH": Dir("#").abspath + ":" + Dir("#pyextra/").abspath, "ACADOS_SOURCE_DIR": Dir("#third_party/acados/acados").abspath, + "ACADOS_PYTHON_INTERFACE_PATH": Dir("#pyextra/acados_template").abspath, "TERA_PATH": Dir("#").abspath + f"/third_party/acados/{arch}/t_renderer", } diff --git a/pyextra/acados_template/.gitignore b/pyextra/acados_template/.gitignore index c6b3a6b5f5..c14a7dfe70 100644 --- a/pyextra/acados_template/.gitignore +++ b/pyextra/acados_template/.gitignore @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:32dae3052f331ee34d628ef535709b301259a45df7c7522c4d35dcf49873f00b -size 13 +oid sha256:477f73573a50b1ae2740849e1aed4f8d353ead59116f95b897e8f620e8dbf31b +size 62 diff --git a/pyextra/acados_template/acados_layout.json b/pyextra/acados_template/acados_layout.json index abc89748dd..ad46d45bb3 100644 --- a/pyextra/acados_template/acados_layout.json +++ b/pyextra/acados_template/acados_layout.json @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:23cbba2db6b7c99e303802e65d4714a24f7b943d32aae0feb80f596b821ea13b -size 13664 +oid sha256:81426176ea04cb7ed7b4b17c2941f49dcc8c2d576c2fc78ff158a6e9ec2839e5 +size 13723 diff --git a/pyextra/acados_template/acados_model.py b/pyextra/acados_template/acados_model.py index 973872b66d..168d37885c 100644 --- a/pyextra/acados_template/acados_model.py +++ b/pyextra/acados_template/acados_model.py @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9b8238f6510a691d9b686eeba5612ac3c20fcc434f19f88f6c1039b821621fd2 -size 7254 +oid sha256:a27c58335c2ffae67468f2cbac2ecfd59300d8af5a97b15cc29273dff20a0aef +size 8020 diff --git a/pyextra/acados_template/acados_ocp.py b/pyextra/acados_template/acados_ocp.py index a1cb4a6510..36e50a2ae1 100644 --- a/pyextra/acados_template/acados_ocp.py +++ b/pyextra/acados_template/acados_ocp.py @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d02c555bdde7266d683d3c2d6fa9812a86b7e3024f841211158197fca94d1f08 -size 95132 +oid sha256:7bbb8ea1f67d4b2247be1eb7e4e6c2d5cc89fac823d0ca4337804a8c0f8de387 +size 95897 diff --git a/pyextra/acados_template/acados_ocp_solver.py b/pyextra/acados_template/acados_ocp_solver.py index ba690ffdd0..1468840b1c 100644 --- a/pyextra/acados_template/acados_ocp_solver.py +++ b/pyextra/acados_template/acados_ocp_solver.py @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b406ef368856f2e70e02a56b42361d7c5937383529654489318e5831a1d23926 -size 60258 +oid sha256:4463ca9b30ad759498497085bfb94e4d11994dfb88e915aa209c124da13254c1 +size 70093 diff --git a/pyextra/acados_template/acados_ocp_solver_pyx.pyx b/pyextra/acados_template/acados_ocp_solver_pyx.pyx new file mode 100644 index 0000000000..68365f8d21 --- /dev/null +++ b/pyextra/acados_template/acados_ocp_solver_pyx.pyx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ec5899c033238f4815409cabdb109b5e7b833db9018a54345e927885bf12d1a +size 17627 diff --git a/pyextra/acados_template/acados_sim.py b/pyextra/acados_template/acados_sim.py index 28106c2ce9..c4834da1e0 100644 --- a/pyextra/acados_template/acados_sim.py +++ b/pyextra/acados_template/acados_sim.py @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1ab310b29ad2be1b0216b01db7d340e501d29add43cf786a2ef0917f95d6dff7 -size 10392 +oid sha256:534e0fcc2b75f6217fb748e9c8a1c365ad2a3b201bcba895484995fb5ed78133 +size 11803 diff --git a/pyextra/acados_template/acados_sim_layout.json b/pyextra/acados_template/acados_sim_layout.json index 4b8e9b2749..145927689a 100644 --- a/pyextra/acados_template/acados_sim_layout.json +++ b/pyextra/acados_template/acados_sim_layout.json @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f56b25cddcdd2043d91a089e3f0ddf670f46a24cd5badd7ac03cc59ffe85caa2 -size 700 +oid sha256:7e21d44ed88590aaa52541839e2e8a8d008ceb5d82c7e2af9736df7b0f4eb5e9 +size 759 diff --git a/pyextra/acados_template/acados_sim_solver.py b/pyextra/acados_template/acados_sim_solver.py index 436d589809..6f5bb1d7f5 100644 --- a/pyextra/acados_template/acados_sim_solver.py +++ b/pyextra/acados_template/acados_sim_solver.py @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e784ba1029a75e244a5671a57b8b28ffd569099a61ad707ac86cea03fa637396 -size 16349 +oid sha256:8d118afbf4993842e202870ff43675d6837befa3a552ddace49b334e6715bb72 +size 16342 diff --git a/pyextra/acados_template/acados_solver_common.pxd b/pyextra/acados_template/acados_solver_common.pxd new file mode 100644 index 0000000000..146c35da5d --- /dev/null +++ b/pyextra/acados_template/acados_solver_common.pxd @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd53981de52f81cccaf7b35054bde06dba358c3425f792877fa837e9e74ecf1a +size 4216 diff --git a/pyextra/acados_template/c_templates_tera/Makefile.in b/pyextra/acados_template/c_templates_tera/Makefile.in index c08835b180..f2b46b808a 100644 --- a/pyextra/acados_template/c_templates_tera/Makefile.in +++ b/pyextra/acados_template/c_templates_tera/Makefile.in @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:22950565d08e2f1c34dd622800279d21af70266de45ecb5f07909d87caf636d1 -size 18458 +oid sha256:ae30257d087cf8a0e41a9c9d6c41bb9bc1700059678bc7e82cf899f32f26c71a +size 19209 diff --git a/pyextra/acados_template/c_templates_tera/acados_mex_create.in.c b/pyextra/acados_template/c_templates_tera/acados_mex_create.in.c index 8671a78077..8e1e21d931 100644 --- a/pyextra/acados_template/c_templates_tera/acados_mex_create.in.c +++ b/pyextra/acados_template/c_templates_tera/acados_mex_create.in.c @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:aba81942a0b7cf9a2b14f8b18fbde557dc974358de5989ff4fd3c8725d429056 -size 17368 +oid sha256:b69ab4513e92ed696e708028a1862fae9f6f38f6c0c3de284f198e5e189fb9b0 +size 17381 diff --git a/pyextra/acados_template/c_templates_tera/acados_mex_free.in.c b/pyextra/acados_template/c_templates_tera/acados_mex_free.in.c index 45cbaec397..a0483d0c86 100644 --- a/pyextra/acados_template/c_templates_tera/acados_mex_free.in.c +++ b/pyextra/acados_template/c_templates_tera/acados_mex_free.in.c @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:01c5708e3cad58a5448ea429000b0110c1957d9d254f822d9e0ecc058533eda5 -size 2529 +oid sha256:cfab0ed0fac52bea3bb4dae81dd45f18764330f56d08dfed5c80d11923ad6b44 +size 2555 diff --git a/pyextra/acados_template/c_templates_tera/acados_mex_set.in.c b/pyextra/acados_template/c_templates_tera/acados_mex_set.in.c index 44cc09173b..05ed5e3254 100644 --- a/pyextra/acados_template/c_templates_tera/acados_mex_set.in.c +++ b/pyextra/acados_template/c_templates_tera/acados_mex_set.in.c @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6bdee4b81723dd296c353d1ea4b2cf39d5247912f9d06fc98c075831cb060b06 -size 19886 +oid sha256:7cfae123bb078e0637621b339a14d0b776f9c451ba50939ed9ec341a8daa030e +size 19912 diff --git a/pyextra/acados_template/c_templates_tera/acados_mex_solve.in.c b/pyextra/acados_template/c_templates_tera/acados_mex_solve.in.c index 55dc272e12..7ed976336c 100644 --- a/pyextra/acados_template/c_templates_tera/acados_mex_solve.in.c +++ b/pyextra/acados_template/c_templates_tera/acados_mex_solve.in.c @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:65a7ed464643a436c1baf426458674fdd49ef77952b1a63da221b1b08c72ac4c -size 2180 +oid sha256:1b5a8e5e2f2bf2456962c53842de00d236d67000b98cb61e48c968fe760eb6a5 +size 2206 diff --git a/pyextra/acados_template/c_templates_tera/acados_sim_solver.in.c b/pyextra/acados_template/c_templates_tera/acados_sim_solver.in.c index b92d00d12b..00ffa564ed 100644 --- a/pyextra/acados_template/c_templates_tera/acados_sim_solver.in.c +++ b/pyextra/acados_template/c_templates_tera/acados_sim_solver.in.c @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:504de8ac91b2dc75dcb01aa6894b8435e57db03cf387c1e1f48d7d03b2f737d6 -size 26492 +oid sha256:d1ed4a97c47e85468b4a19bb0b0dae365f5b13bff7054a58e547a2e530570ee0 +size 25463 diff --git a/pyextra/acados_template/c_templates_tera/acados_sim_solver.in.h b/pyextra/acados_template/c_templates_tera/acados_sim_solver.in.h index 934002c2a7..6565d79dd8 100644 --- a/pyextra/acados_template/c_templates_tera/acados_sim_solver.in.h +++ b/pyextra/acados_template/c_templates_tera/acados_sim_solver.in.h @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:690484ab973bf37151fd437c65e3f708fba8c6a4fe6680054acc5ff3e4aa36c5 -size 3976 +oid sha256:6d12f0cbf3624a4dbf05cd50fadcbf895507d84f869c741bc1c2876ea7bdf18f +size 4193 diff --git a/pyextra/acados_template/c_templates_tera/acados_solver.in.c b/pyextra/acados_template/c_templates_tera/acados_solver.in.c index 7f873748ea..8c8783e431 100644 --- a/pyextra/acados_template/c_templates_tera/acados_solver.in.c +++ b/pyextra/acados_template/c_templates_tera/acados_solver.in.c @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ddeef3b42e38682017002ed2e176dc03edba8292464ae1ba44c7cfdd59897f6a -size 112677 +oid sha256:46068c324d8a79f92eb0b758067e68708829fcd31b6f6106fdd57ddaa3ba4e00 +size 115538 diff --git a/pyextra/acados_template/c_templates_tera/acados_solver.in.h b/pyextra/acados_template/c_templates_tera/acados_solver.in.h index 47e2fa1a03..09eb033a80 100644 --- a/pyextra/acados_template/c_templates_tera/acados_solver.in.h +++ b/pyextra/acados_template/c_templates_tera/acados_solver.in.h @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:bce8b99e1661834d1e20777634844ecc31c6953a1bd58b52ad9dbe67ef23a656 -size 6135 +oid sha256:4d0202112aabfba5b642e7bab59c4608d3227981ba22509166f9ce3c55a3a39b +size 9834 diff --git a/pyextra/acados_template/c_templates_tera/acados_solver.in.pxd b/pyextra/acados_template/c_templates_tera/acados_solver.in.pxd new file mode 100644 index 0000000000..bc399ba5ca --- /dev/null +++ b/pyextra/acados_template/c_templates_tera/acados_solver.in.pxd @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d92fa9835ac4149aa8ad7a4a29a8b4c1a6234c22d43c31df313f94e2602f9e9 +size 1708 diff --git a/pyextra/acados_template/c_templates_tera/acados_solver_sfun.in.c b/pyextra/acados_template/c_templates_tera/acados_solver_sfun.in.c index 0c56068e13..a021dba42a 100644 --- a/pyextra/acados_template/c_templates_tera/acados_solver_sfun.in.c +++ b/pyextra/acados_template/c_templates_tera/acados_solver_sfun.in.c @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7ad7b08956c4980fa57f4386ddf4ded811049adc2690933984cc0c80b842b9aa -size 28517 +oid sha256:6395761be4c3e202197dd6d1953d52299260f2ff6a9fd13bf122a752fdc2467b +size 28556 diff --git a/pyextra/acados_template/c_templates_tera/main.in.c b/pyextra/acados_template/c_templates_tera/main.in.c index d1d49ddf19..f29ba57fb2 100644 --- a/pyextra/acados_template/c_templates_tera/main.in.c +++ b/pyextra/acados_template/c_templates_tera/main.in.c @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1539ee98928959ec0a4b50ee33a25f5a18028a903be0e5bd09e4247e2cadc7f4 -size 6639 +oid sha256:c30036c8497a66d000b05c7dcf8e7f37a3fed477885cf5d9d4600eacbc6cf96d +size 7983 diff --git a/pyextra/acados_template/c_templates_tera/main_sim.in.c b/pyextra/acados_template/c_templates_tera/main_sim.in.c index 46fa5fd7c4..0b1f617791 100644 --- a/pyextra/acados_template/c_templates_tera/main_sim.in.c +++ b/pyextra/acados_template/c_templates_tera/main_sim.in.c @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ef55fa190b17ab806cb233c53ab041f0f6b4e99d535bcffd0e416946cd1bcad2 -size 4557 +oid sha256:659efc1476d5b27bb6ade88223762fe65d2aa1e96bae0c1fdd7dec6e167fe02a +size 4677 diff --git a/pyextra/acados_template/generate_c_code_constraint.py b/pyextra/acados_template/generate_c_code_constraint.py index 1bd61a2b37..673139a871 100644 --- a/pyextra/acados_template/generate_c_code_constraint.py +++ b/pyextra/acados_template/generate_c_code_constraint.py @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e97ba33997f96d57bc70970548f79fc83fee147668cd75e4cefd097d37eda117 -size 6716 +oid sha256:e3ca280cc474c3fa50496ca7e21d3b7c918843c6c4f3737627538eb3a8847148 +size 6728 diff --git a/pyextra/acados_template/generate_c_code_discrete_dynamics.py b/pyextra/acados_template/generate_c_code_discrete_dynamics.py index 87368625a7..5380254e87 100644 --- a/pyextra/acados_template/generate_c_code_discrete_dynamics.py +++ b/pyextra/acados_template/generate_c_code_discrete_dynamics.py @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ba36522a9299fe913e9dcc4890adc6fd74e9c33b6234c4b5468ebedccc1a8096 -size 3665 +oid sha256:25cbd49d4188265253ff16f537bb13b64b2d20957416009b8bf09704952dd318 +size 3677 diff --git a/pyextra/acados_template/generate_c_code_explicit_ode.py b/pyextra/acados_template/generate_c_code_explicit_ode.py index 7e46616c78..624645dbea 100644 --- a/pyextra/acados_template/generate_c_code_explicit_ode.py +++ b/pyextra/acados_template/generate_c_code_explicit_ode.py @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7523392004ef1f5f710ded1ccd415d9e5b6d0601e125beedd7968e333141a3a2 -size 4406 +oid sha256:148abc2a03f578abaea318edd1042c01d3bb04fc6d9e33b7cab0bab48d9ca53f +size 4418 diff --git a/pyextra/acados_template/generate_c_code_external_cost.py b/pyextra/acados_template/generate_c_code_external_cost.py index d480d4dbdd..d2989262d1 100644 --- a/pyextra/acados_template/generate_c_code_external_cost.py +++ b/pyextra/acados_template/generate_c_code_external_cost.py @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a1c794693e86d1b5f70674c4fd78575409ee34a33520ca9ab295cbfe9d0818c3 -size 4091 +oid sha256:df61d4a3b7c0e04bb02dab5408be15db88e293b481301febb129aedd12cd0cab +size 4337 diff --git a/pyextra/acados_template/generate_c_code_gnsf.py b/pyextra/acados_template/generate_c_code_gnsf.py index 1619ca76dc..83140efc14 100644 --- a/pyextra/acados_template/generate_c_code_gnsf.py +++ b/pyextra/acados_template/generate_c_code_gnsf.py @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:68abbd3ba7391d5bc617eef0a6a47c5f9b579de77307cc3b5ec40de01d85a08d -size 4923 +oid sha256:8ad9558c20c7bf4a526874c51204a86cc41b884d071ff55fab9c778a6f2ef999 +size 4935 diff --git a/pyextra/acados_template/generate_c_code_implicit_ode.py b/pyextra/acados_template/generate_c_code_implicit_ode.py index 6599f6fdb1..a80aeebb7b 100644 --- a/pyextra/acados_template/generate_c_code_implicit_ode.py +++ b/pyextra/acados_template/generate_c_code_implicit_ode.py @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8c7bc6cc1f33a6c5c04fe5012284486126f59e3b28e8f518ea3691fc1f68ff60 -size 5189 +oid sha256:14938e8888f7fc72cbf0d1c8614d39952be8909aeb707975afb9b03e2aa817eb +size 5201 diff --git a/pyextra/acados_template/generate_c_code_nls_cost.py b/pyextra/acados_template/generate_c_code_nls_cost.py index fa558c804c..f708e01aa6 100644 --- a/pyextra/acados_template/generate_c_code_nls_cost.py +++ b/pyextra/acados_template/generate_c_code_nls_cost.py @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6a2fd8843020aee038722f9028ccbfb8b7147cec8f23f4a4249f6d262b741e09 -size 3852 +oid sha256:b76eeb34b5ff74e072d53cbd08e464ebab8361e805aa6063b23de821ea400287 +size 3864 diff --git a/pyextra/acados_template/utils.py b/pyextra/acados_template/utils.py index 3b2f3c1bed..5262d962d7 100644 --- a/pyextra/acados_template/utils.py +++ b/pyextra/acados_template/utils.py @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:de3a26bc1112e1fff37bebb2b0736fb88bc08b9aab182307acaa209161fd174d -size 15426 +oid sha256:3172aa8e2130b7d411bc330e0b7b3038ee0896db22bcdee9ef045392f03ea6ce +size 15924 diff --git a/selfdrive/controls/lib/lateral_mpc_lib/SConscript b/selfdrive/controls/lib/lateral_mpc_lib/SConscript index 4ebb5cd4e0..148e4e123d 100644 --- a/selfdrive/controls/lib/lateral_mpc_lib/SConscript +++ b/selfdrive/controls/lib/lateral_mpc_lib/SConscript @@ -1,4 +1,4 @@ -Import('env', 'arch') +Import('env', 'envCython', 'arch', 'common') gen = "c_generated_code" @@ -33,6 +33,7 @@ generated_files = [ f'{gen}/main_lat.c', f'{gen}/acados_solver_lat.h', + f'{gen}/acados_solver.pxd', f'{gen}/lat_model/lat_expl_vde_adj.c', @@ -53,6 +54,24 @@ lenv["CFLAGS"].append("-DACADOS_WITH_QPOASES") lenv["CXXFLAGS"].append("-DACADOS_WITH_QPOASES") lenv["CCFLAGS"].append("-Wno-unused") lenv["LINKFLAGS"].append("-Wl,--disable-new-dtags") -lenv.SharedLibrary(f"{gen}/acados_ocp_solver_lat", - build_files, - LIBS=['m', 'acados', 'hpipm', 'blasfeo', 'qpOASES_e']) +lib_solver = lenv.SharedLibrary(f"{gen}/acados_ocp_solver_lat", + build_files, + LIBS=['m', 'acados', 'hpipm', 'blasfeo', 'qpOASES_e']) + +# generate cython stuff +acados_ocp_solver_pyx = File("#pyextra/acados_template/acados_ocp_solver_pyx.pyx") +acados_ocp_solver_common = File("#pyextra/acados_template/acados_solver_common.pxd") +libacados_ocp_solver_pxd = File(f'{gen}/acados_solver.pxd') +libacados_ocp_solver_c = File(f'{gen}/acados_ocp_solver_pyx.c') + +lenv2 = envCython.Clone() +lenv2["LINKFLAGS"] += [lib_solver[0].get_labspath()] +lenv2.Command(libacados_ocp_solver_c, + [acados_ocp_solver_pyx, acados_ocp_solver_common, libacados_ocp_solver_pxd], + f'cython' + \ + f' -o {libacados_ocp_solver_c.get_labspath()}' + \ + f' -I {libacados_ocp_solver_pxd.get_dir().get_labspath()}' + \ + f' -I {acados_ocp_solver_common.get_dir().get_labspath()}' + \ + f' {acados_ocp_solver_pyx.get_labspath()}') +lib_cython = lenv2.Program(f'{gen}/acados_ocp_solver_pyx.so', [libacados_ocp_solver_c]) +lenv2.Depends(lib_cython, lib_solver) diff --git a/selfdrive/controls/lib/lateral_mpc_lib/lat_mpc.py b/selfdrive/controls/lib/lateral_mpc_lib/lat_mpc.py index 42cef8b890..6162cf68a5 100755 --- a/selfdrive/controls/lib/lateral_mpc_lib/lat_mpc.py +++ b/selfdrive/controls/lib/lateral_mpc_lib/lat_mpc.py @@ -5,8 +5,12 @@ import numpy as np from casadi import SX, vertcat, sin, cos from selfdrive.controls.lib.drive_helpers import LAT_MPC_N as N from selfdrive.controls.lib.drive_helpers import T_IDXS -from pyextra.acados_template import AcadosModel, AcadosOcp, AcadosOcpSolver +if __name__ == '__main__': # generating code + from pyextra.acados_template import AcadosModel, AcadosOcp, AcadosOcpSolver +else: + # from pyextra.acados_template import AcadosOcpSolverFast + from selfdrive.controls.lib.lateral_mpc_lib.c_generated_code.acados_ocp_solver_pyx import AcadosOcpSolverFast # pylint: disable=no-name-in-module, import-error LAT_MPC_DIR = os.path.dirname(os.path.abspath(__file__)) EXPORT_DIR = os.path.join(LAT_MPC_DIR, "c_generated_code") @@ -110,17 +114,16 @@ def gen_lat_mpc_solver(): class LateralMpc(): def __init__(self, x0=np.zeros(X_DIM)): - self.solver = AcadosOcpSolver('lat', N, EXPORT_DIR) + self.solver = AcadosOcpSolverFast('lat', N, EXPORT_DIR) self.reset(x0) def reset(self, x0=np.zeros(X_DIM)): self.x_sol = np.zeros((N+1, X_DIM)) self.u_sol = np.zeros((N, 1)) self.yref = np.zeros((N+1, 3)) - self.solver.cost_set_slice(0, N, "yref", self.yref[:N]) + for i in range(N): + self.solver.cost_set(i, "yref", self.yref[i]) self.solver.cost_set(N, "yref", self.yref[N][:2]) - W = np.eye(3) - self.Ws = np.tile(W[None], reps=(N,1,1)) # Somehow needed for stable init for i in range(N+1): @@ -132,12 +135,11 @@ class LateralMpc(): self.cost = 0 def set_weights(self, path_weight, heading_weight, steer_rate_weight): - self.Ws[:,0,0] = path_weight - self.Ws[:,1,1] = heading_weight - self.Ws[:,2,2] = steer_rate_weight - self.solver.cost_set_slice(0, N, 'W', self.Ws, api='old') + W = np.asfortranarray(np.diag([path_weight, heading_weight, steer_rate_weight])) + for i in range(N): + self.solver.cost_set(i, 'W', W) #TODO hacky weights to keep behavior the same - self.solver.cost_set(N, 'W', (3/20.)*self.Ws[0,:2,:2]) + self.solver.cost_set(N, 'W', (3/20.)*W[:2,:2]) def run(self, x0, v_ego, car_rotation_radius, y_pts, heading_pts): x0_cp = np.copy(x0) @@ -145,12 +147,15 @@ class LateralMpc(): self.solver.constraints_set(0, "ubx", x0_cp) self.yref[:,0] = y_pts self.yref[:,1] = heading_pts*(v_ego+5.0) - self.solver.cost_set_slice(0, N, "yref", self.yref[:N]) + for i in range(N): + self.solver.cost_set(i, "yref", self.yref[i]) self.solver.cost_set(N, "yref", self.yref[N][:2]) self.solution_status = self.solver.solve() - self.solver.fill_in_slice(0, N+1, 'x', self.x_sol) - self.solver.fill_in_slice(0, N, 'u', self.u_sol) + for i in range(N+1): + self.x_sol[i] = self.solver.get(i, 'x') + for i in range(N): + self.u_sol[i] = self.solver.get(i, 'u') self.cost = self.solver.get_cost() diff --git a/selfdrive/controls/lib/longitudinal_mpc_lib/SConscript b/selfdrive/controls/lib/longitudinal_mpc_lib/SConscript index 70a3e72b1d..4abe90f8f8 100644 --- a/selfdrive/controls/lib/longitudinal_mpc_lib/SConscript +++ b/selfdrive/controls/lib/longitudinal_mpc_lib/SConscript @@ -1,4 +1,4 @@ -Import('env', 'arch') +Import('env', 'envCython', 'arch', 'common') gen = "c_generated_code" @@ -41,6 +41,7 @@ generated_files = [ f'{gen}/main_long.c', f'{gen}/acados_solver_long.h', + f'{gen}/acados_solver.pxd', f'{gen}/long_model/long_expl_vde_adj.c', @@ -63,6 +64,24 @@ lenv["CFLAGS"].append("-DACADOS_WITH_QPOASES") lenv["CXXFLAGS"].append("-DACADOS_WITH_QPOASES") lenv["CCFLAGS"].append("-Wno-unused") lenv["LINKFLAGS"].append("-Wl,--disable-new-dtags") -lenv.SharedLibrary(f"{gen}/acados_ocp_solver_long", - build_files, - LIBS=['m', 'acados', 'hpipm', 'blasfeo', 'qpOASES_e']) +lib_solver = lenv.SharedLibrary(f"{gen}/acados_ocp_solver_long", + build_files, + LIBS=['m', 'acados', 'hpipm', 'blasfeo', 'qpOASES_e']) + +# generate cython stuff +acados_ocp_solver_pyx = File("#pyextra/acados_template/acados_ocp_solver_pyx.pyx") +acados_ocp_solver_common = File("#pyextra/acados_template/acados_solver_common.pxd") +libacados_ocp_solver_pxd = File(f'{gen}/acados_solver.pxd') +libacados_ocp_solver_c = File(f'{gen}/acados_ocp_solver_pyx.c') + +lenv2 = envCython.Clone() +lenv2["LINKFLAGS"] += [lib_solver[0].get_labspath()] +lenv2.Command(libacados_ocp_solver_c, + [acados_ocp_solver_pyx, acados_ocp_solver_common, libacados_ocp_solver_pxd], + f'cython' + \ + f' -o {libacados_ocp_solver_c.get_labspath()}' + \ + f' -I {libacados_ocp_solver_pxd.get_dir().get_labspath()}' + \ + f' -I {acados_ocp_solver_common.get_dir().get_labspath()}' + \ + f' {acados_ocp_solver_pyx.get_labspath()}') +lib_cython = lenv2.Program(f'{gen}/acados_ocp_solver_pyx.so', [libacados_ocp_solver_c]) +lenv2.Depends(lib_cython, lib_solver) diff --git a/selfdrive/controls/lib/longitudinal_mpc_lib/long_mpc.py b/selfdrive/controls/lib/longitudinal_mpc_lib/long_mpc.py index b3960fcb59..df6d8e0fb6 100644 --- a/selfdrive/controls/lib/longitudinal_mpc_lib/long_mpc.py +++ b/selfdrive/controls/lib/longitudinal_mpc_lib/long_mpc.py @@ -8,7 +8,12 @@ from selfdrive.swaglog import cloudlog from selfdrive.modeld.constants import index_function from selfdrive.controls.lib.radar_helpers import _LEAD_ACCEL_TAU -from pyextra.acados_template import AcadosModel, AcadosOcp, AcadosOcpSolver +if __name__ == '__main__': # generating code + from pyextra.acados_template import AcadosModel, AcadosOcp, AcadosOcpSolver +else: + # from pyextra.acados_template import AcadosOcpSolver as AcadosOcpSolverFast + from selfdrive.controls.lib.longitudinal_mpc_lib.c_generated_code.acados_ocp_solver_pyx import AcadosOcpSolverFast # pylint: disable=no-name-in-module, import-error + from casadi import SX, vertcat LONG_MPC_DIR = os.path.dirname(os.path.abspath(__file__)) @@ -190,13 +195,14 @@ class LongitudinalMpc(): self.source = SOURCES[2] def reset(self): - self.solver = AcadosOcpSolver('long', N, EXPORT_DIR) + self.solver = AcadosOcpSolverFast('long', N, EXPORT_DIR) self.v_solution = [0.0 for i in range(N+1)] self.a_solution = [0.0 for i in range(N+1)] self.j_solution = [0.0 for i in range(N)] self.yref = np.zeros((N+1, COST_DIM)) - self.solver.cost_set_slice(0, N, "yref", self.yref[:N]) - self.solver.set(N, "yref", self.yref[N][:COST_E_DIM]) + for i in range(N): + self.solver.cost_set(i, "yref", self.yref[i]) + self.solver.cost_set(N, "yref", self.yref[N][:COST_E_DIM]) self.x_sol = np.zeros((N+1, X_DIM)) self.u_sol = np.zeros((N,1)) self.params = np.zeros((N+1,3)) @@ -216,30 +222,30 @@ class LongitudinalMpc(): self.set_weights_for_lead_policy() def set_weights_for_lead_policy(self): - W = np.diag([X_EGO_OBSTACLE_COST, X_EGO_COST, V_EGO_COST, A_EGO_COST, J_EGO_COST]) - Ws = np.tile(W[None], reps=(N,1,1)) - self.solver.cost_set_slice(0, N, 'W', Ws, api='old') + W = np.asfortranarray(np.diag([X_EGO_OBSTACLE_COST, X_EGO_COST, V_EGO_COST, A_EGO_COST, J_EGO_COST])) + for i in range(N): + self.solver.cost_set(i, 'W', W) # Setting the slice without the copy make the array not contiguous, # causing issues with the C interface. self.solver.cost_set(N, 'W', np.copy(W[:COST_E_DIM, :COST_E_DIM])) # Set L2 slack cost on lower bound constraints Zl = np.array([LIMIT_COST, LIMIT_COST, LIMIT_COST, DANGER_ZONE_COST]) - Zls = np.tile(Zl[None], reps=(N+1,1,1)) - self.solver.cost_set_slice(0, N+1, 'Zl', Zls, api='old') + for i in range(N): + self.solver.cost_set(i, 'Zl', Zl) def set_weights_for_xva_policy(self): - W = np.diag([0., 10., 1., 10., 1.]) - Ws = np.tile(W[None], reps=(N,1,1)) - self.solver.cost_set_slice(0, N, 'W', Ws, api='old') + W = np.asfortranarray(np.diag([0., 10., 1., 10., 1.])) + for i in range(N): + self.solver.cost_set(i, 'W', W) # Setting the slice without the copy make the array not contiguous, # causing issues with the C interface. self.solver.cost_set(N, 'W', np.copy(W[:COST_E_DIM, :COST_E_DIM])) # Set L2 slack cost on lower bound constraints Zl = np.array([LIMIT_COST, LIMIT_COST, LIMIT_COST, 0.0]) - Zls = np.tile(Zl[None], reps=(N+1,1,1)) - self.solver.cost_set_slice(0, N+1, 'Zl', Zls, api='old') + for i in range(N): + self.solver.cost_set(i, 'Zl', Zl) def set_cur_state(self, v, a): if abs(self.x0[1] - v) > 1.: @@ -326,8 +332,9 @@ class LongitudinalMpc(): self.yref[:,1] = x self.yref[:,2] = v self.yref[:,3] = a - self.solver.cost_set_slice(0, N, "yref", self.yref[:N], api='old') - self.solver.set(N, "yref", self.yref[N][:COST_E_DIM]) + for i in range(N): + self.solver.cost_set(i, "yref", self.yref[i]) + self.solver.cost_set(N, "yref", self.yref[N][:COST_E_DIM]) self.accel_limit_arr[:,0] = -10. self.accel_limit_arr[:,1] = 10. x_obstacle = 1e5*np.ones((N+1)) @@ -338,12 +345,14 @@ class LongitudinalMpc(): def run(self): for i in range(N+1): - self.solver.set_param(i, self.params[i]) + self.solver.set(i, 'p', self.params[i]) self.solver.constraints_set(0, "lbx", self.x0) self.solver.constraints_set(0, "ubx", self.x0) self.solution_status = self.solver.solve() - self.solver.fill_in_slice(0, N+1, 'x', self.x_sol) - self.solver.fill_in_slice(0, N, 'u', self.u_sol) + for i in range(N+1): + self.x_sol[i] = self.solver.get(i, 'x') + for i in range(N): + self.u_sol[i] = self.solver.get(i, 'u') self.v_solution = self.x_sol[:,1] self.a_solution = self.x_sol[:,2] diff --git a/third_party/acados/aarch64/lib/libacados.so b/third_party/acados/aarch64/lib/libacados.so index 6655569244..b03254e75c 100755 --- a/third_party/acados/aarch64/lib/libacados.so +++ b/third_party/acados/aarch64/lib/libacados.so @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c86a0f145884b913abdce6b9b57953cc462e1ef4b0749b7a8e52878b41240281 -size 567753 +oid sha256:a42119de38b56c672e473edd8860e2e75c3e2793961b631634f8dc6f6f6618e5 +size 575769 diff --git a/third_party/acados/aarch64/lib/libblasfeo.so b/third_party/acados/aarch64/lib/libblasfeo.so index f5f49cd2f2..4cdda5e7fe 100755 --- a/third_party/acados/aarch64/lib/libblasfeo.so +++ b/third_party/acados/aarch64/lib/libblasfeo.so @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f459390b07bd4cff5059475e70436a4843469aecf82cb4137bf77a86732648de +oid sha256:567bfcb693ce73c8faa847ddd3798037f611c8ebfcecddce4da2140f22b191af size 694193 diff --git a/third_party/acados/aarch64/lib/libhpipm.so b/third_party/acados/aarch64/lib/libhpipm.so index d1084d63bf..a6cfdd7e98 100755 --- a/third_party/acados/aarch64/lib/libhpipm.so +++ b/third_party/acados/aarch64/lib/libhpipm.so @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a363f66e7a0c7b3ce5e4bd202e0320431253f9d3d9f6d6423b031afba4c8017b +oid sha256:d6873d1bf018369aaf13cabe5d870d3154656668b31647df0a1a648b3bb383dc size 1324409 diff --git a/third_party/acados/build.sh b/third_party/acados/build.sh index 749ac8291d..9216032e98 100755 --- a/third_party/acados/build.sh +++ b/third_party/acados/build.sh @@ -18,7 +18,7 @@ if [ ! -d acados_repo/ ]; then fi cd acados_repo git fetch -git checkout 43ba28e95062f9ac9b48facd3b45698d57666fa3 +git checkout 79e9e3e76f2751198858adf382c97837833ad31f git submodule update --recursive --init # build diff --git a/third_party/acados/include/acados/ocp_nlp/ocp_nlp_common.h b/third_party/acados/include/acados/ocp_nlp/ocp_nlp_common.h index 0407633816..64939b9ed1 100644 --- a/third_party/acados/include/acados/ocp_nlp/ocp_nlp_common.h +++ b/third_party/acados/include/acados/ocp_nlp/ocp_nlp_common.h @@ -135,6 +135,8 @@ typedef struct ocp_nlp_dims int *nz; // number of algebraic variables int *ns; // number of slack variables int N; // number of shooting nodes + + void *raw_memory; // Pointer to allocated memory, to be used for freeing } ocp_nlp_dims; // @@ -203,6 +205,9 @@ typedef struct ocp_nlp_in /// Pointers to constraints functions (TBC). void **constraints; + /// Pointer to allocated memory, to be used for freeing. + void *raw_memory; + } ocp_nlp_in; // @@ -235,6 +240,8 @@ typedef struct ocp_nlp_out double inf_norm_res; double total_time; + void *raw_memory; // Pointer to allocated memory, to be used for freeing + } ocp_nlp_out; // diff --git a/third_party/acados/include/acados/sim/sim_collocation_utils.h b/third_party/acados/include/acados/sim/sim_collocation_utils.h index 850dab50dc..40a0b1c0cc 100644 --- a/third_party/acados/include/acados/sim/sim_collocation_utils.h +++ b/third_party/acados/include/acados/sim/sim_collocation_utils.h @@ -43,44 +43,53 @@ extern "C" { -enum Newton_type_collocation -{ - exact = 0, - simplified_in, - simplified_inis -}; +// enum Newton_type_collocation +// { +// exact = 0, +// simplified_in, +// simplified_inis +// }; -typedef struct -{ - enum Newton_type_collocation type; - double *eig; - double *low_tria; - bool single; - bool freeze; +// typedef struct +// { +// enum Newton_type_collocation type; +// double *eig; +// double *low_tria; +// bool single; +// bool freeze; - double *transf1; - double *transf2; +// double *transf1; +// double *transf2; - double *transf1_T; - double *transf2_T; -} Newton_scheme; +// double *transf1_T; +// double *transf2_T; +// } Newton_scheme; +typedef enum +{ + GAUSS_LEGENDRE, + GAUSS_RADAU_IIA, +} sim_collocation_type; + // -acados_size_t gauss_nodes_work_calculate_size(int ns); +// acados_size_t gauss_legendre_nodes_work_calculate_size(int ns); // -void gauss_nodes(int ns, double *nodes, void *raw_memory); +// void gauss_legendre_nodes(int ns, double *nodes, void *raw_memory); // -acados_size_t gauss_simplified_work_calculate_size(int ns); +// acados_size_t gauss_simplified_work_calculate_size(int ns); +// // +// void gauss_simplified(int ns, Newton_scheme *scheme, void *work); // -void gauss_simplified(int ns, Newton_scheme *scheme, void *work); +acados_size_t butcher_tableau_work_calculate_size(int ns); // -acados_size_t butcher_table_work_calculate_size(int ns); +// void calculate_butcher_tableau_from_nodes(int ns, double *nodes, double *b, double *A, void *work); // -void butcher_table(int ns, double *nodes, double *b, double *A, void *work); +void calculate_butcher_tableau(int ns, sim_collocation_type collocation_type, double *c_vec, + double *b_vec, double *A_mat, void *work); diff --git a/third_party/acados/include/acados/sim/sim_common.h b/third_party/acados/include/acados/sim/sim_common.h index 177f0893dd..1838d76f81 100644 --- a/third_party/acados/include/acados/sim/sim_common.h +++ b/third_party/acados/include/acados/sim/sim_common.h @@ -141,12 +141,13 @@ typedef struct bool output_z; // 1 -- if zn should be computed bool sens_algebraic; // 1 -- if S_algebraic should be computed bool exact_z_output; // 1 -- if z, S_algebraic should be computed exactly, extra Newton iterations + sim_collocation_type collocation_type; // for explicit integrators: newton_iter == 0 && scheme == NULL // && jac_reuse=false int newton_iter; bool jac_reuse; - Newton_scheme *scheme; + // Newton_scheme *scheme; // workspace void *work; diff --git a/third_party/acados/include/acados/utils/math.h b/third_party/acados/include/acados/utils/math.h index ac0d3263e8..fe1da875f6 100644 --- a/third_party/acados/include/acados/utils/math.h +++ b/third_party/acados/include/acados/utils/math.h @@ -40,7 +40,7 @@ extern "C" { #include "acados/utils/types.h" -#if defined(__DSPACE__) +#if defined(__MABX2__) double fmax(double a, double b); #endif diff --git a/third_party/acados/include/acados/utils/timing.h b/third_party/acados/include/acados/utils/timing.h index 823b763f66..fe561d3891 100644 --- a/third_party/acados/include/acados/utils/timing.h +++ b/third_party/acados/include/acados/utils/timing.h @@ -67,7 +67,7 @@ typedef struct acados_timer_ mach_timebase_info_data_t tinfo; } acados_timer; -#elif defined(__DSPACE__) +#elif defined(__MABX2__) #include diff --git a/third_party/acados/include/acados_c/ocp_nlp_interface.h b/third_party/acados/include/acados_c/ocp_nlp_interface.h index 0f250a6628..b6b893024b 100644 --- a/third_party/acados/include/acados_c/ocp_nlp_interface.h +++ b/third_party/acados/include/acados_c/ocp_nlp_interface.h @@ -227,10 +227,8 @@ int ocp_nlp_dynamics_model_set(ocp_nlp_config *config, ocp_nlp_dims *dims, ocp_n /// \param field The name of the field, either nls_res_jac, /// y_ref, W (others TBC) /// \param value Cost values. -int ocp_nlp_cost_model_set_slice(ocp_nlp_config *config, ocp_nlp_dims *dims, ocp_nlp_in *in, - int start_stage, int end_stage, const char *field, void *value, int dim); int ocp_nlp_cost_model_set(ocp_nlp_config *config, ocp_nlp_dims *dims, ocp_nlp_in *in, - int start_stage, const char *field, void *value); + int stage, const char *field, void *value); /// Sets the function pointers to the constraints functions for the given stage. @@ -241,8 +239,6 @@ int ocp_nlp_cost_model_set(ocp_nlp_config *config, ocp_nlp_dims *dims, ocp_nlp_i /// \param stage Stage number. /// \param field The name of the field, either lb, ub (others TBC) /// \param value Constraints function or values. -int ocp_nlp_constraints_model_set_slice(ocp_nlp_config *config, ocp_nlp_dims *dims, ocp_nlp_in *in, - int start_stage, int end_stage, const char *field, void *value, int dim); int ocp_nlp_constraints_model_set(ocp_nlp_config *config, ocp_nlp_dims *dims, ocp_nlp_in *in, int stage, const char *field, void *value); @@ -283,9 +279,6 @@ void ocp_nlp_out_set(ocp_nlp_config *config, ocp_nlp_dims *dims, ocp_nlp_out *ou void ocp_nlp_out_get(ocp_nlp_config *config, ocp_nlp_dims *dims, ocp_nlp_out *out, int stage, const char *field, void *value); -void ocp_nlp_out_get_slice(ocp_nlp_config *config, ocp_nlp_dims *dims, ocp_nlp_out *out, - int start_stage, int end_stage, const char *field, void *value); - // void ocp_nlp_get_at_stage(ocp_nlp_config *config, ocp_nlp_dims *dims, ocp_nlp_solver *solver, int stage, const char *field, void *value); @@ -300,6 +293,8 @@ void ocp_nlp_constraint_dims_get_from_attr(ocp_nlp_config *config, ocp_nlp_dims void ocp_nlp_cost_dims_get_from_attr(ocp_nlp_config *config, ocp_nlp_dims *dims, ocp_nlp_out *out, int stage, const char *field, int *dims_out); +void ocp_nlp_dynamics_dims_get_from_attr(ocp_nlp_config *config, ocp_nlp_dims *dims, ocp_nlp_out *out, + int stage, const char *field, int *dims_out); /* opts */ @@ -374,6 +369,8 @@ int ocp_nlp_precompute(ocp_nlp_solver *solver, ocp_nlp_in *nlp_in, ocp_nlp_out * /// \param nlp_out The output struct. void ocp_nlp_eval_cost(ocp_nlp_solver *solver, ocp_nlp_in *nlp_in, ocp_nlp_out *nlp_out); +// +void ocp_nlp_eval_residuals(ocp_nlp_solver *solver, ocp_nlp_in *nlp_in, ocp_nlp_out *nlp_out); // void ocp_nlp_eval_param_sens(ocp_nlp_solver *solver, char *field, int stage, int index, ocp_nlp_out *sens_nlp_out); diff --git a/third_party/acados/include/acados_c/sim_interface.h b/third_party/acados/include/acados_c/sim_interface.h index ba2e67bd6a..cc47b62ed4 100644 --- a/third_party/acados/include/acados_c/sim_interface.h +++ b/third_party/acados/include/acados_c/sim_interface.h @@ -45,11 +45,11 @@ extern "C" { typedef enum { - ERK, - IRK, - GNSF, - LIFTED_IRK, - INVALID_SIM_SOLVER, + ERK, + IRK, + GNSF, + LIFTED_IRK, + INVALID_SIM_SOLVER, } sim_solver_t; diff --git a/third_party/acados/include/blasfeo/include/blasfeo_block_size.h b/third_party/acados/include/blasfeo/include/blasfeo_block_size.h index 182cc71b14..0ccd4f1bba 100644 --- a/third_party/acados/include/blasfeo/include/blasfeo_block_size.h +++ b/third_party/acados/include/blasfeo/include/blasfeo_block_size.h @@ -43,7 +43,28 @@ -#if defined( TARGET_X64_INTEL_HASWELL ) +#if defined( TARGET_X64_INTEL_SKYLAKE_X ) +// common +#define CACHE_LINE_SIZE 64 // data cache size: 64 bytes +#define L1_CACHE_SIZE (32*1024) // L1 data cache size: 32 kB, 8-way +#define L2_CACHE_SIZE (256*1024) //(1024*1024) // L2 data cache size: 1 MB ; DTLB1 64*4 kB = 256 kB +#define LLC_CACHE_SIZE (6*1024*1024) //(8*1024*1024) // LLC cache size: 8 MB ; TLB 1536*4 kB = 6 MB +// double +#define D_PS 8 // panel size +#define D_PLD 8 // 4 // GCD of panel length +#define D_M_KERNEL 24 // max kernel size +#define D_KC 128 //256 // 192 +#define D_NC 144 //72 //96 //72 // 120 // 512 +#define D_MC 2400 // 6000 +// single +#define S_PS 16 // panel size +#define S_PLD 4 // GCD of panel length TODO probably 16 when writing assebly +#define S_M_KERNEL 32 // max kernel size +#define S_KC 128 //256 +#define S_NC 128 //144 +#define S_MC 3000 + +#elif defined( TARGET_X64_INTEL_HASWELL ) // common #define CACHE_LINE_SIZE 64 // data cache size: 64 bytes #define L1_CACHE_SIZE (32*1024) // L1 data cache size: 32 kB, 8-way diff --git a/third_party/acados/include/blasfeo/include/blasfeo_d_blasfeo_api.h b/third_party/acados/include/blasfeo/include/blasfeo_d_blasfeo_api.h index f27264e133..15915f436f 100644 --- a/third_party/acados/include/blasfeo/include/blasfeo_d_blasfeo_api.h +++ b/third_party/acados/include/blasfeo/include/blasfeo_d_blasfeo_api.h @@ -101,18 +101,18 @@ void blasfeo_dtrsv_ltu(int m, struct blasfeo_dmat *sA, int ai, int aj, struct bl void blasfeo_dtrsv_unn(int m, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dvec *sx, int xi, struct blasfeo_dvec *sz, int zi); // z <= inv( A^T ) * x, A (m)x(m) upper, transposed, not_unit void blasfeo_dtrsv_utn(int m, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dvec *sx, int xi, struct blasfeo_dvec *sz, int zi); +// z <= A * x ; A lower triangular +void blasfeo_dtrmv_lnn(int m, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dvec *sx, int xi, struct blasfeo_dvec *sz, int zi); +// z <= A * x ; A lower triangular, unit diagonal +void blasfeo_dtrmv_lnu(int m, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dvec *sx, int xi, struct blasfeo_dvec *sz, int zi); +// z <= A^T * x ; A lower triangular +void blasfeo_dtrmv_ltn(int m, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dvec *sx, int xi, struct blasfeo_dvec *sz, int zi); +// z <= A^T * x ; A lower triangular, unit diagonal +void blasfeo_dtrmv_ltu(int m, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dvec *sx, int xi, struct blasfeo_dvec *sz, int zi); // z <= beta * y + alpha * A * x ; A upper triangular void blasfeo_dtrmv_unn(int m, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dvec *sx, int xi, struct blasfeo_dvec *sz, int zi); // z <= A^T * x ; A upper triangular void blasfeo_dtrmv_utn(int m, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dvec *sx, int xi, struct blasfeo_dvec *sz, int zi); -// z <= A * x ; A lower triangular -void blasfeo_dtrmv_lnn(int m, int n, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dvec *sx, int xi, struct blasfeo_dvec *sz, int zi); -// z <= A^T * x ; A lower triangular -void blasfeo_dtrmv_ltn(int m, int n, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dvec *sx, int xi, struct blasfeo_dvec *sz, int zi); -// z <= A * x ; A lower triangular, unit diagonal -void blasfeo_dtrmv_lnu(int m, int n, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dvec *sx, int xi, struct blasfeo_dvec *sz, int zi); -// z <= A^T * x ; A lower triangular, unit diagonal -void blasfeo_dtrmv_ltu(int m, int n, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dvec *sx, int xi, struct blasfeo_dvec *sz, int zi); // z_n <= beta_n * y_n + alpha_n * A * x_n // z_t <= beta_t * y_t + alpha_t * A^T * x_t void blasfeo_dgemv_nt(int m, int n, double alpha_n, double alpha_t, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dvec *sx_n, int xi_n, struct blasfeo_dvec *sx_t, int xi_t, double beta_n, double beta_t, struct blasfeo_dvec *sy_n, int yi_n, struct blasfeo_dvec *sy_t, int yi_t, struct blasfeo_dvec *sz_n, int zi_n, struct blasfeo_dvec *sz_t, int zi_t); diff --git a/third_party/acados/include/blasfeo/include/blasfeo_d_blasfeo_ref_api.h b/third_party/acados/include/blasfeo/include/blasfeo_d_blasfeo_ref_api.h index eaac365817..5773715cfa 100644 --- a/third_party/acados/include/blasfeo/include/blasfeo_d_blasfeo_ref_api.h +++ b/third_party/acados/include/blasfeo/include/blasfeo_d_blasfeo_ref_api.h @@ -101,23 +101,24 @@ void blasfeo_ref_dtrsv_ltu(int m, struct blasfeo_dmat *sA, int ai, int aj, struc void blasfeo_ref_dtrsv_unn(int m, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dvec *sx, int xi, struct blasfeo_dvec *sz, int zi); // z <= inv( A' ) * x, A (m)x(m) upper, transposed, not_unit void blasfeo_ref_dtrsv_utn(int m, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dvec *sx, int xi, struct blasfeo_dvec *sz, int zi); +// z <= A * x ; A lower triangular +void blasfeo_ref_dtrmv_lnn(int m, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dvec *sx, int xi, struct blasfeo_dvec *sz, int zi); +// z <= A * x ; A lower triangular, unit diagonal +void blasfeo_ref_dtrmv_lnu(int m, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dvec *sx, int xi, struct blasfeo_dvec *sz, int zi); +// z <= A' * x ; A lower triangular +void blasfeo_ref_dtrmv_ltn(int m, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dvec *sx, int xi, struct blasfeo_dvec *sz, int zi); +// z <= A' * x ; A lower triangular, unit diagonal +void blasfeo_ref_dtrmv_ltu(int m, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dvec *sx, int xi, struct blasfeo_dvec *sz, int zi); // z <= beta * y + alpha * A * x ; A upper triangular void blasfeo_ref_dtrmv_unn(int m, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dvec *sx, int xi, struct blasfeo_dvec *sz, int zi); // z <= A' * x ; A upper triangular void blasfeo_ref_dtrmv_utn(int m, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dvec *sx, int xi, struct blasfeo_dvec *sz, int zi); -// z <= A * x ; A lower triangular -void blasfeo_ref_dtrmv_lnn(int m, int n, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dvec *sx, int xi, struct blasfeo_dvec *sz, int zi); -// z <= A' * x ; A lower triangular -void blasfeo_ref_dtrmv_ltn(int m, int n, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dvec *sx, int xi, struct blasfeo_dvec *sz, int zi); -// z <= A * x ; A lower triangular, unit diagonal -void blasfeo_ref_dtrmv_lnu(int m, int n, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dvec *sx, int xi, struct blasfeo_dvec *sz, int zi); -// z <= A' * x ; A lower triangular, unit diagonal -void blasfeo_ref_dtrmv_ltu(int m, int n, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dvec *sx, int xi, struct blasfeo_dvec *sz, int zi); // z_n <= beta_n * y_n + alpha_n * A * x_n // z_t <= beta_t * y_t + alpha_t * A' * x_t void blasfeo_ref_dgemv_nt(int m, int n, double alpha_n, double alpha_t, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dvec *sx_n, int xi_n, struct blasfeo_dvec *sx_t, int xi_t, double beta_n, double beta_t, struct blasfeo_dvec *sy_n, int yi_n, struct blasfeo_dvec *sy_t, int yi_t, struct blasfeo_dvec *sz_n, int zi_n, struct blasfeo_dvec *sz_t, int zi_t); // z <= beta * y + alpha * A * x, where A is symmetric and only the lower triangular patr of A is accessed -void blasfeo_ref_dsymv_l(int m, int n, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dvec *sx, int xi, double beta, struct blasfeo_dvec *sy, int yi, struct blasfeo_dvec *sz, int zi); +void blasfeo_ref_dsymv_l(int m, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dvec *sx, int xi, double beta, struct blasfeo_dvec *sy, int yi, struct blasfeo_dvec *sz, int zi); +void blasfeo_ref_dsymv_l_mn(int m, int n, double alpha, struct blasfeo_dmat *sA, int ai, int aj, struct blasfeo_dvec *sx, int xi, double beta, struct blasfeo_dvec *sy, int yi, struct blasfeo_dvec *sz, int zi); // diagonal diff --git a/third_party/acados/include/blasfeo/include/blasfeo_d_kernel.h b/third_party/acados/include/blasfeo/include/blasfeo_d_kernel.h index bf110c2bf8..7094857697 100644 --- a/third_party/acados/include/blasfeo/include/blasfeo_d_kernel.h +++ b/third_party/acados/include/blasfeo/include/blasfeo_d_kernel.h @@ -54,6 +54,158 @@ void blasfeo_align_4096_byte(void *ptr, void **ptr_align); void blasfeo_align_64_byte(void *ptr, void **ptr_align); +// +// lib8 +// + +// 24x8 +void kernel_dgemm_nt_24x8_lib8(int k, double *alpha, double *A, int sda, double *B, double *beta, double *C, int sdc, double *D, int sdd); // +void kernel_dgemm_nt_24x8_vs_lib8(int k, double *alpha, double *A, int sda, double *B, double *beta, double *C, int sdc, double *D, int sdd, int m1, int n1); // +void kernel_dtrsm_nt_rl_inv_24x8_lib8(int k, double *A, int sda, double *B, double *beta, double *C, int sdc, double *D, int sdd, double *E, double *inv_diag_E); // +void kernel_dpotrf_nt_l_24x8_lib8(int k, double *A, int sda, double *B, double *C, int sdc, double *D, int sdd, double *inv_diag_D); +void kernel_dpotrf_nt_l_24x8_vs_lib8(int k, double *A, int sda, double *B, double *C, int sdc, double *D, int sdd, double *inv_diag_D, int m1, int n1); +void kernel_dtrsm_nt_rl_inv_24x8_vs_lib8(int k, double *A, int sda, double *B, double *beta, double *C, int sdc, double *D, int sdd, double *E, double *inv_diag_E, int m1, int n1); // +void kernel_dgemm_dtrsm_nt_rl_inv_24x8_lib8(int kp, double *Ap, int sdap, double *Bp, int km_, double *Am, int sdam, double *Bm, double *C, int sdc, double *D, int sdd, double *E, double *inv_diag_E); +void kernel_dgemm_dtrsm_nt_rl_inv_24x8_vs_lib8(int kp, double *Ap, int sdap, double *Bp, int km_, double *Am, int sdam, double *Bm, double *C, int sdc, double *D, int sdd, double *E, double *inv_diag_E, int m1, int n1); +void kernel_dsyrk_dpotrf_nt_l_24x8_lib8(int kp, double *Ap, int sdap, double *Bp, int km_, double *Am, int sdam, double *Bm, double *C, int sdc, double *D, int sdd, double *inv_diag_D); +void kernel_dsyrk_dpotrf_nt_l_24x8_vs_lib8(int kp, double *Ap, int sdap, double *Bp, int km_, double *Am, int sdam, double *Bm, double *C, int sdc, double *D, int sdd, double *inv_diag_D, int m1, int n1); +void kernel_dlarfb8_rn_24_lib8(int kmax, double *pV, double *pT, double *pD, int sdd); +// 16x8 +void kernel_dgemm_nt_16x8_lib8(int k, double *alpha, double *A, int sda, double *B, double *beta, double *C, int sdc, double *D, int sdd); // +void kernel_dgemm_nt_16x8_vs_lib8(int k, double *alpha, double *A, int sda, double *B, double *beta, double *C, int sdc, double *D, int sdd, int m1, int n1); // +void kernel_dgemm_nt_16x8_gen_lib8(int k, double *alpha, double *A, int sda, double *B, double *beta, int offC, double *C, int sdc, int offD, double *D, int sdd, int m0, int m1, int n0, int n1); // +void kernel_dgemm_nn_16x8_lib8(int k, double *alpha, double *A, int sda, int offB, double *B, int sdb, double *beta, double *C, int sdc, double *D, int sdd); // +void kernel_dgemm_nn_16x8_vs_lib8(int k, double *alpha, double *A, int sda, int offB, double *B, int sdb, double *beta, double *C, int sdc, double *D, int sdd, int m1, int n1); // +void kernel_dgemm_nn_16x8_gen_lib8(int k, double *alpha, double *A, int sda, int offB, double *B, int sdb, double *beta, int offC, double *C, int sdc, int offD, double *D, int sdd, int m0, int m1, int n0, int n1); // +void kernel_dsyrk_nt_l_16x8_lib8(int k, double *alpha, double *A, int sda, double *B, double *beta, double *C, int sdc, double *D, int sdd); // +void kernel_dsyrk_nt_l_16x8_vs_lib8(int k, double *alpha, double *A, int sda, double *B, double *beta, double *C, int sdc, double *D, int sdd, int m1, int n1); // +void kernel_dsyrk_nt_l_16x8_gen_lib8(int k, double *alpha, double *A, int sda, double *B, double *beta, int offC, double *C, int sdc, int offD, double *D, int sdd, int m0, int m1, int n0, int n1); // +void kernel_dtrmm_nn_rl_16x8_lib8(int k, double *alpha, double *A, int sda, int offsetB, double *B, int sdb, double *D, int sdd); +void kernel_dtrmm_nn_rl_16x8_vs_lib8(int k, double *alpha, double *A, int sda, int offsetB, double *B, int sdb, double *D, int sdd, int m1, int n1); +void kernel_dtrmm_nn_rl_16x8_gen_lib8(int k, double *alpha, double *A, int sda, int offsetB, double *B, int sdb, int offD, double *D, int sdd, int m0, int m1, int n0, int n1); +void kernel_dtrsm_nt_rl_inv_16x8_lib8(int k, double *A, int sda, double *B, double *beta, double *C, int sdc, double *D, int sdd, double *E, double *inv_diag_E); // +void kernel_dtrsm_nt_rl_inv_16x8_vs_lib8(int k, double *A, int sda, double *B, double *beta, double *C, int sdc, double *D, int sdd, double *E, double *inv_diag_E, int m1, int n1); // +void kernel_dpotrf_nt_l_16x8_lib8(int k, double *A, int sda, double *B, double *C, int sdc, double *D, int sdd, double *inv_diag_D); +void kernel_dpotrf_nt_l_16x8_vs_lib8(int k, double *A, int sda, double *B, double *C, int sdc, double *D, int sdd, double *inv_diag_D, int m1, int n1); +void kernel_dgemm_dtrsm_nt_rl_inv_16x8_lib8(int kp, double *Ap, int sdap, double *Bp, int km_, double *Am, int sdam, double *Bm, double *C, int sdc, double *D, int sdd, double *E, double *inv_diag_E); +void kernel_dgemm_dtrsm_nt_rl_inv_16x8_vs_lib8(int kp, double *Ap, int sdap, double *Bp, int km_, double *Am, int sdam, double *Bm, double *C, int sdc, double *D, int sdd, double *E, double *inv_diag_E, int m1, int n1); +void kernel_dsyrk_dpotrf_nt_l_16x8_lib8(int kp, double *Ap, int sdap, double *Bp, int km_, double *Am, int sdam, double *Bm, double *C, int sdc, double *D, int sdd, double *inv_diag_D); +void kernel_dsyrk_dpotrf_nt_l_16x8_vs_lib8(int kp, double *Ap, int sdap, double *Bp, int km_, double *Am, int sdam, double *Bm, double *C, int sdc, double *D, int sdd, double *inv_diag_D, int m1, int n1); +void kernel_dlarfb8_rn_16_lib8(int kmax, double *pV, double *pT, double *pD, int sdd); +void kernel_dlarfb8_rn_la_16_lib8(int n1, double *pVA, double *pT, double *pD, int sdd, double *pA, int sda); +void kernel_dlarfb8_rn_lla_16_lib8(int n0, int n1, double *pVL, double *pVA, double *pT, double *pD, int sdd, double *pL, int sdl, double *pA, int sda); +// 8x16 +void kernel_dgemm_tt_8x16_lib8(int k, double *alpha, int offA, double *A, int sda, double *B, int sdb, double *beta, double *C, double *D); // +void kernel_dgemm_tt_8x16_vs_lib8(int k, double *alpha, int offA, double *A, int sda, double *B, int sdb, double *beta, double *C, double *D, int m1, int n1); // +void kernel_dgemm_tt_8x16_gen_lib8(int k, double *alpha, int offA, double *A, int sda, double *B, int sdb, double *beta, int offC, double *C, int sdc, int offD, double *D, int sdd, int m0, int m1, int n0, int n1); // +void kernel_dgemm_nt_8x16_lib8(int k, double *alpha, double *A, double *B, int sdb, double *beta, double *C, double *D); // +void kernel_dgemm_nt_8x16_vs_lib8(int k, double *alpha, double *A, double *B, int sdb, double *beta, double *C, double *D, int m1, int n1); // +// 8x8 +void kernel_dgemm_nt_8x8_lib8(int k, double *alpha, double *A, double *B, double *beta, double *C, double *D); // +void kernel_dgemm_nt_8x8_vs_lib8(int k, double *alpha, double *A, double *B, double *beta, double *C, double *D, int m1, int n1); // +void kernel_dgemm_nt_8x8_gen_lib8(int k, double *alpha, double *A, double *B, double *beta, int offC, double *C, int sdc, int offD, double *D, int sdd, int m0, int m1, int n0, int n1); // +void kernel_dgemm_nn_8x8_lib8(int k, double *alpha, double *A, int offB, double *B, int sdb, double *beta, double *C, double *D); // +void kernel_dgemm_nn_8x8_vs_lib8(int k, double *alpha, double *A, int offB, double *B, int sdb, double *beta, double *C, double *D, int m1, int n1); // +void kernel_dgemm_nn_8x8_gen_lib8(int k, double *alpha, double *A, int offB, double *B, int sdb, double *beta, int offC, double *C, int sdc, int offD, double *D, int sdd, int m0, int m1, int n0, int n1); // +void kernel_dgemm_tt_8x8_lib8(int k, double *alpha, int offA, double *A, int sda, double *B, double *beta, double *C, double *D); // +void kernel_dgemm_tt_8x8_vs_lib8(int k, double *alpha, int offA, double *A, int sda, double *B, double *beta, double *C, double *D, int m1, int n1); // +void kernel_dgemm_tt_8x8_gen_lib8(int k, double *alpha, int offA, double *A, int sda, double *B, double *beta, int offc, double *C, int sdc, int offD, double *D, int sdd, int m0, int m1, int n0, int n1); // +void kernel_dsyrk_nt_l_8x8_lib8(int k, double *alpha, double *A, double *B, double *beta, double *C, double *D); // +void kernel_dsyrk_nt_l_8x8_vs_lib8(int k, double *alpha, double *A, double *B, double *beta, double *C, double *D, int m1, int n1); // +void kernel_dsyrk_nt_l_8x8_gen_lib8(int k, double *alpha, double *A, double *B, double *beta, int offC, double *C, int sdc, int offD, double *D, int sdd, int m0, int m1, int n0, int n1); // +void kernel_dtrmm_nn_rl_8x8_lib8(int k, double *alpha, double *A, int offsetB, double *B, int sdb, double *D); +void kernel_dtrmm_nn_rl_8x8_vs_lib8(int k, double *alpha, double *A, int offsetB, double *B, int sdb, double *D, int m1, int n1); +void kernel_dtrmm_nn_rl_8x8_gen_lib8(int k, double *alpha, double *A, int offsetB, double *B, int sdb, int offD, double *D, int sdd, int m0, int m1, int n0, int n1); +void kernel_dtrsm_nt_rl_inv_8x8_lib8(int k, double *A, double *B, double *beta, double *C, double *D, double *E, double *inv_diag_E); +void kernel_dtrsm_nt_rl_inv_8x8_vs_lib8(int k, double *A, double *B, double *beta, double *C, double *D, double *E, double *inv_diag_E, int m1, int n1); +void kernel_dpotrf_nt_l_8x8_lib8(int k, double *A, double *B, double *C, double *D, double *inv_diag_D); +void kernel_dpotrf_nt_l_8x8_vs_lib8(int k, double *A, double *B, double *C, double *D, double *inv_diag_D, int m1, int n1); +void kernel_dgemm_dtrsm_nt_rl_inv_8x8_lib8(int kp, double *Ap, double *Bp, int km_, double *Am, double *Bm, double *C, double *D, double *E, double *inv_diag_E); +void kernel_dgemm_dtrsm_nt_rl_inv_8x8_vs_lib8(int kp, double *Ap, double *Bp, int km_, double *Am, double *Bm, double *C, double *D, double *E, double *inv_diag_E, int m1, int n1); +void kernel_dsyrk_dpotrf_nt_l_8x8_lib8(int kp, double *Ap, double *Bp, int km_, double *Am, double *Bm, double *C, double *D, double *inv_diag_D); +void kernel_dsyrk_dpotrf_nt_l_8x8_vs_lib8(int kp, double *Ap, double *Bp, int km_, double *Am, double *Bm, double *C, double *D, double *inv_diag_D, int m1, int n1); +void kernel_dgelqf_vs_lib8(int m, int n, int k, int offD, double *pD, int sdd, double *dD); +void kernel_dgelqf_pd_vs_lib8(int m, int n, int k, int offD, double *pD, int sdd, double *dD); +void kernel_dgelqf_8_lib8(int kmax, double *pD, double *dD); +void kernel_dgelqf_pd_8_lib8(int kmax, double *pD, double *dD); +void kernel_dlarft_8_lib8(int kmax, double *pD, double *dD, double *pT); +void kernel_dlarfb8_rn_8_lib8(int kmax, double *pV, double *pT, double *pD); +void kernel_dlarfb8_rn_8_vs_lib8(int kmax, double *pV, double *pT, double *pD, int m1); +void kernel_dlarfb8_rn_1_lib8(int kmax, double *pV, double *pT, double *pD); +void kernel_dgelqf_dlarft8_8_lib8(int kmax, double *pD, double *dD, double *pT); +void kernel_dgelqf_pd_dlarft8_8_lib8(int kmax, double *pD, double *dD, double *pT); +void kernel_dgelqf_pd_la_vs_lib8(int m, int n1, int k, int offD, double *pD, int sdd, double *dD, int offA, double *pA, int sda); +void kernel_dgelqf_pd_la_dlarft8_8_lib8(int kmax, double *pD, double *dD, double *pA, double *pT); +void kernel_dlarft_la_8_lib8(int n1, double *dD, double *pA, double *pT); +void kernel_dlarfb8_rn_la_8_lib8(int n1, double *pVA, double *pT, double *pD, double *pA); +void kernel_dlarfb8_rn_la_8_vs_lib8(int n1, double *pVA, double *pT, double *pD, double *pA, int m1); +void kernel_dlarfb8_rn_la_1_lib8(int n1, double *pVA, double *pT, double *pD, double *pA); +void kernel_dgelqf_pd_lla_vs_lib8(int m, int n0, int n1, int k, int offD, double *pD, int sdd, double *dD, int offL, double *pL, int sdl, int offA, double *pA, int sda); +void kernel_dgelqf_pd_lla_dlarft8_8_lib8(int n0, int n1, double *pD, double *dD, double *pL, double *pA, double *pT); +void kernel_dlarft_lla_8_lib8(int n0, int n1, double *dD, double *pL, double *pA, double *pT); +void kernel_dlarfb8_rn_lla_8_lib8(int n0, int n1, double *pVL, double *pVA, double *pT, double *pD, double *pL, double *pA); +void kernel_dlarfb8_rn_lla_8_vs_lib8(int n0, int n1, double *pVL, double *pVA, double *pT, double *pD, double *pL, double *pA, int m1); +void kernel_dlarfb8_rn_lla_1_lib8(int n0, int n1, double *pVL, double *pVA, double *pT, double *pD, double *pL, double *pA); + +// panel copy / pack +// 24 +void kernel_dpack_nn_24_lib8(int kmax, double *A, int lda, double *C, int sdc); +void kernel_dpack_nn_24_vs_lib8(int kmax, double *A, int lda, double *C, int sdc, int m1); +// 16 +void kernel_dpacp_nn_16_lib8(int kmax, int offsetA, double *A, int sda, double *B, int sdb); +void kernel_dpacp_nn_16_vs_lib8(int kmax, int offsetA, double *A, int sda, double *B, int sdb, int m1); +void kernel_dpack_nn_16_lib8(int kmax, double *A, int lda, double *C, int sdc); +void kernel_dpack_nn_16_vs_lib8(int kmax, double *A, int lda, double *C, int sdc, int m1); +// 8 +void kernel_dpacp_nn_8_lib8(int kmax, int offsetA, double *A, int sda, double *B); +void kernel_dpacp_nn_8_vs_lib8(int kmax, int offsetA, double *A, int sda, double *B, int m1); +void kernel_dpacp_tn_8_lib8(int kmax, int offsetA, double *A, int sda, double *B); +void kernel_dpacp_tn_8_vs_lib8(int kmax, int offsetA, double *A, int sda, double *B, int m1); +void kernel_dpacp_l_nn_8_lib8(int kmax, int offsetA, double *A, int sda, double *B); +void kernel_dpacp_l_nn_8_vs_lib8(int kmax, int offsetA, double *A, int sda, double *B, int m1); +void kernel_dpacp_l_tn_8_lib8(int kmax, int offsetA, double *A, int sda, double *B); +void kernel_dpacp_l_tn_8_vs_lib8(int kmax, int offsetA, double *A, int sda, double *B, int m1); +void kernel_dpaad_nn_8_lib8(int kmax, double *alpha, int offsetA, double *A, int sda, double *B); +void kernel_dpaad_nn_8_vs_lib8(int kmax, double *alpha, int offsetA, double *A, int sda, double *B, int m1); +void kernel_dpack_nn_8_lib8(int kmax, double *A, int lda, double *C); +void kernel_dpack_nn_8_vs_lib8(int kmax, double *A, int lda, double *C, int m1); +void kernel_dpack_tn_8_lib8(int kmax, double *A, int lda, double *C); +void kernel_dpack_tn_8_vs_lib8(int kmax, double *A, int lda, double *C, int m1); +// 4 +void kernel_dpack_tt_4_lib8(int kmax, double *A, int lda, double *C, int sdc); // TODO offsetC +void kernel_dpack_tt_4_vs_lib8(int kmax, double *A, int lda, double *C, int sdc, int m1); // TODO offsetC + +// level 2 BLAS +// 16 +void kernel_dgemv_n_16_lib8(int k, double *alpha, double *A, int sda, double *x, double *beta, double *y, double *z); +// 8 +void kernel_dgemv_n_8_lib8(int k, double *alpha, double *A, double *x, double *beta, double *y, double *z); +void kernel_dgemv_n_8_vs_lib8(int k, double *alpha, double *A, double *x, double *beta, double *y, double *z, int m1); +//void kernel_dgemv_n_8_gen_lib8(int k, double *alpha, double *A, double *x, double *beta, double *y, double *z, int m0, int m1); +void kernel_dgemv_n_8_gen_lib8(int k, double *alpha, int offsetA, double *A, double *x, double *beta, double *y, double *z, int m1); +void kernel_dgemv_t_8_lib8(int k, double *alpha, int offsetA, double *A, int sda, double *x, double *beta, double *y, double *z); +void kernel_dgemv_t_8_vs_lib8(int k, double *alpha, int offsetA, double *A, int sda, double *x, double *beta, double *y, double *z, int n1); +void kernel_dgemv_nt_8_lib8(int kmax, double *alpha_n, double *alpha_t, int offsetA, double *A, int sda, double *x_n, double *x_t, double *beta_t, double *y_t, double *z_n, double *z_t); +void kernel_dgemv_nt_8_vs_lib8(int kmax, double *alpha_n, double *alpha_t, int offsetA, double *A, int sda, double *x_n, double *x_t, double *beta_t, double *y_t, double *z_n, double *z_t, int n1); +void kernel_dsymv_l_8_lib8(int kmax, double *alpha, double *A, int sda, double *x, double *z); +void kernel_dsymv_l_8_vs_lib8(int kmax, double *alpha, double *A, int sda, double *x, double *z, int n1); +void kernel_dsymv_l_8_gen_lib8(int kmax, double *alpha, int offsetA, double *A, int sda, double *x, double *z, int n1); +void kernel_dtrmv_n_ln_8_lib8(int k, double *A, double *x, double *z); +void kernel_dtrmv_n_ln_8_vs_lib8(int k, double *A, double *x, double *z, int m1); +void kernel_dtrmv_n_ln_8_gen_lib8(int k, int offsetA, double *A, double *x, double *z, int m1); +void kernel_dtrmv_t_ln_8_lib8(int k, double *A, int sda, double *x, double *z); +void kernel_dtrmv_t_ln_8_vs_lib8(int k, double *A, int sda, double *x, double *z, int n1); +void kernel_dtrmv_t_ln_8_gen_lib8(int k, int offsetA, double *A, int sda, double *x, double *z, int n1); +void kernel_dtrsv_n_l_inv_8_lib8(int k, double *A, double *inv_diag_A, double *x, double *z); +void kernel_dtrsv_n_l_inv_8_vs_lib8(int k, double *A, double *inv_diag_A, double *x, double *z, int m1, int n1); +void kernel_dtrsv_t_l_inv_8_lib8(int k, double *A, int sda, double *inv_diag_A, double *x, double *z); +void kernel_dtrsv_t_l_inv_8_vs_lib8(int k, double *A, int sda, double *inv_diag_A, double *x, double *z, int m1, int n1); + + + +// +// lib4 +// // level 2 BLAS // 12 @@ -413,10 +565,10 @@ void kernel_drowsw_lib4(int kmax, double *pA, double *pC); // merged routines // 12x4 -void kernel_dgemm_dtrsm_nt_rl_inv_12x4_vs_lib4(int kp, double *Ap, int sdap, double *Bp, int km_, double *Am, int sdam, double *Bm, double *C, int sdc, double *D, int sdd, double *E, double *inv_diag_E, int km, int kn); void kernel_dgemm_dtrsm_nt_rl_inv_12x4_lib4(int kp, double *Ap, int sdap, double *Bp, int km_, double *Am, int sdam, double *Bm, double *C, int sdc, double *D, int sdd, double *E, double *inv_diag_E); -void kernel_dsyrk_dpotrf_nt_l_12x4_vs_lib4(int kp, double *Ap, int sdap, double *Bp, int km_, double *Am, int sdam, double *Bm, double *C, int sdc, double *D, int sdd, double *inv_diag_D, int km, int kn); +void kernel_dgemm_dtrsm_nt_rl_inv_12x4_vs_lib4(int kp, double *Ap, int sdap, double *Bp, int km_, double *Am, int sdam, double *Bm, double *C, int sdc, double *D, int sdd, double *E, double *inv_diag_E, int km, int kn); void kernel_dsyrk_dpotrf_nt_l_12x4_lib4(int kp, double *Ap, int sdap, double *Bp, int km_, double *Am, int sdam, double *Bm, double *C, int sdc, double *D, int sdd, double *inv_diag_D); +void kernel_dsyrk_dpotrf_nt_l_12x4_vs_lib4(int kp, double *Ap, int sdap, double *Bp, int km_, double *Am, int sdam, double *Bm, double *C, int sdc, double *D, int sdd, double *inv_diag_D, int km, int kn); // 4x12 void kernel_dgemm_dtrsm_nt_rl_inv_4x12_vs_lib4(int kp, double *Ap, double *Bp, int sdbp, int km_, double *Am, double *Bm, int sdbm, double *C, double *D, double *E, int sde, double *inv_diag_E, int km, int kn); // 8x8 @@ -425,8 +577,8 @@ void kernel_dsyrk_dpotrf_nt_l_8x8_vs_lib4(int kp, double *Ap, int sdap, double * void kernel_dgemm_dtrsm_nt_rl_inv_8x8l_vs_lib4(int kp, double *Ap, int sdap, double *Bp, int sdb, int km_, double *Am, int sdam, double *Bm, int sdbm, double *C, int sdc, double *D, int sdd, double *E, int sde, double *inv_diag_E, int km, int kn); void kernel_dgemm_dtrsm_nt_rl_inv_8x8u_vs_lib4(int kp, double *Ap, int sdap, double *Bp, int sdb, int km_, double *Am, int sdam, double *Bm, int sdbm, double *C, int sdc, double *D, int sdd, double *E, int sde, double *inv_diag_E, int km, int kn); // 8x4 -void kernel_dgemm_dtrsm_nt_rl_inv_8x4_vs_lib4(int kp, double *Ap, int sdap, double *Bp, int km_, double *Am, int sdam, double *Bm, double *C, int sdc, double *D, int sdd, double *E, double *inv_diag_E, int km, int kn); void kernel_dgemm_dtrsm_nt_rl_inv_8x4_lib4(int kp, double *Ap, int sdap, double *Bp, int km_, double *Am, int sdam, double *Bm, double *C, int sdc, double *D, int sdd, double *E, double *inv_diag_E); +void kernel_dgemm_dtrsm_nt_rl_inv_8x4_vs_lib4(int kp, double *Ap, int sdap, double *Bp, int km_, double *Am, int sdam, double *Bm, double *C, int sdc, double *D, int sdd, double *E, double *inv_diag_E, int km, int kn); void kernel_dsyrk_dpotrf_nt_l_8x4_lib4(int kp, double *Ap, int sdap, double *Bp, int km_, double *Am, int sdam, double *Bm, double *C, int sdc, double *D, int sdd, double *inv_diag_D); void kernel_dsyrk_dpotrf_nt_l_8x4_vs_lib4(int kp, double *Ap, int sdap, double *Bp, int km_, double *Am, int sdam, double *Bm, double *C, int sdc, double *D, int sdd, double *inv_diag_D, int km, int kn); // 4x8 @@ -434,16 +586,16 @@ void kernel_dgemm_dtrsm_nt_rl_inv_4x8_vs_lib4(int kp, double *Ap, double *Bp, in // 4x4 void kernel_dgemm_dtrsm_nt_rl_inv_4x4_lib4(int kp, double *Ap, double *Bp, int km_, double *Am, double *Bm, double *C, double *D, double *E, double *inv_diag_E); void kernel_dgemm_dtrsm_nt_rl_inv_4x4_vs_lib4(int kp, double *Ap, double *Bp, int km_, double *Am, double *Bm, double *C, double *D, double *E, double *inv_diag_E, int km, int kn); -void kernel_dsyrk_dpotrf_nt_l_4x4_vs_lib4(int kp, double *Ap, double *Bp, int km_, double *Am, double *Bm, double *C, double *D, double *inv_diag_D, int km, int kn); void kernel_dsyrk_dpotrf_nt_l_4x4_lib4(int kp, double *Ap, double *Bp, int km_, double *Am, double *Bm, double *C, double *D, double *inv_diag_D); +void kernel_dsyrk_dpotrf_nt_l_4x4_vs_lib4(int kp, double *Ap, double *Bp, int km_, double *Am, double *Bm, double *C, double *D, double *inv_diag_D, int km, int kn); // 4x2 void kernel_dgemm_dtrsm_nt_rl_inv_4x2_lib4(int kp, double *Ap, double *Bp, int km_, double *Am, double *Bm, double *C, double *D, double *E, double *inv_diag_E); void kernel_dgemm_dtrsm_nt_rl_inv_4x2_vs_lib4(int kp, double *Ap, double *Bp, int km_, double *Am, double *Bm, double *C, double *D, double *E, double *inv_diag_E, int km, int kn); -void kernel_dsyrk_dpotrf_nt_l_4x2_vs_lib4(int kp, double *Ap, double *Bp, int km_, double *Am, double *Bm, double *C, double *D, double *inv_diag_D, int km, int kn); void kernel_dsyrk_dpotrf_nt_l_4x2_lib4(int kp, double *Ap, double *Bp, int km_, double *Am, double *Bm, double *C, double *D, double *inv_diag_D); +void kernel_dsyrk_dpotrf_nt_l_4x2_vs_lib4(int kp, double *Ap, double *Bp, int km_, double *Am, double *Bm, double *C, double *D, double *inv_diag_D, int km, int kn); // 2x2 -void kernel_dsyrk_dpotrf_nt_l_2x2_vs_lib4(int kp, double *Ap, double *Bp, int km_, double *Am, double *Bm, double *C, double *D, double *inv_diag_D, int km, int kn); void kernel_dsyrk_dpotrf_nt_l_2x2_lib4(int kp, double *Ap, double *Bp, int km_, double *Am, double *Bm, double *C, double *D, double *inv_diag_D); +void kernel_dsyrk_dpotrf_nt_l_2x2_vs_lib4(int kp, double *Ap, double *Bp, int km_, double *Am, double *Bm, double *C, double *D, double *inv_diag_D, int km, int kn); /* * @@ -1034,6 +1186,53 @@ void kernel_dgemm_nt_8xn_p0_lib44cc(int n, int k, double *alpha, double *A, int +// A, B panel-major bs=8; C, D column-major +// 24x8 +void kernel_dgemm_nt_24x8_lib88cc(int kmax, double *alpha, double *A, int sda, double *B, double *beta, double *C, int ldc, double *D, int ldd); +void kernel_dgemm_nt_24x8_vs_lib88cc(int kmax, double *alpha, double *A, int sda, double *B, double *beta, double *C, int ldc, double *D, int ldd, int m1, int n1); +// 16x8 +void kernel_dgemm_nt_16x8_lib88cc(int kmax, double *alpha, double *A, int sda, double *B, double *beta, double *C, int ldc, double *D, int ldd); +void kernel_dgemm_nt_16x8_vs_lib88cc(int kmax, double *alpha, double *A, int sda, double *B, double *beta, double *C, int ldc, double *D, int ldd, int m1, int n1); +// 8x8 +void kernel_dgemm_nt_8x8_lib88cc(int kmax, double *alpha, double *A, double *B, double *beta, double *C, int ldc, double *D, int ldd); +void kernel_dgemm_nt_8x8_vs_lib88cc(int kmax, double *alpha, double *A, double *B, double *beta, double *C, int ldc, double *D, int ldd, int m1, int n1); + +// A, panel-major bs=8; B, C, D column-major +// 24x8 +void kernel_dgemm_nt_24x8_lib8ccc(int kmax, double *alpha, double *A, int sda, double *B, int ldb, double *beta, double *C, int ldc, double *D, int ldd); +void kernel_dgemm_nt_24x8_vs_lib8ccc(int kmax, double *alpha, double *A, int sda, double *B, int ldb, double *beta, double *C, int ldc, double *D, int ldd, int m1, int n1); +void kernel_dgemm_nn_24x8_lib8ccc(int kmax, double *alpha, double *A, int sda, double *B, int ldb, double *beta, double *C, int ldc, double *D, int ldd); +void kernel_dgemm_nn_24x8_vs_lib8ccc(int kmax, double *alpha, double *A, int sda, double *B, int ldb, double *beta, double *C, int ldc, double *D, int ldd, int m1, int n1); +// 16x8 +void kernel_dgemm_nt_16x8_lib8ccc(int kmax, double *alpha, double *A, int sda, double *B, int ldb, double *beta, double *C, int ldc, double *D, int ldd); +void kernel_dgemm_nt_16x8_vs_lib8ccc(int kmax, double *alpha, double *A, int sda, double *B, int ldb, double *beta, double *C, int ldc, double *D, int ldd, int m1, int n1); +void kernel_dgemm_nn_16x8_lib8ccc(int kmax, double *alpha, double *A, int sda, double *B, int ldb, double *beta, double *C, int ldc, double *D, int ldd); +void kernel_dgemm_nn_16x8_vs_lib8ccc(int kmax, double *alpha, double *A, int sda, double *B, int ldb, double *beta, double *C, int ldc, double *D, int ldd, int m1, int n1); +// 8x8 +void kernel_dgemm_nt_8x8_lib8ccc(int kmax, double *alpha, double *A, double *B, int ldb, double *beta, double *C, int ldc, double *D, int ldd); +void kernel_dgemm_nt_8x8_vs_lib8ccc(int kmax, double *alpha, double *A, double *B, int ldb, double *beta, double *C, int ldc, double *D, int ldd, int m1, int n1); +void kernel_dgemm_nn_8x8_lib8ccc(int kmax, double *alpha, double *A, double *B, int ldb, double *beta, double *C, int ldc, double *D, int ldd); +void kernel_dgemm_nn_8x8_vs_lib8ccc(int kmax, double *alpha, double *A, double *B, int ldb, double *beta, double *C, int ldc, double *D, int ldd, int m1, int n1); + +// B, panel-major bs=8; A, C, D column-major +// 8x24 +void kernel_dgemm_nt_8x24_libc8cc(int kmax, double *alpha, double *A, int lda, double *B, int sdb, double *beta, double *C, int ldc, double *D, int ldd); +void kernel_dgemm_nt_8x24_vs_libc8cc(int kmax, double *alpha, double *A, int lda, double *B, int sdb, double *beta, double *C, int ldc, double *D, int ldd, int m1, int n1); +void kernel_dgemm_tt_8x24_libc8cc(int kmax, double *alpha, double *A, int lda, double *B, int sdb, double *beta, double *C, int ldc, double *D, int ldd); +void kernel_dgemm_tt_8x24_vs_libc8cc(int kmax, double *alpha, double *A, int lda, double *B, int sdb, double *beta, double *C, int ldc, double *D, int ldd, int m1, int n1); +// 8x16 +void kernel_dgemm_nt_8x16_libc8cc(int kmax, double *alpha, double *A, int lda, double *B, int sdb, double *beta, double *C, int ldc, double *D, int ldd); +void kernel_dgemm_nt_8x16_vs_libc8cc(int kmax, double *alpha, double *A, int lda, double *B, int sdb, double *beta, double *C, int ldc, double *D, int ldd, int m1, int n1); +void kernel_dgemm_tt_8x16_libc8cc(int kmax, double *alpha, double *A, int lda, double *B, int sdb, double *beta, double *C, int ldc, double *D, int ldd); +void kernel_dgemm_tt_8x16_vs_libc8cc(int kmax, double *alpha, double *A, int lda, double *B, int sdb, double *beta, double *C, int ldc, double *D, int ldd, int m1, int n1); +// 8x8 +void kernel_dgemm_nt_8x8_libc8cc(int kmax, double *alpha, double *A, int lda, double *B, double *beta, double *C, int ldc, double *D, int ldd); +void kernel_dgemm_nt_8x8_vs_libc8cc(int kmax, double *alpha, double *A, int lda, double *B, double *beta, double *C, int ldc, double *D, int ldd, int m1, int n1); +void kernel_dgemm_tt_8x8_libc8cc(int kmax, double *alpha, double *A, int lda, double *B, double *beta, double *C, int ldc, double *D, int ldd); +void kernel_dgemm_tt_8x8_vs_libc8cc(int kmax, double *alpha, double *A, int lda, double *B, double *beta, double *C, int ldc, double *D, int ldd, int m1, int n1); + + + // aux void kernel_dvecld_inc1(int kmax, double *x); void kernel_dveccp_inc1(int kmax, double *x, double *y); diff --git a/third_party/acados/include/blasfeo/include/blasfeo_s_blasfeo_api.h b/third_party/acados/include/blasfeo/include/blasfeo_s_blasfeo_api.h index 72c19dcdc0..24b21ead25 100644 --- a/third_party/acados/include/blasfeo/include/blasfeo_s_blasfeo_api.h +++ b/third_party/acados/include/blasfeo/include/blasfeo_s_blasfeo_api.h @@ -97,14 +97,18 @@ void blasfeo_strsv_ltu(int m, struct blasfeo_smat *sA, int ai, int aj, struct bl void blasfeo_strsv_unn(int m, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_svec *sx, int xi, struct blasfeo_svec *sz, int zi); // z <= inv( A' ) * x, A (m)x(m) upper, transposed, not_unit void blasfeo_strsv_utn(int m, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_svec *sx, int xi, struct blasfeo_svec *sz, int zi); +// z <= A * x ; A lower triangular +void blasfeo_strmv_lnn(int m, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_svec *sx, int xi, struct blasfeo_svec *sz, int zi); +// z <= A * x ; A lower triangular, unit diagonal +void blasfeo_strmv_lnu(int m, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_svec *sx, int xi, struct blasfeo_svec *sz, int zi); +// z <= A' * x ; A lower triangular +void blasfeo_strmv_ltn(int m, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_svec *sx, int xi, struct blasfeo_svec *sz, int zi); +// z <= A' * x ; A lower triangular, unit diagonal +void blasfeo_strmv_ltu(int m, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_svec *sx, int xi, struct blasfeo_svec *sz, int zi); // z <= beta * y + alpha * A * x ; A upper triangular void blasfeo_strmv_unn(int m, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_svec *sx, int xi, struct blasfeo_svec *sz, int zi); // z <= A' * x ; A upper triangular void blasfeo_strmv_utn(int m, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_svec *sx, int xi, struct blasfeo_svec *sz, int zi); -// z <= A * x ; A lower triangular -void blasfeo_strmv_lnn(int m, int n, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_svec *sx, int xi, struct blasfeo_svec *sz, int zi); -// z <= A' * x ; A lower triangular -void blasfeo_strmv_ltn(int m, int n, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_svec *sx, int xi, struct blasfeo_svec *sz, int zi); // z_n <= beta_n * y_n + alpha_n * A * x_n // z_t <= beta_t * y_t + alpha_t * A' * x_t void blasfeo_sgemv_nt(int m, int n, float alpha_n, float alpha_t, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_svec *sx_n, int xi_n, struct blasfeo_svec *sx_t, int xi_t, float beta_n, float beta_t, struct blasfeo_svec *sy_n, int yi_n, struct blasfeo_svec *sy_t, int yi_t, struct blasfeo_svec *sz_n, int zi_n, struct blasfeo_svec *sz_t, int zi_t); diff --git a/third_party/acados/include/blasfeo/include/blasfeo_s_blasfeo_ref_api.h b/third_party/acados/include/blasfeo/include/blasfeo_s_blasfeo_ref_api.h index db8b28d3b7..805914cc05 100644 --- a/third_party/acados/include/blasfeo/include/blasfeo_s_blasfeo_ref_api.h +++ b/third_party/acados/include/blasfeo/include/blasfeo_s_blasfeo_ref_api.h @@ -97,19 +97,24 @@ void blasfeo_ref_strsv_ltu(int m, struct blasfeo_smat *sA, int ai, int aj, struc void blasfeo_ref_strsv_unn(int m, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_svec *sx, int xi, struct blasfeo_svec *sz, int zi); // z <= inv( A' ) * x, A (m)x(m) upper, transposed, not_unit void blasfeo_ref_strsv_utn(int m, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_svec *sx, int xi, struct blasfeo_svec *sz, int zi); +// z <= A * x ; A lower triangular +void blasfeo_ref_strmv_lnn(int m, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_svec *sx, int xi, struct blasfeo_svec *sz, int zi); +// z <= A * x ; A lower triangular, unit diagonal +void blasfeo_ref_strmv_lnu(int m, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_svec *sx, int xi, struct blasfeo_svec *sz, int zi); +// z <= A' * x ; A lower triangular +void blasfeo_ref_strmv_ltn(int m, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_svec *sx, int xi, struct blasfeo_svec *sz, int zi); +// z <= A' * x ; A lower triangular, unit diagonal +void blasfeo_ref_strmv_ltu(int m, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_svec *sx, int xi, struct blasfeo_svec *sz, int zi); // z <= beta * y + alpha * A * x ; A upper triangular void blasfeo_ref_strmv_unn(int m, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_svec *sx, int xi, struct blasfeo_svec *sz, int zi); // z <= A' * x ; A upper triangular void blasfeo_ref_strmv_utn(int m, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_svec *sx, int xi, struct blasfeo_svec *sz, int zi); -// z <= A * x ; A lower triangular -void blasfeo_ref_strmv_lnn(int m, int n, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_svec *sx, int xi, struct blasfeo_svec *sz, int zi); -// z <= A' * x ; A lower triangular -void blasfeo_ref_strmv_ltn(int m, int n, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_svec *sx, int xi, struct blasfeo_svec *sz, int zi); // z_n <= beta_n * y_n + alpha_n * A * x_n // z_t <= beta_t * y_t + alpha_t * A' * x_t void blasfeo_ref_sgemv_nt(int m, int n, float alpha_n, float alpha_t, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_svec *sx_n, int xi_n, struct blasfeo_svec *sx_t, int xi_t, float beta_n, float beta_t, struct blasfeo_svec *sy_n, int yi_n, struct blasfeo_svec *sy_t, int yi_t, struct blasfeo_svec *sz_n, int zi_n, struct blasfeo_svec *sz_t, int zi_t); // z <= beta * y + alpha * A * x, where A is symmetric and only the lower triangular patr of A is accessed -void blasfeo_ref_ssymv_l(int m, int n, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_svec *sx, int xi, float beta, struct blasfeo_svec *sy, int yi, struct blasfeo_svec *sz, int zi); +void blasfeo_ref_ssymv_l(int m, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_svec *sx, int xi, float beta, struct blasfeo_svec *sy, int yi, struct blasfeo_svec *sz, int zi); +void blasfeo_ref_ssymv_l_mn(int m, int n, float alpha, struct blasfeo_smat *sA, int ai, int aj, struct blasfeo_svec *sx, int xi, float beta, struct blasfeo_svec *sy, int yi, struct blasfeo_svec *sz, int zi); // diagonal diff --git a/third_party/acados/include/blasfeo/include/blasfeo_s_kernel.h b/third_party/acados/include/blasfeo/include/blasfeo_s_kernel.h index dcbde989cc..99d2b28c82 100644 --- a/third_party/acados/include/blasfeo/include/blasfeo_s_kernel.h +++ b/third_party/acados/include/blasfeo/include/blasfeo_s_kernel.h @@ -550,6 +550,16 @@ void kernel_spotrf_nt_l_4x4_lib44cc(int kmax, float *A, float *B, float *C, int void kernel_spotrf_nt_l_4x4_vs_lib44cc(int kmax, float *A, float *B, float *C, int ldc, float *D, int ldd, float *dD, int m1, int n1); // B panel-major bs=8; A, C, D column-major +// 4x24 +void kernel_sgemm_nt_4x24_libc8cc(int kmax, float *alpha, float *A, int lda, float *B, int sdb, float *beta, float *C, int ldc, float *D, int ldd); +void kernel_sgemm_nt_4x24_vs_libc8cc(int kmax, float *alpha, float *A, int lda, float *B, int sdb, float *beta, float *C, int ldc, float *D, int ldd, int m1, int n1); +void kernel_sgemm_tt_4x24_libc8cc(int kmax, float *alpha, float *A, int lda, float *B, int sdb, float *beta, float *C, int ldc, float *D, int ldd); +void kernel_sgemm_tt_4x24_vs_libc8cc(int kmax, float *alpha, float *A, int lda, float *B, int sdb, float *beta, float *C, int ldc, float *D, int ldd, int m1, int n1); +// 4x16 +void kernel_sgemm_nt_4x16_libc8cc(int kmax, float *alpha, float *A, int lda, float *B, int sdb, float *beta, float *C, int ldc, float *D, int ldd); +void kernel_sgemm_nt_4x16_vs_libc8cc(int kmax, float *alpha, float *A, int lda, float *B, int sdb, float *beta, float *C, int ldc, float *D, int ldd, int m1, int n1); +void kernel_sgemm_tt_4x16_libc8cc(int kmax, float *alpha, float *A, int lda, float *B, int sdb, float *beta, float *C, int ldc, float *D, int ldd); +void kernel_sgemm_tt_4x16_vs_libc8cc(int kmax, float *alpha, float *A, int lda, float *B, int sdb, float *beta, float *C, int ldc, float *D, int ldd, int m1, int n1); // 8x8 void kernel_sgemm_nt_8x8_libc8cc(int kmax, float *alpha, float *A, int lda, float *B, float *beta, float *C, int ldc, float *D, int ldd); void kernel_sgemm_nt_8x8_vs_libc8cc(int kmax, float *alpha, float *A, int lda, float *B, float *beta, float *C, int ldc, float *D, int ldd, int m1, int n1); diff --git a/third_party/acados/larch64/lib/libacados.so b/third_party/acados/larch64/lib/libacados.so index a61f486556..68de91c9b5 100644 --- a/third_party/acados/larch64/lib/libacados.so +++ b/third_party/acados/larch64/lib/libacados.so @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ef769d1e86261357234b3260f1aa8a0eb317858251eff83bcfdd8cdd60068664 -size 485104 +oid sha256:9303c7ff72999cc1e799faf24b715112326eefefb842240d4da19beb8caba1d7 +size 484904 diff --git a/third_party/acados/larch64/lib/libblasfeo.so b/third_party/acados/larch64/lib/libblasfeo.so index 92ab214e31..57d8062895 100644 --- a/third_party/acados/larch64/lib/libblasfeo.so +++ b/third_party/acados/larch64/lib/libblasfeo.so @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8c043d1a78e7bf885954fa40ae308d5b7456f5b4f796461c51d00f7f47be5c59 +oid sha256:654deb9f5c9cca7c6c049f8cffc9eae40052d7971489690929b5efded4b9d9da size 730608 diff --git a/third_party/acados/larch64/lib/libhpipm.so b/third_party/acados/larch64/lib/libhpipm.so index 348923f2ff..506dd75fc3 100644 --- a/third_party/acados/larch64/lib/libhpipm.so +++ b/third_party/acados/larch64/lib/libhpipm.so @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2d51902b5af8d29750f76611c956a66f070fef9334e0945f2aec65365834c4cb +oid sha256:c7494826f51ccbbe43da2df236216dad90bbd2f378accee9a1828f63fac80599 size 1367352 diff --git a/third_party/acados/x86_64/lib/libacados.so b/third_party/acados/x86_64/lib/libacados.so index 58ebe41a93..7b47f82fa3 100644 --- a/third_party/acados/x86_64/lib/libacados.so +++ b/third_party/acados/x86_64/lib/libacados.so @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ad96e016d31a7195786b9dc011e94ad78d01199da77ec4288a17bf4b69f33021 -size 525968 +oid sha256:304b272189540640455e2003b6b124ce134edbc537d9f7e266939a6163e3965c +size 521320 diff --git a/third_party/acados/x86_64/lib/libblasfeo.so b/third_party/acados/x86_64/lib/libblasfeo.so index 23ddb4258f..5901f058dc 100644 --- a/third_party/acados/x86_64/lib/libblasfeo.so +++ b/third_party/acados/x86_64/lib/libblasfeo.so @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c41f340411cbd4adf8d3065298c094f438d432304234bcb24403062a0a10caa5 +oid sha256:7eed704b395c5ab6eeff9e4188ee4f883889c666adcea7743cf2bdcef535f533 size 1265064 diff --git a/third_party/acados/x86_64/lib/libhpipm.so b/third_party/acados/x86_64/lib/libhpipm.so index e18beb350a..a81e9c2080 100644 --- a/third_party/acados/x86_64/lib/libhpipm.so +++ b/third_party/acados/x86_64/lib/libhpipm.so @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8d15b89d07c01f24e668961797f9e163d3aab1fb17006164fa8d1c850908fc49 +oid sha256:58abc1d388507fbab4c92483b58a4a2dd985877cd0b568c3135436cadc8f616e size 1572648