From 5fdda8dbd82954c2b9b5af82a19e599d3b4b5ec2 Mon Sep 17 00:00:00 2001 From: George Hotz <72895+geohot@users.noreply.github.com> Date: Mon, 14 Dec 2020 15:46:49 -0800 Subject: [PATCH] Thneed refactors for future functions (#2673) * delete debug * thneed updates, but it seems slower * thneed refactor * refactor touchups * add back asserts * fix uaf * track the size for local args * final thneed refactor * switch kgsl_command_object to avoid memory leak * comments * unused includes Co-authored-by: Comma Device --- selfdrive/modeld/SConscript | 21 +- selfdrive/modeld/thneed/debug/.gitignore | 1 - .../thneed/debug/decompiler/disasm-a3xx.c | 1426 ----- .../thneed/debug/decompiler/instr-a3xx.h | 1119 ---- .../modeld/thneed/debug/decompiler/ir3.h | 1755 ------ .../thneed/debug/decompiler/shader_enums.h | 906 --- .../thneed/debug/decompiler/util/bitset.h | 261 - .../thneed/debug/decompiler/util/list.h | 262 - .../thneed/debug/decompiler/util/macros.h | 346 -- selfdrive/modeld/thneed/debug/disassembler.cc | 132 - .../modeld/thneed/debug/include/a5xx.xml.h | 5201 ----------------- .../thneed/debug/include/adreno_pm4.xml.h | 1344 ----- .../thneed/debug/include/adreno_pm4types.h | 473 -- selfdrive/modeld/thneed/debug/main.cc | 724 --- .../thneed/debug/microbenchmark/gemm.cl | 51 - .../thneed/debug/microbenchmark/gemm_image.cl | 75 - .../modeld/thneed/debug/microbenchmark/go.c | 314 - .../modeld/thneed/debug/microbenchmark/run.sh | 2 - selfdrive/modeld/thneed/debug/test.cc | 99 - selfdrive/modeld/thneed/debug/thneed | 4 - selfdrive/modeld/thneed/thneed.cc | 463 +- selfdrive/modeld/thneed/thneed.h | 63 +- 22 files changed, 349 insertions(+), 14693 deletions(-) delete mode 100644 selfdrive/modeld/thneed/debug/.gitignore delete mode 100644 selfdrive/modeld/thneed/debug/decompiler/disasm-a3xx.c delete mode 100644 selfdrive/modeld/thneed/debug/decompiler/instr-a3xx.h delete mode 100644 selfdrive/modeld/thneed/debug/decompiler/ir3.h delete mode 100644 selfdrive/modeld/thneed/debug/decompiler/shader_enums.h delete mode 100644 selfdrive/modeld/thneed/debug/decompiler/util/bitset.h delete mode 100644 selfdrive/modeld/thneed/debug/decompiler/util/list.h delete mode 100644 selfdrive/modeld/thneed/debug/decompiler/util/macros.h delete mode 100644 selfdrive/modeld/thneed/debug/disassembler.cc delete mode 100644 selfdrive/modeld/thneed/debug/include/a5xx.xml.h delete mode 100644 selfdrive/modeld/thneed/debug/include/adreno_pm4.xml.h delete mode 100644 selfdrive/modeld/thneed/debug/include/adreno_pm4types.h delete mode 100644 selfdrive/modeld/thneed/debug/main.cc delete mode 100644 selfdrive/modeld/thneed/debug/microbenchmark/gemm.cl delete mode 100644 selfdrive/modeld/thneed/debug/microbenchmark/gemm_image.cl delete mode 100644 selfdrive/modeld/thneed/debug/microbenchmark/go.c delete mode 100755 selfdrive/modeld/thneed/debug/microbenchmark/run.sh delete mode 100644 selfdrive/modeld/thneed/debug/test.cc delete mode 100755 selfdrive/modeld/thneed/debug/thneed diff --git a/selfdrive/modeld/SConscript b/selfdrive/modeld/SConscript index 9b34398c98..4fcded106c 100644 --- a/selfdrive/modeld/SConscript +++ b/selfdrive/modeld/SConscript @@ -3,8 +3,6 @@ lenv = env.Clone() libs = [cereal, messaging, common, 'OpenCL', 'SNPE', 'symphony-cpu', 'capnp', 'zmq', 'kj', 'yuv', gpucommon, visionipc] -TEST_THNEED = False - common_src = [ "models/commonmodel.cc", "runners/snpemodel.cc", @@ -14,16 +12,14 @@ common_src = [ if arch == "aarch64": libs += ['gsl', 'CB', 'gnustl_shared'] - if not TEST_THNEED: - common_src += ["thneed/thneed.cc"] - lenv['CFLAGS'].append("-DUSE_THNEED") - lenv['CXXFLAGS'].append("-DUSE_THNEED") + common_src += ["thneed/thneed.cc"] + lenv['CFLAGS'].append("-DUSE_THNEED") + lenv['CXXFLAGS'].append("-DUSE_THNEED") elif arch == "larch64": libs += ['gsl', 'CB', 'pthread', 'dl'] - if not TEST_THNEED: - common_src += ["thneed/thneed.cc"] - lenv['CFLAGS'].append("-DUSE_THNEED") - lenv['CXXFLAGS'].append("-DUSE_THNEED") + common_src += ["thneed/thneed.cc"] + lenv['CFLAGS'].append("-DUSE_THNEED") + lenv['CXXFLAGS'].append("-DUSE_THNEED") else: libs += ['pthread'] @@ -56,8 +52,3 @@ lenv.Program('_modeld', [ "models/driving.cc", ]+common, LIBS=libs) -if TEST_THNEED: - lenv.Program('thneed/debug/_thneed', [ - "thneed/thneed.cc", "thneed/debug/test.cc" - ]+common, LIBS=libs) - diff --git a/selfdrive/modeld/thneed/debug/.gitignore b/selfdrive/modeld/thneed/debug/.gitignore deleted file mode 100644 index b7ed9fb0a7..0000000000 --- a/selfdrive/modeld/thneed/debug/.gitignore +++ /dev/null @@ -1 +0,0 @@ -_thneed diff --git a/selfdrive/modeld/thneed/debug/decompiler/disasm-a3xx.c b/selfdrive/modeld/thneed/debug/decompiler/disasm-a3xx.c deleted file mode 100644 index 1b40fb7fc0..0000000000 --- a/selfdrive/modeld/thneed/debug/decompiler/disasm-a3xx.c +++ /dev/null @@ -1,1426 +0,0 @@ -/* - * Copyright (c) 2013 Rob Clark - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include -#include -#include -#include -#include -#include - -//#include - -#include "util/macros.h" -#include "instr-a3xx.h" - -/* bitmask of debug flags */ -enum debug_t { - PRINT_RAW = 0x1, /* dump raw hexdump */ - PRINT_VERBOSE = 0x2, - EXPAND_REPEAT = 0x4, -}; - -static enum debug_t debug = PRINT_RAW | PRINT_VERBOSE | EXPAND_REPEAT; - -static const char *levels[] = { - "", - "\t", - "\t\t", - "\t\t\t", - "\t\t\t\t", - "\t\t\t\t\t", - "\t\t\t\t\t\t", - "\t\t\t\t\t\t\t", - "\t\t\t\t\t\t\t\t", - "\t\t\t\t\t\t\t\t\t", - "x", - "x", - "x", - "x", - "x", - "x", -}; - -static const char *component = "xyzw"; - -static const char *type[] = { - [TYPE_F16] = "f16", - [TYPE_F32] = "f32", - [TYPE_U16] = "u16", - [TYPE_U32] = "u32", - [TYPE_S16] = "s16", - [TYPE_S32] = "s32", - [TYPE_U8] = "u8", - [TYPE_S8] = "s8", -}; - -struct disasm_ctx { - FILE *out; - int level; - unsigned gpu_id; - - /* current instruction repeat flag: */ - unsigned repeat; - /* current instruction repeat indx/offset (for --expand): */ - unsigned repeatidx; - - unsigned instructions; -}; - -static const char *float_imms[] = { - "0.0", - "0.5", - "1.0", - "2.0", - "e", - "pi", - "1/pi", - "1/log2(e)", - "log2(e)", - "1/log2(10)", - "log2(10)", - "4.0", -}; - -static void print_reg(struct disasm_ctx *ctx, reg_t reg, bool full, - bool is_float, bool r, - bool c, bool im, bool neg, bool abs, bool addr_rel) -{ - const char type = c ? 'c' : 'r'; - - // XXX I prefer - and || for neg/abs, but preserving format used - // by libllvm-a3xx for easy diffing.. - - if (abs && neg) - fprintf(ctx->out, "(absneg)"); - else if (neg) - fprintf(ctx->out, "(neg)"); - else if (abs) - fprintf(ctx->out, "(abs)"); - - if (r) - fprintf(ctx->out, "(r)"); - - if (im) { - if (is_float && full && reg.iim_val < ARRAY_SIZE(float_imms)) { - fprintf(ctx->out, "(%s)", float_imms[reg.iim_val]); - } else { - fprintf(ctx->out, "%d", reg.iim_val); - } - } else if (addr_rel) { - /* I would just use %+d but trying to make it diff'able with - * libllvm-a3xx... - */ - if (reg.iim_val < 0) - fprintf(ctx->out, "%s%c", full ? "" : "h", type, -reg.iim_val); - else if (reg.iim_val > 0) - fprintf(ctx->out, "%s%c", full ? "" : "h", type, reg.iim_val); - else - fprintf(ctx->out, "%s%c", full ? "" : "h", type); - } else if ((reg.num == REG_A0) && !c) { - /* This matches libllvm output, the second (scalar) address register - * seems to be called a1.x instead of a0.y. - */ - fprintf(ctx->out, "a%d.x", reg.comp); - } else if ((reg.num == REG_P0) && !c) { - fprintf(ctx->out, "p0.%c", component[reg.comp]); - } else { - fprintf(ctx->out, "%s%c%d.%c", full ? "" : "h", type, reg.num, component[reg.comp]); - } -} - -static unsigned regidx(reg_t reg) -{ - return (4 * reg.num) + reg.comp; -} - -static reg_t idxreg(unsigned idx) -{ - return (reg_t){ - .comp = idx & 0x3, - .num = idx >> 2, - }; -} - -static void print_reg_dst(struct disasm_ctx *ctx, reg_t reg, bool full, bool addr_rel) -{ - reg = idxreg(regidx(reg) + ctx->repeatidx); - print_reg(ctx, reg, full, false, false, false, false, false, false, addr_rel); -} - -/* TODO switch to using reginfo struct everywhere, since more readable - * than passing a bunch of bools to print_reg_src - */ - -struct reginfo { - reg_t reg; - bool full; - bool r; - bool c; - bool f; /* src reg is interpreted as float, used for printing immediates */ - bool im; - bool neg; - bool abs; - bool addr_rel; -}; - -static void print_src(struct disasm_ctx *ctx, struct reginfo *info) -{ - reg_t reg = info->reg; - - if (info->r) - reg = idxreg(regidx(info->reg) + ctx->repeatidx); - - print_reg(ctx, reg, info->full, info->f, info->r, info->c, info->im, - info->neg, info->abs, info->addr_rel); -} - -//static void print_dst(struct disasm_ctx *ctx, struct reginfo *info) -//{ -// print_reg_dst(ctx, info->reg, info->full, info->addr_rel); -//} - -static void print_instr_cat0(struct disasm_ctx *ctx, instr_t *instr) -{ - static const struct { - const char *suffix; - int nsrc; - bool idx; - } brinfo[7] = { - [BRANCH_PLAIN] = { "r", 1, false }, - [BRANCH_OR] = { "rao", 2, false }, - [BRANCH_AND] = { "raa", 2, false }, - [BRANCH_CONST] = { "rac", 0, true }, - [BRANCH_ANY] = { "any", 1, false }, - [BRANCH_ALL] = { "all", 1, false }, - [BRANCH_X] = { "rax", 0, false }, - }; - instr_cat0_t *cat0 = &instr->cat0; - - switch (instr_opc(instr, ctx->gpu_id)) { - case OPC_KILL: - case OPC_PREDT: - case OPC_PREDF: - fprintf(ctx->out, " %sp0.%c", cat0->inv0 ? "!" : "", - component[cat0->comp0]); - break; - case OPC_B: - fprintf(ctx->out, "%s", brinfo[cat0->brtype].suffix); - if (brinfo[cat0->brtype].idx) { - fprintf(ctx->out, ".%u", cat0->idx); - } - if (brinfo[cat0->brtype].nsrc >= 1) { - fprintf(ctx->out, " %sp0.%c,", cat0->inv0 ? "!" : "", - component[cat0->comp0]); - } - if (brinfo[cat0->brtype].nsrc >= 2) { - fprintf(ctx->out, " %sp0.%c,", cat0->inv1 ? "!" : "", - component[cat0->comp1]); - } - fprintf(ctx->out, " #%d", cat0->a3xx.immed); - break; - case OPC_JUMP: - case OPC_CALL: - case OPC_BKT: - case OPC_GETONE: - case OPC_SHPS: - fprintf(ctx->out, " #%d", cat0->a3xx.immed); - break; - } - - if ((debug & PRINT_VERBOSE) && (cat0->dummy3|cat0->dummy4)) - fprintf(ctx->out, "\t{0: %x,%x}", cat0->dummy3, cat0->dummy4); -} - -static void print_instr_cat1(struct disasm_ctx *ctx, instr_t *instr) -{ - instr_cat1_t *cat1 = &instr->cat1; - - if (cat1->ul) - fprintf(ctx->out, "(ul)"); - - if (cat1->src_type == cat1->dst_type) { - if ((cat1->src_type == TYPE_S16) && (((reg_t)cat1->dst).num == REG_A0)) { - /* special case (nmemonic?): */ - fprintf(ctx->out, "mova"); - } else { - fprintf(ctx->out, "mov.%s%s", type[cat1->src_type], type[cat1->dst_type]); - } - } else { - fprintf(ctx->out, "cov.%s%s", type[cat1->src_type], type[cat1->dst_type]); - } - - fprintf(ctx->out, " "); - - if (cat1->even) - fprintf(ctx->out, "(even)"); - - if (cat1->pos_inf) - fprintf(ctx->out, "(pos_infinity)"); - - print_reg_dst(ctx, (reg_t)(cat1->dst), type_size(cat1->dst_type) == 32, - cat1->dst_rel); - - fprintf(ctx->out, ", "); - - /* ugg, have to special case this.. vs print_reg().. */ - if (cat1->src_im) { - if (type_float(cat1->src_type)) - fprintf(ctx->out, "(%f)", cat1->fim_val); - else if (type_uint(cat1->src_type)) - fprintf(ctx->out, "0x%08x", cat1->uim_val); - else - fprintf(ctx->out, "%d", cat1->iim_val); - } else if (cat1->src_rel && !cat1->src_c) { - /* I would just use %+d but trying to make it diff'able with - * libllvm-a3xx... - */ - char type = cat1->src_rel_c ? 'c' : 'r'; - const char *full = (type_size(cat1->src_type) == 32) ? "" : "h"; - if (cat1->off < 0) - fprintf(ctx->out, "%s%c", full, type, -cat1->off); - else if (cat1->off > 0) - fprintf(ctx->out, "%s%c", full, type, cat1->off); - else - fprintf(ctx->out, "%s%c", full, type); - } else { - struct reginfo src = { - .reg = (reg_t)cat1->src, - .full = type_size(cat1->src_type) == 32, - .r = cat1->src_r, - .c = cat1->src_c, - .im = cat1->src_im, - }; - print_src(ctx, &src); - } - - if ((debug & PRINT_VERBOSE) && (cat1->must_be_0)) - fprintf(ctx->out, "\t{1: %x}", cat1->must_be_0); -} - -static void print_instr_cat2(struct disasm_ctx *ctx, instr_t *instr) -{ - instr_cat2_t *cat2 = &instr->cat2; - int opc = _OPC(2, cat2->opc); - static const char *cond[] = { - "lt", - "le", - "gt", - "ge", - "eq", - "ne", - "?6?", - }; - - switch (opc) { - case OPC_CMPS_F: - case OPC_CMPS_U: - case OPC_CMPS_S: - case OPC_CMPV_F: - case OPC_CMPV_U: - case OPC_CMPV_S: - fprintf(ctx->out, ".%s", cond[cat2->cond]); - break; - } - - fprintf(ctx->out, " "); - if (cat2->ei) - fprintf(ctx->out, "(ei)"); - print_reg_dst(ctx, (reg_t)(cat2->dst), cat2->full ^ cat2->dst_half, false); - fprintf(ctx->out, ", "); - - struct reginfo src1 = { - .full = cat2->full, - .r = cat2->repeat ? cat2->src1_r : 0, - .f = is_cat2_float(opc), - .im = cat2->src1_im, - .abs = cat2->src1_abs, - .neg = cat2->src1_neg, - }; - - if (cat2->c1.src1_c) { - src1.reg = (reg_t)(cat2->c1.src1); - src1.c = true; - } else if (cat2->rel1.src1_rel) { - src1.reg = (reg_t)(cat2->rel1.src1); - src1.c = cat2->rel1.src1_c; - src1.addr_rel = true; - } else { - src1.reg = (reg_t)(cat2->src1); - } - print_src(ctx, &src1); - - struct reginfo src2 = { - .r = cat2->repeat ? cat2->src2_r : 0, - .full = cat2->full, - .f = is_cat2_float(opc), - .abs = cat2->src2_abs, - .neg = cat2->src2_neg, - .im = cat2->src2_im, - }; - switch (opc) { - case OPC_ABSNEG_F: - case OPC_ABSNEG_S: - case OPC_CLZ_B: - case OPC_CLZ_S: - case OPC_SIGN_F: - case OPC_FLOOR_F: - case OPC_CEIL_F: - case OPC_RNDNE_F: - case OPC_RNDAZ_F: - case OPC_TRUNC_F: - case OPC_NOT_B: - case OPC_BFREV_B: - case OPC_SETRM: - case OPC_CBITS_B: - /* these only have one src reg */ - break; - default: - fprintf(ctx->out, ", "); - if (cat2->c2.src2_c) { - src2.reg = (reg_t)(cat2->c2.src2); - src2.c = true; - } else if (cat2->rel2.src2_rel) { - src2.reg = (reg_t)(cat2->rel2.src2); - src2.c = cat2->rel2.src2_c; - src2.addr_rel = true; - } else { - src2.reg = (reg_t)(cat2->src2); - } - print_src(ctx, &src2); - break; - } -} - -static void print_instr_cat3(struct disasm_ctx *ctx, instr_t *instr) -{ - instr_cat3_t *cat3 = &instr->cat3; - bool full = instr_cat3_full(cat3); - - fprintf(ctx->out, " "); - print_reg_dst(ctx, (reg_t)(cat3->dst), full ^ cat3->dst_half, false); - fprintf(ctx->out, ", "); - - struct reginfo src1 = { - .r = cat3->repeat ? cat3->src1_r : 0, - .full = full, - .neg = cat3->src1_neg, - }; - if (cat3->c1.src1_c) { - src1.reg = (reg_t)(cat3->c1.src1); - src1.c = true; - } else if (cat3->rel1.src1_rel) { - src1.reg = (reg_t)(cat3->rel1.src1); - src1.c = cat3->rel1.src1_c; - src1.addr_rel = true; - } else { - src1.reg = (reg_t)(cat3->src1); - } - print_src(ctx, &src1); - - fprintf(ctx->out, ", "); - struct reginfo src2 = { - .reg = (reg_t)cat3->src2, - .full = full, - .r = cat3->repeat ? cat3->src2_r : 0, - .c = cat3->src2_c, - .neg = cat3->src2_neg, - }; - print_src(ctx, &src2); - - fprintf(ctx->out, ", "); - struct reginfo src3 = { - .r = cat3->src3_r, - .full = full, - .neg = cat3->src3_neg, - }; - if (cat3->c2.src3_c) { - src3.reg = (reg_t)(cat3->c2.src3); - src3.c = true; - } else if (cat3->rel2.src3_rel) { - src3.reg = (reg_t)(cat3->rel2.src3); - src3.c = cat3->rel2.src3_c; - src3.addr_rel = true; - } else { - src3.reg = (reg_t)(cat3->src3); - } - print_src(ctx, &src3); -} - -static void print_instr_cat4(struct disasm_ctx *ctx, instr_t *instr) -{ - instr_cat4_t *cat4 = &instr->cat4; - - fprintf(ctx->out, " "); - print_reg_dst(ctx, (reg_t)(cat4->dst), cat4->full ^ cat4->dst_half, false); - fprintf(ctx->out, ", "); - - struct reginfo src = { - .r = cat4->src_r, - .im = cat4->src_im, - .full = cat4->full, - .neg = cat4->src_neg, - .abs = cat4->src_abs, - }; - if (cat4->c.src_c) { - src.reg = (reg_t)(cat4->c.src); - src.c = true; - } else if (cat4->rel.src_rel) { - src.reg = (reg_t)(cat4->rel.src); - src.c = cat4->rel.src_c; - src.addr_rel = true; - } else { - src.reg = (reg_t)(cat4->src); - } - print_src(ctx, &src); - - if ((debug & PRINT_VERBOSE) && (cat4->dummy1|cat4->dummy2)) - fprintf(ctx->out, "\t{4: %x,%x}", cat4->dummy1, cat4->dummy2); -} - -static void print_instr_cat5(struct disasm_ctx *ctx, instr_t *instr) -{ - static const struct { - bool src1, src2, samp, tex; - } info[0x1f] = { - [opc_op(OPC_ISAM)] = { true, false, true, true, }, - [opc_op(OPC_ISAML)] = { true, true, true, true, }, - [opc_op(OPC_ISAMM)] = { true, false, true, true, }, - [opc_op(OPC_SAM)] = { true, false, true, true, }, - [opc_op(OPC_SAMB)] = { true, true, true, true, }, - [opc_op(OPC_SAML)] = { true, true, true, true, }, - [opc_op(OPC_SAMGQ)] = { true, false, true, true, }, - [opc_op(OPC_GETLOD)] = { true, false, true, true, }, - [opc_op(OPC_CONV)] = { true, true, true, true, }, - [opc_op(OPC_CONVM)] = { true, true, true, true, }, - [opc_op(OPC_GETSIZE)] = { true, false, false, true, }, - [opc_op(OPC_GETBUF)] = { false, false, false, true, }, - [opc_op(OPC_GETPOS)] = { true, false, false, true, }, - [opc_op(OPC_GETINFO)] = { false, false, false, true, }, - [opc_op(OPC_DSX)] = { true, false, false, false, }, - [opc_op(OPC_DSY)] = { true, false, false, false, }, - [opc_op(OPC_GATHER4R)] = { true, false, true, true, }, - [opc_op(OPC_GATHER4G)] = { true, false, true, true, }, - [opc_op(OPC_GATHER4B)] = { true, false, true, true, }, - [opc_op(OPC_GATHER4A)] = { true, false, true, true, }, - [opc_op(OPC_SAMGP0)] = { true, false, true, true, }, - [opc_op(OPC_SAMGP1)] = { true, false, true, true, }, - [opc_op(OPC_SAMGP2)] = { true, false, true, true, }, - [opc_op(OPC_SAMGP3)] = { true, false, true, true, }, - [opc_op(OPC_DSXPP_1)] = { true, false, false, false, }, - [opc_op(OPC_DSYPP_1)] = { true, false, false, false, }, - [opc_op(OPC_RGETPOS)] = { true, false, false, false, }, - [opc_op(OPC_RGETINFO)] = { false, false, false, false, }, - }; - - static const struct { - bool indirect; - bool bindless; - bool use_a1; - bool uniform; - } desc_features[8] = { - [CAT5_NONUNIFORM] = { .indirect = true, }, - [CAT5_UNIFORM] = { .indirect = true, .uniform = true, }, - [CAT5_BINDLESS_IMM] = { .bindless = true, }, - [CAT5_BINDLESS_UNIFORM] = { - .bindless = true, - .indirect = true, - .uniform = true, - }, - [CAT5_BINDLESS_NONUNIFORM] = { - .bindless = true, - .indirect = true, - }, - [CAT5_BINDLESS_A1_IMM] = { - .bindless = true, - .use_a1 = true, - }, - [CAT5_BINDLESS_A1_UNIFORM] = { - .bindless = true, - .indirect = true, - .uniform = true, - .use_a1 = true, - }, - [CAT5_BINDLESS_A1_NONUNIFORM] = { - .bindless = true, - .indirect = true, - .use_a1 = true, - }, - }; - - instr_cat5_t *cat5 = &instr->cat5; - int i; - - bool desc_indirect = - cat5->is_s2en_bindless && - desc_features[cat5->s2en_bindless.desc_mode].indirect; - bool bindless = - cat5->is_s2en_bindless && - desc_features[cat5->s2en_bindless.desc_mode].bindless; - bool use_a1 = - cat5->is_s2en_bindless && - desc_features[cat5->s2en_bindless.desc_mode].use_a1; - bool uniform = - cat5->is_s2en_bindless && - desc_features[cat5->s2en_bindless.desc_mode].uniform; - - if (cat5->is_3d) fprintf(ctx->out, ".3d"); - if (cat5->is_a) fprintf(ctx->out, ".a"); - if (cat5->is_o) fprintf(ctx->out, ".o"); - if (cat5->is_p) fprintf(ctx->out, ".p"); - if (cat5->is_s) fprintf(ctx->out, ".s"); - if (desc_indirect) fprintf(ctx->out, ".s2en"); - if (uniform) fprintf(ctx->out, ".uniform"); - - if (bindless) { - unsigned base = (cat5->s2en_bindless.base_hi << 1) | cat5->base_lo; - fprintf(ctx->out, ".base%d", base); - } - - fprintf(ctx->out, " "); - - switch (_OPC(5, cat5->opc)) { - case OPC_DSXPP_1: - case OPC_DSYPP_1: - break; - default: - fprintf(ctx->out, "(%s)", type[cat5->type]); - break; - } - - fprintf(ctx->out, "("); - for (i = 0; i < 4; i++) - if (cat5->wrmask & (1 << i)) - fprintf(ctx->out, "%c", "xyzw"[i]); - fprintf(ctx->out, ")"); - - print_reg_dst(ctx, (reg_t)(cat5->dst), type_size(cat5->type) == 32, false); - - if (info[cat5->opc].src1) { - fprintf(ctx->out, ", "); - struct reginfo src = { .reg = (reg_t)(cat5->src1), .full = cat5->full }; - print_src(ctx, &src); - } - - if (cat5->is_o || info[cat5->opc].src2) { - fprintf(ctx->out, ", "); - struct reginfo src = { .reg = (reg_t)(cat5->src2), .full = cat5->full }; - print_src(ctx, &src); - } - if (cat5->is_s2en_bindless) { - if (!desc_indirect) { - if (info[cat5->opc].samp) { - if (use_a1) - fprintf(ctx->out, ", s#%d", cat5->s2en_bindless.src3); - else - fprintf(ctx->out, ", s#%d", cat5->s2en_bindless.src3 & 0xf); - } - - if (info[cat5->opc].tex && !use_a1) { - fprintf(ctx->out, ", t#%d", cat5->s2en_bindless.src3 >> 4); - } - } - } else { - if (info[cat5->opc].samp) - fprintf(ctx->out, ", s#%d", cat5->norm.samp); - if (info[cat5->opc].tex) - fprintf(ctx->out, ", t#%d", cat5->norm.tex); - } - - if (desc_indirect) { - fprintf(ctx->out, ", "); - struct reginfo src = { .reg = (reg_t)(cat5->s2en_bindless.src3), .full = bindless }; - print_src(ctx, &src); - } - - if (use_a1) - fprintf(ctx->out, ", a1.x"); - - if (debug & PRINT_VERBOSE) { - if (cat5->is_s2en_bindless) { - if ((debug & PRINT_VERBOSE) && cat5->s2en_bindless.dummy1) - fprintf(ctx->out, "\t{5: %x}", cat5->s2en_bindless.dummy1); - } else { - if ((debug & PRINT_VERBOSE) && cat5->norm.dummy1) - fprintf(ctx->out, "\t{5: %x}", cat5->norm.dummy1); - } - } -} - -static void print_instr_cat6_a3xx(struct disasm_ctx *ctx, instr_t *instr) -{ - instr_cat6_t *cat6 = &instr->cat6; - char sd = 0, ss = 0; /* dst/src address space */ - bool nodst = false; - struct reginfo dst, src1, src2; - int src1off = 0, dstoff = 0; - - memset(&dst, 0, sizeof(dst)); - memset(&src1, 0, sizeof(src1)); - memset(&src2, 0, sizeof(src2)); - - switch (_OPC(6, cat6->opc)) { - case OPC_RESINFO: - case OPC_RESFMT: - dst.full = type_size(cat6->type) == 32; - src1.full = type_size(cat6->type) == 32; - src2.full = type_size(cat6->type) == 32; - break; - case OPC_L2G: - case OPC_G2L: - dst.full = true; - src1.full = true; - src2.full = true; - break; - case OPC_STG: - case OPC_STL: - case OPC_STP: - case OPC_STLW: - case OPC_STIB: - dst.full = type_size(cat6->type) == 32; - src1.full = type_size(cat6->type) == 32; - src2.full = type_size(cat6->type) == 32; - break; - default: - dst.full = type_size(cat6->type) == 32; - src1.full = true; - src2.full = true; - break; - } - - switch (_OPC(6, cat6->opc)) { - case OPC_PREFETCH: - break; - case OPC_RESINFO: - fprintf(ctx->out, ".%dd", cat6->ldgb.d + 1); - break; - case OPC_LDGB: - fprintf(ctx->out, ".%s", cat6->ldgb.typed ? "typed" : "untyped"); - fprintf(ctx->out, ".%dd", cat6->ldgb.d + 1); - fprintf(ctx->out, ".%s", type[cat6->type]); - fprintf(ctx->out, ".%d", cat6->ldgb.type_size + 1); - break; - case OPC_STGB: - case OPC_STIB: - fprintf(ctx->out, ".%s", cat6->stgb.typed ? "typed" : "untyped"); - fprintf(ctx->out, ".%dd", cat6->stgb.d + 1); - fprintf(ctx->out, ".%s", type[cat6->type]); - fprintf(ctx->out, ".%d", cat6->stgb.type_size + 1); - break; - case OPC_ATOMIC_ADD: - case OPC_ATOMIC_SUB: - case OPC_ATOMIC_XCHG: - case OPC_ATOMIC_INC: - case OPC_ATOMIC_DEC: - case OPC_ATOMIC_CMPXCHG: - case OPC_ATOMIC_MIN: - case OPC_ATOMIC_MAX: - case OPC_ATOMIC_AND: - case OPC_ATOMIC_OR: - case OPC_ATOMIC_XOR: - ss = cat6->g ? 'g' : 'l'; - fprintf(ctx->out, ".%s", cat6->ldgb.typed ? "typed" : "untyped"); - fprintf(ctx->out, ".%dd", cat6->ldgb.d + 1); - fprintf(ctx->out, ".%s", type[cat6->type]); - fprintf(ctx->out, ".%d", cat6->ldgb.type_size + 1); - fprintf(ctx->out, ".%c", ss); - break; - default: - dst.im = cat6->g && !cat6->dst_off; - fprintf(ctx->out, ".%s", type[cat6->type]); - break; - } - fprintf(ctx->out, " "); - - switch (_OPC(6, cat6->opc)) { - case OPC_STG: - sd = 'g'; - break; - case OPC_STP: - sd = 'p'; - break; - case OPC_STL: - case OPC_STLW: - sd = 'l'; - break; - - case OPC_LDG: - case OPC_LDC: - ss = 'g'; - break; - case OPC_LDP: - ss = 'p'; - break; - case OPC_LDL: - case OPC_LDLW: - case OPC_LDLV: - ss = 'l'; - break; - - case OPC_L2G: - ss = 'l'; - sd = 'g'; - break; - - case OPC_G2L: - ss = 'g'; - sd = 'l'; - break; - - case OPC_PREFETCH: - ss = 'g'; - nodst = true; - break; - } - - if ((_OPC(6, cat6->opc) == OPC_STGB) || (_OPC(6, cat6->opc) == OPC_STIB)) { - struct reginfo src3; - - memset(&src3, 0, sizeof(src3)); - - src1.reg = (reg_t)(cat6->stgb.src1); - src2.reg = (reg_t)(cat6->stgb.src2); - src2.im = cat6->stgb.src2_im; - src3.reg = (reg_t)(cat6->stgb.src3); - src3.im = cat6->stgb.src3_im; - src3.full = true; - - fprintf(ctx->out, "g[%u], ", cat6->stgb.dst_ssbo); - print_src(ctx, &src1); - fprintf(ctx->out, ", "); - print_src(ctx, &src2); - fprintf(ctx->out, ", "); - print_src(ctx, &src3); - - if (debug & PRINT_VERBOSE) - fprintf(ctx->out, " (pad0=%x, pad3=%x)", cat6->stgb.pad0, cat6->stgb.pad3); - - return; - } - - if (is_atomic(_OPC(6, cat6->opc))) { - - src1.reg = (reg_t)(cat6->ldgb.src1); - src1.im = cat6->ldgb.src1_im; - src2.reg = (reg_t)(cat6->ldgb.src2); - src2.im = cat6->ldgb.src2_im; - dst.reg = (reg_t)(cat6->ldgb.dst); - - print_src(ctx, &dst); - fprintf(ctx->out, ", "); - if (ss == 'g') { - struct reginfo src3; - memset(&src3, 0, sizeof(src3)); - - src3.reg = (reg_t)(cat6->ldgb.src3); - src3.full = true; - - /* For images, the ".typed" variant is used and src2 is - * the ivecN coordinates, ie ivec2 for 2d. - * - * For SSBOs, the ".untyped" variant is used and src2 is - * a simple dword offset.. src3 appears to be - * uvec2(offset * 4, 0). Not sure the point of that. - */ - - fprintf(ctx->out, "g[%u], ", cat6->ldgb.src_ssbo); - print_src(ctx, &src1); /* value */ - fprintf(ctx->out, ", "); - print_src(ctx, &src2); /* offset/coords */ - fprintf(ctx->out, ", "); - print_src(ctx, &src3); /* 64b byte offset.. */ - - if (debug & PRINT_VERBOSE) { - fprintf(ctx->out, " (pad0=%x, pad3=%x, mustbe0=%x)", cat6->ldgb.pad0, - cat6->ldgb.pad3, cat6->ldgb.mustbe0); - } - } else { /* ss == 'l' */ - fprintf(ctx->out, "l["); - print_src(ctx, &src1); /* simple byte offset */ - fprintf(ctx->out, "], "); - print_src(ctx, &src2); /* value */ - - if (debug & PRINT_VERBOSE) { - fprintf(ctx->out, " (src3=%x, pad0=%x, pad3=%x, mustbe0=%x)", - cat6->ldgb.src3, cat6->ldgb.pad0, - cat6->ldgb.pad3, cat6->ldgb.mustbe0); - } - } - - return; - } else if (_OPC(6, cat6->opc) == OPC_RESINFO) { - dst.reg = (reg_t)(cat6->ldgb.dst); - - print_src(ctx, &dst); - fprintf(ctx->out, ", "); - fprintf(ctx->out, "g[%u]", cat6->ldgb.src_ssbo); - - return; - } else if (_OPC(6, cat6->opc) == OPC_LDGB) { - - src1.reg = (reg_t)(cat6->ldgb.src1); - src1.im = cat6->ldgb.src1_im; - src2.reg = (reg_t)(cat6->ldgb.src2); - src2.im = cat6->ldgb.src2_im; - dst.reg = (reg_t)(cat6->ldgb.dst); - - print_src(ctx, &dst); - fprintf(ctx->out, ", "); - fprintf(ctx->out, "g[%u], ", cat6->ldgb.src_ssbo); - print_src(ctx, &src1); - fprintf(ctx->out, ", "); - print_src(ctx, &src2); - - if (debug & PRINT_VERBOSE) - fprintf(ctx->out, " (pad0=%x, pad3=%x, mustbe0=%x)", cat6->ldgb.pad0, cat6->ldgb.pad3, cat6->ldgb.mustbe0); - - return; - } else if (_OPC(6, cat6->opc) == OPC_LDG && cat6->a.src1_im && cat6->a.src2_im) { - struct reginfo src3; - - memset(&src3, 0, sizeof(src3)); - src1.reg = (reg_t)(cat6->a.src1); - src2.reg = (reg_t)(cat6->a.src2); - src2.im = cat6->a.src2_im; - src3.reg = (reg_t)(cat6->a.off); - src3.full = true; - dst.reg = (reg_t)(cat6->d.dst); - - print_src(ctx, &dst); - fprintf(ctx->out, ", g["); - print_src(ctx, &src1); - fprintf(ctx->out, "+"); - print_src(ctx, &src3); - fprintf(ctx->out, "], "); - print_src(ctx, &src2); - - return; - } - if (cat6->dst_off) { - dst.reg = (reg_t)(cat6->c.dst); - dstoff = cat6->c.off; - } else { - dst.reg = (reg_t)(cat6->d.dst); - } - - if (cat6->src_off) { - src1.reg = (reg_t)(cat6->a.src1); - src1.im = cat6->a.src1_im; - src2.reg = (reg_t)(cat6->a.src2); - src2.im = cat6->a.src2_im; - src1off = cat6->a.off; - } else { - src1.reg = (reg_t)(cat6->b.src1); - src1.im = cat6->b.src1_im; - src2.reg = (reg_t)(cat6->b.src2); - src2.im = cat6->b.src2_im; - } - - if (!nodst) { - if (sd) - fprintf(ctx->out, "%c[", sd); - /* note: dst might actually be a src (ie. address to store to) */ - print_src(ctx, &dst); - if (cat6->dst_off && cat6->g) { - struct reginfo dstoff_reg = {0}; - dstoff_reg.reg = (reg_t) cat6->c.off; - dstoff_reg.full = true; - fprintf(ctx->out, "+"); - print_src(ctx, &dstoff_reg); - } else if (dstoff) - fprintf(ctx->out, "%+d", dstoff); - if (sd) - fprintf(ctx->out, "]"); - fprintf(ctx->out, ", "); - } - - if (ss) - fprintf(ctx->out, "%c[", ss); - - /* can have a larger than normal immed, so hack: */ - if (src1.im) { - fprintf(ctx->out, "%u", src1.reg.dummy13); - } else { - print_src(ctx, &src1); - } - - if (cat6->src_off && cat6->g) - print_src(ctx, &src2); - else if (src1off) - fprintf(ctx->out, "%+d", src1off); - if (ss) - fprintf(ctx->out, "]"); - - switch (_OPC(6, cat6->opc)) { - case OPC_RESINFO: - case OPC_RESFMT: - break; - default: - fprintf(ctx->out, ", "); - print_src(ctx, &src2); - break; - } -} - -static void print_instr_cat6_a6xx(struct disasm_ctx *ctx, instr_t *instr) -{ - instr_cat6_a6xx_t *cat6 = &instr->cat6_a6xx; - struct reginfo src1, src2, ssbo; - bool uses_type = _OPC(6, cat6->opc) != OPC_LDC; - - static const struct { - bool indirect; - bool bindless; - const char *name; - } desc_features[8] = { - [CAT6_IMM] = { - .name = "imm" - }, - [CAT6_UNIFORM] = { - .indirect = true, - .name = "uniform" - }, - [CAT6_NONUNIFORM] = { - .indirect = true, - .name = "nonuniform" - }, - [CAT6_BINDLESS_IMM] = { - .bindless = true, - .name = "imm" - }, - [CAT6_BINDLESS_UNIFORM] = { - .bindless = true, - .indirect = true, - .name = "uniform" - }, - [CAT6_BINDLESS_NONUNIFORM] = { - .bindless = true, - .indirect = true, - .name = "nonuniform" - }, - }; - - bool indirect_ssbo = desc_features[cat6->desc_mode].indirect; - bool bindless = desc_features[cat6->desc_mode].bindless; - bool type_full = cat6->type != TYPE_U16; - - - memset(&src1, 0, sizeof(src1)); - memset(&src2, 0, sizeof(src2)); - memset(&ssbo, 0, sizeof(ssbo)); - - if (uses_type) { - fprintf(ctx->out, ".%s", cat6->typed ? "typed" : "untyped"); - fprintf(ctx->out, ".%dd", cat6->d + 1); - fprintf(ctx->out, ".%s", type[cat6->type]); - } else { - fprintf(ctx->out, ".offset%d", cat6->d); - } - fprintf(ctx->out, ".%u", cat6->type_size + 1); - - fprintf(ctx->out, ".%s", desc_features[cat6->desc_mode].name); - if (bindless) - fprintf(ctx->out, ".base%d", cat6->base); - fprintf(ctx->out, " "); - - src2.reg = (reg_t)(cat6->src2); - src2.full = type_full; - print_src(ctx, &src2); - fprintf(ctx->out, ", "); - - src1.reg = (reg_t)(cat6->src1); - src1.full = true; // XXX - print_src(ctx, &src1); - fprintf(ctx->out, ", "); - ssbo.reg = (reg_t)(cat6->ssbo); - ssbo.im = !indirect_ssbo; - ssbo.full = true; - print_src(ctx, &ssbo); - - if (debug & PRINT_VERBOSE) { - fprintf(ctx->out, " (pad1=%x, pad2=%x, pad3=%x, pad4=%x, pad5=%x)", - cat6->pad1, cat6->pad2, cat6->pad3, cat6->pad4, cat6->pad5); - } -} - -static void print_instr_cat6(struct disasm_ctx *ctx, instr_t *instr) -{ - if (!is_cat6_legacy(instr, ctx->gpu_id)) { - print_instr_cat6_a6xx(ctx, instr); - if (debug & PRINT_VERBOSE) - fprintf(ctx->out, " NEW"); - } else { - print_instr_cat6_a3xx(ctx, instr); - if (debug & PRINT_VERBOSE) - fprintf(ctx->out, " LEGACY"); - } -} -static void print_instr_cat7(struct disasm_ctx *ctx, instr_t *instr) -{ - instr_cat7_t *cat7 = &instr->cat7; - - if (cat7->g) - fprintf(ctx->out, ".g"); - if (cat7->l) - fprintf(ctx->out, ".l"); - - if (_OPC(7, cat7->opc) == OPC_FENCE) { - if (cat7->r) - fprintf(ctx->out, ".r"); - if (cat7->w) - fprintf(ctx->out, ".w"); - } -} - -/* size of largest OPC field of all the instruction categories: */ -#define NOPC_BITS 6 - -static const struct opc_info { - uint16_t cat; - uint16_t opc; - const char *name; - void (*print)(struct disasm_ctx *ctx, instr_t *instr); -} opcs[1 << (3+NOPC_BITS)] = { -#define OPC(cat, opc, name) [(opc)] = { (cat), (opc), #name, print_instr_cat##cat } - /* category 0: */ - OPC(0, OPC_NOP, nop), - OPC(0, OPC_B, b), - OPC(0, OPC_JUMP, jump), - OPC(0, OPC_CALL, call), - OPC(0, OPC_RET, ret), - OPC(0, OPC_KILL, kill), - OPC(0, OPC_END, end), - OPC(0, OPC_EMIT, emit), - OPC(0, OPC_CUT, cut), - OPC(0, OPC_CHMASK, chmask), - OPC(0, OPC_CHSH, chsh), - OPC(0, OPC_FLOW_REV, flow_rev), - OPC(0, OPC_PREDT, predt), - OPC(0, OPC_PREDF, predf), - OPC(0, OPC_PREDE, prede), - OPC(0, OPC_BKT, bkt), - OPC(0, OPC_STKS, stks), - OPC(0, OPC_STKR, stkr), - OPC(0, OPC_XSET, xset), - OPC(0, OPC_XCLR, xclr), - OPC(0, OPC_GETONE, getone), - OPC(0, OPC_DBG, dbg), - OPC(0, OPC_SHPS, shps), - OPC(0, OPC_SHPE, shpe), - - /* category 1: */ - OPC(1, OPC_MOV, ), - - /* category 2: */ - OPC(2, OPC_ADD_F, add.f), - OPC(2, OPC_MIN_F, min.f), - OPC(2, OPC_MAX_F, max.f), - OPC(2, OPC_MUL_F, mul.f), - OPC(2, OPC_SIGN_F, sign.f), - OPC(2, OPC_CMPS_F, cmps.f), - OPC(2, OPC_ABSNEG_F, absneg.f), - OPC(2, OPC_CMPV_F, cmpv.f), - OPC(2, OPC_FLOOR_F, floor.f), - OPC(2, OPC_CEIL_F, ceil.f), - OPC(2, OPC_RNDNE_F, rndne.f), - OPC(2, OPC_RNDAZ_F, rndaz.f), - OPC(2, OPC_TRUNC_F, trunc.f), - OPC(2, OPC_ADD_U, add.u), - OPC(2, OPC_ADD_S, add.s), - OPC(2, OPC_SUB_U, sub.u), - OPC(2, OPC_SUB_S, sub.s), - OPC(2, OPC_CMPS_U, cmps.u), - OPC(2, OPC_CMPS_S, cmps.s), - OPC(2, OPC_MIN_U, min.u), - OPC(2, OPC_MIN_S, min.s), - OPC(2, OPC_MAX_U, max.u), - OPC(2, OPC_MAX_S, max.s), - OPC(2, OPC_ABSNEG_S, absneg.s), - OPC(2, OPC_AND_B, and.b), - OPC(2, OPC_OR_B, or.b), - OPC(2, OPC_NOT_B, not.b), - OPC(2, OPC_XOR_B, xor.b), - OPC(2, OPC_CMPV_U, cmpv.u), - OPC(2, OPC_CMPV_S, cmpv.s), - OPC(2, OPC_MUL_U24, mul.u24), - OPC(2, OPC_MUL_S24, mul.s24), - OPC(2, OPC_MULL_U, mull.u), - OPC(2, OPC_BFREV_B, bfrev.b), - OPC(2, OPC_CLZ_S, clz.s), - OPC(2, OPC_CLZ_B, clz.b), - OPC(2, OPC_SHL_B, shl.b), - OPC(2, OPC_SHR_B, shr.b), - OPC(2, OPC_ASHR_B, ashr.b), - OPC(2, OPC_BARY_F, bary.f), - OPC(2, OPC_MGEN_B, mgen.b), - OPC(2, OPC_GETBIT_B, getbit.b), - OPC(2, OPC_SETRM, setrm), - OPC(2, OPC_CBITS_B, cbits.b), - OPC(2, OPC_SHB, shb), - OPC(2, OPC_MSAD, msad), - - /* category 3: */ - OPC(3, OPC_MAD_U16, mad.u16), - OPC(3, OPC_MADSH_U16, madsh.u16), - OPC(3, OPC_MAD_S16, mad.s16), - OPC(3, OPC_MADSH_M16, madsh.m16), - OPC(3, OPC_MAD_U24, mad.u24), - OPC(3, OPC_MAD_S24, mad.s24), - OPC(3, OPC_MAD_F16, mad.f16), - OPC(3, OPC_MAD_F32, mad.f32), - OPC(3, OPC_SEL_B16, sel.b16), - OPC(3, OPC_SEL_B32, sel.b32), - OPC(3, OPC_SEL_S16, sel.s16), - OPC(3, OPC_SEL_S32, sel.s32), - OPC(3, OPC_SEL_F16, sel.f16), - OPC(3, OPC_SEL_F32, sel.f32), - OPC(3, OPC_SAD_S16, sad.s16), - OPC(3, OPC_SAD_S32, sad.s32), - - /* category 4: */ - OPC(4, OPC_RCP, rcp), - OPC(4, OPC_RSQ, rsq), - OPC(4, OPC_LOG2, log2), - OPC(4, OPC_EXP2, exp2), - OPC(4, OPC_SIN, sin), - OPC(4, OPC_COS, cos), - OPC(4, OPC_SQRT, sqrt), - OPC(4, OPC_HRSQ, hrsq), - OPC(4, OPC_HLOG2, hlog2), - OPC(4, OPC_HEXP2, hexp2), - - /* category 5: */ - OPC(5, OPC_ISAM, isam), - OPC(5, OPC_ISAML, isaml), - OPC(5, OPC_ISAMM, isamm), - OPC(5, OPC_SAM, sam), - OPC(5, OPC_SAMB, samb), - OPC(5, OPC_SAML, saml), - OPC(5, OPC_SAMGQ, samgq), - OPC(5, OPC_GETLOD, getlod), - OPC(5, OPC_CONV, conv), - OPC(5, OPC_CONVM, convm), - OPC(5, OPC_GETSIZE, getsize), - OPC(5, OPC_GETBUF, getbuf), - OPC(5, OPC_GETPOS, getpos), - OPC(5, OPC_GETINFO, getinfo), - OPC(5, OPC_DSX, dsx), - OPC(5, OPC_DSY, dsy), - OPC(5, OPC_GATHER4R, gather4r), - OPC(5, OPC_GATHER4G, gather4g), - OPC(5, OPC_GATHER4B, gather4b), - OPC(5, OPC_GATHER4A, gather4a), - OPC(5, OPC_SAMGP0, samgp0), - OPC(5, OPC_SAMGP1, samgp1), - OPC(5, OPC_SAMGP2, samgp2), - OPC(5, OPC_SAMGP3, samgp3), - OPC(5, OPC_DSXPP_1, dsxpp.1), - OPC(5, OPC_DSYPP_1, dsypp.1), - OPC(5, OPC_RGETPOS, rgetpos), - OPC(5, OPC_RGETINFO, rgetinfo), - - - /* category 6: */ - OPC(6, OPC_LDG, ldg), - OPC(6, OPC_LDL, ldl), - OPC(6, OPC_LDP, ldp), - OPC(6, OPC_STG, stg), - OPC(6, OPC_STL, stl), - OPC(6, OPC_STP, stp), - OPC(6, OPC_LDIB, ldib), - OPC(6, OPC_G2L, g2l), - OPC(6, OPC_L2G, l2g), - OPC(6, OPC_PREFETCH, prefetch), - OPC(6, OPC_LDLW, ldlw), - OPC(6, OPC_STLW, stlw), - OPC(6, OPC_RESFMT, resfmt), - OPC(6, OPC_RESINFO, resinfo), - OPC(6, OPC_ATOMIC_ADD, atomic.add), - OPC(6, OPC_ATOMIC_SUB, atomic.sub), - OPC(6, OPC_ATOMIC_XCHG, atomic.xchg), - OPC(6, OPC_ATOMIC_INC, atomic.inc), - OPC(6, OPC_ATOMIC_DEC, atomic.dec), - OPC(6, OPC_ATOMIC_CMPXCHG, atomic.cmpxchg), - OPC(6, OPC_ATOMIC_MIN, atomic.min), - OPC(6, OPC_ATOMIC_MAX, atomic.max), - OPC(6, OPC_ATOMIC_AND, atomic.and), - OPC(6, OPC_ATOMIC_OR, atomic.or), - OPC(6, OPC_ATOMIC_XOR, atomic.xor), - OPC(6, OPC_LDGB, ldgb), - OPC(6, OPC_STGB, stgb), - OPC(6, OPC_STIB, stib), - OPC(6, OPC_LDC, ldc), - OPC(6, OPC_LDLV, ldlv), - - OPC(7, OPC_BAR, bar), - OPC(7, OPC_FENCE, fence), - -#undef OPC -}; - -#define GETINFO(instr) (&(opcs[((instr)->opc_cat << NOPC_BITS) | instr_opc(instr, ctx->gpu_id)])) - -// XXX hack.. probably should move this table somewhere common: -#include "ir3.h" -const char *ir3_instr_name(struct ir3_instruction *instr) -{ - if (opc_cat(instr->opc) == -1) return "??meta??"; - return opcs[instr->opc].name; -} - -static void print_single_instr(struct disasm_ctx *ctx, instr_t *instr) -{ - const char *name = GETINFO(instr)->name; - uint32_t opc = instr_opc(instr, ctx->gpu_id); - - if (name) { - fprintf(ctx->out, "%s", name); - GETINFO(instr)->print(ctx, instr); - } else { - fprintf(ctx->out, "unknown(%d,%d)", instr->opc_cat, opc); - - switch (instr->opc_cat) { - case 0: print_instr_cat0(ctx, instr); break; - case 1: print_instr_cat1(ctx, instr); break; - case 2: print_instr_cat2(ctx, instr); break; - case 3: print_instr_cat3(ctx, instr); break; - case 4: print_instr_cat4(ctx, instr); break; - case 5: print_instr_cat5(ctx, instr); break; - case 6: print_instr_cat6(ctx, instr); break; - case 7: print_instr_cat7(ctx, instr); break; - } - } -} - -static bool print_instr(struct disasm_ctx *ctx, uint32_t *dwords, int n) -{ - instr_t *instr = (instr_t *)dwords; - uint32_t opc = instr_opc(instr, ctx->gpu_id); - unsigned nop = 0; - unsigned cycles = ctx->instructions; - - if (debug & PRINT_VERBOSE) { - fprintf(ctx->out, "%s%04d:%04d[%08xx_%08xx] ", levels[ctx->level], - n, cycles++, dwords[1], dwords[0]); - } - - /* NOTE: order flags are printed is a bit fugly.. but for now I - * try to match the order in llvm-a3xx disassembler for easy - * diff'ing.. - */ - - ctx->repeat = instr_repeat(instr); - ctx->instructions += 1 + ctx->repeat; - - if (instr->sync) { - fprintf(ctx->out, "(sy)"); - } - if (instr->ss && ((instr->opc_cat <= 4) || (instr->opc_cat == 7))) { - fprintf(ctx->out, "(ss)"); - } - if (instr->jmp_tgt) - fprintf(ctx->out, "(jp)"); - if ((instr->opc_cat == 0) && instr->cat0.eq) - fprintf(ctx->out, "(eq)"); - if (instr_sat(instr)) - fprintf(ctx->out, "(sat)"); - if (ctx->repeat) - fprintf(ctx->out, "(rpt%d)", ctx->repeat); - else if ((instr->opc_cat == 2) && (instr->cat2.src1_r || instr->cat2.src2_r)) - nop = (instr->cat2.src2_r * 2) + instr->cat2.src1_r; - else if ((instr->opc_cat == 3) && (instr->cat3.src1_r || instr->cat3.src2_r)) - nop = (instr->cat3.src2_r * 2) + instr->cat3.src1_r; - ctx->instructions += nop; - if (nop) - fprintf(ctx->out, "(nop%d) ", nop); - - if (instr->ul && ((2 <= instr->opc_cat) && (instr->opc_cat <= 4))) - fprintf(ctx->out, "(ul)"); - - print_single_instr(ctx, instr); - fprintf(ctx->out, "\n"); - - if ((instr->opc_cat <= 4) && (debug & EXPAND_REPEAT)) { - int i; - for (i = 0; i < nop; i++) { - if (debug & PRINT_VERBOSE) { - fprintf(ctx->out, "%s%04d:%04d[ ] ", - levels[ctx->level], n, cycles++); - } - fprintf(ctx->out, "nop\n"); - } - for (i = 0; i < ctx->repeat; i++) { - ctx->repeatidx = i + 1; - if (debug & PRINT_VERBOSE) { - fprintf(ctx->out, "%s%04d:%04d[ ] ", - levels[ctx->level], n, cycles++); - } - print_single_instr(ctx, instr); - fprintf(ctx->out, "\n"); - } - ctx->repeatidx = 0; - } - - return (instr->opc_cat == 0) && (opc == OPC_END); -} - -int disasm_a3xx(uint32_t *dwords, int sizedwords, int level, FILE *out, unsigned gpu_id) -{ - struct disasm_ctx ctx; - int i; - int nop_count = 0; - - //assert((sizedwords % 2) == 0); - - memset(&ctx, 0, sizeof(ctx)); - ctx.out = out; - ctx.level = level; - ctx.gpu_id = gpu_id; - - for (i = 0; i < sizedwords; i += 2) { - print_instr(&ctx, &dwords[i], i/2); - if (dwords[i] == 0 && dwords[i + 1] == 0) - nop_count++; - else - nop_count = 0; - if (nop_count > 3) - break; - } - - return 0; -} - -int main(int argc, char *argv[]) { - uint32_t buf[0x10000]; - FILE *f = fopen(argv[1], "rb"); - if (argc > 2) { - int seek = atoi(argv[2]); - printf("skip %d\n", seek); - fread(buf, 1, seek , f); - } - int len = fread(buf, 1, sizeof(buf), f); - fclose(f); - - disasm_a3xx(buf, len/4, 0, stdout, 0); -} - diff --git a/selfdrive/modeld/thneed/debug/decompiler/instr-a3xx.h b/selfdrive/modeld/thneed/debug/decompiler/instr-a3xx.h deleted file mode 100644 index e4f548d639..0000000000 --- a/selfdrive/modeld/thneed/debug/decompiler/instr-a3xx.h +++ /dev/null @@ -1,1119 +0,0 @@ -/* - * Copyright (c) 2013 Rob Clark - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef INSTR_A3XX_H_ -#define INSTR_A3XX_H_ - -#define PACKED __attribute__((__packed__)) - -#include -#include -#include -#include - -/* size of largest OPC field of all the instruction categories: */ -#define NOPC_BITS 6 - -#define _OPC(cat, opc) (((cat) << NOPC_BITS) | opc) - -typedef enum { - /* category 0: */ - OPC_NOP = _OPC(0, 0), - OPC_B = _OPC(0, 1), - OPC_JUMP = _OPC(0, 2), - OPC_CALL = _OPC(0, 3), - OPC_RET = _OPC(0, 4), - OPC_KILL = _OPC(0, 5), - OPC_END = _OPC(0, 6), - OPC_EMIT = _OPC(0, 7), - OPC_CUT = _OPC(0, 8), - OPC_CHMASK = _OPC(0, 9), - OPC_CHSH = _OPC(0, 10), - OPC_FLOW_REV = _OPC(0, 11), - - OPC_BKT = _OPC(0, 16), - OPC_STKS = _OPC(0, 17), - OPC_STKR = _OPC(0, 18), - OPC_XSET = _OPC(0, 19), - OPC_XCLR = _OPC(0, 20), - OPC_GETONE = _OPC(0, 21), - OPC_DBG = _OPC(0, 22), - OPC_SHPS = _OPC(0, 23), /* shader prologue start */ - OPC_SHPE = _OPC(0, 24), /* shader prologue end */ - - OPC_PREDT = _OPC(0, 29), /* predicated true */ - OPC_PREDF = _OPC(0, 30), /* predicated false */ - OPC_PREDE = _OPC(0, 31), /* predicated end */ - - /* category 1: */ - OPC_MOV = _OPC(1, 0), - - /* category 2: */ - OPC_ADD_F = _OPC(2, 0), - OPC_MIN_F = _OPC(2, 1), - OPC_MAX_F = _OPC(2, 2), - OPC_MUL_F = _OPC(2, 3), - OPC_SIGN_F = _OPC(2, 4), - OPC_CMPS_F = _OPC(2, 5), - OPC_ABSNEG_F = _OPC(2, 6), - OPC_CMPV_F = _OPC(2, 7), - /* 8 - invalid */ - OPC_FLOOR_F = _OPC(2, 9), - OPC_CEIL_F = _OPC(2, 10), - OPC_RNDNE_F = _OPC(2, 11), - OPC_RNDAZ_F = _OPC(2, 12), - OPC_TRUNC_F = _OPC(2, 13), - /* 14-15 - invalid */ - OPC_ADD_U = _OPC(2, 16), - OPC_ADD_S = _OPC(2, 17), - OPC_SUB_U = _OPC(2, 18), - OPC_SUB_S = _OPC(2, 19), - OPC_CMPS_U = _OPC(2, 20), - OPC_CMPS_S = _OPC(2, 21), - OPC_MIN_U = _OPC(2, 22), - OPC_MIN_S = _OPC(2, 23), - OPC_MAX_U = _OPC(2, 24), - OPC_MAX_S = _OPC(2, 25), - OPC_ABSNEG_S = _OPC(2, 26), - /* 27 - invalid */ - OPC_AND_B = _OPC(2, 28), - OPC_OR_B = _OPC(2, 29), - OPC_NOT_B = _OPC(2, 30), - OPC_XOR_B = _OPC(2, 31), - /* 32 - invalid */ - OPC_CMPV_U = _OPC(2, 33), - OPC_CMPV_S = _OPC(2, 34), - /* 35-47 - invalid */ - OPC_MUL_U24 = _OPC(2, 48), /* 24b mul into 32b result */ - OPC_MUL_S24 = _OPC(2, 49), /* 24b mul into 32b result with sign extension */ - OPC_MULL_U = _OPC(2, 50), - OPC_BFREV_B = _OPC(2, 51), - OPC_CLZ_S = _OPC(2, 52), - OPC_CLZ_B = _OPC(2, 53), - OPC_SHL_B = _OPC(2, 54), - OPC_SHR_B = _OPC(2, 55), - OPC_ASHR_B = _OPC(2, 56), - OPC_BARY_F = _OPC(2, 57), - OPC_MGEN_B = _OPC(2, 58), - OPC_GETBIT_B = _OPC(2, 59), - OPC_SETRM = _OPC(2, 60), - OPC_CBITS_B = _OPC(2, 61), - OPC_SHB = _OPC(2, 62), - OPC_MSAD = _OPC(2, 63), - - /* category 3: */ - OPC_MAD_U16 = _OPC(3, 0), - OPC_MADSH_U16 = _OPC(3, 1), - OPC_MAD_S16 = _OPC(3, 2), - OPC_MADSH_M16 = _OPC(3, 3), /* should this be .s16? */ - OPC_MAD_U24 = _OPC(3, 4), - OPC_MAD_S24 = _OPC(3, 5), - OPC_MAD_F16 = _OPC(3, 6), - OPC_MAD_F32 = _OPC(3, 7), - OPC_SEL_B16 = _OPC(3, 8), - OPC_SEL_B32 = _OPC(3, 9), - OPC_SEL_S16 = _OPC(3, 10), - OPC_SEL_S32 = _OPC(3, 11), - OPC_SEL_F16 = _OPC(3, 12), - OPC_SEL_F32 = _OPC(3, 13), - OPC_SAD_S16 = _OPC(3, 14), - OPC_SAD_S32 = _OPC(3, 15), - - /* category 4: */ - OPC_RCP = _OPC(4, 0), - OPC_RSQ = _OPC(4, 1), - OPC_LOG2 = _OPC(4, 2), - OPC_EXP2 = _OPC(4, 3), - OPC_SIN = _OPC(4, 4), - OPC_COS = _OPC(4, 5), - OPC_SQRT = _OPC(4, 6), - /* NOTE that these are 8+opc from their highp equivs, so it's possible - * that the high order bit in the opc field has been repurposed for - * half-precision use? But note that other ops (rcp/lsin/cos/sqrt) - * still use the same opc as highp - */ - OPC_HRSQ = _OPC(4, 9), - OPC_HLOG2 = _OPC(4, 10), - OPC_HEXP2 = _OPC(4, 11), - - /* category 5: */ - OPC_ISAM = _OPC(5, 0), - OPC_ISAML = _OPC(5, 1), - OPC_ISAMM = _OPC(5, 2), - OPC_SAM = _OPC(5, 3), - OPC_SAMB = _OPC(5, 4), - OPC_SAML = _OPC(5, 5), - OPC_SAMGQ = _OPC(5, 6), - OPC_GETLOD = _OPC(5, 7), - OPC_CONV = _OPC(5, 8), - OPC_CONVM = _OPC(5, 9), - OPC_GETSIZE = _OPC(5, 10), - OPC_GETBUF = _OPC(5, 11), - OPC_GETPOS = _OPC(5, 12), - OPC_GETINFO = _OPC(5, 13), - OPC_DSX = _OPC(5, 14), - OPC_DSY = _OPC(5, 15), - OPC_GATHER4R = _OPC(5, 16), - OPC_GATHER4G = _OPC(5, 17), - OPC_GATHER4B = _OPC(5, 18), - OPC_GATHER4A = _OPC(5, 19), - OPC_SAMGP0 = _OPC(5, 20), - OPC_SAMGP1 = _OPC(5, 21), - OPC_SAMGP2 = _OPC(5, 22), - OPC_SAMGP3 = _OPC(5, 23), - OPC_DSXPP_1 = _OPC(5, 24), - OPC_DSYPP_1 = _OPC(5, 25), - OPC_RGETPOS = _OPC(5, 26), - OPC_RGETINFO = _OPC(5, 27), - - /* category 6: */ - OPC_LDG = _OPC(6, 0), /* load-global */ - OPC_LDL = _OPC(6, 1), - OPC_LDP = _OPC(6, 2), - OPC_STG = _OPC(6, 3), /* store-global */ - OPC_STL = _OPC(6, 4), - OPC_STP = _OPC(6, 5), - OPC_LDIB = _OPC(6, 6), - OPC_G2L = _OPC(6, 7), - OPC_L2G = _OPC(6, 8), - OPC_PREFETCH = _OPC(6, 9), - OPC_LDLW = _OPC(6, 10), - OPC_STLW = _OPC(6, 11), - OPC_RESFMT = _OPC(6, 14), - OPC_RESINFO = _OPC(6, 15), - OPC_ATOMIC_ADD = _OPC(6, 16), - OPC_ATOMIC_SUB = _OPC(6, 17), - OPC_ATOMIC_XCHG = _OPC(6, 18), - OPC_ATOMIC_INC = _OPC(6, 19), - OPC_ATOMIC_DEC = _OPC(6, 20), - OPC_ATOMIC_CMPXCHG = _OPC(6, 21), - OPC_ATOMIC_MIN = _OPC(6, 22), - OPC_ATOMIC_MAX = _OPC(6, 23), - OPC_ATOMIC_AND = _OPC(6, 24), - OPC_ATOMIC_OR = _OPC(6, 25), - OPC_ATOMIC_XOR = _OPC(6, 26), - OPC_LDGB = _OPC(6, 27), - OPC_STGB = _OPC(6, 28), - OPC_STIB = _OPC(6, 29), - OPC_LDC = _OPC(6, 30), - OPC_LDLV = _OPC(6, 31), - - /* category 7: */ - OPC_BAR = _OPC(7, 0), - OPC_FENCE = _OPC(7, 1), - - /* meta instructions (category -1): */ - /* placeholder instr to mark shader inputs: */ - OPC_META_INPUT = _OPC(-1, 0), - /* The "collect" and "split" instructions are used for keeping - * track of instructions that write to multiple dst registers - * (split) like texture sample instructions, or read multiple - * consecutive scalar registers (collect) (bary.f, texture samp) - * - * A "split" extracts a scalar component from a vecN, and a - * "collect" gathers multiple scalar components into a vecN - */ - OPC_META_SPLIT = _OPC(-1, 2), - OPC_META_COLLECT = _OPC(-1, 3), - - /* placeholder for texture fetches that run before FS invocation - * starts: - */ - OPC_META_TEX_PREFETCH = _OPC(-1, 4), - -} opc_t; - -#define opc_cat(opc) ((int)((opc) >> NOPC_BITS)) -#define opc_op(opc) ((unsigned)((opc) & ((1 << NOPC_BITS) - 1))) - -typedef enum { - TYPE_F16 = 0, - TYPE_F32 = 1, - TYPE_U16 = 2, - TYPE_U32 = 3, - TYPE_S16 = 4, - TYPE_S32 = 5, - TYPE_U8 = 6, - TYPE_S8 = 7, // XXX I assume? -} type_t; - -static inline uint32_t type_size(type_t type) -{ - switch (type) { - case TYPE_F32: - case TYPE_U32: - case TYPE_S32: - return 32; - case TYPE_F16: - case TYPE_U16: - case TYPE_S16: - return 16; - case TYPE_U8: - case TYPE_S8: - return 8; - default: - assert(0); /* invalid type */ - return 0; - } -} - -static inline int type_float(type_t type) -{ - return (type == TYPE_F32) || (type == TYPE_F16); -} - -static inline int type_uint(type_t type) -{ - return (type == TYPE_U32) || (type == TYPE_U16) || (type == TYPE_U8); -} - -static inline int type_sint(type_t type) -{ - return (type == TYPE_S32) || (type == TYPE_S16) || (type == TYPE_S8); -} - -typedef union PACKED { - /* normal gpr or const src register: */ - struct PACKED { - uint32_t comp : 2; - uint32_t num : 10; - }; - /* for immediate val: */ - int32_t iim_val : 11; - /* to make compiler happy: */ - uint32_t dummy32; - uint32_t dummy10 : 10; - int32_t idummy10 : 10; - uint32_t dummy11 : 11; - uint32_t dummy12 : 12; - uint32_t dummy13 : 13; - uint32_t dummy8 : 8; - int32_t idummy13 : 13; - int32_t idummy8 : 8; -} reg_t; - -/* special registers: */ -#define REG_A0 61 /* address register */ -#define REG_P0 62 /* predicate register */ - -static inline int reg_special(reg_t reg) -{ - return (reg.num == REG_A0) || (reg.num == REG_P0); -} - -typedef enum { - BRANCH_PLAIN = 0, /* br */ - BRANCH_OR = 1, /* brao */ - BRANCH_AND = 2, /* braa */ - BRANCH_CONST = 3, /* brac */ - BRANCH_ANY = 4, /* bany */ - BRANCH_ALL = 5, /* ball */ - BRANCH_X = 6, /* brax ??? */ -} brtype_t; - -typedef struct PACKED { - /* dword0: */ - union PACKED { - struct PACKED { - int16_t immed : 16; - uint32_t dummy1 : 16; - } a3xx; - struct PACKED { - int32_t immed : 20; - uint32_t dummy1 : 12; - } a4xx; - struct PACKED { - int32_t immed : 32; - } a5xx; - }; - - /* dword1: */ - uint32_t idx : 5; /* brac.N index */ - uint32_t brtype : 3; /* branch type, see brtype_t */ - uint32_t repeat : 3; - uint32_t dummy3 : 1; - uint32_t ss : 1; - uint32_t inv1 : 1; - uint32_t comp1 : 2; - uint32_t eq : 1; - uint32_t opc_hi : 1; /* at least one bit */ - uint32_t dummy4 : 2; - uint32_t inv0 : 1; - uint32_t comp0 : 2; /* component for first src */ - uint32_t opc : 4; - uint32_t jmp_tgt : 1; - uint32_t sync : 1; - uint32_t opc_cat : 3; -} instr_cat0_t; - -typedef struct PACKED { - /* dword0: */ - union PACKED { - /* for normal src register: */ - struct PACKED { - uint32_t src : 11; - /* at least low bit of pad must be zero or it will - * look like a address relative src - */ - uint32_t pad : 21; - }; - /* for address relative: */ - struct PACKED { - int32_t off : 10; - uint32_t src_rel_c : 1; - uint32_t src_rel : 1; - uint32_t unknown : 20; - }; - /* for immediate: */ - int32_t iim_val; - uint32_t uim_val; - float fim_val; - }; - - /* dword1: */ - uint32_t dst : 8; - uint32_t repeat : 3; - uint32_t src_r : 1; - uint32_t ss : 1; - uint32_t ul : 1; - uint32_t dst_type : 3; - uint32_t dst_rel : 1; - uint32_t src_type : 3; - uint32_t src_c : 1; - uint32_t src_im : 1; - uint32_t even : 1; - uint32_t pos_inf : 1; - uint32_t must_be_0 : 2; - uint32_t jmp_tgt : 1; - uint32_t sync : 1; - uint32_t opc_cat : 3; -} instr_cat1_t; - -typedef struct PACKED { - /* dword0: */ - union PACKED { - struct PACKED { - uint32_t src1 : 11; - uint32_t must_be_zero1: 2; - uint32_t src1_im : 1; /* immediate */ - uint32_t src1_neg : 1; /* negate */ - uint32_t src1_abs : 1; /* absolute value */ - }; - struct PACKED { - uint32_t src1 : 10; - uint32_t src1_c : 1; /* relative-const */ - uint32_t src1_rel : 1; /* relative address */ - uint32_t must_be_zero : 1; - uint32_t dummy : 3; - } rel1; - struct PACKED { - uint32_t src1 : 12; - uint32_t src1_c : 1; /* const */ - uint32_t dummy : 3; - } c1; - }; - - union PACKED { - struct PACKED { - uint32_t src2 : 11; - uint32_t must_be_zero2: 2; - uint32_t src2_im : 1; /* immediate */ - uint32_t src2_neg : 1; /* negate */ - uint32_t src2_abs : 1; /* absolute value */ - }; - struct PACKED { - uint32_t src2 : 10; - uint32_t src2_c : 1; /* relative-const */ - uint32_t src2_rel : 1; /* relative address */ - uint32_t must_be_zero : 1; - uint32_t dummy : 3; - } rel2; - struct PACKED { - uint32_t src2 : 12; - uint32_t src2_c : 1; /* const */ - uint32_t dummy : 3; - } c2; - }; - - /* dword1: */ - uint32_t dst : 8; - uint32_t repeat : 2; - uint32_t sat : 1; - uint32_t src1_r : 1; /* doubles as nop0 if repeat==0 */ - uint32_t ss : 1; - uint32_t ul : 1; /* dunno */ - uint32_t dst_half : 1; /* or widen/narrow.. ie. dst hrN <-> rN */ - uint32_t ei : 1; - uint32_t cond : 3; - uint32_t src2_r : 1; /* doubles as nop1 if repeat==0 */ - uint32_t full : 1; /* not half */ - uint32_t opc : 6; - uint32_t jmp_tgt : 1; - uint32_t sync : 1; - uint32_t opc_cat : 3; -} instr_cat2_t; - -typedef struct PACKED { - /* dword0: */ - union PACKED { - struct PACKED { - uint32_t src1 : 11; - uint32_t must_be_zero1: 2; - uint32_t src2_c : 1; - uint32_t src1_neg : 1; - uint32_t src2_r : 1; /* doubles as nop1 if repeat==0 */ - }; - struct PACKED { - uint32_t src1 : 10; - uint32_t src1_c : 1; - uint32_t src1_rel : 1; - uint32_t must_be_zero : 1; - uint32_t dummy : 3; - } rel1; - struct PACKED { - uint32_t src1 : 12; - uint32_t src1_c : 1; - uint32_t dummy : 3; - } c1; - }; - - union PACKED { - struct PACKED { - uint32_t src3 : 11; - uint32_t must_be_zero2: 2; - uint32_t src3_r : 1; - uint32_t src2_neg : 1; - uint32_t src3_neg : 1; - }; - struct PACKED { - uint32_t src3 : 10; - uint32_t src3_c : 1; - uint32_t src3_rel : 1; - uint32_t must_be_zero : 1; - uint32_t dummy : 3; - } rel2; - struct PACKED { - uint32_t src3 : 12; - uint32_t src3_c : 1; - uint32_t dummy : 3; - } c2; - }; - - /* dword1: */ - uint32_t dst : 8; - uint32_t repeat : 2; - uint32_t sat : 1; - uint32_t src1_r : 1; /* doubles as nop0 if repeat==0 */ - uint32_t ss : 1; - uint32_t ul : 1; - uint32_t dst_half : 1; /* or widen/narrow.. ie. dst hrN <-> rN */ - uint32_t src2 : 8; - uint32_t opc : 4; - uint32_t jmp_tgt : 1; - uint32_t sync : 1; - uint32_t opc_cat : 3; -} instr_cat3_t; - -static inline bool instr_cat3_full(instr_cat3_t *cat3) -{ - switch (_OPC(3, cat3->opc)) { - case OPC_MAD_F16: - case OPC_MAD_U16: - case OPC_MAD_S16: - case OPC_SEL_B16: - case OPC_SEL_S16: - case OPC_SEL_F16: - case OPC_SAD_S16: - case OPC_SAD_S32: // really?? - return false; - default: - return true; - } -} - -typedef struct PACKED { - /* dword0: */ - union PACKED { - struct PACKED { - uint32_t src : 11; - uint32_t must_be_zero1: 2; - uint32_t src_im : 1; /* immediate */ - uint32_t src_neg : 1; /* negate */ - uint32_t src_abs : 1; /* absolute value */ - }; - struct PACKED { - uint32_t src : 10; - uint32_t src_c : 1; /* relative-const */ - uint32_t src_rel : 1; /* relative address */ - uint32_t must_be_zero : 1; - uint32_t dummy : 3; - } rel; - struct PACKED { - uint32_t src : 12; - uint32_t src_c : 1; /* const */ - uint32_t dummy : 3; - } c; - }; - uint32_t dummy1 : 16; /* seem to be ignored */ - - /* dword1: */ - uint32_t dst : 8; - uint32_t repeat : 2; - uint32_t sat : 1; - uint32_t src_r : 1; - uint32_t ss : 1; - uint32_t ul : 1; - uint32_t dst_half : 1; /* or widen/narrow.. ie. dst hrN <-> rN */ - uint32_t dummy2 : 5; /* seem to be ignored */ - uint32_t full : 1; /* not half */ - uint32_t opc : 6; - uint32_t jmp_tgt : 1; - uint32_t sync : 1; - uint32_t opc_cat : 3; -} instr_cat4_t; - -/* With is_bindless_s2en = 1, this determines whether bindless is enabled and - * if so, how to get the (base, index) pair for both sampler and texture. - * There is a single base embedded in the instruction, which is always used - * for the texture. - */ -typedef enum { - /* Use traditional GL binding model, get texture and sampler index - * from src3 which is not presumed to be uniform. This is - * backwards-compatible with earlier generations, where this field was - * always 0 and nonuniform-indexed sampling always worked. - */ - CAT5_NONUNIFORM = 0, - - /* The sampler base comes from the low 3 bits of a1.x, and the sampler - * and texture index come from src3 which is presumed to be uniform. - */ - CAT5_BINDLESS_A1_UNIFORM = 1, - - /* The texture and sampler share the same base, and the sampler and - * texture index come from src3 which is *not* presumed to be uniform. - */ - CAT5_BINDLESS_NONUNIFORM = 2, - - /* The sampler base comes from the low 3 bits of a1.x, and the sampler - * and texture index come from src3 which is *not* presumed to be - * uniform. - */ - CAT5_BINDLESS_A1_NONUNIFORM = 3, - - /* Use traditional GL binding model, get texture and sampler index - * from src3 which is presumed to be uniform. - */ - CAT5_UNIFORM = 4, - - /* The texture and sampler share the same base, and the sampler and - * texture index come from src3 which is presumed to be uniform. - */ - CAT5_BINDLESS_UNIFORM = 5, - - /* The texture and sampler share the same base, get sampler index from low - * 4 bits of src3 and texture index from high 4 bits. - */ - CAT5_BINDLESS_IMM = 6, - - /* The sampler base comes from the low 3 bits of a1.x, and the texture - * index comes from the next 8 bits of a1.x. The sampler index is an - * immediate in src3. - */ - CAT5_BINDLESS_A1_IMM = 7, -} cat5_desc_mode_t; - -typedef struct PACKED { - /* dword0: */ - union PACKED { - /* normal case: */ - struct PACKED { - uint32_t full : 1; /* not half */ - uint32_t src1 : 8; - uint32_t src2 : 8; - uint32_t dummy1 : 4; /* seem to be ignored */ - uint32_t samp : 4; - uint32_t tex : 7; - } norm; - /* s2en case: */ - struct PACKED { - uint32_t full : 1; /* not half */ - uint32_t src1 : 8; - uint32_t src2 : 8; - uint32_t dummy1 : 2; - uint32_t base_hi : 2; - uint32_t src3 : 8; - uint32_t desc_mode : 3; - } s2en_bindless; - /* same in either case: */ - // XXX I think, confirm this - struct PACKED { - uint32_t full : 1; /* not half */ - uint32_t src1 : 8; - uint32_t src2 : 8; - uint32_t pad : 15; - }; - }; - - /* dword1: */ - uint32_t dst : 8; - uint32_t wrmask : 4; /* write-mask */ - uint32_t type : 3; - uint32_t base_lo : 1; /* used with bindless */ - uint32_t is_3d : 1; - - uint32_t is_a : 1; - uint32_t is_s : 1; - uint32_t is_s2en_bindless : 1; - uint32_t is_o : 1; - uint32_t is_p : 1; - - uint32_t opc : 5; - uint32_t jmp_tgt : 1; - uint32_t sync : 1; - uint32_t opc_cat : 3; -} instr_cat5_t; - -/* dword0 encoding for src_off: [src1 + off], src2: */ -typedef struct PACKED { - /* dword0: */ - uint32_t mustbe1 : 1; - int32_t off : 13; - uint32_t src1 : 8; - uint32_t src1_im : 1; - uint32_t src2_im : 1; - uint32_t src2 : 8; - - /* dword1: */ - uint32_t dword1; -} instr_cat6a_t; - -/* dword0 encoding for !src_off: [src1], src2 */ -typedef struct PACKED { - /* dword0: */ - uint32_t mustbe0 : 1; - uint32_t src1 : 13; - uint32_t ignore0 : 8; - uint32_t src1_im : 1; - uint32_t src2_im : 1; - uint32_t src2 : 8; - - /* dword1: */ - uint32_t dword1; -} instr_cat6b_t; - -/* dword1 encoding for dst_off: */ -typedef struct PACKED { - /* dword0: */ - uint32_t dword0; - - /* note: there is some weird stuff going on where sometimes - * cat6->a.off is involved.. but that seems like a bug in - * the blob, since it is used even if !cat6->src_off - * It would make sense for there to be some more bits to - * bring us to 11 bits worth of offset, but not sure.. - */ - int32_t off : 8; - uint32_t mustbe1 : 1; - uint32_t dst : 8; - uint32_t pad1 : 15; -} instr_cat6c_t; - -/* dword1 encoding for !dst_off: */ -typedef struct PACKED { - /* dword0: */ - uint32_t dword0; - - uint32_t dst : 8; - uint32_t mustbe0 : 1; - uint32_t idx : 8; - uint32_t pad0 : 15; -} instr_cat6d_t; - -/* ldgb and atomics.. - * - * ldgb: pad0=0, pad3=1 - * atomic .g: pad0=1, pad3=1 - * .l: pad0=1, pad3=0 - */ -typedef struct PACKED { - /* dword0: */ - uint32_t pad0 : 1; - uint32_t src3 : 8; - uint32_t d : 2; - uint32_t typed : 1; - uint32_t type_size : 2; - uint32_t src1 : 8; - uint32_t src1_im : 1; - uint32_t src2_im : 1; - uint32_t src2 : 8; - - /* dword1: */ - uint32_t dst : 8; - uint32_t mustbe0 : 1; - uint32_t src_ssbo : 8; - uint32_t pad2 : 3; // type - uint32_t g : 1; - uint32_t pad3 : 1; - uint32_t pad4 : 10; // opc/jmp_tgt/sync/opc_cat -} instr_cat6ldgb_t; - -/* stgb, pad0=0, pad3=2 - */ -typedef struct PACKED { - /* dword0: */ - uint32_t mustbe1 : 1; // ??? - uint32_t src1 : 8; - uint32_t d : 2; - uint32_t typed : 1; - uint32_t type_size : 2; - uint32_t pad0 : 9; - uint32_t src2_im : 1; - uint32_t src2 : 8; - - /* dword1: */ - uint32_t src3 : 8; - uint32_t src3_im : 1; - uint32_t dst_ssbo : 8; - uint32_t pad2 : 3; // type - uint32_t pad3 : 2; - uint32_t pad4 : 10; // opc/jmp_tgt/sync/opc_cat -} instr_cat6stgb_t; - -typedef union PACKED { - instr_cat6a_t a; - instr_cat6b_t b; - instr_cat6c_t c; - instr_cat6d_t d; - instr_cat6ldgb_t ldgb; - instr_cat6stgb_t stgb; - struct PACKED { - /* dword0: */ - uint32_t src_off : 1; - uint32_t pad1 : 31; - - /* dword1: */ - uint32_t pad2 : 8; - uint32_t dst_off : 1; - uint32_t pad3 : 8; - uint32_t type : 3; - uint32_t g : 1; /* or in some cases it means dst immed */ - uint32_t pad4 : 1; - uint32_t opc : 5; - uint32_t jmp_tgt : 1; - uint32_t sync : 1; - uint32_t opc_cat : 3; - }; -} instr_cat6_t; - -/* Similar to cat5_desc_mode_t, describes how the descriptor is loaded. - */ -typedef enum { - /* Use old GL binding model with an immediate index. */ - CAT6_IMM = 0, - - CAT6_UNIFORM = 1, - - CAT6_NONUNIFORM = 2, - - /* Use the bindless model, with an immediate index. - */ - CAT6_BINDLESS_IMM = 4, - - /* Use the bindless model, with a uniform register index. - */ - CAT6_BINDLESS_UNIFORM = 5, - - /* Use the bindless model, with a register index that isn't guaranteed - * to be uniform. This presumably checks if the indices are equal and - * splits up the load/store, because it works the way you would - * expect. - */ - CAT6_BINDLESS_NONUNIFORM = 6, -} cat6_desc_mode_t; - -/** - * For atomic ops (which return a value): - * - * pad1=1, pad3=c, pad5=3 - * src1 - vecN offset/coords - * src2.x - is actually dest register - * src2.y - is 'data' except for cmpxchg where src2.y is 'compare' - * and src2.z is 'data' - * - * For stib (which does not return a value): - * pad1=0, pad3=c, pad5=2 - * src1 - vecN offset/coords - * src2 - value to store - * - * For ldib: - * pad1=1, pad3=c, pad5=2 - * src1 - vecN offset/coords - * - * for ldc (load from UBO using descriptor): - * pad1=0, pad3=8, pad5=2 - * - * pad2 and pad5 are only observed to be 0. - */ -typedef struct PACKED { - /* dword0: */ - uint32_t pad1 : 1; - uint32_t base : 3; - uint32_t pad2 : 2; - uint32_t desc_mode : 3; - uint32_t d : 2; - uint32_t typed : 1; - uint32_t type_size : 2; - uint32_t opc : 5; - uint32_t pad3 : 5; - uint32_t src1 : 8; /* coordinate/offset */ - - /* dword1: */ - uint32_t src2 : 8; /* or the dst for load instructions */ - uint32_t pad4 : 1; //mustbe0 ?? - uint32_t ssbo : 8; /* ssbo/image binding point */ - uint32_t type : 3; - uint32_t pad5 : 7; - uint32_t jmp_tgt : 1; - uint32_t sync : 1; - uint32_t opc_cat : 3; -} instr_cat6_a6xx_t; - -typedef struct PACKED { - /* dword0: */ - uint32_t pad1 : 32; - - /* dword1: */ - uint32_t pad2 : 12; - uint32_t ss : 1; /* maybe in the encoding, but blob only uses (sy) */ - uint32_t pad3 : 6; - uint32_t w : 1; /* write */ - uint32_t r : 1; /* read */ - uint32_t l : 1; /* local */ - uint32_t g : 1; /* global */ - uint32_t opc : 4; /* presumed, but only a couple known OPCs */ - uint32_t jmp_tgt : 1; /* (jp) */ - uint32_t sync : 1; /* (sy) */ - uint32_t opc_cat : 3; -} instr_cat7_t; - -typedef union PACKED { - instr_cat0_t cat0; - instr_cat1_t cat1; - instr_cat2_t cat2; - instr_cat3_t cat3; - instr_cat4_t cat4; - instr_cat5_t cat5; - instr_cat6_t cat6; - instr_cat6_a6xx_t cat6_a6xx; - instr_cat7_t cat7; - struct PACKED { - /* dword0: */ - uint32_t pad1 : 32; - - /* dword1: */ - uint32_t pad2 : 12; - uint32_t ss : 1; /* cat1-cat4 (cat0??) and cat7 (?) */ - uint32_t ul : 1; /* cat2-cat4 (and cat1 in blob.. which may be bug??) */ - uint32_t pad3 : 13; - uint32_t jmp_tgt : 1; - uint32_t sync : 1; - uint32_t opc_cat : 3; - - }; -} instr_t; - -static inline uint32_t instr_repeat(instr_t *instr) -{ - switch (instr->opc_cat) { - case 0: return instr->cat0.repeat; - case 1: return instr->cat1.repeat; - case 2: return instr->cat2.repeat; - case 3: return instr->cat3.repeat; - case 4: return instr->cat4.repeat; - default: return 0; - } -} - -static inline bool instr_sat(instr_t *instr) -{ - switch (instr->opc_cat) { - case 2: return instr->cat2.sat; - case 3: return instr->cat3.sat; - case 4: return instr->cat4.sat; - default: return false; - } -} - -/* We can probably drop the gpu_id arg, but keeping it for now so we can - * assert if we see something we think should be new encoding on an older - * gpu. - */ -static inline bool is_cat6_legacy(instr_t *instr, unsigned gpu_id) -{ - instr_cat6_a6xx_t *cat6 = &instr->cat6_a6xx; - - /* At least one of these two bits is pad in all the possible - * "legacy" cat6 encodings, and a analysis of all the pre-a6xx - * cmdstream traces I have indicates that the pad bit is zero - * in all cases. So we can use this to detect new encoding: - */ - if ((cat6->pad3 & 0x8) && (cat6->pad5 & 0x2)) { - assert(gpu_id >= 600); - assert(instr->cat6.opc == 0); - return false; - } - - return true; -} - -static inline uint32_t instr_opc(instr_t *instr, unsigned gpu_id) -{ - switch (instr->opc_cat) { - case 0: return instr->cat0.opc | instr->cat0.opc_hi << 4; - case 1: return 0; - case 2: return instr->cat2.opc; - case 3: return instr->cat3.opc; - case 4: return instr->cat4.opc; - case 5: return instr->cat5.opc; - case 6: - if (!is_cat6_legacy(instr, gpu_id)) - return instr->cat6_a6xx.opc; - return instr->cat6.opc; - case 7: return instr->cat7.opc; - default: return 0; - } -} - -static inline bool is_mad(opc_t opc) -{ - switch (opc) { - case OPC_MAD_U16: - case OPC_MAD_S16: - case OPC_MAD_U24: - case OPC_MAD_S24: - case OPC_MAD_F16: - case OPC_MAD_F32: - return true; - default: - return false; - } -} - -static inline bool is_madsh(opc_t opc) -{ - switch (opc) { - case OPC_MADSH_U16: - case OPC_MADSH_M16: - return true; - default: - return false; - } -} - -static inline bool is_atomic(opc_t opc) -{ - switch (opc) { - case OPC_ATOMIC_ADD: - case OPC_ATOMIC_SUB: - case OPC_ATOMIC_XCHG: - case OPC_ATOMIC_INC: - case OPC_ATOMIC_DEC: - case OPC_ATOMIC_CMPXCHG: - case OPC_ATOMIC_MIN: - case OPC_ATOMIC_MAX: - case OPC_ATOMIC_AND: - case OPC_ATOMIC_OR: - case OPC_ATOMIC_XOR: - return true; - default: - return false; - } -} - -static inline bool is_ssbo(opc_t opc) -{ - switch (opc) { - case OPC_RESFMT: - case OPC_RESINFO: - case OPC_LDGB: - case OPC_STGB: - case OPC_STIB: - return true; - default: - return false; - } -} - -static inline bool is_isam(opc_t opc) -{ - switch (opc) { - case OPC_ISAM: - case OPC_ISAML: - case OPC_ISAMM: - return true; - default: - return false; - } -} - - -static inline bool is_cat2_float(opc_t opc) -{ - switch (opc) { - case OPC_ADD_F: - case OPC_MIN_F: - case OPC_MAX_F: - case OPC_MUL_F: - case OPC_SIGN_F: - case OPC_CMPS_F: - case OPC_ABSNEG_F: - case OPC_CMPV_F: - case OPC_FLOOR_F: - case OPC_CEIL_F: - case OPC_RNDNE_F: - case OPC_RNDAZ_F: - case OPC_TRUNC_F: - return true; - - default: - return false; - } -} - -static inline bool is_cat3_float(opc_t opc) -{ - switch (opc) { - case OPC_MAD_F16: - case OPC_MAD_F32: - case OPC_SEL_F16: - case OPC_SEL_F32: - return true; - default: - return false; - } -} - -int disasm_a3xx(uint32_t *dwords, int sizedwords, int level, FILE *out, unsigned gpu_id); - -#endif /* INSTR_A3XX_H_ */ diff --git a/selfdrive/modeld/thneed/debug/decompiler/ir3.h b/selfdrive/modeld/thneed/debug/decompiler/ir3.h deleted file mode 100644 index 278dc96362..0000000000 --- a/selfdrive/modeld/thneed/debug/decompiler/ir3.h +++ /dev/null @@ -1,1755 +0,0 @@ -/* - * Copyright (c) 2013 Rob Clark - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef IR3_H_ -#define IR3_H_ - -#include -#include - -#include "shader_enums.h" -#include "util/list.h" - -/*#include "util/bitscan.h" -#include "util/list.h" -#include "util/set.h" -#include "util/u_debug.h"*/ - -#include "instr-a3xx.h" - -/* low level intermediate representation of an adreno shader program */ - -struct ir3_compiler; -struct ir3; -struct ir3_instruction; -struct ir3_block; - -struct ir3_info { - uint32_t gpu_id; - uint16_t sizedwords; - uint16_t instrs_count; /* expanded to account for rpt's */ - uint16_t nops_count; /* # of nop instructions, including nopN */ - uint16_t mov_count; - uint16_t cov_count; - /* NOTE: max_reg, etc, does not include registers not touched - * by the shader (ie. vertex fetched via VFD_DECODE but not - * touched by shader) - */ - int8_t max_reg; /* highest GPR # used by shader */ - int8_t max_half_reg; - int16_t max_const; - - /* number of sync bits: */ - uint16_t ss, sy; - - /* estimate of number of cycles stalled on (ss) */ - uint16_t sstall; - - uint16_t last_baryf; /* instruction # of last varying fetch */ -}; - -struct ir3_register { - enum { - IR3_REG_CONST = 0x001, - IR3_REG_IMMED = 0x002, - IR3_REG_HALF = 0x004, - /* high registers are used for some things in compute shaders, - * for example. Seems to be for things that are global to all - * threads in a wave, so possibly these are global/shared by - * all the threads in the wave? - */ - IR3_REG_HIGH = 0x008, - IR3_REG_RELATIV= 0x010, - IR3_REG_R = 0x020, - /* Most instructions, it seems, can do float abs/neg but not - * integer. The CP pass needs to know what is intended (int or - * float) in order to do the right thing. For this reason the - * abs/neg flags are split out into float and int variants. In - * addition, .b (bitwise) operations, the negate is actually a - * bitwise not, so split that out into a new flag to make it - * more clear. - */ - IR3_REG_FNEG = 0x040, - IR3_REG_FABS = 0x080, - IR3_REG_SNEG = 0x100, - IR3_REG_SABS = 0x200, - IR3_REG_BNOT = 0x400, - IR3_REG_EVEN = 0x800, - IR3_REG_POS_INF= 0x1000, - /* (ei) flag, end-input? Set on last bary, presumably to signal - * that the shader needs no more input: - */ - IR3_REG_EI = 0x2000, - /* meta-flags, for intermediate stages of IR, ie. - * before register assignment is done: - */ - IR3_REG_SSA = 0x4000, /* 'instr' is ptr to assigning instr */ - IR3_REG_ARRAY = 0x8000, - - } flags; - - /* used for cat5 instructions, but also for internal/IR level - * tracking of what registers are read/written by an instruction. - * wrmask may be a bad name since it is used to represent both - * src and dst that touch multiple adjacent registers. - */ - unsigned wrmask : 16; /* up to vec16 */ - - /* for relative addressing, 32bits for array size is too small, - * but otoh we don't need to deal with disjoint sets, so instead - * use a simple size field (number of scalar components). - * - * Note the size field isn't important for relative const (since - * we don't have to do register allocation for constants). - */ - unsigned size : 15; - - bool merged : 1; /* half-regs conflict with full regs (ie >= a6xx) */ - - /* normal registers: - * the component is in the low two bits of the reg #, so - * rN.x becomes: (N << 2) | x - */ - uint16_t num; - union { - /* immediate: */ - int32_t iim_val; - uint32_t uim_val; - float fim_val; - /* relative: */ - struct { - uint16_t id; - int16_t offset; - } array; - }; - - /* For IR3_REG_SSA, src registers contain ptr back to assigning - * instruction. - * - * For IR3_REG_ARRAY, the pointer is back to the last dependent - * array access (although the net effect is the same, it points - * back to a previous instruction that we depend on). - */ - struct ir3_instruction *instr; -}; - -/* - * Stupid/simple growable array implementation: - */ -#define DECLARE_ARRAY(type, name) \ - unsigned name ## _count, name ## _sz; \ - type * name; - -#define array_insert(ctx, arr, val) do { \ - if (arr ## _count == arr ## _sz) { \ - arr ## _sz = MAX2(2 * arr ## _sz, 16); \ - arr = reralloc_size(ctx, arr, arr ## _sz * sizeof(arr[0])); \ - } \ - arr[arr ##_count++] = val; \ - } while (0) - -struct ir3_instruction { - struct ir3_block *block; - opc_t opc; - enum { - /* (sy) flag is set on first instruction, and after sample - * instructions (probably just on RAW hazard). - */ - IR3_INSTR_SY = 0x001, - /* (ss) flag is set on first instruction, and first instruction - * to depend on the result of "long" instructions (RAW hazard): - * - * rcp, rsq, log2, exp2, sin, cos, sqrt - * - * It seems to synchronize until all in-flight instructions are - * completed, for example: - * - * rsq hr1.w, hr1.w - * add.f hr2.z, (neg)hr2.z, hc0.y - * mul.f hr2.w, (neg)hr2.y, (neg)hr2.y - * rsq hr2.x, hr2.x - * (rpt1)nop - * mad.f16 hr2.w, hr2.z, hr2.z, hr2.w - * nop - * mad.f16 hr2.w, (neg)hr0.w, (neg)hr0.w, hr2.w - * (ss)(rpt2)mul.f hr1.x, (r)hr1.x, hr1.w - * (rpt2)mul.f hr0.x, (neg)(r)hr0.x, hr2.x - * - * The last mul.f does not have (ss) set, presumably because the - * (ss) on the previous instruction does the job. - * - * The blob driver also seems to set it on WAR hazards, although - * not really clear if this is needed or just blob compiler being - * sloppy. So far I haven't found a case where removing the (ss) - * causes problems for WAR hazard, but I could just be getting - * lucky: - * - * rcp r1.y, r3.y - * (ss)(rpt2)mad.f32 r3.y, (r)c9.x, r1.x, (r)r3.z - * - */ - IR3_INSTR_SS = 0x002, - /* (jp) flag is set on jump targets: - */ - IR3_INSTR_JP = 0x004, - IR3_INSTR_UL = 0x008, - IR3_INSTR_3D = 0x010, - IR3_INSTR_A = 0x020, - IR3_INSTR_O = 0x040, - IR3_INSTR_P = 0x080, - IR3_INSTR_S = 0x100, - IR3_INSTR_S2EN = 0x200, - IR3_INSTR_G = 0x400, - IR3_INSTR_SAT = 0x800, - /* (cat5/cat6) Bindless */ - IR3_INSTR_B = 0x1000, - /* (cat5-only) Get some parts of the encoding from a1.x */ - IR3_INSTR_A1EN = 0x2000, - /* meta-flags, for intermediate stages of IR, ie. - * before register assignment is done: - */ - IR3_INSTR_MARK = 0x4000, - IR3_INSTR_UNUSED= 0x8000, - } flags; - uint8_t repeat; - uint8_t nop; -#ifdef DEBUG - unsigned regs_max; -#endif - unsigned regs_count; - struct ir3_register **regs; - union { - struct { - char inv; - char comp; - int immed; - struct ir3_block *target; - } cat0; - struct { - type_t src_type, dst_type; - } cat1; - struct { - enum { - IR3_COND_LT = 0, - IR3_COND_LE = 1, - IR3_COND_GT = 2, - IR3_COND_GE = 3, - IR3_COND_EQ = 4, - IR3_COND_NE = 5, - } condition; - } cat2; - struct { - unsigned samp, tex; - unsigned tex_base : 3; - type_t type; - } cat5; - struct { - type_t type; - int src_offset; - int dst_offset; - int iim_val : 3; /* for ldgb/stgb, # of components */ - unsigned d : 3; /* for ldc, component offset */ - bool typed : 1; - unsigned base : 3; - } cat6; - struct { - unsigned w : 1; /* write */ - unsigned r : 1; /* read */ - unsigned l : 1; /* local */ - unsigned g : 1; /* global */ - } cat7; - /* for meta-instructions, just used to hold extra data - * before instruction scheduling, etc - */ - struct { - int off; /* component/offset */ - } split; - struct { - /* for output collects, this maps back to the entry in the - * ir3_shader_variant::outputs table. - */ - int outidx; - } collect; - struct { - unsigned samp, tex; - unsigned input_offset; - unsigned samp_base : 3; - unsigned tex_base : 3; - } prefetch; - struct { - /* maps back to entry in ir3_shader_variant::inputs table: */ - int inidx; - /* for sysvals, identifies the sysval type. Mostly so we can - * identify the special cases where a sysval should not be DCE'd - * (currently, just pre-fs texture fetch) - */ - gl_system_value sysval; - } input; - }; - - /* When we get to the RA stage, we need instruction's position/name: */ - uint16_t ip; - uint16_t name; - - /* used for per-pass extra instruction data. - * - * TODO we should remove the per-pass data like this and 'use_count' - * and do something similar to what RA does w/ ir3_ra_instr_data.. - * ie. use the ir3_count_instructions pass, and then use instr->ip - * to index into a table of pass-private data. - */ - void *data; - - /** - * Valid if pass calls ir3_find_ssa_uses().. see foreach_ssa_use() - */ - struct set *uses; - - int sun; /* Sethi–Ullman number, used by sched */ - int use_count; /* currently just updated/used by cp */ - - /* Used during CP and RA stages. For collect and shader inputs/ - * outputs where we need a sequence of consecutive registers, - * keep track of each src instructions left (ie 'n-1') and right - * (ie 'n+1') neighbor. The front-end must insert enough mov's - * to ensure that each instruction has at most one left and at - * most one right neighbor. During the copy-propagation pass, - * we only remove mov's when we can preserve this constraint. - * And during the RA stage, we use the neighbor information to - * allocate a block of registers in one shot. - * - * TODO: maybe just add something like: - * struct ir3_instruction_ref { - * struct ir3_instruction *instr; - * unsigned cnt; - * } - * - * Or can we get away without the refcnt stuff? It seems like - * it should be overkill.. the problem is if, potentially after - * already eliminating some mov's, if you have a single mov that - * needs to be grouped with it's neighbors in two different - * places (ex. shader output and a collect). - */ - struct { - struct ir3_instruction *left, *right; - uint16_t left_cnt, right_cnt; - } cp; - - /* an instruction can reference at most one address register amongst - * it's src/dst registers. Beyond that, you need to insert mov's. - * - * NOTE: do not write this directly, use ir3_instr_set_address() - */ - struct ir3_instruction *address; - - /* Tracking for additional dependent instructions. Used to handle - * barriers, WAR hazards for arrays/SSBOs/etc. - */ - DECLARE_ARRAY(struct ir3_instruction *, deps); - - /* - * From PoV of instruction scheduling, not execution (ie. ignores global/ - * local distinction): - * shared image atomic SSBO everything - * barrier()/ - R/W R/W R/W R/W X - * groupMemoryBarrier() - * memoryBarrier() - R/W R/W - * (but only images declared coherent?) - * memoryBarrierAtomic() - R/W - * memoryBarrierBuffer() - R/W - * memoryBarrierImage() - R/W - * memoryBarrierShared() - R/W - * - * TODO I think for SSBO/image/shared, in cases where we can determine - * which variable is accessed, we don't need to care about accesses to - * different variables (unless declared coherent??) - */ - enum { - IR3_BARRIER_EVERYTHING = 1 << 0, - IR3_BARRIER_SHARED_R = 1 << 1, - IR3_BARRIER_SHARED_W = 1 << 2, - IR3_BARRIER_IMAGE_R = 1 << 3, - IR3_BARRIER_IMAGE_W = 1 << 4, - IR3_BARRIER_BUFFER_R = 1 << 5, - IR3_BARRIER_BUFFER_W = 1 << 6, - IR3_BARRIER_ARRAY_R = 1 << 7, - IR3_BARRIER_ARRAY_W = 1 << 8, - } barrier_class, barrier_conflict; - - /* Entry in ir3_block's instruction list: */ - struct list_head node; - -#ifdef DEBUG - uint32_t serialno; -#endif - - // TODO only computerator/assembler: - int line; -}; - -static inline struct ir3_instruction * -ir3_neighbor_first(struct ir3_instruction *instr) -{ - int cnt = 0; - while (instr->cp.left) { - instr = instr->cp.left; - if (++cnt > 0xffff) { - debug_assert(0); - break; - } - } - return instr; -} - -static inline int ir3_neighbor_count(struct ir3_instruction *instr) -{ - int num = 1; - - debug_assert(!instr->cp.left); - - while (instr->cp.right) { - num++; - instr = instr->cp.right; - if (num > 0xffff) { - debug_assert(0); - break; - } - } - - return num; -} - -struct ir3 { - struct ir3_compiler *compiler; - gl_shader_stage type; - - DECLARE_ARRAY(struct ir3_instruction *, inputs); - DECLARE_ARRAY(struct ir3_instruction *, outputs); - - /* Track bary.f (and ldlv) instructions.. this is needed in - * scheduling to ensure that all varying fetches happen before - * any potential kill instructions. The hw gets grumpy if all - * threads in a group are killed before the last bary.f gets - * a chance to signal end of input (ei). - */ - DECLARE_ARRAY(struct ir3_instruction *, baryfs); - - /* Track all indirect instructions (read and write). To avoid - * deadlock scenario where an address register gets scheduled, - * but other dependent src instructions cannot be scheduled due - * to dependency on a *different* address register value, the - * scheduler needs to ensure that all dependencies other than - * the instruction other than the address register are scheduled - * before the one that writes the address register. Having a - * convenient list of instructions that reference some address - * register simplifies this. - */ - DECLARE_ARRAY(struct ir3_instruction *, a0_users); - - /* same for a1.x: */ - DECLARE_ARRAY(struct ir3_instruction *, a1_users); - - /* and same for instructions that consume predicate register: */ - DECLARE_ARRAY(struct ir3_instruction *, predicates); - - /* Track texture sample instructions which need texture state - * patched in (for astc-srgb workaround): - */ - DECLARE_ARRAY(struct ir3_instruction *, astc_srgb); - - /* List of blocks: */ - struct list_head block_list; - - /* List of ir3_array's: */ - struct list_head array_list; - - unsigned max_sun; /* max Sethi–Ullman number */ - -#ifdef DEBUG - unsigned block_count, instr_count; -#endif -}; - -struct ir3_array { - struct list_head node; - unsigned length; - unsigned id; - - struct nir_register *r; - - /* To avoid array write's from getting DCE'd, keep track of the - * most recent write. Any array access depends on the most - * recent write. This way, nothing depends on writes after the - * last read. But all the writes that happen before that have - * something depending on them - */ - struct ir3_instruction *last_write; - - /* extra stuff used in RA pass: */ - unsigned base; /* base vreg name */ - unsigned reg; /* base physical reg */ - uint16_t start_ip, end_ip; - - /* Indicates if half-precision */ - bool half; -}; - -struct ir3_array * ir3_lookup_array(struct ir3 *ir, unsigned id); - -struct ir3_block { - struct list_head node; - struct ir3 *shader; - - const struct nir_block *nblock; - - struct list_head instr_list; /* list of ir3_instruction */ - - /* each block has either one or two successors.. in case of - * two successors, 'condition' decides which one to follow. - * A block preceding an if/else has two successors. - */ - struct ir3_instruction *condition; - struct ir3_block *successors[2]; - - struct set *predecessors; /* set of ir3_block */ - - uint16_t start_ip, end_ip; - - /* Track instructions which do not write a register but other- - * wise must not be discarded (such as kill, stg, etc) - */ - DECLARE_ARRAY(struct ir3_instruction *, keeps); - - /* used for per-pass extra block data. Mainly used right - * now in RA step to track livein/liveout. - */ - void *data; - -#ifdef DEBUG - uint32_t serialno; -#endif -}; - -static inline uint32_t -block_id(struct ir3_block *block) -{ -#ifdef DEBUG - return block->serialno; -#else - return (uint32_t)(unsigned long)block; -#endif -} - -struct ir3 * ir3_create(struct ir3_compiler *compiler, gl_shader_stage type); -void ir3_destroy(struct ir3 *shader); -void * ir3_assemble(struct ir3 *shader, - struct ir3_info *info, uint32_t gpu_id); -void * ir3_alloc(struct ir3 *shader, int sz); - -struct ir3_block * ir3_block_create(struct ir3 *shader); - -struct ir3_instruction * ir3_instr_create(struct ir3_block *block, opc_t opc); -struct ir3_instruction * ir3_instr_create2(struct ir3_block *block, - opc_t opc, int nreg); -struct ir3_instruction * ir3_instr_clone(struct ir3_instruction *instr); -void ir3_instr_add_dep(struct ir3_instruction *instr, struct ir3_instruction *dep); -const char *ir3_instr_name(struct ir3_instruction *instr); - -struct ir3_register * ir3_reg_create(struct ir3_instruction *instr, - int num, int flags); -struct ir3_register * ir3_reg_clone(struct ir3 *shader, - struct ir3_register *reg); - -void ir3_instr_set_address(struct ir3_instruction *instr, - struct ir3_instruction *addr); - -static inline bool ir3_instr_check_mark(struct ir3_instruction *instr) -{ - if (instr->flags & IR3_INSTR_MARK) - return true; /* already visited */ - instr->flags |= IR3_INSTR_MARK; - return false; -} - -void ir3_block_clear_mark(struct ir3_block *block); -void ir3_clear_mark(struct ir3 *shader); - -unsigned ir3_count_instructions(struct ir3 *ir); -unsigned ir3_count_instructions_ra(struct ir3 *ir); - -void ir3_find_ssa_uses(struct ir3 *ir, void *mem_ctx, bool falsedeps); - -//#include "util/set.h" -#define foreach_ssa_use(__use, __instr) \ - for (struct ir3_instruction *__use = (void *)~0; \ - __use && (__instr)->uses; __use = NULL) \ - set_foreach ((__instr)->uses, __entry) \ - if ((__use = (void *)__entry->key)) - -#define MAX_ARRAYS 16 - -/* comp: - * 0 - x - * 1 - y - * 2 - z - * 3 - w - */ -static inline uint32_t regid(int num, int comp) -{ - return (num << 2) | (comp & 0x3); -} - -static inline uint32_t reg_num(struct ir3_register *reg) -{ - return reg->num >> 2; -} - -static inline uint32_t reg_comp(struct ir3_register *reg) -{ - return reg->num & 0x3; -} - -#define INVALID_REG regid(63, 0) -#define VALIDREG(r) ((r) != INVALID_REG) -#define CONDREG(r, val) COND(VALIDREG(r), (val)) - -static inline bool is_flow(struct ir3_instruction *instr) -{ - return (opc_cat(instr->opc) == 0); -} - -static inline bool is_kill(struct ir3_instruction *instr) -{ - return instr->opc == OPC_KILL; -} - -static inline bool is_nop(struct ir3_instruction *instr) -{ - return instr->opc == OPC_NOP; -} - -static inline bool is_same_type_reg(struct ir3_register *reg1, - struct ir3_register *reg2) -{ - unsigned type_reg1 = (reg1->flags & (IR3_REG_HIGH | IR3_REG_HALF)); - unsigned type_reg2 = (reg2->flags & (IR3_REG_HIGH | IR3_REG_HALF)); - - if (type_reg1 ^ type_reg2) - return false; - else - return true; -} - -/* Is it a non-transformative (ie. not type changing) mov? This can - * also include absneg.s/absneg.f, which for the most part can be - * treated as a mov (single src argument). - */ -static inline bool is_same_type_mov(struct ir3_instruction *instr) -{ - struct ir3_register *dst; - - switch (instr->opc) { - case OPC_MOV: - if (instr->cat1.src_type != instr->cat1.dst_type) - return false; - /* If the type of dest reg and src reg are different, - * it shouldn't be considered as same type mov - */ - if (!is_same_type_reg(instr->regs[0], instr->regs[1])) - return false; - break; - case OPC_ABSNEG_F: - case OPC_ABSNEG_S: - if (instr->flags & IR3_INSTR_SAT) - return false; - /* If the type of dest reg and src reg are different, - * it shouldn't be considered as same type mov - */ - if (!is_same_type_reg(instr->regs[0], instr->regs[1])) - return false; - break; - default: - return false; - } - - dst = instr->regs[0]; - - /* mov's that write to a0 or p0.x are special: */ - if (dst->num == regid(REG_P0, 0)) - return false; - if (reg_num(dst) == REG_A0) - return false; - - if (dst->flags & (IR3_REG_RELATIV | IR3_REG_ARRAY)) - return false; - - return true; -} - -/* A move from const, which changes size but not type, can also be - * folded into dest instruction in some cases. - */ -static inline bool is_const_mov(struct ir3_instruction *instr) -{ - if (instr->opc != OPC_MOV) - return false; - - if (!(instr->regs[1]->flags & IR3_REG_CONST)) - return false; - - type_t src_type = instr->cat1.src_type; - type_t dst_type = instr->cat1.dst_type; - - return (type_float(src_type) && type_float(dst_type)) || - (type_uint(src_type) && type_uint(dst_type)) || - (type_sint(src_type) && type_sint(dst_type)); -} - -static inline bool is_alu(struct ir3_instruction *instr) -{ - return (1 <= opc_cat(instr->opc)) && (opc_cat(instr->opc) <= 3); -} - -static inline bool is_sfu(struct ir3_instruction *instr) -{ - return (opc_cat(instr->opc) == 4); -} - -static inline bool is_tex(struct ir3_instruction *instr) -{ - return (opc_cat(instr->opc) == 5); -} - -static inline bool is_tex_or_prefetch(struct ir3_instruction *instr) -{ - return is_tex(instr) || (instr->opc == OPC_META_TEX_PREFETCH); -} - -static inline bool is_mem(struct ir3_instruction *instr) -{ - return (opc_cat(instr->opc) == 6); -} - -static inline bool is_barrier(struct ir3_instruction *instr) -{ - return (opc_cat(instr->opc) == 7); -} - -static inline bool -is_half(struct ir3_instruction *instr) -{ - return !!(instr->regs[0]->flags & IR3_REG_HALF); -} - -static inline bool -is_high(struct ir3_instruction *instr) -{ - return !!(instr->regs[0]->flags & IR3_REG_HIGH); -} - -static inline bool -is_store(struct ir3_instruction *instr) -{ - /* these instructions, the "destination" register is - * actually a source, the address to store to. - */ - switch (instr->opc) { - case OPC_STG: - case OPC_STGB: - case OPC_STIB: - case OPC_STP: - case OPC_STL: - case OPC_STLW: - case OPC_L2G: - case OPC_G2L: - return true; - default: - return false; - } -} - -static inline bool is_load(struct ir3_instruction *instr) -{ - switch (instr->opc) { - case OPC_LDG: - case OPC_LDGB: - case OPC_LDIB: - case OPC_LDL: - case OPC_LDP: - case OPC_L2G: - case OPC_LDLW: - case OPC_LDC: - case OPC_LDLV: - /* probably some others too.. */ - return true; - default: - return false; - } -} - -static inline bool is_input(struct ir3_instruction *instr) -{ - /* in some cases, ldlv is used to fetch varying without - * interpolation.. fortunately inloc is the first src - * register in either case - */ - switch (instr->opc) { - case OPC_LDLV: - case OPC_BARY_F: - return true; - default: - return false; - } -} - -static inline bool is_bool(struct ir3_instruction *instr) -{ - switch (instr->opc) { - case OPC_CMPS_F: - case OPC_CMPS_S: - case OPC_CMPS_U: - return true; - default: - return false; - } -} - -static inline bool is_meta(struct ir3_instruction *instr) -{ - return (opc_cat(instr->opc) == -1); -} - -static inline unsigned dest_regs(struct ir3_instruction *instr) -{ - if ((instr->regs_count == 0) || is_store(instr) || is_flow(instr)) - return 0; - - return util_last_bit(instr->regs[0]->wrmask); -} - -static inline bool -writes_gpr(struct ir3_instruction *instr) -{ - if (dest_regs(instr) == 0) - return false; - /* is dest a normal temp register: */ - struct ir3_register *reg = instr->regs[0]; - debug_assert(!(reg->flags & (IR3_REG_CONST | IR3_REG_IMMED))); - if ((reg_num(reg) == REG_A0) || - (reg->num == regid(REG_P0, 0))) - return false; - return true; -} - -static inline bool writes_addr0(struct ir3_instruction *instr) -{ - if (instr->regs_count > 0) { - struct ir3_register *dst = instr->regs[0]; - return dst->num == regid(REG_A0, 0); - } - return false; -} - -static inline bool writes_addr1(struct ir3_instruction *instr) -{ - if (instr->regs_count > 0) { - struct ir3_register *dst = instr->regs[0]; - return dst->num == regid(REG_A0, 1); - } - return false; -} - -static inline bool writes_pred(struct ir3_instruction *instr) -{ - if (instr->regs_count > 0) { - struct ir3_register *dst = instr->regs[0]; - return reg_num(dst) == REG_P0; - } - return false; -} - -/* returns defining instruction for reg */ -/* TODO better name */ -static inline struct ir3_instruction *ssa(struct ir3_register *reg) -{ - if (reg->flags & (IR3_REG_SSA | IR3_REG_ARRAY)) { - return reg->instr; - } - return NULL; -} - -static inline bool conflicts(struct ir3_instruction *a, - struct ir3_instruction *b) -{ - return (a && b) && (a != b); -} - -static inline bool reg_gpr(struct ir3_register *r) -{ - if (r->flags & (IR3_REG_CONST | IR3_REG_IMMED)) - return false; - if ((reg_num(r) == REG_A0) || (reg_num(r) == REG_P0)) - return false; - return true; -} - -static inline type_t half_type(type_t type) -{ - switch (type) { - case TYPE_F32: return TYPE_F16; - case TYPE_U32: return TYPE_U16; - case TYPE_S32: return TYPE_S16; - case TYPE_F16: - case TYPE_U16: - case TYPE_S16: - return type; - default: - assert(0); - return ~0; - } -} - -/* some cat2 instructions (ie. those which are not float) can embed an - * immediate: - */ -static inline bool ir3_cat2_int(opc_t opc) -{ - switch (opc) { - case OPC_ADD_U: - case OPC_ADD_S: - case OPC_SUB_U: - case OPC_SUB_S: - case OPC_CMPS_U: - case OPC_CMPS_S: - case OPC_MIN_U: - case OPC_MIN_S: - case OPC_MAX_U: - case OPC_MAX_S: - case OPC_CMPV_U: - case OPC_CMPV_S: - case OPC_MUL_U24: - case OPC_MUL_S24: - case OPC_MULL_U: - case OPC_CLZ_S: - case OPC_ABSNEG_S: - case OPC_AND_B: - case OPC_OR_B: - case OPC_NOT_B: - case OPC_XOR_B: - case OPC_BFREV_B: - case OPC_CLZ_B: - case OPC_SHL_B: - case OPC_SHR_B: - case OPC_ASHR_B: - case OPC_MGEN_B: - case OPC_GETBIT_B: - case OPC_CBITS_B: - case OPC_BARY_F: - return true; - - default: - return false; - } -} - -/* map cat2 instruction to valid abs/neg flags: */ -static inline unsigned ir3_cat2_absneg(opc_t opc) -{ - switch (opc) { - case OPC_ADD_F: - case OPC_MIN_F: - case OPC_MAX_F: - case OPC_MUL_F: - case OPC_SIGN_F: - case OPC_CMPS_F: - case OPC_ABSNEG_F: - case OPC_CMPV_F: - case OPC_FLOOR_F: - case OPC_CEIL_F: - case OPC_RNDNE_F: - case OPC_RNDAZ_F: - case OPC_TRUNC_F: - case OPC_BARY_F: - return IR3_REG_FABS | IR3_REG_FNEG; - - case OPC_ADD_U: - case OPC_ADD_S: - case OPC_SUB_U: - case OPC_SUB_S: - case OPC_CMPS_U: - case OPC_CMPS_S: - case OPC_MIN_U: - case OPC_MIN_S: - case OPC_MAX_U: - case OPC_MAX_S: - case OPC_CMPV_U: - case OPC_CMPV_S: - case OPC_MUL_U24: - case OPC_MUL_S24: - case OPC_MULL_U: - case OPC_CLZ_S: - return 0; - - case OPC_ABSNEG_S: - return IR3_REG_SABS | IR3_REG_SNEG; - - case OPC_AND_B: - case OPC_OR_B: - case OPC_NOT_B: - case OPC_XOR_B: - case OPC_BFREV_B: - case OPC_CLZ_B: - case OPC_SHL_B: - case OPC_SHR_B: - case OPC_ASHR_B: - case OPC_MGEN_B: - case OPC_GETBIT_B: - case OPC_CBITS_B: - return IR3_REG_BNOT; - - default: - return 0; - } -} - -/* map cat3 instructions to valid abs/neg flags: */ -static inline unsigned ir3_cat3_absneg(opc_t opc) -{ - switch (opc) { - case OPC_MAD_F16: - case OPC_MAD_F32: - case OPC_SEL_F16: - case OPC_SEL_F32: - return IR3_REG_FNEG; - - case OPC_MAD_U16: - case OPC_MADSH_U16: - case OPC_MAD_S16: - case OPC_MADSH_M16: - case OPC_MAD_U24: - case OPC_MAD_S24: - case OPC_SEL_S16: - case OPC_SEL_S32: - case OPC_SAD_S16: - case OPC_SAD_S32: - /* neg *may* work on 3rd src.. */ - - case OPC_SEL_B16: - case OPC_SEL_B32: - - default: - return 0; - } -} - -#define MASK(n) ((1 << (n)) - 1) - -/* iterator for an instructions's sources (reg), also returns src #: */ -#define foreach_src_n(__srcreg, __n, __instr) \ - if ((__instr)->regs_count) \ - for (unsigned __cnt = (__instr)->regs_count - 1, __n = 0; __n < __cnt; __n++) \ - if ((__srcreg = (__instr)->regs[__n + 1])) - -/* iterator for an instructions's sources (reg): */ -#define foreach_src(__srcreg, __instr) \ - foreach_src_n(__srcreg, __i, __instr) - -static inline unsigned __ssa_src_cnt(struct ir3_instruction *instr) -{ - unsigned cnt = instr->regs_count + instr->deps_count; - if (instr->address) - cnt++; - return cnt; -} - -static inline struct ir3_instruction ** -__ssa_srcp_n(struct ir3_instruction *instr, unsigned n) -{ - if (n == (instr->regs_count + instr->deps_count)) - return &instr->address; - if (n >= instr->regs_count) - return &instr->deps[n - instr->regs_count]; - if (ssa(instr->regs[n])) - return &instr->regs[n]->instr; - return NULL; -} - -static inline bool __is_false_dep(struct ir3_instruction *instr, unsigned n) -{ - if (n == (instr->regs_count + instr->deps_count)) - return false; - if (n >= instr->regs_count) - return true; - return false; -} - -#define foreach_ssa_srcp_n(__srcp, __n, __instr) \ - for (struct ir3_instruction **__srcp = (void *)~0; __srcp; __srcp = NULL) \ - for (unsigned __cnt = __ssa_src_cnt(__instr), __n = 0; __n < __cnt; __n++) \ - if ((__srcp = __ssa_srcp_n(__instr, __n))) - -#define foreach_ssa_srcp(__srcp, __instr) \ - foreach_ssa_srcp_n(__srcp, __i, __instr) - -/* iterator for an instruction's SSA sources (instr), also returns src #: */ -#define foreach_ssa_src_n(__srcinst, __n, __instr) \ - foreach_ssa_srcp_n(__srcp, __n, __instr) \ - if ((__srcinst = *__srcp)) - -/* iterator for an instruction's SSA sources (instr): */ -#define foreach_ssa_src(__srcinst, __instr) \ - foreach_ssa_src_n(__srcinst, __i, __instr) - -/* iterators for shader inputs: */ -#define foreach_input_n(__ininstr, __cnt, __ir) \ - for (unsigned __cnt = 0; __cnt < (__ir)->inputs_count; __cnt++) \ - if ((__ininstr = (__ir)->inputs[__cnt])) -#define foreach_input(__ininstr, __ir) \ - foreach_input_n(__ininstr, __i, __ir) - -/* iterators for shader outputs: */ -#define foreach_output_n(__outinstr, __cnt, __ir) \ - for (unsigned __cnt = 0; __cnt < (__ir)->outputs_count; __cnt++) \ - if ((__outinstr = (__ir)->outputs[__cnt])) -#define foreach_output(__outinstr, __ir) \ - foreach_output_n(__outinstr, __i, __ir) - -/* iterators for instructions: */ -#define foreach_instr(__instr, __list) \ - list_for_each_entry(struct ir3_instruction, __instr, __list, node) -#define foreach_instr_rev(__instr, __list) \ - list_for_each_entry_rev(struct ir3_instruction, __instr, __list, node) -#define foreach_instr_safe(__instr, __list) \ - list_for_each_entry_safe(struct ir3_instruction, __instr, __list, node) - -/* iterators for blocks: */ -#define foreach_block(__block, __list) \ - list_for_each_entry(struct ir3_block, __block, __list, node) -#define foreach_block_safe(__block, __list) \ - list_for_each_entry_safe(struct ir3_block, __block, __list, node) -#define foreach_block_rev(__block, __list) \ - list_for_each_entry_rev(struct ir3_block, __block, __list, node) - -/* iterators for arrays: */ -#define foreach_array(__array, __list) \ - list_for_each_entry(struct ir3_array, __array, __list, node) - -/* Check if condition is true for any src instruction. - */ -static inline bool -check_src_cond(struct ir3_instruction *instr, bool (*cond)(struct ir3_instruction *)) -{ - struct ir3_register *reg; - - /* Note that this is also used post-RA so skip the ssa iterator: */ - foreach_src (reg, instr) { - struct ir3_instruction *src = reg->instr; - - if (!src) - continue; - - /* meta:split/collect aren't real instructions, the thing that - * we actually care about is *their* srcs - */ - if ((src->opc == OPC_META_SPLIT) || (src->opc == OPC_META_COLLECT)) { - if (check_src_cond(src, cond)) - return true; - } else { - if (cond(src)) - return true; - } - } - - return false; -} - -/* dump: */ -void ir3_print(struct ir3 *ir); -void ir3_print_instr(struct ir3_instruction *instr); - -/* delay calculation: */ -int ir3_delayslots(struct ir3_instruction *assigner, - struct ir3_instruction *consumer, unsigned n, bool soft); -unsigned ir3_delay_calc(struct ir3_block *block, struct ir3_instruction *instr, - bool soft, bool pred); -void ir3_remove_nops(struct ir3 *ir); - -/* dead code elimination: */ -struct ir3_shader_variant; -void ir3_dce(struct ir3 *ir, struct ir3_shader_variant *so); - -/* fp16 conversion folding */ -void ir3_cf(struct ir3 *ir); - -/* copy-propagate: */ -void ir3_cp(struct ir3 *ir, struct ir3_shader_variant *so); - -/* group neighbors and insert mov's to resolve conflicts: */ -void ir3_group(struct ir3 *ir); - -/* Sethi–Ullman numbering: */ -void ir3_sun(struct ir3 *ir); - -/* scheduling: */ -void ir3_sched_add_deps(struct ir3 *ir); -int ir3_sched(struct ir3 *ir); - -struct ir3_context; -int ir3_postsched(struct ir3_context *ctx); - -bool ir3_a6xx_fixup_atomic_dests(struct ir3 *ir, struct ir3_shader_variant *so); - -/* register assignment: */ -struct ir3_ra_reg_set * ir3_ra_alloc_reg_set(struct ir3_compiler *compiler); -int ir3_ra(struct ir3_shader_variant *v, struct ir3_instruction **precolor, unsigned nprecolor); - -/* legalize: */ -void ir3_legalize(struct ir3 *ir, struct ir3_shader_variant *so, int *max_bary); - -static inline bool -ir3_has_latency_to_hide(struct ir3 *ir) -{ - /* VS/GS/TCS/TESS co-exist with frag shader invocations, but we don't - * know the nature of the fragment shader. Just assume it will have - * latency to hide: - */ - if (ir->type != MESA_SHADER_FRAGMENT) - return true; - - foreach_block (block, &ir->block_list) { - foreach_instr (instr, &block->instr_list) { - if (is_tex_or_prefetch(instr)) - return true; - - if (is_load(instr)) { - switch (instr->opc) { - case OPC_LDLV: - case OPC_LDL: - case OPC_LDLW: - break; - default: - return true; - } - } - } - } - - return false; -} - -/* ************************************************************************* */ -/* instruction helpers */ - -/* creates SSA src of correct type (ie. half vs full precision) */ -static inline struct ir3_register * __ssa_src(struct ir3_instruction *instr, - struct ir3_instruction *src, unsigned flags) -{ - struct ir3_register *reg; - if (src->regs[0]->flags & IR3_REG_HALF) - flags |= IR3_REG_HALF; - reg = ir3_reg_create(instr, 0, IR3_REG_SSA | flags); - reg->instr = src; - reg->wrmask = src->regs[0]->wrmask; - return reg; -} - -static inline struct ir3_register * __ssa_dst(struct ir3_instruction *instr) -{ - struct ir3_register *reg = ir3_reg_create(instr, 0, 0); - reg->flags |= IR3_REG_SSA; - return reg; -} - -static inline struct ir3_instruction * -create_immed_typed(struct ir3_block *block, uint32_t val, type_t type) -{ - struct ir3_instruction *mov; - unsigned flags = (type_size(type) < 32) ? IR3_REG_HALF : 0; - - mov = ir3_instr_create(block, OPC_MOV); - mov->cat1.src_type = type; - mov->cat1.dst_type = type; - __ssa_dst(mov)->flags |= flags; - ir3_reg_create(mov, 0, IR3_REG_IMMED | flags)->uim_val = val; - - return mov; -} - -static inline struct ir3_instruction * -create_immed(struct ir3_block *block, uint32_t val) -{ - return create_immed_typed(block, val, TYPE_U32); -} - -static inline struct ir3_instruction * -create_uniform_typed(struct ir3_block *block, unsigned n, type_t type) -{ - struct ir3_instruction *mov; - unsigned flags = (type_size(type) < 32) ? IR3_REG_HALF : 0; - - mov = ir3_instr_create(block, OPC_MOV); - mov->cat1.src_type = type; - mov->cat1.dst_type = type; - __ssa_dst(mov)->flags |= flags; - ir3_reg_create(mov, n, IR3_REG_CONST | flags); - - return mov; -} - -static inline struct ir3_instruction * -create_uniform(struct ir3_block *block, unsigned n) -{ - return create_uniform_typed(block, n, TYPE_F32); -} - -static inline struct ir3_instruction * -create_uniform_indirect(struct ir3_block *block, int n, - struct ir3_instruction *address) -{ - struct ir3_instruction *mov; - - mov = ir3_instr_create(block, OPC_MOV); - mov->cat1.src_type = TYPE_U32; - mov->cat1.dst_type = TYPE_U32; - __ssa_dst(mov); - ir3_reg_create(mov, 0, IR3_REG_CONST | IR3_REG_RELATIV)->array.offset = n; - - ir3_instr_set_address(mov, address); - - return mov; -} - -static inline struct ir3_instruction * -ir3_MOV(struct ir3_block *block, struct ir3_instruction *src, type_t type) -{ - struct ir3_instruction *instr = ir3_instr_create(block, OPC_MOV); - __ssa_dst(instr); - if (src->regs[0]->flags & IR3_REG_ARRAY) { - struct ir3_register *src_reg = __ssa_src(instr, src, IR3_REG_ARRAY); - src_reg->array = src->regs[0]->array; - } else { - __ssa_src(instr, src, src->regs[0]->flags & IR3_REG_HIGH); - } - debug_assert(!(src->regs[0]->flags & IR3_REG_RELATIV)); - instr->cat1.src_type = type; - instr->cat1.dst_type = type; - return instr; -} - -static inline struct ir3_instruction * -ir3_COV(struct ir3_block *block, struct ir3_instruction *src, - type_t src_type, type_t dst_type) -{ - struct ir3_instruction *instr = ir3_instr_create(block, OPC_MOV); - unsigned dst_flags = (type_size(dst_type) < 32) ? IR3_REG_HALF : 0; - unsigned src_flags = (type_size(src_type) < 32) ? IR3_REG_HALF : 0; - - debug_assert((src->regs[0]->flags & IR3_REG_HALF) == src_flags); - - __ssa_dst(instr)->flags |= dst_flags; - __ssa_src(instr, src, 0); - instr->cat1.src_type = src_type; - instr->cat1.dst_type = dst_type; - debug_assert(!(src->regs[0]->flags & IR3_REG_ARRAY)); - return instr; -} - -static inline struct ir3_instruction * -ir3_NOP(struct ir3_block *block) -{ - return ir3_instr_create(block, OPC_NOP); -} - -#define IR3_INSTR_0 0 - -#define __INSTR0(flag, name, opc) \ -static inline struct ir3_instruction * \ -ir3_##name(struct ir3_block *block) \ -{ \ - struct ir3_instruction *instr = \ - ir3_instr_create(block, opc); \ - instr->flags |= flag; \ - return instr; \ -} -#define INSTR0F(f, name) __INSTR0(IR3_INSTR_##f, name##_##f, OPC_##name) -#define INSTR0(name) __INSTR0(0, name, OPC_##name) - -#define __INSTR1(flag, name, opc) \ -static inline struct ir3_instruction * \ -ir3_##name(struct ir3_block *block, \ - struct ir3_instruction *a, unsigned aflags) \ -{ \ - struct ir3_instruction *instr = \ - ir3_instr_create(block, opc); \ - __ssa_dst(instr); \ - __ssa_src(instr, a, aflags); \ - instr->flags |= flag; \ - return instr; \ -} -#define INSTR1F(f, name) __INSTR1(IR3_INSTR_##f, name##_##f, OPC_##name) -#define INSTR1(name) __INSTR1(0, name, OPC_##name) - -#define __INSTR2(flag, name, opc) \ -static inline struct ir3_instruction * \ -ir3_##name(struct ir3_block *block, \ - struct ir3_instruction *a, unsigned aflags, \ - struct ir3_instruction *b, unsigned bflags) \ -{ \ - struct ir3_instruction *instr = \ - ir3_instr_create(block, opc); \ - __ssa_dst(instr); \ - __ssa_src(instr, a, aflags); \ - __ssa_src(instr, b, bflags); \ - instr->flags |= flag; \ - return instr; \ -} -#define INSTR2F(f, name) __INSTR2(IR3_INSTR_##f, name##_##f, OPC_##name) -#define INSTR2(name) __INSTR2(0, name, OPC_##name) - -#define __INSTR3(flag, name, opc) \ -static inline struct ir3_instruction * \ -ir3_##name(struct ir3_block *block, \ - struct ir3_instruction *a, unsigned aflags, \ - struct ir3_instruction *b, unsigned bflags, \ - struct ir3_instruction *c, unsigned cflags) \ -{ \ - struct ir3_instruction *instr = \ - ir3_instr_create2(block, opc, 4); \ - __ssa_dst(instr); \ - __ssa_src(instr, a, aflags); \ - __ssa_src(instr, b, bflags); \ - __ssa_src(instr, c, cflags); \ - instr->flags |= flag; \ - return instr; \ -} -#define INSTR3F(f, name) __INSTR3(IR3_INSTR_##f, name##_##f, OPC_##name) -#define INSTR3(name) __INSTR3(0, name, OPC_##name) - -#define __INSTR4(flag, name, opc) \ -static inline struct ir3_instruction * \ -ir3_##name(struct ir3_block *block, \ - struct ir3_instruction *a, unsigned aflags, \ - struct ir3_instruction *b, unsigned bflags, \ - struct ir3_instruction *c, unsigned cflags, \ - struct ir3_instruction *d, unsigned dflags) \ -{ \ - struct ir3_instruction *instr = \ - ir3_instr_create2(block, opc, 5); \ - __ssa_dst(instr); \ - __ssa_src(instr, a, aflags); \ - __ssa_src(instr, b, bflags); \ - __ssa_src(instr, c, cflags); \ - __ssa_src(instr, d, dflags); \ - instr->flags |= flag; \ - return instr; \ -} -#define INSTR4F(f, name) __INSTR4(IR3_INSTR_##f, name##_##f, OPC_##name) -#define INSTR4(name) __INSTR4(0, name, OPC_##name) - -/* cat0 instructions: */ -INSTR1(B) -INSTR0(JUMP) -INSTR1(KILL) -INSTR0(END) -INSTR0(CHSH) -INSTR0(CHMASK) -INSTR1(PREDT) -INSTR0(PREDF) -INSTR0(PREDE) - -/* cat2 instructions, most 2 src but some 1 src: */ -INSTR2(ADD_F) -INSTR2(MIN_F) -INSTR2(MAX_F) -INSTR2(MUL_F) -INSTR1(SIGN_F) -INSTR2(CMPS_F) -INSTR1(ABSNEG_F) -INSTR2(CMPV_F) -INSTR1(FLOOR_F) -INSTR1(CEIL_F) -INSTR1(RNDNE_F) -INSTR1(RNDAZ_F) -INSTR1(TRUNC_F) -INSTR2(ADD_U) -INSTR2(ADD_S) -INSTR2(SUB_U) -INSTR2(SUB_S) -INSTR2(CMPS_U) -INSTR2(CMPS_S) -INSTR2(MIN_U) -INSTR2(MIN_S) -INSTR2(MAX_U) -INSTR2(MAX_S) -INSTR1(ABSNEG_S) -INSTR2(AND_B) -INSTR2(OR_B) -INSTR1(NOT_B) -INSTR2(XOR_B) -INSTR2(CMPV_U) -INSTR2(CMPV_S) -INSTR2(MUL_U24) -INSTR2(MUL_S24) -INSTR2(MULL_U) -INSTR1(BFREV_B) -INSTR1(CLZ_S) -INSTR1(CLZ_B) -INSTR2(SHL_B) -INSTR2(SHR_B) -INSTR2(ASHR_B) -INSTR2(BARY_F) -INSTR2(MGEN_B) -INSTR2(GETBIT_B) -INSTR1(SETRM) -INSTR1(CBITS_B) -INSTR2(SHB) -INSTR2(MSAD) - -/* cat3 instructions: */ -INSTR3(MAD_U16) -INSTR3(MADSH_U16) -INSTR3(MAD_S16) -INSTR3(MADSH_M16) -INSTR3(MAD_U24) -INSTR3(MAD_S24) -INSTR3(MAD_F16) -INSTR3(MAD_F32) -/* NOTE: SEL_B32 checks for zero vs nonzero */ -INSTR3(SEL_B16) -INSTR3(SEL_B32) -INSTR3(SEL_S16) -INSTR3(SEL_S32) -INSTR3(SEL_F16) -INSTR3(SEL_F32) -INSTR3(SAD_S16) -INSTR3(SAD_S32) - -/* cat4 instructions: */ -INSTR1(RCP) -INSTR1(RSQ) -INSTR1(HRSQ) -INSTR1(LOG2) -INSTR1(HLOG2) -INSTR1(EXP2) -INSTR1(HEXP2) -INSTR1(SIN) -INSTR1(COS) -INSTR1(SQRT) - -/* cat5 instructions: */ -INSTR1(DSX) -INSTR1(DSXPP_1) -INSTR1(DSY) -INSTR1(DSYPP_1) -INSTR1F(3D, DSX) -INSTR1F(3D, DSY) -INSTR1(RGETPOS) - -static inline struct ir3_instruction * -ir3_SAM(struct ir3_block *block, opc_t opc, type_t type, - unsigned wrmask, unsigned flags, struct ir3_instruction *samp_tex, - struct ir3_instruction *src0, struct ir3_instruction *src1) -{ - struct ir3_instruction *sam; - - sam = ir3_instr_create(block, opc); - sam->flags |= flags; - __ssa_dst(sam)->wrmask = wrmask; - if (flags & IR3_INSTR_S2EN) { - __ssa_src(sam, samp_tex, IR3_REG_HALF); - } - if (src0) { - __ssa_src(sam, src0, 0); - } - if (src1) { - __ssa_src(sam, src1, 0); - } - sam->cat5.type = type; - - return sam; -} - -/* cat6 instructions: */ -INSTR2(LDLV) -INSTR3(LDG) -INSTR3(LDL) -INSTR3(LDLW) -INSTR3(STG) -INSTR3(STL) -INSTR3(STLW) -INSTR1(RESINFO) -INSTR1(RESFMT) -INSTR2(ATOMIC_ADD) -INSTR2(ATOMIC_SUB) -INSTR2(ATOMIC_XCHG) -INSTR2(ATOMIC_INC) -INSTR2(ATOMIC_DEC) -INSTR2(ATOMIC_CMPXCHG) -INSTR2(ATOMIC_MIN) -INSTR2(ATOMIC_MAX) -INSTR2(ATOMIC_AND) -INSTR2(ATOMIC_OR) -INSTR2(ATOMIC_XOR) -INSTR2(LDC) -#if GPU >= 600 -INSTR3(STIB); -INSTR2(LDIB); -INSTR3F(G, ATOMIC_ADD) -INSTR3F(G, ATOMIC_SUB) -INSTR3F(G, ATOMIC_XCHG) -INSTR3F(G, ATOMIC_INC) -INSTR3F(G, ATOMIC_DEC) -INSTR3F(G, ATOMIC_CMPXCHG) -INSTR3F(G, ATOMIC_MIN) -INSTR3F(G, ATOMIC_MAX) -INSTR3F(G, ATOMIC_AND) -INSTR3F(G, ATOMIC_OR) -INSTR3F(G, ATOMIC_XOR) -#elif GPU >= 400 -INSTR3(LDGB) -INSTR4(STGB) -INSTR4(STIB) -INSTR4F(G, ATOMIC_ADD) -INSTR4F(G, ATOMIC_SUB) -INSTR4F(G, ATOMIC_XCHG) -INSTR4F(G, ATOMIC_INC) -INSTR4F(G, ATOMIC_DEC) -INSTR4F(G, ATOMIC_CMPXCHG) -INSTR4F(G, ATOMIC_MIN) -INSTR4F(G, ATOMIC_MAX) -INSTR4F(G, ATOMIC_AND) -INSTR4F(G, ATOMIC_OR) -INSTR4F(G, ATOMIC_XOR) -#endif - -INSTR4F(G, STG) - -/* cat7 instructions: */ -INSTR0(BAR) -INSTR0(FENCE) - -/* meta instructions: */ -INSTR0(META_TEX_PREFETCH); - -/* ************************************************************************* */ -/* split this out or find some helper to use.. like main/bitset.h.. */ - -#include -#include "util/bitset.h" - -#define MAX_REG 256 - -typedef BITSET_DECLARE(regmask_t, 2 * MAX_REG); - -static inline bool -__regmask_get(regmask_t *regmask, struct ir3_register *reg, unsigned n) -{ - if (reg->merged) { - /* a6xx+ case, with merged register file, we track things in terms - * of half-precision registers, with a full precisions register - * using two half-precision slots: - */ - if (reg->flags & IR3_REG_HALF) { - return BITSET_TEST(*regmask, n); - } else { - n *= 2; - return BITSET_TEST(*regmask, n) || BITSET_TEST(*regmask, n+1); - } - } else { - /* pre a6xx case, with separate register file for half and full - * precision: - */ - if (reg->flags & IR3_REG_HALF) - n += MAX_REG; - return BITSET_TEST(*regmask, n); - } -} - -static inline void -__regmask_set(regmask_t *regmask, struct ir3_register *reg, unsigned n) -{ - if (reg->merged) { - /* a6xx+ case, with merged register file, we track things in terms - * of half-precision registers, with a full precisions register - * using two half-precision slots: - */ - if (reg->flags & IR3_REG_HALF) { - BITSET_SET(*regmask, n); - } else { - n *= 2; - BITSET_SET(*regmask, n); - BITSET_SET(*regmask, n+1); - } - } else { - /* pre a6xx case, with separate register file for half and full - * precision: - */ - if (reg->flags & IR3_REG_HALF) - n += MAX_REG; - BITSET_SET(*regmask, n); - } -} - -static inline void regmask_init(regmask_t *regmask) -{ - memset(regmask, 0, sizeof(*regmask)); -} - -static inline void regmask_set(regmask_t *regmask, struct ir3_register *reg) -{ - if (reg->flags & IR3_REG_RELATIV) { - for (unsigned i = 0; i < reg->size; i++) - __regmask_set(regmask, reg, reg->array.offset + i); - } else { - for (unsigned mask = reg->wrmask, n = reg->num; mask; mask >>= 1, n++) - if (mask & 1) - __regmask_set(regmask, reg, n); - } -} - -static inline void regmask_or(regmask_t *dst, regmask_t *a, regmask_t *b) -{ - unsigned i; - for (i = 0; i < ARRAY_SIZE(*dst); i++) - (*dst)[i] = (*a)[i] | (*b)[i]; -} - -static inline bool regmask_get(regmask_t *regmask, - struct ir3_register *reg) -{ - if (reg->flags & IR3_REG_RELATIV) { - for (unsigned i = 0; i < reg->size; i++) - if (__regmask_get(regmask, reg, reg->array.offset + i)) - return true; - } else { - for (unsigned mask = reg->wrmask, n = reg->num; mask; mask >>= 1, n++) - if (mask & 1) - if (__regmask_get(regmask, reg, n)) - return true; - } - return false; -} - -/* ************************************************************************* */ - -#endif /* IR3_H_ */ diff --git a/selfdrive/modeld/thneed/debug/decompiler/shader_enums.h b/selfdrive/modeld/thneed/debug/decompiler/shader_enums.h deleted file mode 100644 index b33a91727a..0000000000 --- a/selfdrive/modeld/thneed/debug/decompiler/shader_enums.h +++ /dev/null @@ -1,906 +0,0 @@ -/* - * Mesa 3-D graphics library - * - * Copyright (C) 1999-2008 Brian Paul All Rights Reserved. - * Copyright (C) 2009 VMware, Inc. All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -#ifndef SHADER_ENUMS_H -#define SHADER_ENUMS_H - -#include - -/* Project-wide (GL and Vulkan) maximum. */ -#define MAX_DRAW_BUFFERS 8 - -#ifdef __cplusplus -extern "C" { -#endif - -/** - * Shader stages. - * - * The order must match how shaders are ordered in the pipeline. - * The GLSL linker assumes that if i is the maximum number of - * invocations in a sub-group. The maximum - * supported in this extension is 64." - * - * The spec defines this as a uniform. However, it's highly unlikely that - * implementations actually treat it as a uniform (which is loaded from a - * constant buffer). Most likely, this is an implementation-wide constant, - * or perhaps something that depends on the shader stage. - */ - SYSTEM_VALUE_SUBGROUP_SIZE, - - /** - * From the GL_ARB_shader_ballot spec: - * - * "The variable holds the index of the - * invocation within sub-group. This variable is in the range 0 to - * -1, where is the total - * number of invocations in a sub-group." - */ - SYSTEM_VALUE_SUBGROUP_INVOCATION, - - /** - * From the GL_ARB_shader_ballot spec: - * - * "The variables provide a bitmask for all - * invocations, with one bit per invocation starting with the least - * significant bit, according to the following table, - * - * variable equation for bit values - * -------------------- ------------------------------------ - * gl_SubGroupEqMaskARB bit index == gl_SubGroupInvocationARB - * gl_SubGroupGeMaskARB bit index >= gl_SubGroupInvocationARB - * gl_SubGroupGtMaskARB bit index > gl_SubGroupInvocationARB - * gl_SubGroupLeMaskARB bit index <= gl_SubGroupInvocationARB - * gl_SubGroupLtMaskARB bit index < gl_SubGroupInvocationARB - */ - SYSTEM_VALUE_SUBGROUP_EQ_MASK, - SYSTEM_VALUE_SUBGROUP_GE_MASK, - SYSTEM_VALUE_SUBGROUP_GT_MASK, - SYSTEM_VALUE_SUBGROUP_LE_MASK, - SYSTEM_VALUE_SUBGROUP_LT_MASK, - /*@}*/ - - /** - * Builtin variables added by VK_KHR_subgroups - */ - /*@{*/ - SYSTEM_VALUE_NUM_SUBGROUPS, - SYSTEM_VALUE_SUBGROUP_ID, - /*@}*/ - - /*@}*/ - - /** - * \name Vertex shader system values - */ - /*@{*/ - /** - * OpenGL-style vertex ID. - * - * Section 2.11.7 (Shader Execution), subsection Shader Inputs, of the - * OpenGL 3.3 core profile spec says: - * - * "gl_VertexID holds the integer index i implicitly passed by - * DrawArrays or one of the other drawing commands defined in section - * 2.8.3." - * - * Section 2.8.3 (Drawing Commands) of the same spec says: - * - * "The commands....are equivalent to the commands with the same base - * name (without the BaseVertex suffix), except that the ith element - * transferred by the corresponding draw call will be taken from - * element indices[i] + basevertex of each enabled array." - * - * Additionally, the overview in the GL_ARB_shader_draw_parameters spec - * says: - * - * "In unextended GL, vertex shaders have inputs named gl_VertexID and - * gl_InstanceID, which contain, respectively the index of the vertex - * and instance. The value of gl_VertexID is the implicitly passed - * index of the vertex being processed, which includes the value of - * baseVertex, for those commands that accept it." - * - * gl_VertexID gets basevertex added in. This differs from DirectX where - * SV_VertexID does \b not get basevertex added in. - * - * \note - * If all system values are available, \c SYSTEM_VALUE_VERTEX_ID will be - * equal to \c SYSTEM_VALUE_VERTEX_ID_ZERO_BASE plus - * \c SYSTEM_VALUE_BASE_VERTEX. - * - * \sa SYSTEM_VALUE_VERTEX_ID_ZERO_BASE, SYSTEM_VALUE_BASE_VERTEX - */ - SYSTEM_VALUE_VERTEX_ID, - - /** - * Instanced ID as supplied to gl_InstanceID - * - * Values assigned to gl_InstanceID always begin with zero, regardless of - * the value of baseinstance. - * - * Section 11.1.3.9 (Shader Inputs) of the OpenGL 4.4 core profile spec - * says: - * - * "gl_InstanceID holds the integer instance number of the current - * primitive in an instanced draw call (see section 10.5)." - * - * Through a big chain of pseudocode, section 10.5 describes that - * baseinstance is not counted by gl_InstanceID. In that section, notice - * - * "If an enabled vertex attribute array is instanced (it has a - * non-zero divisor as specified by VertexAttribDivisor), the element - * index that is transferred to the GL, for all vertices, is given by - * - * floor(instance/divisor) + baseinstance - * - * If an array corresponding to an attribute required by a vertex - * shader is not enabled, then the corresponding element is taken from - * the current attribute state (see section 10.2)." - * - * Note that baseinstance is \b not included in the value of instance. - */ - SYSTEM_VALUE_INSTANCE_ID, - - /** - * Vulkan InstanceIndex. - * - * InstanceIndex = gl_InstanceID + gl_BaseInstance - */ - SYSTEM_VALUE_INSTANCE_INDEX, - - /** - * DirectX-style vertex ID. - * - * Unlike \c SYSTEM_VALUE_VERTEX_ID, this system value does \b not include - * the value of basevertex. - * - * \sa SYSTEM_VALUE_VERTEX_ID, SYSTEM_VALUE_BASE_VERTEX - */ - SYSTEM_VALUE_VERTEX_ID_ZERO_BASE, - - /** - * Value of \c basevertex passed to \c glDrawElementsBaseVertex and similar - * functions. - * - * \sa SYSTEM_VALUE_VERTEX_ID, SYSTEM_VALUE_VERTEX_ID_ZERO_BASE - */ - SYSTEM_VALUE_BASE_VERTEX, - - /** - * Depending on the type of the draw call (indexed or non-indexed), - * is the value of \c basevertex passed to \c glDrawElementsBaseVertex and - * similar, or is the value of \c first passed to \c glDrawArrays and - * similar. - * - * \note - * It can be used to calculate the \c SYSTEM_VALUE_VERTEX_ID as - * \c SYSTEM_VALUE_VERTEX_ID_ZERO_BASE plus \c SYSTEM_VALUE_FIRST_VERTEX. - * - * \sa SYSTEM_VALUE_VERTEX_ID_ZERO_BASE, SYSTEM_VALUE_VERTEX_ID - */ - SYSTEM_VALUE_FIRST_VERTEX, - - /** - * If the Draw command used to start the rendering was an indexed draw - * or not (~0/0). Useful to calculate \c SYSTEM_VALUE_BASE_VERTEX as - * \c SYSTEM_VALUE_IS_INDEXED_DRAW & \c SYSTEM_VALUE_FIRST_VERTEX. - */ - SYSTEM_VALUE_IS_INDEXED_DRAW, - - /** - * Value of \c baseinstance passed to instanced draw entry points - * - * \sa SYSTEM_VALUE_INSTANCE_ID - */ - SYSTEM_VALUE_BASE_INSTANCE, - - /** - * From _ARB_shader_draw_parameters: - * - * "Additionally, this extension adds a further built-in variable, - * gl_DrawID to the shading language. This variable contains the index - * of the draw currently being processed by a Multi* variant of a - * drawing command (such as MultiDrawElements or - * MultiDrawArraysIndirect)." - * - * If GL_ARB_multi_draw_indirect is not supported, this is always 0. - */ - SYSTEM_VALUE_DRAW_ID, - /*@}*/ - - /** - * \name Geometry shader system values - */ - /*@{*/ - SYSTEM_VALUE_INVOCATION_ID, /**< (Also in Tessellation Control shader) */ - /*@}*/ - - /** - * \name Fragment shader system values - */ - /*@{*/ - SYSTEM_VALUE_FRAG_COORD, - SYSTEM_VALUE_POINT_COORD, - SYSTEM_VALUE_FRONT_FACE, - SYSTEM_VALUE_SAMPLE_ID, - SYSTEM_VALUE_SAMPLE_POS, - SYSTEM_VALUE_SAMPLE_MASK_IN, - SYSTEM_VALUE_HELPER_INVOCATION, - SYSTEM_VALUE_COLOR0, - SYSTEM_VALUE_COLOR1, - /*@}*/ - - /** - * \name Tessellation Evaluation shader system values - */ - /*@{*/ - SYSTEM_VALUE_TESS_COORD, - SYSTEM_VALUE_VERTICES_IN, /**< Tessellation vertices in input patch */ - SYSTEM_VALUE_PRIMITIVE_ID, - SYSTEM_VALUE_TESS_LEVEL_OUTER, /**< TES input */ - SYSTEM_VALUE_TESS_LEVEL_INNER, /**< TES input */ - SYSTEM_VALUE_TESS_LEVEL_OUTER_DEFAULT, /**< TCS input for passthru TCS */ - SYSTEM_VALUE_TESS_LEVEL_INNER_DEFAULT, /**< TCS input for passthru TCS */ - /*@}*/ - - /** - * \name Compute shader system values - */ - /*@{*/ - SYSTEM_VALUE_LOCAL_INVOCATION_ID, - SYSTEM_VALUE_LOCAL_INVOCATION_INDEX, - SYSTEM_VALUE_GLOBAL_INVOCATION_ID, - SYSTEM_VALUE_GLOBAL_INVOCATION_INDEX, - SYSTEM_VALUE_WORK_GROUP_ID, - SYSTEM_VALUE_NUM_WORK_GROUPS, - SYSTEM_VALUE_LOCAL_GROUP_SIZE, - SYSTEM_VALUE_GLOBAL_GROUP_SIZE, - SYSTEM_VALUE_WORK_DIM, - SYSTEM_VALUE_USER_DATA_AMD, - /*@}*/ - - /** Required for VK_KHR_device_group */ - SYSTEM_VALUE_DEVICE_INDEX, - - /** Required for VK_KHX_multiview */ - SYSTEM_VALUE_VIEW_INDEX, - - /** - * Driver internal vertex-count, used (for example) for drivers to - * calculate stride for stream-out outputs. Not externally visible. - */ - SYSTEM_VALUE_VERTEX_CNT, - - /** - * Required for AMD_shader_explicit_vertex_parameter and also used for - * varying-fetch instructions. - * - * The _SIZE value is "primitive size", used to scale i/j in primitive - * space to pixel space. - */ - SYSTEM_VALUE_BARYCENTRIC_PERSP_PIXEL, - SYSTEM_VALUE_BARYCENTRIC_PERSP_SAMPLE, - SYSTEM_VALUE_BARYCENTRIC_PERSP_CENTROID, - SYSTEM_VALUE_BARYCENTRIC_PERSP_SIZE, - SYSTEM_VALUE_BARYCENTRIC_LINEAR_PIXEL, - SYSTEM_VALUE_BARYCENTRIC_LINEAR_CENTROID, - SYSTEM_VALUE_BARYCENTRIC_LINEAR_SAMPLE, - SYSTEM_VALUE_BARYCENTRIC_PULL_MODEL, - - /** - * IR3 specific geometry shader and tesselation control shader system - * values that packs invocation id, thread id and vertex id. Having this - * as a nir level system value lets us do the unpacking in nir. - */ - SYSTEM_VALUE_GS_HEADER_IR3, - SYSTEM_VALUE_TCS_HEADER_IR3, - - SYSTEM_VALUE_MAX /**< Number of values */ -} gl_system_value; - -const char *gl_system_value_name(gl_system_value sysval); - -/** - * The possible interpolation qualifiers that can be applied to a fragment - * shader input in GLSL. - * - * Note: INTERP_MODE_NONE must be 0 so that memsetting the - * ir_variable data structure to 0 causes the default behavior. - */ -enum glsl_interp_mode -{ - INTERP_MODE_NONE = 0, - INTERP_MODE_SMOOTH, - INTERP_MODE_FLAT, - INTERP_MODE_NOPERSPECTIVE, - INTERP_MODE_EXPLICIT, - INTERP_MODE_COUNT /**< Number of interpolation qualifiers */ -}; - -enum glsl_interface_packing { - GLSL_INTERFACE_PACKING_STD140, - GLSL_INTERFACE_PACKING_SHARED, - GLSL_INTERFACE_PACKING_PACKED, - GLSL_INTERFACE_PACKING_STD430 -}; - -const char *glsl_interp_mode_name(enum glsl_interp_mode qual); - -/** - * Fragment program results - */ -typedef enum -{ - FRAG_RESULT_DEPTH = 0, - FRAG_RESULT_STENCIL = 1, - /* If a single color should be written to all render targets, this - * register is written. No FRAG_RESULT_DATAn will be written. - */ - FRAG_RESULT_COLOR = 2, - FRAG_RESULT_SAMPLE_MASK = 3, - - /* FRAG_RESULT_DATAn are the per-render-target (GLSL gl_FragData[n] - * or ARB_fragment_program fragment.color[n]) color results. If - * any are written, FRAG_RESULT_COLOR will not be written. - * FRAG_RESULT_DATA1 and up are simply for the benefit of - * gl_frag_result_name() and not to be construed as an upper bound - */ - FRAG_RESULT_DATA0 = 4, - FRAG_RESULT_DATA1, - FRAG_RESULT_DATA2, - FRAG_RESULT_DATA3, - FRAG_RESULT_DATA4, - FRAG_RESULT_DATA5, - FRAG_RESULT_DATA6, - FRAG_RESULT_DATA7, -} gl_frag_result; - -const char *gl_frag_result_name(gl_frag_result result); - -#define FRAG_RESULT_MAX (FRAG_RESULT_DATA0 + MAX_DRAW_BUFFERS) - -/** - * \brief Layout qualifiers for gl_FragDepth. - * - * Extension AMD_conservative_depth allows gl_FragDepth to be redeclared with - * a layout qualifier. - * - * \see enum ir_depth_layout - */ -enum gl_frag_depth_layout -{ - FRAG_DEPTH_LAYOUT_NONE, /**< No layout is specified. */ - FRAG_DEPTH_LAYOUT_ANY, - FRAG_DEPTH_LAYOUT_GREATER, - FRAG_DEPTH_LAYOUT_LESS, - FRAG_DEPTH_LAYOUT_UNCHANGED -}; - -/** - * \brief Buffer access qualifiers - */ -enum gl_access_qualifier -{ - ACCESS_COHERENT = (1 << 0), - ACCESS_RESTRICT = (1 << 1), - ACCESS_VOLATILE = (1 << 2), - ACCESS_NON_READABLE = (1 << 3), - ACCESS_NON_WRITEABLE = (1 << 4), - - /** The access may use a non-uniform buffer or image index */ - ACCESS_NON_UNIFORM = (1 << 5), - - /* This has the same semantics as NIR_INTRINSIC_CAN_REORDER, only to be - * used with loads. In other words, it means that the load can be - * arbitrarily reordered, or combined with other loads to the same address. - * It is implied by ACCESS_NON_WRITEABLE together with ACCESS_RESTRICT, and - * a lack of ACCESS_COHERENT and ACCESS_VOLATILE. - */ - ACCESS_CAN_REORDER = (1 << 6), - - /** Use as little cache space as possible. */ - ACCESS_STREAM_CACHE_POLICY = (1 << 7), -}; - -/** - * \brief Blend support qualifiers - */ -enum gl_advanced_blend_mode -{ - BLEND_NONE = 0x0000, - - BLEND_MULTIPLY = 0x0001, - BLEND_SCREEN = 0x0002, - BLEND_OVERLAY = 0x0004, - BLEND_DARKEN = 0x0008, - BLEND_LIGHTEN = 0x0010, - BLEND_COLORDODGE = 0x0020, - BLEND_COLORBURN = 0x0040, - BLEND_HARDLIGHT = 0x0080, - BLEND_SOFTLIGHT = 0x0100, - BLEND_DIFFERENCE = 0x0200, - BLEND_EXCLUSION = 0x0400, - BLEND_HSL_HUE = 0x0800, - BLEND_HSL_SATURATION = 0x1000, - BLEND_HSL_COLOR = 0x2000, - BLEND_HSL_LUMINOSITY = 0x4000, - - BLEND_ALL = 0x7fff, -}; - -enum blend_func -{ - BLEND_FUNC_ADD, - BLEND_FUNC_SUBTRACT, - BLEND_FUNC_REVERSE_SUBTRACT, - BLEND_FUNC_MIN, - BLEND_FUNC_MAX, -}; - -enum blend_factor -{ - BLEND_FACTOR_ZERO, - BLEND_FACTOR_SRC_COLOR, - BLEND_FACTOR_DST_COLOR, - BLEND_FACTOR_SRC_ALPHA, - BLEND_FACTOR_DST_ALPHA, - BLEND_FACTOR_CONSTANT_COLOR, - BLEND_FACTOR_CONSTANT_ALPHA, - BLEND_FACTOR_SRC_ALPHA_SATURATE, -}; - -enum gl_tess_spacing -{ - TESS_SPACING_UNSPECIFIED, - TESS_SPACING_EQUAL, - TESS_SPACING_FRACTIONAL_ODD, - TESS_SPACING_FRACTIONAL_EVEN, -}; - -/** - * A compare function enum for use in compiler lowering passes. This is in - * the same order as GL's compare functions (shifted down by GL_NEVER), and is - * exactly the same as gallium's PIPE_FUNC_*. - */ -enum compare_func -{ - COMPARE_FUNC_NEVER, - COMPARE_FUNC_LESS, - COMPARE_FUNC_EQUAL, - COMPARE_FUNC_LEQUAL, - COMPARE_FUNC_GREATER, - COMPARE_FUNC_NOTEQUAL, - COMPARE_FUNC_GEQUAL, - COMPARE_FUNC_ALWAYS, -}; - -/** - * Arrangements for grouping invocations from NV_compute_shader_derivatives. - * - * The extension provides new layout qualifiers that support two different - * arrangements of compute shader invocations for the purpose of derivative - * computation. When specifying - * - * layout(derivative_group_quadsNV) in; - * - * compute shader invocations are grouped into 2x2x1 arrays whose four local - * invocation ID values follow the pattern: - * - * +-----------------+------------------+ - * | (2x+0, 2y+0, z) | (2x+1, 2y+0, z) | - * +-----------------+------------------+ - * | (2x+0, 2y+1, z) | (2x+1, 2y+1, z) | - * +-----------------+------------------+ - * - * where Y increases from bottom to top. When specifying - * - * layout(derivative_group_linearNV) in; - * - * compute shader invocations are grouped into 2x2x1 arrays whose four local - * invocation index values follow the pattern: - * - * +------+------+ - * | 4n+0 | 4n+1 | - * +------+------+ - * | 4n+2 | 4n+3 | - * +------+------+ - * - * If neither layout qualifier is specified, derivatives in compute shaders - * return zero, which is consistent with the handling of built-in texture - * functions like texture() in GLSL 4.50 compute shaders. - */ -enum gl_derivative_group { - DERIVATIVE_GROUP_NONE = 0, - DERIVATIVE_GROUP_QUADS, - DERIVATIVE_GROUP_LINEAR, -}; - -enum float_controls -{ - FLOAT_CONTROLS_DEFAULT_FLOAT_CONTROL_MODE = 0x0000, - FLOAT_CONTROLS_DENORM_PRESERVE_FP16 = 0x0001, - FLOAT_CONTROLS_DENORM_PRESERVE_FP32 = 0x0002, - FLOAT_CONTROLS_DENORM_PRESERVE_FP64 = 0x0004, - FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP16 = 0x0008, - FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP32 = 0x0010, - FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP64 = 0x0020, - FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP16 = 0x0040, - FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP32 = 0x0080, - FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP64 = 0x0100, - FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP16 = 0x0200, - FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP32 = 0x0400, - FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP64 = 0x0800, - FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP16 = 0x1000, - FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP32 = 0x2000, - FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP64 = 0x4000, -}; - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif /* SHADER_ENUMS_H */ diff --git a/selfdrive/modeld/thneed/debug/decompiler/util/bitset.h b/selfdrive/modeld/thneed/debug/decompiler/util/bitset.h deleted file mode 100644 index 264144c39b..0000000000 --- a/selfdrive/modeld/thneed/debug/decompiler/util/bitset.h +++ /dev/null @@ -1,261 +0,0 @@ -/* - * Mesa 3-D graphics library - * - * Copyright (C) 2006 Brian Paul All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -/** - * \file bitset.h - * \brief Bitset of arbitrary size definitions. - * \author Michal Krol - */ - -#ifndef BITSET_H -#define BITSET_H - -//#include "util/bitscan.h" -//#include "util/macros.h" - -/**************************************************************************** - * generic bitset implementation - */ - -#define BITSET_WORD unsigned int -#define BITSET_WORDBITS (sizeof (BITSET_WORD) * 8) - -/* bitset declarations - */ -#define BITSET_WORDS(bits) (((bits) + BITSET_WORDBITS - 1) / BITSET_WORDBITS) -#define BITSET_DECLARE(name, bits) BITSET_WORD name[BITSET_WORDS(bits)] - -/* bitset operations - */ -#define BITSET_COPY(x, y) memcpy( (x), (y), sizeof (x) ) -#define BITSET_EQUAL(x, y) (memcmp( (x), (y), sizeof (x) ) == 0) -#define BITSET_ZERO(x) memset( (x), 0, sizeof (x) ) -#define BITSET_ONES(x) memset( (x), 0xff, sizeof (x) ) - -#define BITSET_BITWORD(b) ((b) / BITSET_WORDBITS) -#define BITSET_BIT(b) (1u << ((b) % BITSET_WORDBITS)) - -/* single bit operations - */ -#define BITSET_TEST(x, b) (((x)[BITSET_BITWORD(b)] & BITSET_BIT(b)) != 0) -#define BITSET_SET(x, b) ((x)[BITSET_BITWORD(b)] |= BITSET_BIT(b)) -#define BITSET_CLEAR(x, b) ((x)[BITSET_BITWORD(b)] &= ~BITSET_BIT(b)) - -#define BITSET_MASK(b) (((b) % BITSET_WORDBITS == 0) ? ~0 : BITSET_BIT(b) - 1) -#define BITSET_RANGE(b, e) ((BITSET_MASK((e) + 1)) & ~(BITSET_BIT(b) - 1)) - -/* bit range operations - */ -#define BITSET_TEST_RANGE(x, b, e) \ - (BITSET_BITWORD(b) == BITSET_BITWORD(e) ? \ - (((x)[BITSET_BITWORD(b)] & BITSET_RANGE(b, e)) != 0) : \ - (assert (!"BITSET_TEST_RANGE: bit range crosses word boundary"), 0)) -#define BITSET_SET_RANGE(x, b, e) \ - (BITSET_BITWORD(b) == BITSET_BITWORD(e) ? \ - ((x)[BITSET_BITWORD(b)] |= BITSET_RANGE(b, e)) : \ - (assert (!"BITSET_SET_RANGE: bit range crosses word boundary"), 0)) -#define BITSET_CLEAR_RANGE(x, b, e) \ - (BITSET_BITWORD(b) == BITSET_BITWORD(e) ? \ - ((x)[BITSET_BITWORD(b)] &= ~BITSET_RANGE(b, e)) : \ - (assert (!"BITSET_CLEAR_RANGE: bit range crosses word boundary"), 0)) - -/* Get first bit set in a bitset. - */ -static inline int -__bitset_ffs(const BITSET_WORD *x, int n) -{ - int i; - - for (i = 0; i < n; i++) { - if (x[i]) - return ffs(x[i]) + BITSET_WORDBITS * i; - } - - return 0; -} - -#define BITSET_FFS(x) __bitset_ffs(x, ARRAY_SIZE(x)) - -static inline unsigned -__bitset_next_set(unsigned i, BITSET_WORD *tmp, - const BITSET_WORD *set, unsigned size) -{ - unsigned bit, word; - - /* NOTE: The initial conditions for this function are very specific. At - * the start of the loop, the tmp variable must be set to *set and the - * initial i value set to 0. This way, if there is a bit set in the first - * word, we ignore the i-value and just grab that bit (so 0 is ok, even - * though 0 may be returned). If the first word is 0, then the value of - * `word` will be 0 and we will go on to look at the second word. - */ - word = BITSET_BITWORD(i); - while (*tmp == 0) { - word++; - - if (word >= BITSET_WORDS(size)) - return size; - - *tmp = set[word]; - } - - /* Find the next set bit in the non-zero word */ - bit = ffs(*tmp) - 1; - - /* Unset the bit */ - *tmp &= ~(1ull << bit); - - return word * BITSET_WORDBITS + bit; -} - -/** - * Iterates over each set bit in a set - * - * @param __i iteration variable, bit number - * @param __set the bitset to iterate (will not be modified) - * @param __size number of bits in the set to consider - */ -#define BITSET_FOREACH_SET(__i, __set, __size) \ - for (BITSET_WORD __tmp = *(__set), *__foo = &__tmp; __foo != NULL; __foo = NULL) \ - for (__i = 0; \ - (__i = __bitset_next_set(__i, &__tmp, __set, __size)) < __size;) - -#ifdef __cplusplus - -/** - * Simple C++ wrapper of a bitset type of static size, with value semantics - * and basic bitwise arithmetic operators. The operators defined below are - * expected to have the same semantics as the same operator applied to other - * fundamental integer types. T is the name of the struct to instantiate - * it as, and N is the number of bits in the bitset. - */ -#define DECLARE_BITSET_T(T, N) struct T { \ - EXPLICIT_CONVERSION \ - operator bool() const \ - { \ - for (unsigned i = 0; i < BITSET_WORDS(N); i++) \ - if (words[i]) \ - return true; \ - return false; \ - } \ - \ - T & \ - operator=(int x) \ - { \ - const T c = {{ (BITSET_WORD)x }}; \ - return *this = c; \ - } \ - \ - friend bool \ - operator==(const T &b, const T &c) \ - { \ - return BITSET_EQUAL(b.words, c.words); \ - } \ - \ - friend bool \ - operator!=(const T &b, const T &c) \ - { \ - return !(b == c); \ - } \ - \ - friend bool \ - operator==(const T &b, int x) \ - { \ - const T c = {{ (BITSET_WORD)x }}; \ - return b == c; \ - } \ - \ - friend bool \ - operator!=(const T &b, int x) \ - { \ - return !(b == x); \ - } \ - \ - friend T \ - operator~(const T &b) \ - { \ - T c; \ - for (unsigned i = 0; i < BITSET_WORDS(N); i++) \ - c.words[i] = ~b.words[i]; \ - return c; \ - } \ - \ - T & \ - operator|=(const T &b) \ - { \ - for (unsigned i = 0; i < BITSET_WORDS(N); i++) \ - words[i] |= b.words[i]; \ - return *this; \ - } \ - \ - friend T \ - operator|(const T &b, const T &c) \ - { \ - T d = b; \ - d |= c; \ - return d; \ - } \ - \ - T & \ - operator&=(const T &b) \ - { \ - for (unsigned i = 0; i < BITSET_WORDS(N); i++) \ - words[i] &= b.words[i]; \ - return *this; \ - } \ - \ - friend T \ - operator&(const T &b, const T &c) \ - { \ - T d = b; \ - d &= c; \ - return d; \ - } \ - \ - bool \ - test(unsigned i) const \ - { \ - return BITSET_TEST(words, i); \ - } \ - \ - T & \ - set(unsigned i) \ - { \ - BITSET_SET(words, i); \ - return *this; \ - } \ - \ - T & \ - clear(unsigned i) \ - { \ - BITSET_CLEAR(words, i); \ - return *this; \ - } \ - \ - BITSET_WORD words[BITSET_WORDS(N)]; \ - } - -#endif - -#endif diff --git a/selfdrive/modeld/thneed/debug/decompiler/util/list.h b/selfdrive/modeld/thneed/debug/decompiler/util/list.h deleted file mode 100644 index 7f36e8c39d..0000000000 --- a/selfdrive/modeld/thneed/debug/decompiler/util/list.h +++ /dev/null @@ -1,262 +0,0 @@ -/************************************************************************** - * - * Copyright 2006 VMware, Inc., Bismarck, ND. USA. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - **************************************************************************/ - -/** - * \file - * List macros heavily inspired by the Linux kernel - * list handling. No list looping yet. - * - * Is not threadsafe, so common operations need to - * be protected using an external mutex. - */ - -#ifndef _UTIL_LIST_H_ -#define _UTIL_LIST_H_ - - -#include -#include -#include - -#ifdef DEBUG -# define list_assert(cond, msg) assert(cond && msg) -#else -# define list_assert(cond, msg) (void)(0 && (cond)) -#endif - -struct list_head -{ - struct list_head *prev; - struct list_head *next; -}; - -static inline void list_inithead(struct list_head *item) -{ - item->prev = item; - item->next = item; -} - -static inline void list_add(struct list_head *item, struct list_head *list) -{ - item->prev = list; - item->next = list->next; - list->next->prev = item; - list->next = item; -} - -static inline void list_addtail(struct list_head *item, struct list_head *list) -{ - item->next = list; - item->prev = list->prev; - list->prev->next = item; - list->prev = item; -} - -static inline bool list_is_empty(const struct list_head *list); - -static inline void list_replace(struct list_head *from, struct list_head *to) -{ - if (list_is_empty(from)) { - list_inithead(to); - } else { - to->prev = from->prev; - to->next = from->next; - from->next->prev = to; - from->prev->next = to; - } -} - -static inline void list_del(struct list_head *item) -{ - item->prev->next = item->next; - item->next->prev = item->prev; - item->prev = item->next = NULL; -} - -static inline void list_delinit(struct list_head *item) -{ - item->prev->next = item->next; - item->next->prev = item->prev; - item->next = item; - item->prev = item; -} - -static inline bool list_is_empty(const struct list_head *list) -{ - return list->next == list; -} - -/** - * Returns whether the list has exactly one element. - */ -static inline bool list_is_singular(const struct list_head *list) -{ - return list->next != NULL && list->next != list && list->next->next == list; -} - -static inline unsigned list_length(const struct list_head *list) -{ - struct list_head *node; - unsigned length = 0; - for (node = list->next; node != list; node = node->next) - length++; - return length; -} - -static inline void list_splice(struct list_head *src, struct list_head *dst) -{ - if (list_is_empty(src)) - return; - - src->next->prev = dst; - src->prev->next = dst->next; - dst->next->prev = src->prev; - dst->next = src->next; -} - -static inline void list_splicetail(struct list_head *src, struct list_head *dst) -{ - if (list_is_empty(src)) - return; - - src->prev->next = dst; - src->next->prev = dst->prev; - dst->prev->next = src->next; - dst->prev = src->prev; -} - -static inline void list_validate(const struct list_head *list) -{ - struct list_head *node; - assert(list->next->prev == list && list->prev->next == list); - for (node = list->next; node != list; node = node->next) - assert(node->next->prev == node && node->prev->next == node); -} - -#define LIST_ENTRY(__type, __item, __field) \ - ((__type *)(((char *)(__item)) - offsetof(__type, __field))) - -/** - * Cast from a pointer to a member of a struct back to the containing struct. - * - * 'sample' MUST be initialized, or else the result is undefined! - */ -#ifndef container_of -#define container_of(ptr, sample, member) \ - (void *)((char *)(ptr) \ - - ((char *)&(sample)->member - (char *)(sample))) -#endif - -#define list_first_entry(ptr, type, member) \ - LIST_ENTRY(type, (ptr)->next, member) - -#define list_last_entry(ptr, type, member) \ - LIST_ENTRY(type, (ptr)->prev, member) - - -#define LIST_FOR_EACH_ENTRY(pos, head, member) \ - for (pos = NULL, pos = container_of((head)->next, pos, member); \ - &pos->member != (head); \ - pos = container_of(pos->member.next, pos, member)) - -#define LIST_FOR_EACH_ENTRY_SAFE(pos, storage, head, member) \ - for (pos = NULL, pos = container_of((head)->next, pos, member), \ - storage = container_of(pos->member.next, pos, member); \ - &pos->member != (head); \ - pos = storage, storage = container_of(storage->member.next, storage, member)) - -#define LIST_FOR_EACH_ENTRY_SAFE_REV(pos, storage, head, member) \ - for (pos = NULL, pos = container_of((head)->prev, pos, member), \ - storage = container_of(pos->member.prev, pos, member); \ - &pos->member != (head); \ - pos = storage, storage = container_of(storage->member.prev, storage, member)) - -#define LIST_FOR_EACH_ENTRY_FROM(pos, start, head, member) \ - for (pos = NULL, pos = container_of((start), pos, member); \ - &pos->member != (head); \ - pos = container_of(pos->member.next, pos, member)) - -#define LIST_FOR_EACH_ENTRY_FROM_REV(pos, start, head, member) \ - for (pos = NULL, pos = container_of((start), pos, member); \ - &pos->member != (head); \ - pos = container_of(pos->member.prev, pos, member)) - -#define list_for_each_entry(type, pos, head, member) \ - for (type *pos = LIST_ENTRY(type, (head)->next, member), \ - *__next = LIST_ENTRY(type, pos->member.next, member); \ - &pos->member != (head); \ - pos = LIST_ENTRY(type, pos->member.next, member), \ - list_assert(pos == __next, "use _safe iterator"), \ - __next = LIST_ENTRY(type, __next->member.next, member)) - -#define list_for_each_entry_safe(type, pos, head, member) \ - for (type *pos = LIST_ENTRY(type, (head)->next, member), \ - *__next = LIST_ENTRY(type, pos->member.next, member); \ - &pos->member != (head); \ - pos = __next, \ - __next = LIST_ENTRY(type, __next->member.next, member)) - -#define list_for_each_entry_rev(type, pos, head, member) \ - for (type *pos = LIST_ENTRY(type, (head)->prev, member), \ - *__prev = LIST_ENTRY(type, pos->member.prev, member); \ - &pos->member != (head); \ - pos = LIST_ENTRY(type, pos->member.prev, member), \ - list_assert(pos == __prev, "use _safe iterator"), \ - __prev = LIST_ENTRY(type, __prev->member.prev, member)) - -#define list_for_each_entry_safe_rev(type, pos, head, member) \ - for (type *pos = LIST_ENTRY(type, (head)->prev, member), \ - *__prev = LIST_ENTRY(type, pos->member.prev, member); \ - &pos->member != (head); \ - pos = __prev, \ - __prev = LIST_ENTRY(type, __prev->member.prev, member)) - -#define list_for_each_entry_from(type, pos, start, head, member) \ - for (type *pos = LIST_ENTRY(type, (start), member); \ - &pos->member != (head); \ - pos = LIST_ENTRY(type, pos->member.next, member)) - -#define list_for_each_entry_from_safe(type, pos, start, head, member) \ - for (type *pos = LIST_ENTRY(type, (start), member), \ - *__next = LIST_ENTRY(type, pos->member.next, member); \ - &pos->member != (head); \ - pos = __next, \ - __next = LIST_ENTRY(type, __next->member.next, member)) - -#define list_for_each_entry_from_rev(type, pos, start, head, member) \ - for (type *pos = LIST_ENTRY(type, (start), member); \ - &pos->member != (head); \ - pos = LIST_ENTRY(type, pos->member.prev, member)) - -#define list_pair_for_each_entry(type, pos1, pos2, head1, head2, member) \ - for (type *pos1 = LIST_ENTRY(type, (head1)->next, member), \ - *pos2 = LIST_ENTRY(type, (head2)->next, member); \ - &pos1->member != (head1) && &pos2->member != (head2); \ - pos1 = LIST_ENTRY(type, pos1->member.next, member), \ - pos2 = LIST_ENTRY(type, pos2->member.next, member)) - -#endif /*_UTIL_LIST_H_*/ diff --git a/selfdrive/modeld/thneed/debug/decompiler/util/macros.h b/selfdrive/modeld/thneed/debug/decompiler/util/macros.h deleted file mode 100644 index a36bdd411e..0000000000 --- a/selfdrive/modeld/thneed/debug/decompiler/util/macros.h +++ /dev/null @@ -1,346 +0,0 @@ -/* - * Copyright © 2014 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#ifndef UTIL_MACROS_H -#define UTIL_MACROS_H - -#include - -/* Compute the size of an array */ -#ifndef ARRAY_SIZE -# define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) -#endif - -/* For compatibility with Clang's __has_builtin() */ -#ifndef __has_builtin -# define __has_builtin(x) 0 -#endif - -/** - * __builtin_expect macros - */ -#if !defined(HAVE___BUILTIN_EXPECT) -# define __builtin_expect(x, y) (x) -#endif - -#ifndef likely -# ifdef HAVE___BUILTIN_EXPECT -# define likely(x) __builtin_expect(!!(x), 1) -# define unlikely(x) __builtin_expect(!!(x), 0) -# else -# define likely(x) (x) -# define unlikely(x) (x) -# endif -#endif - - -/** - * Static (compile-time) assertion. - * Basically, use COND to dimension an array. If COND is false/zero the - * array size will be -1 and we'll get a compilation error. - */ -#define STATIC_ASSERT(COND) \ - do { \ - (void) sizeof(char [1 - 2*!(COND)]); \ - } while (0) - - -/** - * Unreachable macro. Useful for suppressing "control reaches end of non-void - * function" warnings. - */ -#if defined(HAVE___BUILTIN_UNREACHABLE) || __has_builtin(__builtin_unreachable) -#define unreachable(str) \ -do { \ - assert(!str); \ - __builtin_unreachable(); \ -} while (0) -#elif defined (_MSC_VER) -#define unreachable(str) \ -do { \ - assert(!str); \ - __assume(0); \ -} while (0) -#else -#define unreachable(str) assert(!str) -#endif - -/** - * Assume macro. Useful for expressing our assumptions to the compiler, - * typically for purposes of silencing warnings. - */ -#if __has_builtin(__builtin_assume) -#define assume(expr) \ -do { \ - assert(expr); \ - __builtin_assume(expr); \ -} while (0) -#elif defined HAVE___BUILTIN_UNREACHABLE -#define assume(expr) ((expr) ? ((void) 0) \ - : (assert(!"assumption failed"), \ - __builtin_unreachable())) -#elif defined (_MSC_VER) -#define assume(expr) __assume(expr) -#else -#define assume(expr) assert(expr) -#endif - -/* Attribute const is used for functions that have no effects other than their - * return value, and only rely on the argument values to compute the return - * value. As a result, calls to it can be CSEed. Note that using memory - * pointed to by the arguments is not allowed for const functions. - */ -#ifdef HAVE_FUNC_ATTRIBUTE_CONST -#define ATTRIBUTE_CONST __attribute__((__const__)) -#else -#define ATTRIBUTE_CONST -#endif - -#ifdef HAVE_FUNC_ATTRIBUTE_FLATTEN -#define FLATTEN __attribute__((__flatten__)) -#else -#define FLATTEN -#endif - -#ifdef HAVE_FUNC_ATTRIBUTE_FORMAT -#define PRINTFLIKE(f, a) __attribute__ ((format(__printf__, f, a))) -#else -#define PRINTFLIKE(f, a) -#endif - -#ifdef HAVE_FUNC_ATTRIBUTE_MALLOC -#define MALLOCLIKE __attribute__((__malloc__)) -#else -#define MALLOCLIKE -#endif - -/* Forced function inlining */ -/* Note: Clang also sets __GNUC__ (see other cases below) */ -#ifndef ALWAYS_INLINE -# if defined(__GNUC__) -# define ALWAYS_INLINE inline __attribute__((always_inline)) -# elif defined(_MSC_VER) -# define ALWAYS_INLINE __forceinline -# else -# define ALWAYS_INLINE inline -# endif -#endif - -/* Used to optionally mark structures with misaligned elements or size as - * packed, to trade off performance for space. - */ -#ifdef HAVE_FUNC_ATTRIBUTE_PACKED -#define PACKED __attribute__((__packed__)) -#else -#define PACKED -#endif - -/* Attribute pure is used for functions that have no effects other than their - * return value. As a result, calls to it can be dead code eliminated. - */ -#ifdef HAVE_FUNC_ATTRIBUTE_PURE -#define ATTRIBUTE_PURE __attribute__((__pure__)) -#else -#define ATTRIBUTE_PURE -#endif - -#ifdef HAVE_FUNC_ATTRIBUTE_RETURNS_NONNULL -#define ATTRIBUTE_RETURNS_NONNULL __attribute__((__returns_nonnull__)) -#else -#define ATTRIBUTE_RETURNS_NONNULL -#endif - -#ifndef NORETURN -# ifdef _MSC_VER -# define NORETURN __declspec(noreturn) -# elif defined HAVE_FUNC_ATTRIBUTE_NORETURN -# define NORETURN __attribute__((__noreturn__)) -# else -# define NORETURN -# endif -#endif - -#ifdef __cplusplus -/** - * Macro function that evaluates to true if T is a trivially - * destructible type -- that is, if its (non-virtual) destructor - * performs no action and all member variables and base classes are - * trivially destructible themselves. - */ -# if (defined(__clang__) && defined(__has_feature)) -# if __has_feature(has_trivial_destructor) -# define HAS_TRIVIAL_DESTRUCTOR(T) __has_trivial_destructor(T) -# endif -# elif defined(__GNUC__) -# if ((__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ >= 3))) -# define HAS_TRIVIAL_DESTRUCTOR(T) __has_trivial_destructor(T) -# endif -# elif defined(_MSC_VER) && !defined(__INTEL_COMPILER) -# define HAS_TRIVIAL_DESTRUCTOR(T) __has_trivial_destructor(T) -# endif -# ifndef HAS_TRIVIAL_DESTRUCTOR - /* It's always safe (if inefficient) to assume that a - * destructor is non-trivial. - */ -# define HAS_TRIVIAL_DESTRUCTOR(T) (false) -# endif -#endif - -/** - * PUBLIC/USED macros - * - * If we build the library with gcc's -fvisibility=hidden flag, we'll - * use the PUBLIC macro to mark functions that are to be exported. - * - * We also need to define a USED attribute, so the optimizer doesn't - * inline a static function that we later use in an alias. - ajax - */ -#ifndef PUBLIC -# if defined(__GNUC__) -# define PUBLIC __attribute__((visibility("default"))) -# define USED __attribute__((used)) -# elif defined(_MSC_VER) -# define PUBLIC __declspec(dllexport) -# define USED -# else -# define PUBLIC -# define USED -# endif -#endif - -/** - * UNUSED marks variables (or sometimes functions) that have to be defined, - * but are sometimes (or always) unused beyond that. A common case is for - * a function parameter to be used in some build configurations but not others. - * Another case is fallback vfuncs that don't do anything with their params. - * - * Note that this should not be used for identifiers used in `assert()`; - * see ASSERTED below. - */ -#ifdef HAVE_FUNC_ATTRIBUTE_UNUSED -#define UNUSED __attribute__((unused)) -#else -#define UNUSED -#endif - -/** - * Use ASSERTED to indicate that an identifier is unused outside of an `assert()`, - * so that assert-free builds don't get "unused variable" warnings. - */ -#ifdef NDEBUG -#define ASSERTED UNUSED -#else -#define ASSERTED -#endif - -#ifdef HAVE_FUNC_ATTRIBUTE_WARN_UNUSED_RESULT -#define MUST_CHECK __attribute__((warn_unused_result)) -#else -#define MUST_CHECK -#endif - -#if defined(__GNUC__) -#define ATTRIBUTE_NOINLINE __attribute__((noinline)) -#else -#define ATTRIBUTE_NOINLINE -#endif - - -/** - * Check that STRUCT::FIELD can hold MAXVAL. We use a lot of bitfields - * in Mesa/gallium. We have to be sure they're of sufficient size to - * hold the largest expected value. - * Note that with MSVC, enums are signed and enum bitfields need one extra - * high bit (always zero) to ensure the max value is handled correctly. - * This macro will detect that with MSVC, but not GCC. - */ -#define ASSERT_BITFIELD_SIZE(STRUCT, FIELD, MAXVAL) \ - do { \ - ASSERTED STRUCT s; \ - s.FIELD = (MAXVAL); \ - assert((int) s.FIELD == (MAXVAL) && "Insufficient bitfield size!"); \ - } while (0) - - -/** Compute ceiling of integer quotient of A divided by B. */ -#define DIV_ROUND_UP( A, B ) ( ((A) + (B) - 1) / (B) ) - -/** Clamp X to [MIN,MAX]. Turn NaN into MIN, arbitrarily. */ -#define CLAMP( X, MIN, MAX ) ( (X)>(MIN) ? ((X)>(MAX) ? (MAX) : (X)) : (MIN) ) - -/** Minimum of two values: */ -#define MIN2( A, B ) ( (A)<(B) ? (A) : (B) ) - -/** Maximum of two values: */ -#define MAX2( A, B ) ( (A)>(B) ? (A) : (B) ) - -/** Minimum and maximum of three values: */ -#define MIN3( A, B, C ) ((A) < (B) ? MIN2(A, C) : MIN2(B, C)) -#define MAX3( A, B, C ) ((A) > (B) ? MAX2(A, C) : MAX2(B, C)) - -/** Align a value to a power of two */ -#define ALIGN_POT(x, pot_align) (((x) + (pot_align) - 1) & ~((pot_align) - 1)) - -/** - * Macro for declaring an explicit conversion operator. Defaults to an - * implicit conversion if C++11 is not supported. - */ -#if __cplusplus >= 201103L -#define EXPLICIT_CONVERSION explicit -#elif defined(__cplusplus) -#define EXPLICIT_CONVERSION -#endif - -/** Set a single bit */ -#define BITFIELD_BIT(b) (1u << (b)) -/** Set all bits up to excluding bit b */ -#define BITFIELD_MASK(b) \ - ((b) == 32 ? (~0u) : BITFIELD_BIT((b) % 32) - 1) -/** Set count bits starting from bit b */ -#define BITFIELD_RANGE(b, count) \ - (BITFIELD_MASK((b) + (count)) & ~BITFIELD_MASK(b)) - -/** Set a single bit */ -#define BITFIELD64_BIT(b) (1ull << (b)) -/** Set all bits up to excluding bit b */ -#define BITFIELD64_MASK(b) \ - ((b) == 64 ? (~0ull) : BITFIELD64_BIT(b) - 1) -/** Set count bits starting from bit b */ -#define BITFIELD64_RANGE(b, count) \ - (BITFIELD64_MASK((b) + (count)) & ~BITFIELD64_MASK(b)) - -/* TODO: In future we should try to move this to u_debug.h once header - * dependencies are reorganised to allow this. - */ -enum pipe_debug_type -{ - PIPE_DEBUG_TYPE_OUT_OF_MEMORY = 1, - PIPE_DEBUG_TYPE_ERROR, - PIPE_DEBUG_TYPE_SHADER_INFO, - PIPE_DEBUG_TYPE_PERF_INFO, - PIPE_DEBUG_TYPE_INFO, - PIPE_DEBUG_TYPE_FALLBACK, - PIPE_DEBUG_TYPE_CONFORMANCE, -}; - -#endif /* UTIL_MACROS_H */ diff --git a/selfdrive/modeld/thneed/debug/disassembler.cc b/selfdrive/modeld/thneed/debug/disassembler.cc deleted file mode 100644 index c1f7e6332c..0000000000 --- a/selfdrive/modeld/thneed/debug/disassembler.cc +++ /dev/null @@ -1,132 +0,0 @@ -#include "debug/include/adreno_pm4types.h" -#define REG_A5XX_TPL1_CS_TEX_CONST_LO 0x0000e760 -#define REG_A5XX_TPL1_CS_TEX_SAMP_LO 0x0000e75c -#define REG_A5XX_SP_CS_CTRL_REG0 0x0000e5f0 - -std::map regs = { - {0x0000e760, "REG_A5XX_TPL1_CS_TEX_CONST_LO"}, - {0x0000e75c, "REG_A5XX_TPL1_CS_TEX_SAMP_LO"}, - {0x00000e06, "REG_A5XX_HLSQ_MODE_CNTL"}, - {0x00000e91, "REG_A5XX_UCHE_CACHE_INVALIDATE_MIN_LO"}, - {0x00000ec2, "REG_A5XX_SP_MODE_CNTL"}, - {0x0000e580, "REG_A5XX_SP_SP_CNTL"}, - {0x0000e5f0, "REG_A5XX_SP_CS_CTRL_REG0"}, - {0x0000e796, "REG_A5XX_HLSQ_CS_CNTL"}, - {0x0000e784, "REG_A5XX_HLSQ_CONTROL_0_REG"}, - {0x0000e7b0, "REG_A5XX_HLSQ_CS_NDRANGE_0"}, - {0x0000e7b9, "REG_A5XX_HLSQ_CS_KERNEL_GROUP_X"}, - {0x00000cdd, "REG_A5XX_VSC_RESOLVE_CNTL"}, -}; - -std::map ops = { - {33, "CP_REG_RMW"}, - {62, "CP_REG_TO_MEM"}, - {49, "CP_RUN_OPENCL"}, - {16, "CP_NOP"}, - {38, "CP_WAIT_FOR_IDLE"}, - {110, "CP_COMPUTE_CHECKPOINT"}, - {48, "CP_LOAD_STATE"}, -}; - -void CachedCommand::disassemble() { - uint32_t *src = (uint32_t *)cmds[1].gpuaddr; - int len = cmds[1].size/4; - printf("disassemble %p %d\n", src, len); - - int i = 0; - while (i < len) { - int pktsize; - int pkttype = -1; - - if (pkt_is_type0(src[i])) { - pkttype = 0; - pktsize = type0_pkt_size(src[i]); - } else if (pkt_is_type3(src[i])) { - pkttype = 3; - pktsize = type3_pkt_size(src[i]); - } else if (pkt_is_type4(src[i])) { - pkttype = 4; - pktsize = type4_pkt_size(src[i]); - } else if (pkt_is_type7(src[i])) { - pkttype = 7; - pktsize = type7_pkt_size(src[i]); - } - printf("%3d: type:%d size:%d ", i, pkttype, pktsize); - - if (pkttype == 7) { - int op = cp_type7_opcode(src[i]); - if (ops.find(op) != ops.end()) { - printf("%-40s ", ops[op].c_str()); - } else { - printf("op: %4d ", op); - } - } - - if (pkttype == 4) { - int reg = cp_type4_base_index_one_reg_wr(src[i]); - if (regs.find(reg) != regs.end()) { - printf("%-40s ", regs[reg].c_str()); - } else { - printf("reg: %4x ", reg); - } - } - - for (int j = 0; j < pktsize+1; j++) { - printf("%8.8X ", src[i+j]); - } - printf("\n"); - - uint64_t addr; - if (pkttype == 7) { - switch (cp_type7_opcode(src[i])) { - case CP_LOAD_STATE: - int dst_off = src[i+1] & 0x1FFF; - int state_src = (src[i+1] >> 16) & 3; - int state_block = (src[i+1] >> 18) & 7; - int state_type = src[i+2] & 3; - int num_unit = (src[i+1] & 0xffc00000) >> 22; - printf(" dst_off: %x state_src: %d state_block: %d state_type: %d num_unit: %d\n", - dst_off, state_src, state_block, state_type, num_unit); - addr = (uint64_t)(src[i+2] & 0xfffffffc) | ((uint64_t)(src[i+3]) << 32); - if (state_block == 5 && state_type == 0) { - if (!(addr&0xFFF)) { - int len = 0x1000; - if (num_unit >= 32) len += 0x1000; - //hexdump((uint32_t *)addr, len); - char fn[0x100]; - snprintf(fn, sizeof(fn), "/tmp/0x%lx.shader", addr); - printf("dumping %s\n", fn); - FILE *f = fopen(fn, "wb"); - // groups of 16 instructions - fwrite((void*)addr, 1, len, f); - fclose(f); - } - } - break; - } - } - - /*if (pkttype == 4) { - switch (cp_type4_base_index_one_reg_wr(src[i])) { - case REG_A5XX_SP_CS_CTRL_REG0: - addr = (uint64_t)(src[i+4] & 0xfffffffc) | ((uint64_t)(src[i+5]) << 32); - hexdump((uint32_t *)addr, 0x1000); - break; - } - }*/ - - /*if (pkttype == 4 && cp_type4_base_index_one_reg_wr(src[i]) == REG_A5XX_TPL1_CS_TEX_CONST_LO) { - uint64_t addr = (uint64_t)(src[i+1] & 0xffffffff) | ((uint64_t)(src[i+2]) << 32); - hexdump((uint32_t *)addr, 0x40); - } - - if (pkttype == 4 && cp_type4_base_index_one_reg_wr(src[i]) == REG_A5XX_TPL1_CS_TEX_SAMP_LO) { - uint64_t addr = (uint64_t)(src[i+1] & 0xffffffff) | ((uint64_t)(src[i+2]) << 32); - hexdump((uint32_t *)addr, 0x40); - }*/ - - if (pkttype == -1) break; - i += (1+pktsize); - } - assert(i == len); -} diff --git a/selfdrive/modeld/thneed/debug/include/a5xx.xml.h b/selfdrive/modeld/thneed/debug/include/a5xx.xml.h deleted file mode 100644 index 4a61d4e72c..0000000000 --- a/selfdrive/modeld/thneed/debug/include/a5xx.xml.h +++ /dev/null @@ -1,5201 +0,0 @@ -#ifndef A5XX_XML -#define A5XX_XML - -/* Autogenerated file, DO NOT EDIT manually! - -This file was generated by the rules-ng-ng headergen tool in this git repository: -http://github.com/freedreno/envytools/ -git clone https://github.com/freedreno/envytools.git - -The rules-ng-ng source files this header was generated from are: -- /home/ubuntu/envytools/envytools/rnndb/./adreno.xml ( 501 bytes, from 2019-05-29 01:28:15) -- /home/ubuntu/envytools/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2019-05-29 01:28:15) -- /home/ubuntu/envytools/envytools/rnndb/adreno/a2xx.xml ( 79608 bytes, from 2019-05-29 01:28:15) -- /home/ubuntu/envytools/envytools/rnndb/adreno/adreno_common.xml ( 14239 bytes, from 2019-05-29 01:28:15) -- /home/ubuntu/envytools/envytools/rnndb/adreno/adreno_pm4.xml ( 43155 bytes, from 2019-05-29 01:28:15) -- /home/ubuntu/envytools/envytools/rnndb/adreno/a3xx.xml ( 83840 bytes, from 2019-05-29 01:28:15) -- /home/ubuntu/envytools/envytools/rnndb/adreno/a4xx.xml ( 112086 bytes, from 2019-05-29 01:28:15) -- /home/ubuntu/envytools/envytools/rnndb/adreno/a5xx.xml ( 147291 bytes, from 2019-05-29 14:51:41) -- /home/ubuntu/envytools/envytools/rnndb/adreno/a6xx.xml ( 148461 bytes, from 2019-05-29 01:28:15) -- /home/ubuntu/envytools/envytools/rnndb/adreno/a6xx_gmu.xml ( 10431 bytes, from 2019-05-29 01:28:15) -- /home/ubuntu/envytools/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2019-05-29 01:28:15) - -Copyright (C) 2013-2019 by the following authors: -- Rob Clark (robclark) -- Ilia Mirkin (imirkin) - -Permission is hereby granted, free of charge, to any person obtaining -a copy of this software and associated documentation files (the -"Software"), to deal in the Software without restriction, including -without limitation the rights to use, copy, modify, merge, publish, -distribute, sublicense, and/or sell copies of the Software, and to -permit persons to whom the Software is furnished to do so, subject to -the following conditions: - -The above copyright notice and this permission notice (including the -next paragraph) shall be included in all copies or substantial -portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE -LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - - -enum a5xx_color_fmt { - RB5_A8_UNORM = 2, - RB5_R8_UNORM = 3, - RB5_R8_SNORM = 4, - RB5_R8_UINT = 5, - RB5_R8_SINT = 6, - RB5_R4G4B4A4_UNORM = 8, - RB5_R5G5B5A1_UNORM = 10, - RB5_R5G6B5_UNORM = 14, - RB5_R8G8_UNORM = 15, - RB5_R8G8_SNORM = 16, - RB5_R8G8_UINT = 17, - RB5_R8G8_SINT = 18, - RB5_R16_UNORM = 21, - RB5_R16_SNORM = 22, - RB5_R16_FLOAT = 23, - RB5_R16_UINT = 24, - RB5_R16_SINT = 25, - RB5_R8G8B8A8_UNORM = 48, - RB5_R8G8B8_UNORM = 49, - RB5_R8G8B8A8_SNORM = 50, - RB5_R8G8B8A8_UINT = 51, - RB5_R8G8B8A8_SINT = 52, - RB5_R10G10B10A2_UNORM = 55, - RB5_R10G10B10A2_UINT = 58, - RB5_R11G11B10_FLOAT = 66, - RB5_R16G16_UNORM = 67, - RB5_R16G16_SNORM = 68, - RB5_R16G16_FLOAT = 69, - RB5_R16G16_UINT = 70, - RB5_R16G16_SINT = 71, - RB5_R32_FLOAT = 74, - RB5_R32_UINT = 75, - RB5_R32_SINT = 76, - RB5_R16G16B16A16_UNORM = 96, - RB5_R16G16B16A16_SNORM = 97, - RB5_R16G16B16A16_FLOAT = 98, - RB5_R16G16B16A16_UINT = 99, - RB5_R16G16B16A16_SINT = 100, - RB5_R32G32_FLOAT = 103, - RB5_R32G32_UINT = 104, - RB5_R32G32_SINT = 105, - RB5_R32G32B32A32_FLOAT = 130, - RB5_R32G32B32A32_UINT = 131, - RB5_R32G32B32A32_SINT = 132, -}; - -enum a5xx_tile_mode { - TILE5_LINEAR = 0, - TILE5_2 = 2, - TILE5_3 = 3, -}; - -enum a5xx_vtx_fmt { - VFMT5_8_UNORM = 3, - VFMT5_8_SNORM = 4, - VFMT5_8_UINT = 5, - VFMT5_8_SINT = 6, - VFMT5_8_8_UNORM = 15, - VFMT5_8_8_SNORM = 16, - VFMT5_8_8_UINT = 17, - VFMT5_8_8_SINT = 18, - VFMT5_16_UNORM = 21, - VFMT5_16_SNORM = 22, - VFMT5_16_FLOAT = 23, - VFMT5_16_UINT = 24, - VFMT5_16_SINT = 25, - VFMT5_8_8_8_UNORM = 33, - VFMT5_8_8_8_SNORM = 34, - VFMT5_8_8_8_UINT = 35, - VFMT5_8_8_8_SINT = 36, - VFMT5_8_8_8_8_UNORM = 48, - VFMT5_8_8_8_8_SNORM = 50, - VFMT5_8_8_8_8_UINT = 51, - VFMT5_8_8_8_8_SINT = 52, - VFMT5_10_10_10_2_UNORM = 54, - VFMT5_10_10_10_2_SNORM = 57, - VFMT5_10_10_10_2_UINT = 58, - VFMT5_10_10_10_2_SINT = 59, - VFMT5_11_11_10_FLOAT = 66, - VFMT5_16_16_UNORM = 67, - VFMT5_16_16_SNORM = 68, - VFMT5_16_16_FLOAT = 69, - VFMT5_16_16_UINT = 70, - VFMT5_16_16_SINT = 71, - VFMT5_32_UNORM = 72, - VFMT5_32_SNORM = 73, - VFMT5_32_FLOAT = 74, - VFMT5_32_UINT = 75, - VFMT5_32_SINT = 76, - VFMT5_32_FIXED = 77, - VFMT5_16_16_16_UNORM = 88, - VFMT5_16_16_16_SNORM = 89, - VFMT5_16_16_16_FLOAT = 90, - VFMT5_16_16_16_UINT = 91, - VFMT5_16_16_16_SINT = 92, - VFMT5_16_16_16_16_UNORM = 96, - VFMT5_16_16_16_16_SNORM = 97, - VFMT5_16_16_16_16_FLOAT = 98, - VFMT5_16_16_16_16_UINT = 99, - VFMT5_16_16_16_16_SINT = 100, - VFMT5_32_32_UNORM = 101, - VFMT5_32_32_SNORM = 102, - VFMT5_32_32_FLOAT = 103, - VFMT5_32_32_UINT = 104, - VFMT5_32_32_SINT = 105, - VFMT5_32_32_FIXED = 106, - VFMT5_32_32_32_UNORM = 112, - VFMT5_32_32_32_SNORM = 113, - VFMT5_32_32_32_UINT = 114, - VFMT5_32_32_32_SINT = 115, - VFMT5_32_32_32_FLOAT = 116, - VFMT5_32_32_32_FIXED = 117, - VFMT5_32_32_32_32_UNORM = 128, - VFMT5_32_32_32_32_SNORM = 129, - VFMT5_32_32_32_32_FLOAT = 130, - VFMT5_32_32_32_32_UINT = 131, - VFMT5_32_32_32_32_SINT = 132, - VFMT5_32_32_32_32_FIXED = 133, -}; - -enum a5xx_tex_fmt { - TFMT5_A8_UNORM = 2, - TFMT5_8_UNORM = 3, - TFMT5_8_SNORM = 4, - TFMT5_8_UINT = 5, - TFMT5_8_SINT = 6, - TFMT5_4_4_4_4_UNORM = 8, - TFMT5_5_5_5_1_UNORM = 10, - TFMT5_5_6_5_UNORM = 14, - TFMT5_8_8_UNORM = 15, - TFMT5_8_8_SNORM = 16, - TFMT5_8_8_UINT = 17, - TFMT5_8_8_SINT = 18, - TFMT5_L8_A8_UNORM = 19, - TFMT5_16_UNORM = 21, - TFMT5_16_SNORM = 22, - TFMT5_16_FLOAT = 23, - TFMT5_16_UINT = 24, - TFMT5_16_SINT = 25, - TFMT5_8_8_8_8_UNORM = 48, - TFMT5_8_8_8_UNORM = 49, - TFMT5_8_8_8_8_SNORM = 50, - TFMT5_8_8_8_8_UINT = 51, - TFMT5_8_8_8_8_SINT = 52, - TFMT5_9_9_9_E5_FLOAT = 53, - TFMT5_10_10_10_2_UNORM = 54, - TFMT5_10_10_10_2_UINT = 58, - TFMT5_11_11_10_FLOAT = 66, - TFMT5_16_16_UNORM = 67, - TFMT5_16_16_SNORM = 68, - TFMT5_16_16_FLOAT = 69, - TFMT5_16_16_UINT = 70, - TFMT5_16_16_SINT = 71, - TFMT5_32_FLOAT = 74, - TFMT5_32_UINT = 75, - TFMT5_32_SINT = 76, - TFMT5_16_16_16_16_UNORM = 96, - TFMT5_16_16_16_16_SNORM = 97, - TFMT5_16_16_16_16_FLOAT = 98, - TFMT5_16_16_16_16_UINT = 99, - TFMT5_16_16_16_16_SINT = 100, - TFMT5_32_32_FLOAT = 103, - TFMT5_32_32_UINT = 104, - TFMT5_32_32_SINT = 105, - TFMT5_32_32_32_UINT = 114, - TFMT5_32_32_32_SINT = 115, - TFMT5_32_32_32_FLOAT = 116, - TFMT5_32_32_32_32_FLOAT = 130, - TFMT5_32_32_32_32_UINT = 131, - TFMT5_32_32_32_32_SINT = 132, - TFMT5_X8Z24_UNORM = 160, - TFMT5_ETC2_RG11_UNORM = 171, - TFMT5_ETC2_RG11_SNORM = 172, - TFMT5_ETC2_R11_UNORM = 173, - TFMT5_ETC2_R11_SNORM = 174, - TFMT5_ETC1 = 175, - TFMT5_ETC2_RGB8 = 176, - TFMT5_ETC2_RGBA8 = 177, - TFMT5_ETC2_RGB8A1 = 178, - TFMT5_DXT1 = 179, - TFMT5_DXT3 = 180, - TFMT5_DXT5 = 181, - TFMT5_RGTC1_UNORM = 183, - TFMT5_RGTC1_SNORM = 184, - TFMT5_RGTC2_UNORM = 187, - TFMT5_RGTC2_SNORM = 188, - TFMT5_BPTC_UFLOAT = 190, - TFMT5_BPTC_FLOAT = 191, - TFMT5_BPTC = 192, - TFMT5_ASTC_4x4 = 193, - TFMT5_ASTC_5x4 = 194, - TFMT5_ASTC_5x5 = 195, - TFMT5_ASTC_6x5 = 196, - TFMT5_ASTC_6x6 = 197, - TFMT5_ASTC_8x5 = 198, - TFMT5_ASTC_8x6 = 199, - TFMT5_ASTC_8x8 = 200, - TFMT5_ASTC_10x5 = 201, - TFMT5_ASTC_10x6 = 202, - TFMT5_ASTC_10x8 = 203, - TFMT5_ASTC_10x10 = 204, - TFMT5_ASTC_12x10 = 205, - TFMT5_ASTC_12x12 = 206, -}; - -enum a5xx_tex_fetchsize { - TFETCH5_1_BYTE = 0, - TFETCH5_2_BYTE = 1, - TFETCH5_4_BYTE = 2, - TFETCH5_8_BYTE = 3, - TFETCH5_16_BYTE = 4, -}; - -enum a5xx_depth_format { - DEPTH5_NONE = 0, - DEPTH5_16 = 1, - DEPTH5_24_8 = 2, - DEPTH5_32 = 4, -}; - -enum a5xx_blit_buf { - BLIT_MRT0 = 0, - BLIT_MRT1 = 1, - BLIT_MRT2 = 2, - BLIT_MRT3 = 3, - BLIT_MRT4 = 4, - BLIT_MRT5 = 5, - BLIT_MRT6 = 6, - BLIT_MRT7 = 7, - BLIT_ZS = 8, - BLIT_S = 9, -}; - -enum a5xx_cp_perfcounter_select { - PERF_CP_ALWAYS_COUNT = 0, - PERF_CP_BUSY_GFX_CORE_IDLE = 1, - PERF_CP_BUSY_CYCLES = 2, - PERF_CP_PFP_IDLE = 3, - PERF_CP_PFP_BUSY_WORKING = 4, - PERF_CP_PFP_STALL_CYCLES_ANY = 5, - PERF_CP_PFP_STARVE_CYCLES_ANY = 6, - PERF_CP_PFP_ICACHE_MISS = 7, - PERF_CP_PFP_ICACHE_HIT = 8, - PERF_CP_PFP_MATCH_PM4_PKT_PROFILE = 9, - PERF_CP_ME_BUSY_WORKING = 10, - PERF_CP_ME_IDLE = 11, - PERF_CP_ME_STARVE_CYCLES_ANY = 12, - PERF_CP_ME_FIFO_EMPTY_PFP_IDLE = 13, - PERF_CP_ME_FIFO_EMPTY_PFP_BUSY = 14, - PERF_CP_ME_FIFO_FULL_ME_BUSY = 15, - PERF_CP_ME_FIFO_FULL_ME_NON_WORKING = 16, - PERF_CP_ME_STALL_CYCLES_ANY = 17, - PERF_CP_ME_ICACHE_MISS = 18, - PERF_CP_ME_ICACHE_HIT = 19, - PERF_CP_NUM_PREEMPTIONS = 20, - PERF_CP_PREEMPTION_REACTION_DELAY = 21, - PERF_CP_PREEMPTION_SWITCH_OUT_TIME = 22, - PERF_CP_PREEMPTION_SWITCH_IN_TIME = 23, - PERF_CP_DEAD_DRAWS_IN_BIN_RENDER = 24, - PERF_CP_PREDICATED_DRAWS_KILLED = 25, - PERF_CP_MODE_SWITCH = 26, - PERF_CP_ZPASS_DONE = 27, - PERF_CP_CONTEXT_DONE = 28, - PERF_CP_CACHE_FLUSH = 29, - PERF_CP_LONG_PREEMPTIONS = 30, -}; - -enum a5xx_rbbm_perfcounter_select { - PERF_RBBM_ALWAYS_COUNT = 0, - PERF_RBBM_ALWAYS_ON = 1, - PERF_RBBM_TSE_BUSY = 2, - PERF_RBBM_RAS_BUSY = 3, - PERF_RBBM_PC_DCALL_BUSY = 4, - PERF_RBBM_PC_VSD_BUSY = 5, - PERF_RBBM_STATUS_MASKED = 6, - PERF_RBBM_COM_BUSY = 7, - PERF_RBBM_DCOM_BUSY = 8, - PERF_RBBM_VBIF_BUSY = 9, - PERF_RBBM_VSC_BUSY = 10, - PERF_RBBM_TESS_BUSY = 11, - PERF_RBBM_UCHE_BUSY = 12, - PERF_RBBM_HLSQ_BUSY = 13, -}; - -enum a5xx_pc_perfcounter_select { - PERF_PC_BUSY_CYCLES = 0, - PERF_PC_WORKING_CYCLES = 1, - PERF_PC_STALL_CYCLES_VFD = 2, - PERF_PC_STALL_CYCLES_TSE = 3, - PERF_PC_STALL_CYCLES_VPC = 4, - PERF_PC_STALL_CYCLES_UCHE = 5, - PERF_PC_STALL_CYCLES_TESS = 6, - PERF_PC_STALL_CYCLES_TSE_ONLY = 7, - PERF_PC_STALL_CYCLES_VPC_ONLY = 8, - PERF_PC_PASS1_TF_STALL_CYCLES = 9, - PERF_PC_STARVE_CYCLES_FOR_INDEX = 10, - PERF_PC_STARVE_CYCLES_FOR_TESS_FACTOR = 11, - PERF_PC_STARVE_CYCLES_FOR_VIZ_STREAM = 12, - PERF_PC_STARVE_CYCLES_FOR_POSITION = 13, - PERF_PC_STARVE_CYCLES_DI = 14, - PERF_PC_VIS_STREAMS_LOADED = 15, - PERF_PC_INSTANCES = 16, - PERF_PC_VPC_PRIMITIVES = 17, - PERF_PC_DEAD_PRIM = 18, - PERF_PC_LIVE_PRIM = 19, - PERF_PC_VERTEX_HITS = 20, - PERF_PC_IA_VERTICES = 21, - PERF_PC_IA_PRIMITIVES = 22, - PERF_PC_GS_PRIMITIVES = 23, - PERF_PC_HS_INVOCATIONS = 24, - PERF_PC_DS_INVOCATIONS = 25, - PERF_PC_VS_INVOCATIONS = 26, - PERF_PC_GS_INVOCATIONS = 27, - PERF_PC_DS_PRIMITIVES = 28, - PERF_PC_VPC_POS_DATA_TRANSACTION = 29, - PERF_PC_3D_DRAWCALLS = 30, - PERF_PC_2D_DRAWCALLS = 31, - PERF_PC_NON_DRAWCALL_GLOBAL_EVENTS = 32, - PERF_TESS_BUSY_CYCLES = 33, - PERF_TESS_WORKING_CYCLES = 34, - PERF_TESS_STALL_CYCLES_PC = 35, - PERF_TESS_STARVE_CYCLES_PC = 36, -}; - -enum a5xx_vfd_perfcounter_select { - PERF_VFD_BUSY_CYCLES = 0, - PERF_VFD_STALL_CYCLES_UCHE = 1, - PERF_VFD_STALL_CYCLES_VPC_ALLOC = 2, - PERF_VFD_STALL_CYCLES_MISS_VB = 3, - PERF_VFD_STALL_CYCLES_MISS_Q = 4, - PERF_VFD_STALL_CYCLES_SP_INFO = 5, - PERF_VFD_STALL_CYCLES_SP_ATTR = 6, - PERF_VFD_STALL_CYCLES_VFDP_VB = 7, - PERF_VFD_STALL_CYCLES_VFDP_Q = 8, - PERF_VFD_DECODER_PACKER_STALL = 9, - PERF_VFD_STARVE_CYCLES_UCHE = 10, - PERF_VFD_RBUFFER_FULL = 11, - PERF_VFD_ATTR_INFO_FIFO_FULL = 12, - PERF_VFD_DECODED_ATTRIBUTE_BYTES = 13, - PERF_VFD_NUM_ATTRIBUTES = 14, - PERF_VFD_INSTRUCTIONS = 15, - PERF_VFD_UPPER_SHADER_FIBERS = 16, - PERF_VFD_LOWER_SHADER_FIBERS = 17, - PERF_VFD_MODE_0_FIBERS = 18, - PERF_VFD_MODE_1_FIBERS = 19, - PERF_VFD_MODE_2_FIBERS = 20, - PERF_VFD_MODE_3_FIBERS = 21, - PERF_VFD_MODE_4_FIBERS = 22, - PERF_VFD_TOTAL_VERTICES = 23, - PERF_VFD_NUM_ATTR_MISS = 24, - PERF_VFD_1_BURST_REQ = 25, - PERF_VFDP_STALL_CYCLES_VFD = 26, - PERF_VFDP_STALL_CYCLES_VFD_INDEX = 27, - PERF_VFDP_STALL_CYCLES_VFD_PROG = 28, - PERF_VFDP_STARVE_CYCLES_PC = 29, - PERF_VFDP_VS_STAGE_32_WAVES = 30, -}; - -enum a5xx_hlsq_perfcounter_select { - PERF_HLSQ_BUSY_CYCLES = 0, - PERF_HLSQ_STALL_CYCLES_UCHE = 1, - PERF_HLSQ_STALL_CYCLES_SP_STATE = 2, - PERF_HLSQ_STALL_CYCLES_SP_FS_STAGE = 3, - PERF_HLSQ_UCHE_LATENCY_CYCLES = 4, - PERF_HLSQ_UCHE_LATENCY_COUNT = 5, - PERF_HLSQ_FS_STAGE_32_WAVES = 6, - PERF_HLSQ_FS_STAGE_64_WAVES = 7, - PERF_HLSQ_QUADS = 8, - PERF_HLSQ_SP_STATE_COPY_TRANS_FS_STAGE = 9, - PERF_HLSQ_SP_STATE_COPY_TRANS_VS_STAGE = 10, - PERF_HLSQ_TP_STATE_COPY_TRANS_FS_STAGE = 11, - PERF_HLSQ_TP_STATE_COPY_TRANS_VS_STAGE = 12, - PERF_HLSQ_CS_INVOCATIONS = 13, - PERF_HLSQ_COMPUTE_DRAWCALLS = 14, -}; - -enum a5xx_vpc_perfcounter_select { - PERF_VPC_BUSY_CYCLES = 0, - PERF_VPC_WORKING_CYCLES = 1, - PERF_VPC_STALL_CYCLES_UCHE = 2, - PERF_VPC_STALL_CYCLES_VFD_WACK = 3, - PERF_VPC_STALL_CYCLES_HLSQ_PRIM_ALLOC = 4, - PERF_VPC_STALL_CYCLES_PC = 5, - PERF_VPC_STALL_CYCLES_SP_LM = 6, - PERF_VPC_POS_EXPORT_STALL_CYCLES = 7, - PERF_VPC_STARVE_CYCLES_SP = 8, - PERF_VPC_STARVE_CYCLES_LRZ = 9, - PERF_VPC_PC_PRIMITIVES = 10, - PERF_VPC_SP_COMPONENTS = 11, - PERF_VPC_SP_LM_PRIMITIVES = 12, - PERF_VPC_SP_LM_COMPONENTS = 13, - PERF_VPC_SP_LM_DWORDS = 14, - PERF_VPC_STREAMOUT_COMPONENTS = 15, - PERF_VPC_GRANT_PHASES = 16, -}; - -enum a5xx_tse_perfcounter_select { - PERF_TSE_BUSY_CYCLES = 0, - PERF_TSE_CLIPPING_CYCLES = 1, - PERF_TSE_STALL_CYCLES_RAS = 2, - PERF_TSE_STALL_CYCLES_LRZ_BARYPLANE = 3, - PERF_TSE_STALL_CYCLES_LRZ_ZPLANE = 4, - PERF_TSE_STARVE_CYCLES_PC = 5, - PERF_TSE_INPUT_PRIM = 6, - PERF_TSE_INPUT_NULL_PRIM = 7, - PERF_TSE_TRIVAL_REJ_PRIM = 8, - PERF_TSE_CLIPPED_PRIM = 9, - PERF_TSE_ZERO_AREA_PRIM = 10, - PERF_TSE_FACENESS_CULLED_PRIM = 11, - PERF_TSE_ZERO_PIXEL_PRIM = 12, - PERF_TSE_OUTPUT_NULL_PRIM = 13, - PERF_TSE_OUTPUT_VISIBLE_PRIM = 14, - PERF_TSE_CINVOCATION = 15, - PERF_TSE_CPRIMITIVES = 16, - PERF_TSE_2D_INPUT_PRIM = 17, - PERF_TSE_2D_ALIVE_CLCLES = 18, -}; - -enum a5xx_ras_perfcounter_select { - PERF_RAS_BUSY_CYCLES = 0, - PERF_RAS_SUPERTILE_ACTIVE_CYCLES = 1, - PERF_RAS_STALL_CYCLES_LRZ = 2, - PERF_RAS_STARVE_CYCLES_TSE = 3, - PERF_RAS_SUPER_TILES = 4, - PERF_RAS_8X4_TILES = 5, - PERF_RAS_MASKGEN_ACTIVE = 6, - PERF_RAS_FULLY_COVERED_SUPER_TILES = 7, - PERF_RAS_FULLY_COVERED_8X4_TILES = 8, - PERF_RAS_PRIM_KILLED_INVISILBE = 9, -}; - -enum a5xx_lrz_perfcounter_select { - PERF_LRZ_BUSY_CYCLES = 0, - PERF_LRZ_STARVE_CYCLES_RAS = 1, - PERF_LRZ_STALL_CYCLES_RB = 2, - PERF_LRZ_STALL_CYCLES_VSC = 3, - PERF_LRZ_STALL_CYCLES_VPC = 4, - PERF_LRZ_STALL_CYCLES_FLAG_PREFETCH = 5, - PERF_LRZ_STALL_CYCLES_UCHE = 6, - PERF_LRZ_LRZ_READ = 7, - PERF_LRZ_LRZ_WRITE = 8, - PERF_LRZ_READ_LATENCY = 9, - PERF_LRZ_MERGE_CACHE_UPDATING = 10, - PERF_LRZ_PRIM_KILLED_BY_MASKGEN = 11, - PERF_LRZ_PRIM_KILLED_BY_LRZ = 12, - PERF_LRZ_VISIBLE_PRIM_AFTER_LRZ = 13, - PERF_LRZ_FULL_8X8_TILES = 14, - PERF_LRZ_PARTIAL_8X8_TILES = 15, - PERF_LRZ_TILE_KILLED = 16, - PERF_LRZ_TOTAL_PIXEL = 17, - PERF_LRZ_VISIBLE_PIXEL_AFTER_LRZ = 18, -}; - -enum a5xx_uche_perfcounter_select { - PERF_UCHE_BUSY_CYCLES = 0, - PERF_UCHE_STALL_CYCLES_VBIF = 1, - PERF_UCHE_VBIF_LATENCY_CYCLES = 2, - PERF_UCHE_VBIF_LATENCY_SAMPLES = 3, - PERF_UCHE_VBIF_READ_BEATS_TP = 4, - PERF_UCHE_VBIF_READ_BEATS_VFD = 5, - PERF_UCHE_VBIF_READ_BEATS_HLSQ = 6, - PERF_UCHE_VBIF_READ_BEATS_LRZ = 7, - PERF_UCHE_VBIF_READ_BEATS_SP = 8, - PERF_UCHE_READ_REQUESTS_TP = 9, - PERF_UCHE_READ_REQUESTS_VFD = 10, - PERF_UCHE_READ_REQUESTS_HLSQ = 11, - PERF_UCHE_READ_REQUESTS_LRZ = 12, - PERF_UCHE_READ_REQUESTS_SP = 13, - PERF_UCHE_WRITE_REQUESTS_LRZ = 14, - PERF_UCHE_WRITE_REQUESTS_SP = 15, - PERF_UCHE_WRITE_REQUESTS_VPC = 16, - PERF_UCHE_WRITE_REQUESTS_VSC = 17, - PERF_UCHE_EVICTS = 18, - PERF_UCHE_BANK_REQ0 = 19, - PERF_UCHE_BANK_REQ1 = 20, - PERF_UCHE_BANK_REQ2 = 21, - PERF_UCHE_BANK_REQ3 = 22, - PERF_UCHE_BANK_REQ4 = 23, - PERF_UCHE_BANK_REQ5 = 24, - PERF_UCHE_BANK_REQ6 = 25, - PERF_UCHE_BANK_REQ7 = 26, - PERF_UCHE_VBIF_READ_BEATS_CH0 = 27, - PERF_UCHE_VBIF_READ_BEATS_CH1 = 28, - PERF_UCHE_GMEM_READ_BEATS = 29, - PERF_UCHE_FLAG_COUNT = 30, -}; - -enum a5xx_tp_perfcounter_select { - PERF_TP_BUSY_CYCLES = 0, - PERF_TP_STALL_CYCLES_UCHE = 1, - PERF_TP_LATENCY_CYCLES = 2, - PERF_TP_LATENCY_TRANS = 3, - PERF_TP_FLAG_CACHE_REQUEST_SAMPLES = 4, - PERF_TP_FLAG_CACHE_REQUEST_LATENCY = 5, - PERF_TP_L1_CACHELINE_REQUESTS = 6, - PERF_TP_L1_CACHELINE_MISSES = 7, - PERF_TP_SP_TP_TRANS = 8, - PERF_TP_TP_SP_TRANS = 9, - PERF_TP_OUTPUT_PIXELS = 10, - PERF_TP_FILTER_WORKLOAD_16BIT = 11, - PERF_TP_FILTER_WORKLOAD_32BIT = 12, - PERF_TP_QUADS_RECEIVED = 13, - PERF_TP_QUADS_OFFSET = 14, - PERF_TP_QUADS_SHADOW = 15, - PERF_TP_QUADS_ARRAY = 16, - PERF_TP_QUADS_GRADIENT = 17, - PERF_TP_QUADS_1D = 18, - PERF_TP_QUADS_2D = 19, - PERF_TP_QUADS_BUFFER = 20, - PERF_TP_QUADS_3D = 21, - PERF_TP_QUADS_CUBE = 22, - PERF_TP_STATE_CACHE_REQUESTS = 23, - PERF_TP_STATE_CACHE_MISSES = 24, - PERF_TP_DIVERGENT_QUADS_RECEIVED = 25, - PERF_TP_BINDLESS_STATE_CACHE_REQUESTS = 26, - PERF_TP_BINDLESS_STATE_CACHE_MISSES = 27, - PERF_TP_PRT_NON_RESIDENT_EVENTS = 28, - PERF_TP_OUTPUT_PIXELS_POINT = 29, - PERF_TP_OUTPUT_PIXELS_BILINEAR = 30, - PERF_TP_OUTPUT_PIXELS_MIP = 31, - PERF_TP_OUTPUT_PIXELS_ANISO = 32, - PERF_TP_OUTPUT_PIXELS_ZERO_LOD = 33, - PERF_TP_FLAG_CACHE_REQUESTS = 34, - PERF_TP_FLAG_CACHE_MISSES = 35, - PERF_TP_L1_5_L2_REQUESTS = 36, - PERF_TP_2D_OUTPUT_PIXELS = 37, - PERF_TP_2D_OUTPUT_PIXELS_POINT = 38, - PERF_TP_2D_OUTPUT_PIXELS_BILINEAR = 39, - PERF_TP_2D_FILTER_WORKLOAD_16BIT = 40, - PERF_TP_2D_FILTER_WORKLOAD_32BIT = 41, -}; - -enum a5xx_sp_perfcounter_select { - PERF_SP_BUSY_CYCLES = 0, - PERF_SP_ALU_WORKING_CYCLES = 1, - PERF_SP_EFU_WORKING_CYCLES = 2, - PERF_SP_STALL_CYCLES_VPC = 3, - PERF_SP_STALL_CYCLES_TP = 4, - PERF_SP_STALL_CYCLES_UCHE = 5, - PERF_SP_STALL_CYCLES_RB = 6, - PERF_SP_SCHEDULER_NON_WORKING = 7, - PERF_SP_WAVE_CONTEXTS = 8, - PERF_SP_WAVE_CONTEXT_CYCLES = 9, - PERF_SP_FS_STAGE_WAVE_CYCLES = 10, - PERF_SP_FS_STAGE_WAVE_SAMPLES = 11, - PERF_SP_VS_STAGE_WAVE_CYCLES = 12, - PERF_SP_VS_STAGE_WAVE_SAMPLES = 13, - PERF_SP_FS_STAGE_DURATION_CYCLES = 14, - PERF_SP_VS_STAGE_DURATION_CYCLES = 15, - PERF_SP_WAVE_CTRL_CYCLES = 16, - PERF_SP_WAVE_LOAD_CYCLES = 17, - PERF_SP_WAVE_EMIT_CYCLES = 18, - PERF_SP_WAVE_NOP_CYCLES = 19, - PERF_SP_WAVE_WAIT_CYCLES = 20, - PERF_SP_WAVE_FETCH_CYCLES = 21, - PERF_SP_WAVE_IDLE_CYCLES = 22, - PERF_SP_WAVE_END_CYCLES = 23, - PERF_SP_WAVE_LONG_SYNC_CYCLES = 24, - PERF_SP_WAVE_SHORT_SYNC_CYCLES = 25, - PERF_SP_WAVE_JOIN_CYCLES = 26, - PERF_SP_LM_LOAD_INSTRUCTIONS = 27, - PERF_SP_LM_STORE_INSTRUCTIONS = 28, - PERF_SP_LM_ATOMICS = 29, - PERF_SP_GM_LOAD_INSTRUCTIONS = 30, - PERF_SP_GM_STORE_INSTRUCTIONS = 31, - PERF_SP_GM_ATOMICS = 32, - PERF_SP_VS_STAGE_TEX_INSTRUCTIONS = 33, - PERF_SP_VS_STAGE_CFLOW_INSTRUCTIONS = 34, - PERF_SP_VS_STAGE_EFU_INSTRUCTIONS = 35, - PERF_SP_VS_STAGE_FULL_ALU_INSTRUCTIONS = 36, - PERF_SP_VS_STAGE_HALF_ALU_INSTRUCTIONS = 37, - PERF_SP_FS_STAGE_TEX_INSTRUCTIONS = 38, - PERF_SP_FS_STAGE_CFLOW_INSTRUCTIONS = 39, - PERF_SP_FS_STAGE_EFU_INSTRUCTIONS = 40, - PERF_SP_FS_STAGE_FULL_ALU_INSTRUCTIONS = 41, - PERF_SP_FS_STAGE_HALF_ALU_INSTRUCTIONS = 42, - PERF_SP_FS_STAGE_BARY_INSTRUCTIONS = 43, - PERF_SP_VS_INSTRUCTIONS = 44, - PERF_SP_FS_INSTRUCTIONS = 45, - PERF_SP_ADDR_LOCK_COUNT = 46, - PERF_SP_UCHE_READ_TRANS = 47, - PERF_SP_UCHE_WRITE_TRANS = 48, - PERF_SP_EXPORT_VPC_TRANS = 49, - PERF_SP_EXPORT_RB_TRANS = 50, - PERF_SP_PIXELS_KILLED = 51, - PERF_SP_ICL1_REQUESTS = 52, - PERF_SP_ICL1_MISSES = 53, - PERF_SP_ICL0_REQUESTS = 54, - PERF_SP_ICL0_MISSES = 55, - PERF_SP_HS_INSTRUCTIONS = 56, - PERF_SP_DS_INSTRUCTIONS = 57, - PERF_SP_GS_INSTRUCTIONS = 58, - PERF_SP_CS_INSTRUCTIONS = 59, - PERF_SP_GPR_READ = 60, - PERF_SP_GPR_WRITE = 61, - PERF_SP_LM_CH0_REQUESTS = 62, - PERF_SP_LM_CH1_REQUESTS = 63, - PERF_SP_LM_BANK_CONFLICTS = 64, -}; - -enum a5xx_rb_perfcounter_select { - PERF_RB_BUSY_CYCLES = 0, - PERF_RB_STALL_CYCLES_CCU = 1, - PERF_RB_STALL_CYCLES_HLSQ = 2, - PERF_RB_STALL_CYCLES_FIFO0_FULL = 3, - PERF_RB_STALL_CYCLES_FIFO1_FULL = 4, - PERF_RB_STALL_CYCLES_FIFO2_FULL = 5, - PERF_RB_STARVE_CYCLES_SP = 6, - PERF_RB_STARVE_CYCLES_LRZ_TILE = 7, - PERF_RB_STARVE_CYCLES_CCU = 8, - PERF_RB_STARVE_CYCLES_Z_PLANE = 9, - PERF_RB_STARVE_CYCLES_BARY_PLANE = 10, - PERF_RB_Z_WORKLOAD = 11, - PERF_RB_HLSQ_ACTIVE = 12, - PERF_RB_Z_READ = 13, - PERF_RB_Z_WRITE = 14, - PERF_RB_C_READ = 15, - PERF_RB_C_WRITE = 16, - PERF_RB_TOTAL_PASS = 17, - PERF_RB_Z_PASS = 18, - PERF_RB_Z_FAIL = 19, - PERF_RB_S_FAIL = 20, - PERF_RB_BLENDED_FXP_COMPONENTS = 21, - PERF_RB_BLENDED_FP16_COMPONENTS = 22, - RB_RESERVED = 23, - PERF_RB_2D_ALIVE_CYCLES = 24, - PERF_RB_2D_STALL_CYCLES_A2D = 25, - PERF_RB_2D_STARVE_CYCLES_SRC = 26, - PERF_RB_2D_STARVE_CYCLES_SP = 27, - PERF_RB_2D_STARVE_CYCLES_DST = 28, - PERF_RB_2D_VALID_PIXELS = 29, -}; - -enum a5xx_rb_samples_perfcounter_select { - TOTAL_SAMPLES = 0, - ZPASS_SAMPLES = 1, - ZFAIL_SAMPLES = 2, - SFAIL_SAMPLES = 3, -}; - -enum a5xx_vsc_perfcounter_select { - PERF_VSC_BUSY_CYCLES = 0, - PERF_VSC_WORKING_CYCLES = 1, - PERF_VSC_STALL_CYCLES_UCHE = 2, - PERF_VSC_EOT_NUM = 3, -}; - -enum a5xx_ccu_perfcounter_select { - PERF_CCU_BUSY_CYCLES = 0, - PERF_CCU_STALL_CYCLES_RB_DEPTH_RETURN = 1, - PERF_CCU_STALL_CYCLES_RB_COLOR_RETURN = 2, - PERF_CCU_STARVE_CYCLES_FLAG_RETURN = 3, - PERF_CCU_DEPTH_BLOCKS = 4, - PERF_CCU_COLOR_BLOCKS = 5, - PERF_CCU_DEPTH_BLOCK_HIT = 6, - PERF_CCU_COLOR_BLOCK_HIT = 7, - PERF_CCU_PARTIAL_BLOCK_READ = 8, - PERF_CCU_GMEM_READ = 9, - PERF_CCU_GMEM_WRITE = 10, - PERF_CCU_DEPTH_READ_FLAG0_COUNT = 11, - PERF_CCU_DEPTH_READ_FLAG1_COUNT = 12, - PERF_CCU_DEPTH_READ_FLAG2_COUNT = 13, - PERF_CCU_DEPTH_READ_FLAG3_COUNT = 14, - PERF_CCU_DEPTH_READ_FLAG4_COUNT = 15, - PERF_CCU_COLOR_READ_FLAG0_COUNT = 16, - PERF_CCU_COLOR_READ_FLAG1_COUNT = 17, - PERF_CCU_COLOR_READ_FLAG2_COUNT = 18, - PERF_CCU_COLOR_READ_FLAG3_COUNT = 19, - PERF_CCU_COLOR_READ_FLAG4_COUNT = 20, - PERF_CCU_2D_BUSY_CYCLES = 21, - PERF_CCU_2D_RD_REQ = 22, - PERF_CCU_2D_WR_REQ = 23, - PERF_CCU_2D_REORDER_STARVE_CYCLES = 24, - PERF_CCU_2D_PIXELS = 25, -}; - -enum a5xx_cmp_perfcounter_select { - PERF_CMPDECMP_STALL_CYCLES_VBIF = 0, - PERF_CMPDECMP_VBIF_LATENCY_CYCLES = 1, - PERF_CMPDECMP_VBIF_LATENCY_SAMPLES = 2, - PERF_CMPDECMP_VBIF_READ_DATA_CCU = 3, - PERF_CMPDECMP_VBIF_WRITE_DATA_CCU = 4, - PERF_CMPDECMP_VBIF_READ_REQUEST = 5, - PERF_CMPDECMP_VBIF_WRITE_REQUEST = 6, - PERF_CMPDECMP_VBIF_READ_DATA = 7, - PERF_CMPDECMP_VBIF_WRITE_DATA = 8, - PERF_CMPDECMP_FLAG_FETCH_CYCLES = 9, - PERF_CMPDECMP_FLAG_FETCH_SAMPLES = 10, - PERF_CMPDECMP_DEPTH_WRITE_FLAG1_COUNT = 11, - PERF_CMPDECMP_DEPTH_WRITE_FLAG2_COUNT = 12, - PERF_CMPDECMP_DEPTH_WRITE_FLAG3_COUNT = 13, - PERF_CMPDECMP_DEPTH_WRITE_FLAG4_COUNT = 14, - PERF_CMPDECMP_COLOR_WRITE_FLAG1_COUNT = 15, - PERF_CMPDECMP_COLOR_WRITE_FLAG2_COUNT = 16, - PERF_CMPDECMP_COLOR_WRITE_FLAG3_COUNT = 17, - PERF_CMPDECMP_COLOR_WRITE_FLAG4_COUNT = 18, - PERF_CMPDECMP_2D_STALL_CYCLES_VBIF_REQ = 19, - PERF_CMPDECMP_2D_STALL_CYCLES_VBIF_WR = 20, - PERF_CMPDECMP_2D_STALL_CYCLES_VBIF_RETURN = 21, - PERF_CMPDECMP_2D_RD_DATA = 22, - PERF_CMPDECMP_2D_WR_DATA = 23, -}; - -enum a5xx_vbif_perfcounter_select { - AXI_READ_REQUESTS_ID_0 = 0, - AXI_READ_REQUESTS_ID_1 = 1, - AXI_READ_REQUESTS_ID_2 = 2, - AXI_READ_REQUESTS_ID_3 = 3, - AXI_READ_REQUESTS_ID_4 = 4, - AXI_READ_REQUESTS_ID_5 = 5, - AXI_READ_REQUESTS_ID_6 = 6, - AXI_READ_REQUESTS_ID_7 = 7, - AXI_READ_REQUESTS_ID_8 = 8, - AXI_READ_REQUESTS_ID_9 = 9, - AXI_READ_REQUESTS_ID_10 = 10, - AXI_READ_REQUESTS_ID_11 = 11, - AXI_READ_REQUESTS_ID_12 = 12, - AXI_READ_REQUESTS_ID_13 = 13, - AXI_READ_REQUESTS_ID_14 = 14, - AXI_READ_REQUESTS_ID_15 = 15, - AXI0_READ_REQUESTS_TOTAL = 16, - AXI1_READ_REQUESTS_TOTAL = 17, - AXI2_READ_REQUESTS_TOTAL = 18, - AXI3_READ_REQUESTS_TOTAL = 19, - AXI_READ_REQUESTS_TOTAL = 20, - AXI_WRITE_REQUESTS_ID_0 = 21, - AXI_WRITE_REQUESTS_ID_1 = 22, - AXI_WRITE_REQUESTS_ID_2 = 23, - AXI_WRITE_REQUESTS_ID_3 = 24, - AXI_WRITE_REQUESTS_ID_4 = 25, - AXI_WRITE_REQUESTS_ID_5 = 26, - AXI_WRITE_REQUESTS_ID_6 = 27, - AXI_WRITE_REQUESTS_ID_7 = 28, - AXI_WRITE_REQUESTS_ID_8 = 29, - AXI_WRITE_REQUESTS_ID_9 = 30, - AXI_WRITE_REQUESTS_ID_10 = 31, - AXI_WRITE_REQUESTS_ID_11 = 32, - AXI_WRITE_REQUESTS_ID_12 = 33, - AXI_WRITE_REQUESTS_ID_13 = 34, - AXI_WRITE_REQUESTS_ID_14 = 35, - AXI_WRITE_REQUESTS_ID_15 = 36, - AXI0_WRITE_REQUESTS_TOTAL = 37, - AXI1_WRITE_REQUESTS_TOTAL = 38, - AXI2_WRITE_REQUESTS_TOTAL = 39, - AXI3_WRITE_REQUESTS_TOTAL = 40, - AXI_WRITE_REQUESTS_TOTAL = 41, - AXI_TOTAL_REQUESTS = 42, - AXI_READ_DATA_BEATS_ID_0 = 43, - AXI_READ_DATA_BEATS_ID_1 = 44, - AXI_READ_DATA_BEATS_ID_2 = 45, - AXI_READ_DATA_BEATS_ID_3 = 46, - AXI_READ_DATA_BEATS_ID_4 = 47, - AXI_READ_DATA_BEATS_ID_5 = 48, - AXI_READ_DATA_BEATS_ID_6 = 49, - AXI_READ_DATA_BEATS_ID_7 = 50, - AXI_READ_DATA_BEATS_ID_8 = 51, - AXI_READ_DATA_BEATS_ID_9 = 52, - AXI_READ_DATA_BEATS_ID_10 = 53, - AXI_READ_DATA_BEATS_ID_11 = 54, - AXI_READ_DATA_BEATS_ID_12 = 55, - AXI_READ_DATA_BEATS_ID_13 = 56, - AXI_READ_DATA_BEATS_ID_14 = 57, - AXI_READ_DATA_BEATS_ID_15 = 58, - AXI0_READ_DATA_BEATS_TOTAL = 59, - AXI1_READ_DATA_BEATS_TOTAL = 60, - AXI2_READ_DATA_BEATS_TOTAL = 61, - AXI3_READ_DATA_BEATS_TOTAL = 62, - AXI_READ_DATA_BEATS_TOTAL = 63, - AXI_WRITE_DATA_BEATS_ID_0 = 64, - AXI_WRITE_DATA_BEATS_ID_1 = 65, - AXI_WRITE_DATA_BEATS_ID_2 = 66, - AXI_WRITE_DATA_BEATS_ID_3 = 67, - AXI_WRITE_DATA_BEATS_ID_4 = 68, - AXI_WRITE_DATA_BEATS_ID_5 = 69, - AXI_WRITE_DATA_BEATS_ID_6 = 70, - AXI_WRITE_DATA_BEATS_ID_7 = 71, - AXI_WRITE_DATA_BEATS_ID_8 = 72, - AXI_WRITE_DATA_BEATS_ID_9 = 73, - AXI_WRITE_DATA_BEATS_ID_10 = 74, - AXI_WRITE_DATA_BEATS_ID_11 = 75, - AXI_WRITE_DATA_BEATS_ID_12 = 76, - AXI_WRITE_DATA_BEATS_ID_13 = 77, - AXI_WRITE_DATA_BEATS_ID_14 = 78, - AXI_WRITE_DATA_BEATS_ID_15 = 79, - AXI0_WRITE_DATA_BEATS_TOTAL = 80, - AXI1_WRITE_DATA_BEATS_TOTAL = 81, - AXI2_WRITE_DATA_BEATS_TOTAL = 82, - AXI3_WRITE_DATA_BEATS_TOTAL = 83, - AXI_WRITE_DATA_BEATS_TOTAL = 84, - AXI_DATA_BEATS_TOTAL = 85, -}; - -enum a5xx_tex_filter { - A5XX_TEX_NEAREST = 0, - A5XX_TEX_LINEAR = 1, - A5XX_TEX_ANISO = 2, -}; - -enum a5xx_tex_clamp { - A5XX_TEX_REPEAT = 0, - A5XX_TEX_CLAMP_TO_EDGE = 1, - A5XX_TEX_MIRROR_REPEAT = 2, - A5XX_TEX_CLAMP_TO_BORDER = 3, - A5XX_TEX_MIRROR_CLAMP = 4, -}; - -enum a5xx_tex_aniso { - A5XX_TEX_ANISO_1 = 0, - A5XX_TEX_ANISO_2 = 1, - A5XX_TEX_ANISO_4 = 2, - A5XX_TEX_ANISO_8 = 3, - A5XX_TEX_ANISO_16 = 4, -}; - -enum a5xx_tex_swiz { - A5XX_TEX_X = 0, - A5XX_TEX_Y = 1, - A5XX_TEX_Z = 2, - A5XX_TEX_W = 3, - A5XX_TEX_ZERO = 4, - A5XX_TEX_ONE = 5, -}; - -enum a5xx_tex_type { - A5XX_TEX_1D = 0, - A5XX_TEX_2D = 1, - A5XX_TEX_CUBE = 2, - A5XX_TEX_3D = 3, -}; - -#define A5XX_INT0_RBBM_GPU_IDLE 0x00000001 -#define A5XX_INT0_RBBM_AHB_ERROR 0x00000002 -#define A5XX_INT0_RBBM_TRANSFER_TIMEOUT 0x00000004 -#define A5XX_INT0_RBBM_ME_MS_TIMEOUT 0x00000008 -#define A5XX_INT0_RBBM_PFP_MS_TIMEOUT 0x00000010 -#define A5XX_INT0_RBBM_ETS_MS_TIMEOUT 0x00000020 -#define A5XX_INT0_RBBM_ATB_ASYNC_OVERFLOW 0x00000040 -#define A5XX_INT0_RBBM_GPC_ERROR 0x00000080 -#define A5XX_INT0_CP_SW 0x00000100 -#define A5XX_INT0_CP_HW_ERROR 0x00000200 -#define A5XX_INT0_CP_CCU_FLUSH_DEPTH_TS 0x00000400 -#define A5XX_INT0_CP_CCU_FLUSH_COLOR_TS 0x00000800 -#define A5XX_INT0_CP_CCU_RESOLVE_TS 0x00001000 -#define A5XX_INT0_CP_IB2 0x00002000 -#define A5XX_INT0_CP_IB1 0x00004000 -#define A5XX_INT0_CP_RB 0x00008000 -#define A5XX_INT0_CP_UNUSED_1 0x00010000 -#define A5XX_INT0_CP_RB_DONE_TS 0x00020000 -#define A5XX_INT0_CP_WT_DONE_TS 0x00040000 -#define A5XX_INT0_UNKNOWN_1 0x00080000 -#define A5XX_INT0_CP_CACHE_FLUSH_TS 0x00100000 -#define A5XX_INT0_UNUSED_2 0x00200000 -#define A5XX_INT0_RBBM_ATB_BUS_OVERFLOW 0x00400000 -#define A5XX_INT0_MISC_HANG_DETECT 0x00800000 -#define A5XX_INT0_UCHE_OOB_ACCESS 0x01000000 -#define A5XX_INT0_UCHE_TRAP_INTR 0x02000000 -#define A5XX_INT0_DEBBUS_INTR_0 0x04000000 -#define A5XX_INT0_DEBBUS_INTR_1 0x08000000 -#define A5XX_INT0_GPMU_VOLTAGE_DROOP 0x10000000 -#define A5XX_INT0_GPMU_FIRMWARE 0x20000000 -#define A5XX_INT0_ISDB_CPU_IRQ 0x40000000 -#define A5XX_INT0_ISDB_UNDER_DEBUG 0x80000000 -#define A5XX_CP_INT_CP_OPCODE_ERROR 0x00000001 -#define A5XX_CP_INT_CP_RESERVED_BIT_ERROR 0x00000002 -#define A5XX_CP_INT_CP_HW_FAULT_ERROR 0x00000004 -#define A5XX_CP_INT_CP_DMA_ERROR 0x00000008 -#define A5XX_CP_INT_CP_REGISTER_PROTECTION_ERROR 0x00000010 -#define A5XX_CP_INT_CP_AHB_ERROR 0x00000020 -#define REG_A5XX_CP_RB_BASE 0x00000800 - -#define REG_A5XX_CP_RB_BASE_HI 0x00000801 - -#define REG_A5XX_CP_RB_CNTL 0x00000802 - -#define REG_A5XX_CP_RB_RPTR_ADDR 0x00000804 - -#define REG_A5XX_CP_RB_RPTR_ADDR_HI 0x00000805 - -#define REG_A5XX_CP_RB_RPTR 0x00000806 - -#define REG_A5XX_CP_RB_WPTR 0x00000807 - -#define REG_A5XX_CP_PFP_STAT_ADDR 0x00000808 - -#define REG_A5XX_CP_PFP_STAT_DATA 0x00000809 - -#define REG_A5XX_CP_DRAW_STATE_ADDR 0x0000080b - -#define REG_A5XX_CP_DRAW_STATE_DATA 0x0000080c - -#define REG_A5XX_CP_ME_NRT_ADDR_LO 0x0000080d - -#define REG_A5XX_CP_ME_NRT_ADDR_HI 0x0000080e - -#define REG_A5XX_CP_ME_NRT_DATA 0x00000810 - -#define REG_A5XX_CP_CRASH_SCRIPT_BASE_LO 0x00000817 - -#define REG_A5XX_CP_CRASH_SCRIPT_BASE_HI 0x00000818 - -#define REG_A5XX_CP_CRASH_DUMP_CNTL 0x00000819 - -#define REG_A5XX_CP_ME_STAT_ADDR 0x0000081a - -#define REG_A5XX_CP_ROQ_THRESHOLDS_1 0x0000081f - -#define REG_A5XX_CP_ROQ_THRESHOLDS_2 0x00000820 - -#define REG_A5XX_CP_ROQ_DBG_ADDR 0x00000821 - -#define REG_A5XX_CP_ROQ_DBG_DATA 0x00000822 - -#define REG_A5XX_CP_MEQ_DBG_ADDR 0x00000823 - -#define REG_A5XX_CP_MEQ_DBG_DATA 0x00000824 - -#define REG_A5XX_CP_MEQ_THRESHOLDS 0x00000825 - -#define REG_A5XX_CP_MERCIU_SIZE 0x00000826 - -#define REG_A5XX_CP_MERCIU_DBG_ADDR 0x00000827 - -#define REG_A5XX_CP_MERCIU_DBG_DATA_1 0x00000828 - -#define REG_A5XX_CP_MERCIU_DBG_DATA_2 0x00000829 - -#define REG_A5XX_CP_PFP_UCODE_DBG_ADDR 0x0000082a - -#define REG_A5XX_CP_PFP_UCODE_DBG_DATA 0x0000082b - -#define REG_A5XX_CP_ME_UCODE_DBG_ADDR 0x0000082f - -#define REG_A5XX_CP_ME_UCODE_DBG_DATA 0x00000830 - -#define REG_A5XX_CP_CNTL 0x00000831 - -#define REG_A5XX_CP_PFP_ME_CNTL 0x00000832 - -#define REG_A5XX_CP_CHICKEN_DBG 0x00000833 - -#define REG_A5XX_CP_PFP_INSTR_BASE_LO 0x00000835 - -#define REG_A5XX_CP_PFP_INSTR_BASE_HI 0x00000836 - -#define REG_A5XX_CP_ME_INSTR_BASE_LO 0x00000838 - -#define REG_A5XX_CP_ME_INSTR_BASE_HI 0x00000839 - -#define REG_A5XX_CP_CONTEXT_SWITCH_CNTL 0x0000083b - -#define REG_A5XX_CP_CONTEXT_SWITCH_RESTORE_ADDR_LO 0x0000083c - -#define REG_A5XX_CP_CONTEXT_SWITCH_RESTORE_ADDR_HI 0x0000083d - -#define REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO 0x0000083e - -#define REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_HI 0x0000083f - -#define REG_A5XX_CP_CONTEXT_SWITCH_SMMU_INFO_LO 0x00000840 - -#define REG_A5XX_CP_CONTEXT_SWITCH_SMMU_INFO_HI 0x00000841 - -#define REG_A5XX_CP_ADDR_MODE_CNTL 0x00000860 - -#define REG_A5XX_CP_ME_STAT_DATA 0x00000b14 - -#define REG_A5XX_CP_WFI_PEND_CTR 0x00000b15 - -#define REG_A5XX_CP_INTERRUPT_STATUS 0x00000b18 - -#define REG_A5XX_CP_HW_FAULT 0x00000b1a - -#define REG_A5XX_CP_PROTECT_STATUS 0x00000b1c - -#define REG_A5XX_CP_IB1_BASE 0x00000b1f - -#define REG_A5XX_CP_IB1_BASE_HI 0x00000b20 - -#define REG_A5XX_CP_IB1_BUFSZ 0x00000b21 - -#define REG_A5XX_CP_IB2_BASE 0x00000b22 - -#define REG_A5XX_CP_IB2_BASE_HI 0x00000b23 - -#define REG_A5XX_CP_IB2_BUFSZ 0x00000b24 - -static inline uint32_t REG_A5XX_CP_SCRATCH(uint32_t i0) { return 0x00000b78 + 0x1*i0; } - -static inline uint32_t REG_A5XX_CP_SCRATCH_REG(uint32_t i0) { return 0x00000b78 + 0x1*i0; } - -static inline uint32_t REG_A5XX_CP_PROTECT(uint32_t i0) { return 0x00000880 + 0x1*i0; } - -static inline uint32_t REG_A5XX_CP_PROTECT_REG(uint32_t i0) { return 0x00000880 + 0x1*i0; } -#define A5XX_CP_PROTECT_REG_BASE_ADDR__MASK 0x0001ffff -#define A5XX_CP_PROTECT_REG_BASE_ADDR__SHIFT 0 -static inline uint32_t A5XX_CP_PROTECT_REG_BASE_ADDR(uint32_t val) -{ - return ((val) << A5XX_CP_PROTECT_REG_BASE_ADDR__SHIFT) & A5XX_CP_PROTECT_REG_BASE_ADDR__MASK; -} -#define A5XX_CP_PROTECT_REG_MASK_LEN__MASK 0x1f000000 -#define A5XX_CP_PROTECT_REG_MASK_LEN__SHIFT 24 -static inline uint32_t A5XX_CP_PROTECT_REG_MASK_LEN(uint32_t val) -{ - return ((val) << A5XX_CP_PROTECT_REG_MASK_LEN__SHIFT) & A5XX_CP_PROTECT_REG_MASK_LEN__MASK; -} -#define A5XX_CP_PROTECT_REG_TRAP_WRITE 0x20000000 -#define A5XX_CP_PROTECT_REG_TRAP_READ 0x40000000 - -#define REG_A5XX_CP_PROTECT_CNTL 0x000008a0 - -#define REG_A5XX_CP_AHB_FAULT 0x00000b1b - -#define REG_A5XX_CP_PERFCTR_CP_SEL_0 0x00000bb0 - -#define REG_A5XX_CP_PERFCTR_CP_SEL_1 0x00000bb1 - -#define REG_A5XX_CP_PERFCTR_CP_SEL_2 0x00000bb2 - -#define REG_A5XX_CP_PERFCTR_CP_SEL_3 0x00000bb3 - -#define REG_A5XX_CP_PERFCTR_CP_SEL_4 0x00000bb4 - -#define REG_A5XX_CP_PERFCTR_CP_SEL_5 0x00000bb5 - -#define REG_A5XX_CP_PERFCTR_CP_SEL_6 0x00000bb6 - -#define REG_A5XX_CP_PERFCTR_CP_SEL_7 0x00000bb7 - -#define REG_A5XX_VSC_ADDR_MODE_CNTL 0x00000bc1 - -#define REG_A5XX_CP_POWERCTR_CP_SEL_0 0x00000bba - -#define REG_A5XX_CP_POWERCTR_CP_SEL_1 0x00000bbb - -#define REG_A5XX_CP_POWERCTR_CP_SEL_2 0x00000bbc - -#define REG_A5XX_CP_POWERCTR_CP_SEL_3 0x00000bbd - -#define REG_A5XX_RBBM_CFG_DBGBUS_SEL_A 0x00000004 - -#define REG_A5XX_RBBM_CFG_DBGBUS_SEL_B 0x00000005 - -#define REG_A5XX_RBBM_CFG_DBGBUS_SEL_C 0x00000006 - -#define REG_A5XX_RBBM_CFG_DBGBUS_SEL_D 0x00000007 - -#define REG_A5XX_RBBM_CFG_DBGBUS_CNTLT 0x00000008 - -#define REG_A5XX_RBBM_CFG_DBGBUS_CNTLM 0x00000009 - -#define REG_A5XX_RBBM_CFG_DEBBUS_CTLTM_ENABLE_SHIFT 0x00000018 - -#define REG_A5XX_RBBM_CFG_DBGBUS_OPL 0x0000000a - -#define REG_A5XX_RBBM_CFG_DBGBUS_OPE 0x0000000b - -#define REG_A5XX_RBBM_CFG_DBGBUS_IVTL_0 0x0000000c - -#define REG_A5XX_RBBM_CFG_DBGBUS_IVTL_1 0x0000000d - -#define REG_A5XX_RBBM_CFG_DBGBUS_IVTL_2 0x0000000e - -#define REG_A5XX_RBBM_CFG_DBGBUS_IVTL_3 0x0000000f - -#define REG_A5XX_RBBM_CFG_DBGBUS_MASKL_0 0x00000010 - -#define REG_A5XX_RBBM_CFG_DBGBUS_MASKL_1 0x00000011 - -#define REG_A5XX_RBBM_CFG_DBGBUS_MASKL_2 0x00000012 - -#define REG_A5XX_RBBM_CFG_DBGBUS_MASKL_3 0x00000013 - -#define REG_A5XX_RBBM_CFG_DBGBUS_BYTEL_0 0x00000014 - -#define REG_A5XX_RBBM_CFG_DBGBUS_BYTEL_1 0x00000015 - -#define REG_A5XX_RBBM_CFG_DBGBUS_IVTE_0 0x00000016 - -#define REG_A5XX_RBBM_CFG_DBGBUS_IVTE_1 0x00000017 - -#define REG_A5XX_RBBM_CFG_DBGBUS_IVTE_2 0x00000018 - -#define REG_A5XX_RBBM_CFG_DBGBUS_IVTE_3 0x00000019 - -#define REG_A5XX_RBBM_CFG_DBGBUS_MASKE_0 0x0000001a - -#define REG_A5XX_RBBM_CFG_DBGBUS_MASKE_1 0x0000001b - -#define REG_A5XX_RBBM_CFG_DBGBUS_MASKE_2 0x0000001c - -#define REG_A5XX_RBBM_CFG_DBGBUS_MASKE_3 0x0000001d - -#define REG_A5XX_RBBM_CFG_DBGBUS_NIBBLEE 0x0000001e - -#define REG_A5XX_RBBM_CFG_DBGBUS_PTRC0 0x0000001f - -#define REG_A5XX_RBBM_CFG_DBGBUS_PTRC1 0x00000020 - -#define REG_A5XX_RBBM_CFG_DBGBUS_LOADREG 0x00000021 - -#define REG_A5XX_RBBM_CFG_DBGBUS_IDX 0x00000022 - -#define REG_A5XX_RBBM_CFG_DBGBUS_CLRC 0x00000023 - -#define REG_A5XX_RBBM_CFG_DBGBUS_LOADIVT 0x00000024 - -#define REG_A5XX_RBBM_INTERFACE_HANG_INT_CNTL 0x0000002f - -#define REG_A5XX_RBBM_INT_CLEAR_CMD 0x00000037 - -#define REG_A5XX_RBBM_INT_0_MASK 0x00000038 -#define A5XX_RBBM_INT_0_MASK_RBBM_GPU_IDLE 0x00000001 -#define A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR 0x00000002 -#define A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT 0x00000004 -#define A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT 0x00000008 -#define A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT 0x00000010 -#define A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT 0x00000020 -#define A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW 0x00000040 -#define A5XX_RBBM_INT_0_MASK_RBBM_GPC_ERROR 0x00000080 -#define A5XX_RBBM_INT_0_MASK_CP_SW 0x00000100 -#define A5XX_RBBM_INT_0_MASK_CP_HW_ERROR 0x00000200 -#define A5XX_RBBM_INT_0_MASK_CP_CCU_FLUSH_DEPTH_TS 0x00000400 -#define A5XX_RBBM_INT_0_MASK_CP_CCU_FLUSH_COLOR_TS 0x00000800 -#define A5XX_RBBM_INT_0_MASK_CP_CCU_RESOLVE_TS 0x00001000 -#define A5XX_RBBM_INT_0_MASK_CP_IB2 0x00002000 -#define A5XX_RBBM_INT_0_MASK_CP_IB1 0x00004000 -#define A5XX_RBBM_INT_0_MASK_CP_RB 0x00008000 -#define A5XX_RBBM_INT_0_MASK_CP_RB_DONE_TS 0x00020000 -#define A5XX_RBBM_INT_0_MASK_CP_WT_DONE_TS 0x00040000 -#define A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS 0x00100000 -#define A5XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW 0x00400000 -#define A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT 0x00800000 -#define A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS 0x01000000 -#define A5XX_RBBM_INT_0_MASK_UCHE_TRAP_INTR 0x02000000 -#define A5XX_RBBM_INT_0_MASK_DEBBUS_INTR_0 0x04000000 -#define A5XX_RBBM_INT_0_MASK_DEBBUS_INTR_1 0x08000000 -#define A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP 0x10000000 -#define A5XX_RBBM_INT_0_MASK_GPMU_FIRMWARE 0x20000000 -#define A5XX_RBBM_INT_0_MASK_ISDB_CPU_IRQ 0x40000000 -#define A5XX_RBBM_INT_0_MASK_ISDB_UNDER_DEBUG 0x80000000 - -#define REG_A5XX_RBBM_AHB_DBG_CNTL 0x0000003f - -#define REG_A5XX_RBBM_EXT_VBIF_DBG_CNTL 0x00000041 - -#define REG_A5XX_RBBM_SW_RESET_CMD 0x00000043 - -#define REG_A5XX_RBBM_BLOCK_SW_RESET_CMD 0x00000045 - -#define REG_A5XX_RBBM_BLOCK_SW_RESET_CMD2 0x00000046 - -#define REG_A5XX_RBBM_DBG_LO_HI_GPIO 0x00000048 - -#define REG_A5XX_RBBM_EXT_TRACE_BUS_CNTL 0x00000049 - -#define REG_A5XX_RBBM_CLOCK_CNTL_TP0 0x0000004a - -#define REG_A5XX_RBBM_CLOCK_CNTL_TP1 0x0000004b - -#define REG_A5XX_RBBM_CLOCK_CNTL_TP2 0x0000004c - -#define REG_A5XX_RBBM_CLOCK_CNTL_TP3 0x0000004d - -#define REG_A5XX_RBBM_CLOCK_CNTL2_TP0 0x0000004e - -#define REG_A5XX_RBBM_CLOCK_CNTL2_TP1 0x0000004f - -#define REG_A5XX_RBBM_CLOCK_CNTL2_TP2 0x00000050 - -#define REG_A5XX_RBBM_CLOCK_CNTL2_TP3 0x00000051 - -#define REG_A5XX_RBBM_CLOCK_CNTL3_TP0 0x00000052 - -#define REG_A5XX_RBBM_CLOCK_CNTL3_TP1 0x00000053 - -#define REG_A5XX_RBBM_CLOCK_CNTL3_TP2 0x00000054 - -#define REG_A5XX_RBBM_CLOCK_CNTL3_TP3 0x00000055 - -#define REG_A5XX_RBBM_READ_AHB_THROUGH_DBG 0x00000059 - -#define REG_A5XX_RBBM_CLOCK_CNTL_UCHE 0x0000005a - -#define REG_A5XX_RBBM_CLOCK_CNTL2_UCHE 0x0000005b - -#define REG_A5XX_RBBM_CLOCK_CNTL3_UCHE 0x0000005c - -#define REG_A5XX_RBBM_CLOCK_CNTL4_UCHE 0x0000005d - -#define REG_A5XX_RBBM_CLOCK_HYST_UCHE 0x0000005e - -#define REG_A5XX_RBBM_CLOCK_DELAY_UCHE 0x0000005f - -#define REG_A5XX_RBBM_CLOCK_MODE_GPC 0x00000060 - -#define REG_A5XX_RBBM_CLOCK_DELAY_GPC 0x00000061 - -#define REG_A5XX_RBBM_CLOCK_HYST_GPC 0x00000062 - -#define REG_A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM 0x00000063 - -#define REG_A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM 0x00000064 - -#define REG_A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM 0x00000065 - -#define REG_A5XX_RBBM_CLOCK_DELAY_HLSQ 0x00000066 - -#define REG_A5XX_RBBM_CLOCK_CNTL 0x00000067 - -#define REG_A5XX_RBBM_CLOCK_CNTL_SP0 0x00000068 - -#define REG_A5XX_RBBM_CLOCK_CNTL_SP1 0x00000069 - -#define REG_A5XX_RBBM_CLOCK_CNTL_SP2 0x0000006a - -#define REG_A5XX_RBBM_CLOCK_CNTL_SP3 0x0000006b - -#define REG_A5XX_RBBM_CLOCK_CNTL2_SP0 0x0000006c - -#define REG_A5XX_RBBM_CLOCK_CNTL2_SP1 0x0000006d - -#define REG_A5XX_RBBM_CLOCK_CNTL2_SP2 0x0000006e - -#define REG_A5XX_RBBM_CLOCK_CNTL2_SP3 0x0000006f - -#define REG_A5XX_RBBM_CLOCK_HYST_SP0 0x00000070 - -#define REG_A5XX_RBBM_CLOCK_HYST_SP1 0x00000071 - -#define REG_A5XX_RBBM_CLOCK_HYST_SP2 0x00000072 - -#define REG_A5XX_RBBM_CLOCK_HYST_SP3 0x00000073 - -#define REG_A5XX_RBBM_CLOCK_DELAY_SP0 0x00000074 - -#define REG_A5XX_RBBM_CLOCK_DELAY_SP1 0x00000075 - -#define REG_A5XX_RBBM_CLOCK_DELAY_SP2 0x00000076 - -#define REG_A5XX_RBBM_CLOCK_DELAY_SP3 0x00000077 - -#define REG_A5XX_RBBM_CLOCK_CNTL_RB0 0x00000078 - -#define REG_A5XX_RBBM_CLOCK_CNTL_RB1 0x00000079 - -#define REG_A5XX_RBBM_CLOCK_CNTL_RB2 0x0000007a - -#define REG_A5XX_RBBM_CLOCK_CNTL_RB3 0x0000007b - -#define REG_A5XX_RBBM_CLOCK_CNTL2_RB0 0x0000007c - -#define REG_A5XX_RBBM_CLOCK_CNTL2_RB1 0x0000007d - -#define REG_A5XX_RBBM_CLOCK_CNTL2_RB2 0x0000007e - -#define REG_A5XX_RBBM_CLOCK_CNTL2_RB3 0x0000007f - -#define REG_A5XX_RBBM_CLOCK_HYST_RAC 0x00000080 - -#define REG_A5XX_RBBM_CLOCK_DELAY_RAC 0x00000081 - -#define REG_A5XX_RBBM_CLOCK_CNTL_CCU0 0x00000082 - -#define REG_A5XX_RBBM_CLOCK_CNTL_CCU1 0x00000083 - -#define REG_A5XX_RBBM_CLOCK_CNTL_CCU2 0x00000084 - -#define REG_A5XX_RBBM_CLOCK_CNTL_CCU3 0x00000085 - -#define REG_A5XX_RBBM_CLOCK_HYST_RB_CCU0 0x00000086 - -#define REG_A5XX_RBBM_CLOCK_HYST_RB_CCU1 0x00000087 - -#define REG_A5XX_RBBM_CLOCK_HYST_RB_CCU2 0x00000088 - -#define REG_A5XX_RBBM_CLOCK_HYST_RB_CCU3 0x00000089 - -#define REG_A5XX_RBBM_CLOCK_CNTL_RAC 0x0000008a - -#define REG_A5XX_RBBM_CLOCK_CNTL2_RAC 0x0000008b - -#define REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0 0x0000008c - -#define REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_1 0x0000008d - -#define REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_2 0x0000008e - -#define REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_3 0x0000008f - -#define REG_A5XX_RBBM_CLOCK_HYST_VFD 0x00000090 - -#define REG_A5XX_RBBM_CLOCK_MODE_VFD 0x00000091 - -#define REG_A5XX_RBBM_CLOCK_DELAY_VFD 0x00000092 - -#define REG_A5XX_RBBM_AHB_CNTL0 0x00000093 - -#define REG_A5XX_RBBM_AHB_CNTL1 0x00000094 - -#define REG_A5XX_RBBM_AHB_CNTL2 0x00000095 - -#define REG_A5XX_RBBM_AHB_CMD 0x00000096 - -#define REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL11 0x0000009c - -#define REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL12 0x0000009d - -#define REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL13 0x0000009e - -#define REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL14 0x0000009f - -#define REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL15 0x000000a0 - -#define REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL16 0x000000a1 - -#define REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL17 0x000000a2 - -#define REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL18 0x000000a3 - -#define REG_A5XX_RBBM_CLOCK_DELAY_TP0 0x000000a4 - -#define REG_A5XX_RBBM_CLOCK_DELAY_TP1 0x000000a5 - -#define REG_A5XX_RBBM_CLOCK_DELAY_TP2 0x000000a6 - -#define REG_A5XX_RBBM_CLOCK_DELAY_TP3 0x000000a7 - -#define REG_A5XX_RBBM_CLOCK_DELAY2_TP0 0x000000a8 - -#define REG_A5XX_RBBM_CLOCK_DELAY2_TP1 0x000000a9 - -#define REG_A5XX_RBBM_CLOCK_DELAY2_TP2 0x000000aa - -#define REG_A5XX_RBBM_CLOCK_DELAY2_TP3 0x000000ab - -#define REG_A5XX_RBBM_CLOCK_DELAY3_TP0 0x000000ac - -#define REG_A5XX_RBBM_CLOCK_DELAY3_TP1 0x000000ad - -#define REG_A5XX_RBBM_CLOCK_DELAY3_TP2 0x000000ae - -#define REG_A5XX_RBBM_CLOCK_DELAY3_TP3 0x000000af - -#define REG_A5XX_RBBM_CLOCK_HYST_TP0 0x000000b0 - -#define REG_A5XX_RBBM_CLOCK_HYST_TP1 0x000000b1 - -#define REG_A5XX_RBBM_CLOCK_HYST_TP2 0x000000b2 - -#define REG_A5XX_RBBM_CLOCK_HYST_TP3 0x000000b3 - -#define REG_A5XX_RBBM_CLOCK_HYST2_TP0 0x000000b4 - -#define REG_A5XX_RBBM_CLOCK_HYST2_TP1 0x000000b5 - -#define REG_A5XX_RBBM_CLOCK_HYST2_TP2 0x000000b6 - -#define REG_A5XX_RBBM_CLOCK_HYST2_TP3 0x000000b7 - -#define REG_A5XX_RBBM_CLOCK_HYST3_TP0 0x000000b8 - -#define REG_A5XX_RBBM_CLOCK_HYST3_TP1 0x000000b9 - -#define REG_A5XX_RBBM_CLOCK_HYST3_TP2 0x000000ba - -#define REG_A5XX_RBBM_CLOCK_HYST3_TP3 0x000000bb - -#define REG_A5XX_RBBM_CLOCK_CNTL_GPMU 0x000000c8 - -#define REG_A5XX_RBBM_CLOCK_DELAY_GPMU 0x000000c9 - -#define REG_A5XX_RBBM_CLOCK_HYST_GPMU 0x000000ca - -#define REG_A5XX_RBBM_PERFCTR_CP_0_LO 0x000003a0 - -#define REG_A5XX_RBBM_PERFCTR_CP_0_HI 0x000003a1 - -#define REG_A5XX_RBBM_PERFCTR_CP_1_LO 0x000003a2 - -#define REG_A5XX_RBBM_PERFCTR_CP_1_HI 0x000003a3 - -#define REG_A5XX_RBBM_PERFCTR_CP_2_LO 0x000003a4 - -#define REG_A5XX_RBBM_PERFCTR_CP_2_HI 0x000003a5 - -#define REG_A5XX_RBBM_PERFCTR_CP_3_LO 0x000003a6 - -#define REG_A5XX_RBBM_PERFCTR_CP_3_HI 0x000003a7 - -#define REG_A5XX_RBBM_PERFCTR_CP_4_LO 0x000003a8 - -#define REG_A5XX_RBBM_PERFCTR_CP_4_HI 0x000003a9 - -#define REG_A5XX_RBBM_PERFCTR_CP_5_LO 0x000003aa - -#define REG_A5XX_RBBM_PERFCTR_CP_5_HI 0x000003ab - -#define REG_A5XX_RBBM_PERFCTR_CP_6_LO 0x000003ac - -#define REG_A5XX_RBBM_PERFCTR_CP_6_HI 0x000003ad - -#define REG_A5XX_RBBM_PERFCTR_CP_7_LO 0x000003ae - -#define REG_A5XX_RBBM_PERFCTR_CP_7_HI 0x000003af - -#define REG_A5XX_RBBM_PERFCTR_RBBM_0_LO 0x000003b0 - -#define REG_A5XX_RBBM_PERFCTR_RBBM_0_HI 0x000003b1 - -#define REG_A5XX_RBBM_PERFCTR_RBBM_1_LO 0x000003b2 - -#define REG_A5XX_RBBM_PERFCTR_RBBM_1_HI 0x000003b3 - -#define REG_A5XX_RBBM_PERFCTR_RBBM_2_LO 0x000003b4 - -#define REG_A5XX_RBBM_PERFCTR_RBBM_2_HI 0x000003b5 - -#define REG_A5XX_RBBM_PERFCTR_RBBM_3_LO 0x000003b6 - -#define REG_A5XX_RBBM_PERFCTR_RBBM_3_HI 0x000003b7 - -#define REG_A5XX_RBBM_PERFCTR_PC_0_LO 0x000003b8 - -#define REG_A5XX_RBBM_PERFCTR_PC_0_HI 0x000003b9 - -#define REG_A5XX_RBBM_PERFCTR_PC_1_LO 0x000003ba - -#define REG_A5XX_RBBM_PERFCTR_PC_1_HI 0x000003bb - -#define REG_A5XX_RBBM_PERFCTR_PC_2_LO 0x000003bc - -#define REG_A5XX_RBBM_PERFCTR_PC_2_HI 0x000003bd - -#define REG_A5XX_RBBM_PERFCTR_PC_3_LO 0x000003be - -#define REG_A5XX_RBBM_PERFCTR_PC_3_HI 0x000003bf - -#define REG_A5XX_RBBM_PERFCTR_PC_4_LO 0x000003c0 - -#define REG_A5XX_RBBM_PERFCTR_PC_4_HI 0x000003c1 - -#define REG_A5XX_RBBM_PERFCTR_PC_5_LO 0x000003c2 - -#define REG_A5XX_RBBM_PERFCTR_PC_5_HI 0x000003c3 - -#define REG_A5XX_RBBM_PERFCTR_PC_6_LO 0x000003c4 - -#define REG_A5XX_RBBM_PERFCTR_PC_6_HI 0x000003c5 - -#define REG_A5XX_RBBM_PERFCTR_PC_7_LO 0x000003c6 - -#define REG_A5XX_RBBM_PERFCTR_PC_7_HI 0x000003c7 - -#define REG_A5XX_RBBM_PERFCTR_VFD_0_LO 0x000003c8 - -#define REG_A5XX_RBBM_PERFCTR_VFD_0_HI 0x000003c9 - -#define REG_A5XX_RBBM_PERFCTR_VFD_1_LO 0x000003ca - -#define REG_A5XX_RBBM_PERFCTR_VFD_1_HI 0x000003cb - -#define REG_A5XX_RBBM_PERFCTR_VFD_2_LO 0x000003cc - -#define REG_A5XX_RBBM_PERFCTR_VFD_2_HI 0x000003cd - -#define REG_A5XX_RBBM_PERFCTR_VFD_3_LO 0x000003ce - -#define REG_A5XX_RBBM_PERFCTR_VFD_3_HI 0x000003cf - -#define REG_A5XX_RBBM_PERFCTR_VFD_4_LO 0x000003d0 - -#define REG_A5XX_RBBM_PERFCTR_VFD_4_HI 0x000003d1 - -#define REG_A5XX_RBBM_PERFCTR_VFD_5_LO 0x000003d2 - -#define REG_A5XX_RBBM_PERFCTR_VFD_5_HI 0x000003d3 - -#define REG_A5XX_RBBM_PERFCTR_VFD_6_LO 0x000003d4 - -#define REG_A5XX_RBBM_PERFCTR_VFD_6_HI 0x000003d5 - -#define REG_A5XX_RBBM_PERFCTR_VFD_7_LO 0x000003d6 - -#define REG_A5XX_RBBM_PERFCTR_VFD_7_HI 0x000003d7 - -#define REG_A5XX_RBBM_PERFCTR_HLSQ_0_LO 0x000003d8 - -#define REG_A5XX_RBBM_PERFCTR_HLSQ_0_HI 0x000003d9 - -#define REG_A5XX_RBBM_PERFCTR_HLSQ_1_LO 0x000003da - -#define REG_A5XX_RBBM_PERFCTR_HLSQ_1_HI 0x000003db - -#define REG_A5XX_RBBM_PERFCTR_HLSQ_2_LO 0x000003dc - -#define REG_A5XX_RBBM_PERFCTR_HLSQ_2_HI 0x000003dd - -#define REG_A5XX_RBBM_PERFCTR_HLSQ_3_LO 0x000003de - -#define REG_A5XX_RBBM_PERFCTR_HLSQ_3_HI 0x000003df - -#define REG_A5XX_RBBM_PERFCTR_HLSQ_4_LO 0x000003e0 - -#define REG_A5XX_RBBM_PERFCTR_HLSQ_4_HI 0x000003e1 - -#define REG_A5XX_RBBM_PERFCTR_HLSQ_5_LO 0x000003e2 - -#define REG_A5XX_RBBM_PERFCTR_HLSQ_5_HI 0x000003e3 - -#define REG_A5XX_RBBM_PERFCTR_HLSQ_6_LO 0x000003e4 - -#define REG_A5XX_RBBM_PERFCTR_HLSQ_6_HI 0x000003e5 - -#define REG_A5XX_RBBM_PERFCTR_HLSQ_7_LO 0x000003e6 - -#define REG_A5XX_RBBM_PERFCTR_HLSQ_7_HI 0x000003e7 - -#define REG_A5XX_RBBM_PERFCTR_VPC_0_LO 0x000003e8 - -#define REG_A5XX_RBBM_PERFCTR_VPC_0_HI 0x000003e9 - -#define REG_A5XX_RBBM_PERFCTR_VPC_1_LO 0x000003ea - -#define REG_A5XX_RBBM_PERFCTR_VPC_1_HI 0x000003eb - -#define REG_A5XX_RBBM_PERFCTR_VPC_2_LO 0x000003ec - -#define REG_A5XX_RBBM_PERFCTR_VPC_2_HI 0x000003ed - -#define REG_A5XX_RBBM_PERFCTR_VPC_3_LO 0x000003ee - -#define REG_A5XX_RBBM_PERFCTR_VPC_3_HI 0x000003ef - -#define REG_A5XX_RBBM_PERFCTR_CCU_0_LO 0x000003f0 - -#define REG_A5XX_RBBM_PERFCTR_CCU_0_HI 0x000003f1 - -#define REG_A5XX_RBBM_PERFCTR_CCU_1_LO 0x000003f2 - -#define REG_A5XX_RBBM_PERFCTR_CCU_1_HI 0x000003f3 - -#define REG_A5XX_RBBM_PERFCTR_CCU_2_LO 0x000003f4 - -#define REG_A5XX_RBBM_PERFCTR_CCU_2_HI 0x000003f5 - -#define REG_A5XX_RBBM_PERFCTR_CCU_3_LO 0x000003f6 - -#define REG_A5XX_RBBM_PERFCTR_CCU_3_HI 0x000003f7 - -#define REG_A5XX_RBBM_PERFCTR_TSE_0_LO 0x000003f8 - -#define REG_A5XX_RBBM_PERFCTR_TSE_0_HI 0x000003f9 - -#define REG_A5XX_RBBM_PERFCTR_TSE_1_LO 0x000003fa - -#define REG_A5XX_RBBM_PERFCTR_TSE_1_HI 0x000003fb - -#define REG_A5XX_RBBM_PERFCTR_TSE_2_LO 0x000003fc - -#define REG_A5XX_RBBM_PERFCTR_TSE_2_HI 0x000003fd - -#define REG_A5XX_RBBM_PERFCTR_TSE_3_LO 0x000003fe - -#define REG_A5XX_RBBM_PERFCTR_TSE_3_HI 0x000003ff - -#define REG_A5XX_RBBM_PERFCTR_RAS_0_LO 0x00000400 - -#define REG_A5XX_RBBM_PERFCTR_RAS_0_HI 0x00000401 - -#define REG_A5XX_RBBM_PERFCTR_RAS_1_LO 0x00000402 - -#define REG_A5XX_RBBM_PERFCTR_RAS_1_HI 0x00000403 - -#define REG_A5XX_RBBM_PERFCTR_RAS_2_LO 0x00000404 - -#define REG_A5XX_RBBM_PERFCTR_RAS_2_HI 0x00000405 - -#define REG_A5XX_RBBM_PERFCTR_RAS_3_LO 0x00000406 - -#define REG_A5XX_RBBM_PERFCTR_RAS_3_HI 0x00000407 - -#define REG_A5XX_RBBM_PERFCTR_UCHE_0_LO 0x00000408 - -#define REG_A5XX_RBBM_PERFCTR_UCHE_0_HI 0x00000409 - -#define REG_A5XX_RBBM_PERFCTR_UCHE_1_LO 0x0000040a - -#define REG_A5XX_RBBM_PERFCTR_UCHE_1_HI 0x0000040b - -#define REG_A5XX_RBBM_PERFCTR_UCHE_2_LO 0x0000040c - -#define REG_A5XX_RBBM_PERFCTR_UCHE_2_HI 0x0000040d - -#define REG_A5XX_RBBM_PERFCTR_UCHE_3_LO 0x0000040e - -#define REG_A5XX_RBBM_PERFCTR_UCHE_3_HI 0x0000040f - -#define REG_A5XX_RBBM_PERFCTR_UCHE_4_LO 0x00000410 - -#define REG_A5XX_RBBM_PERFCTR_UCHE_4_HI 0x00000411 - -#define REG_A5XX_RBBM_PERFCTR_UCHE_5_LO 0x00000412 - -#define REG_A5XX_RBBM_PERFCTR_UCHE_5_HI 0x00000413 - -#define REG_A5XX_RBBM_PERFCTR_UCHE_6_LO 0x00000414 - -#define REG_A5XX_RBBM_PERFCTR_UCHE_6_HI 0x00000415 - -#define REG_A5XX_RBBM_PERFCTR_UCHE_7_LO 0x00000416 - -#define REG_A5XX_RBBM_PERFCTR_UCHE_7_HI 0x00000417 - -#define REG_A5XX_RBBM_PERFCTR_TP_0_LO 0x00000418 - -#define REG_A5XX_RBBM_PERFCTR_TP_0_HI 0x00000419 - -#define REG_A5XX_RBBM_PERFCTR_TP_1_LO 0x0000041a - -#define REG_A5XX_RBBM_PERFCTR_TP_1_HI 0x0000041b - -#define REG_A5XX_RBBM_PERFCTR_TP_2_LO 0x0000041c - -#define REG_A5XX_RBBM_PERFCTR_TP_2_HI 0x0000041d - -#define REG_A5XX_RBBM_PERFCTR_TP_3_LO 0x0000041e - -#define REG_A5XX_RBBM_PERFCTR_TP_3_HI 0x0000041f - -#define REG_A5XX_RBBM_PERFCTR_TP_4_LO 0x00000420 - -#define REG_A5XX_RBBM_PERFCTR_TP_4_HI 0x00000421 - -#define REG_A5XX_RBBM_PERFCTR_TP_5_LO 0x00000422 - -#define REG_A5XX_RBBM_PERFCTR_TP_5_HI 0x00000423 - -#define REG_A5XX_RBBM_PERFCTR_TP_6_LO 0x00000424 - -#define REG_A5XX_RBBM_PERFCTR_TP_6_HI 0x00000425 - -#define REG_A5XX_RBBM_PERFCTR_TP_7_LO 0x00000426 - -#define REG_A5XX_RBBM_PERFCTR_TP_7_HI 0x00000427 - -#define REG_A5XX_RBBM_PERFCTR_SP_0_LO 0x00000428 - -#define REG_A5XX_RBBM_PERFCTR_SP_0_HI 0x00000429 - -#define REG_A5XX_RBBM_PERFCTR_SP_1_LO 0x0000042a - -#define REG_A5XX_RBBM_PERFCTR_SP_1_HI 0x0000042b - -#define REG_A5XX_RBBM_PERFCTR_SP_2_LO 0x0000042c - -#define REG_A5XX_RBBM_PERFCTR_SP_2_HI 0x0000042d - -#define REG_A5XX_RBBM_PERFCTR_SP_3_LO 0x0000042e - -#define REG_A5XX_RBBM_PERFCTR_SP_3_HI 0x0000042f - -#define REG_A5XX_RBBM_PERFCTR_SP_4_LO 0x00000430 - -#define REG_A5XX_RBBM_PERFCTR_SP_4_HI 0x00000431 - -#define REG_A5XX_RBBM_PERFCTR_SP_5_LO 0x00000432 - -#define REG_A5XX_RBBM_PERFCTR_SP_5_HI 0x00000433 - -#define REG_A5XX_RBBM_PERFCTR_SP_6_LO 0x00000434 - -#define REG_A5XX_RBBM_PERFCTR_SP_6_HI 0x00000435 - -#define REG_A5XX_RBBM_PERFCTR_SP_7_LO 0x00000436 - -#define REG_A5XX_RBBM_PERFCTR_SP_7_HI 0x00000437 - -#define REG_A5XX_RBBM_PERFCTR_SP_8_LO 0x00000438 - -#define REG_A5XX_RBBM_PERFCTR_SP_8_HI 0x00000439 - -#define REG_A5XX_RBBM_PERFCTR_SP_9_LO 0x0000043a - -#define REG_A5XX_RBBM_PERFCTR_SP_9_HI 0x0000043b - -#define REG_A5XX_RBBM_PERFCTR_SP_10_LO 0x0000043c - -#define REG_A5XX_RBBM_PERFCTR_SP_10_HI 0x0000043d - -#define REG_A5XX_RBBM_PERFCTR_SP_11_LO 0x0000043e - -#define REG_A5XX_RBBM_PERFCTR_SP_11_HI 0x0000043f - -#define REG_A5XX_RBBM_PERFCTR_RB_0_LO 0x00000440 - -#define REG_A5XX_RBBM_PERFCTR_RB_0_HI 0x00000441 - -#define REG_A5XX_RBBM_PERFCTR_RB_1_LO 0x00000442 - -#define REG_A5XX_RBBM_PERFCTR_RB_1_HI 0x00000443 - -#define REG_A5XX_RBBM_PERFCTR_RB_2_LO 0x00000444 - -#define REG_A5XX_RBBM_PERFCTR_RB_2_HI 0x00000445 - -#define REG_A5XX_RBBM_PERFCTR_RB_3_LO 0x00000446 - -#define REG_A5XX_RBBM_PERFCTR_RB_3_HI 0x00000447 - -#define REG_A5XX_RBBM_PERFCTR_RB_4_LO 0x00000448 - -#define REG_A5XX_RBBM_PERFCTR_RB_4_HI 0x00000449 - -#define REG_A5XX_RBBM_PERFCTR_RB_5_LO 0x0000044a - -#define REG_A5XX_RBBM_PERFCTR_RB_5_HI 0x0000044b - -#define REG_A5XX_RBBM_PERFCTR_RB_6_LO 0x0000044c - -#define REG_A5XX_RBBM_PERFCTR_RB_6_HI 0x0000044d - -#define REG_A5XX_RBBM_PERFCTR_RB_7_LO 0x0000044e - -#define REG_A5XX_RBBM_PERFCTR_RB_7_HI 0x0000044f - -#define REG_A5XX_RBBM_PERFCTR_VSC_0_LO 0x00000450 - -#define REG_A5XX_RBBM_PERFCTR_VSC_0_HI 0x00000451 - -#define REG_A5XX_RBBM_PERFCTR_VSC_1_LO 0x00000452 - -#define REG_A5XX_RBBM_PERFCTR_VSC_1_HI 0x00000453 - -#define REG_A5XX_RBBM_PERFCTR_LRZ_0_LO 0x00000454 - -#define REG_A5XX_RBBM_PERFCTR_LRZ_0_HI 0x00000455 - -#define REG_A5XX_RBBM_PERFCTR_LRZ_1_LO 0x00000456 - -#define REG_A5XX_RBBM_PERFCTR_LRZ_1_HI 0x00000457 - -#define REG_A5XX_RBBM_PERFCTR_LRZ_2_LO 0x00000458 - -#define REG_A5XX_RBBM_PERFCTR_LRZ_2_HI 0x00000459 - -#define REG_A5XX_RBBM_PERFCTR_LRZ_3_LO 0x0000045a - -#define REG_A5XX_RBBM_PERFCTR_LRZ_3_HI 0x0000045b - -#define REG_A5XX_RBBM_PERFCTR_CMP_0_LO 0x0000045c - -#define REG_A5XX_RBBM_PERFCTR_CMP_0_HI 0x0000045d - -#define REG_A5XX_RBBM_PERFCTR_CMP_1_LO 0x0000045e - -#define REG_A5XX_RBBM_PERFCTR_CMP_1_HI 0x0000045f - -#define REG_A5XX_RBBM_PERFCTR_CMP_2_LO 0x00000460 - -#define REG_A5XX_RBBM_PERFCTR_CMP_2_HI 0x00000461 - -#define REG_A5XX_RBBM_PERFCTR_CMP_3_LO 0x00000462 - -#define REG_A5XX_RBBM_PERFCTR_CMP_3_HI 0x00000463 - -#define REG_A5XX_RBBM_PERFCTR_RBBM_SEL_0 0x0000046b - -#define REG_A5XX_RBBM_PERFCTR_RBBM_SEL_1 0x0000046c - -#define REG_A5XX_RBBM_PERFCTR_RBBM_SEL_2 0x0000046d - -#define REG_A5XX_RBBM_PERFCTR_RBBM_SEL_3 0x0000046e - -#define REG_A5XX_RBBM_ALWAYSON_COUNTER_LO 0x000004d2 - -#define REG_A5XX_RBBM_ALWAYSON_COUNTER_HI 0x000004d3 - -#define REG_A5XX_RBBM_STATUS 0x000004f5 -#define A5XX_RBBM_STATUS_GPU_BUSY_IGN_AHB 0x80000000 -#define A5XX_RBBM_STATUS_GPU_BUSY_IGN_AHB_CP 0x40000000 -#define A5XX_RBBM_STATUS_HLSQ_BUSY 0x20000000 -#define A5XX_RBBM_STATUS_VSC_BUSY 0x10000000 -#define A5XX_RBBM_STATUS_TPL1_BUSY 0x08000000 -#define A5XX_RBBM_STATUS_SP_BUSY 0x04000000 -#define A5XX_RBBM_STATUS_UCHE_BUSY 0x02000000 -#define A5XX_RBBM_STATUS_VPC_BUSY 0x01000000 -#define A5XX_RBBM_STATUS_VFDP_BUSY 0x00800000 -#define A5XX_RBBM_STATUS_VFD_BUSY 0x00400000 -#define A5XX_RBBM_STATUS_TESS_BUSY 0x00200000 -#define A5XX_RBBM_STATUS_PC_VSD_BUSY 0x00100000 -#define A5XX_RBBM_STATUS_PC_DCALL_BUSY 0x00080000 -#define A5XX_RBBM_STATUS_GPMU_SLAVE_BUSY 0x00040000 -#define A5XX_RBBM_STATUS_DCOM_BUSY 0x00020000 -#define A5XX_RBBM_STATUS_COM_BUSY 0x00010000 -#define A5XX_RBBM_STATUS_LRZ_BUZY 0x00008000 -#define A5XX_RBBM_STATUS_A2D_DSP_BUSY 0x00004000 -#define A5XX_RBBM_STATUS_CCUFCHE_BUSY 0x00002000 -#define A5XX_RBBM_STATUS_RB_BUSY 0x00001000 -#define A5XX_RBBM_STATUS_RAS_BUSY 0x00000800 -#define A5XX_RBBM_STATUS_TSE_BUSY 0x00000400 -#define A5XX_RBBM_STATUS_VBIF_BUSY 0x00000200 -#define A5XX_RBBM_STATUS_GPU_BUSY_IGN_AHB_HYST 0x00000100 -#define A5XX_RBBM_STATUS_CP_BUSY_IGN_HYST 0x00000080 -#define A5XX_RBBM_STATUS_CP_BUSY 0x00000040 -#define A5XX_RBBM_STATUS_GPMU_MASTER_BUSY 0x00000020 -#define A5XX_RBBM_STATUS_CP_CRASH_BUSY 0x00000010 -#define A5XX_RBBM_STATUS_CP_ETS_BUSY 0x00000008 -#define A5XX_RBBM_STATUS_CP_PFP_BUSY 0x00000004 -#define A5XX_RBBM_STATUS_CP_ME_BUSY 0x00000002 -#define A5XX_RBBM_STATUS_HI_BUSY 0x00000001 - -#define REG_A5XX_RBBM_STATUS3 0x00000530 - -#define REG_A5XX_RBBM_INT_0_STATUS 0x000004e1 - -#define REG_A5XX_RBBM_AHB_ME_SPLIT_STATUS 0x000004f0 - -#define REG_A5XX_RBBM_AHB_PFP_SPLIT_STATUS 0x000004f1 - -#define REG_A5XX_RBBM_AHB_ETS_SPLIT_STATUS 0x000004f3 - -#define REG_A5XX_RBBM_AHB_ERROR_STATUS 0x000004f4 - -#define REG_A5XX_RBBM_PERFCTR_CNTL 0x00000464 - -#define REG_A5XX_RBBM_PERFCTR_LOAD_CMD0 0x00000465 - -#define REG_A5XX_RBBM_PERFCTR_LOAD_CMD1 0x00000466 - -#define REG_A5XX_RBBM_PERFCTR_LOAD_CMD2 0x00000467 - -#define REG_A5XX_RBBM_PERFCTR_LOAD_CMD3 0x00000468 - -#define REG_A5XX_RBBM_PERFCTR_LOAD_VALUE_LO 0x00000469 - -#define REG_A5XX_RBBM_PERFCTR_LOAD_VALUE_HI 0x0000046a - -#define REG_A5XX_RBBM_PERFCTR_RBBM_SEL_0 0x0000046b - -#define REG_A5XX_RBBM_PERFCTR_RBBM_SEL_1 0x0000046c - -#define REG_A5XX_RBBM_PERFCTR_RBBM_SEL_2 0x0000046d - -#define REG_A5XX_RBBM_PERFCTR_RBBM_SEL_3 0x0000046e - -#define REG_A5XX_RBBM_PERFCTR_GPU_BUSY_MASKED 0x0000046f - -#define REG_A5XX_RBBM_AHB_ERROR 0x000004ed - -#define REG_A5XX_RBBM_CFG_DBGBUS_EVENT_LOGIC 0x00000504 - -#define REG_A5XX_RBBM_CFG_DBGBUS_OVER 0x00000505 - -#define REG_A5XX_RBBM_CFG_DBGBUS_COUNT0 0x00000506 - -#define REG_A5XX_RBBM_CFG_DBGBUS_COUNT1 0x00000507 - -#define REG_A5XX_RBBM_CFG_DBGBUS_COUNT2 0x00000508 - -#define REG_A5XX_RBBM_CFG_DBGBUS_COUNT3 0x00000509 - -#define REG_A5XX_RBBM_CFG_DBGBUS_COUNT4 0x0000050a - -#define REG_A5XX_RBBM_CFG_DBGBUS_COUNT5 0x0000050b - -#define REG_A5XX_RBBM_CFG_DBGBUS_TRACE_ADDR 0x0000050c - -#define REG_A5XX_RBBM_CFG_DBGBUS_TRACE_BUF0 0x0000050d - -#define REG_A5XX_RBBM_CFG_DBGBUS_TRACE_BUF1 0x0000050e - -#define REG_A5XX_RBBM_CFG_DBGBUS_TRACE_BUF2 0x0000050f - -#define REG_A5XX_RBBM_CFG_DBGBUS_TRACE_BUF3 0x00000510 - -#define REG_A5XX_RBBM_CFG_DBGBUS_TRACE_BUF4 0x00000511 - -#define REG_A5XX_RBBM_CFG_DBGBUS_MISR0 0x00000512 - -#define REG_A5XX_RBBM_CFG_DBGBUS_MISR1 0x00000513 - -#define REG_A5XX_RBBM_ISDB_CNT 0x00000533 - -#define REG_A5XX_RBBM_SECVID_TRUST_CONFIG 0x0000f000 - -#define REG_A5XX_RBBM_SECVID_TRUST_CNTL 0x0000f400 - -#define REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO 0x0000f800 - -#define REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI 0x0000f801 - -#define REG_A5XX_RBBM_SECVID_TSB_TRUSTED_SIZE 0x0000f802 - -#define REG_A5XX_RBBM_SECVID_TSB_CNTL 0x0000f803 - -#define REG_A5XX_RBBM_SECVID_TSB_COMP_STATUS_LO 0x0000f804 - -#define REG_A5XX_RBBM_SECVID_TSB_COMP_STATUS_HI 0x0000f805 - -#define REG_A5XX_RBBM_SECVID_TSB_UCHE_STATUS_LO 0x0000f806 - -#define REG_A5XX_RBBM_SECVID_TSB_UCHE_STATUS_HI 0x0000f807 - -#define REG_A5XX_RBBM_SECVID_TSB_ADDR_MODE_CNTL 0x0000f810 - -#define REG_A5XX_VSC_BIN_SIZE 0x00000bc2 -#define A5XX_VSC_BIN_SIZE_WIDTH__MASK 0x000000ff -#define A5XX_VSC_BIN_SIZE_WIDTH__SHIFT 0 -static inline uint32_t A5XX_VSC_BIN_SIZE_WIDTH(uint32_t val) -{ - return ((val >> 5) << A5XX_VSC_BIN_SIZE_WIDTH__SHIFT) & A5XX_VSC_BIN_SIZE_WIDTH__MASK; -} -#define A5XX_VSC_BIN_SIZE_HEIGHT__MASK 0x0001fe00 -#define A5XX_VSC_BIN_SIZE_HEIGHT__SHIFT 9 -static inline uint32_t A5XX_VSC_BIN_SIZE_HEIGHT(uint32_t val) -{ - return ((val >> 5) << A5XX_VSC_BIN_SIZE_HEIGHT__SHIFT) & A5XX_VSC_BIN_SIZE_HEIGHT__MASK; -} - -#define REG_A5XX_VSC_SIZE_ADDRESS_LO 0x00000bc3 - -#define REG_A5XX_VSC_SIZE_ADDRESS_HI 0x00000bc4 - -#define REG_A5XX_UNKNOWN_0BC5 0x00000bc5 - -#define REG_A5XX_UNKNOWN_0BC6 0x00000bc6 - -static inline uint32_t REG_A5XX_VSC_PIPE_CONFIG(uint32_t i0) { return 0x00000bd0 + 0x1*i0; } - -static inline uint32_t REG_A5XX_VSC_PIPE_CONFIG_REG(uint32_t i0) { return 0x00000bd0 + 0x1*i0; } -#define A5XX_VSC_PIPE_CONFIG_REG_X__MASK 0x000003ff -#define A5XX_VSC_PIPE_CONFIG_REG_X__SHIFT 0 -static inline uint32_t A5XX_VSC_PIPE_CONFIG_REG_X(uint32_t val) -{ - return ((val) << A5XX_VSC_PIPE_CONFIG_REG_X__SHIFT) & A5XX_VSC_PIPE_CONFIG_REG_X__MASK; -} -#define A5XX_VSC_PIPE_CONFIG_REG_Y__MASK 0x000ffc00 -#define A5XX_VSC_PIPE_CONFIG_REG_Y__SHIFT 10 -static inline uint32_t A5XX_VSC_PIPE_CONFIG_REG_Y(uint32_t val) -{ - return ((val) << A5XX_VSC_PIPE_CONFIG_REG_Y__SHIFT) & A5XX_VSC_PIPE_CONFIG_REG_Y__MASK; -} -#define A5XX_VSC_PIPE_CONFIG_REG_W__MASK 0x00f00000 -#define A5XX_VSC_PIPE_CONFIG_REG_W__SHIFT 20 -static inline uint32_t A5XX_VSC_PIPE_CONFIG_REG_W(uint32_t val) -{ - return ((val) << A5XX_VSC_PIPE_CONFIG_REG_W__SHIFT) & A5XX_VSC_PIPE_CONFIG_REG_W__MASK; -} -#define A5XX_VSC_PIPE_CONFIG_REG_H__MASK 0x0f000000 -#define A5XX_VSC_PIPE_CONFIG_REG_H__SHIFT 24 -static inline uint32_t A5XX_VSC_PIPE_CONFIG_REG_H(uint32_t val) -{ - return ((val) << A5XX_VSC_PIPE_CONFIG_REG_H__SHIFT) & A5XX_VSC_PIPE_CONFIG_REG_H__MASK; -} - -static inline uint32_t REG_A5XX_VSC_PIPE_DATA_ADDRESS(uint32_t i0) { return 0x00000be0 + 0x2*i0; } - -static inline uint32_t REG_A5XX_VSC_PIPE_DATA_ADDRESS_LO(uint32_t i0) { return 0x00000be0 + 0x2*i0; } - -static inline uint32_t REG_A5XX_VSC_PIPE_DATA_ADDRESS_HI(uint32_t i0) { return 0x00000be1 + 0x2*i0; } - -static inline uint32_t REG_A5XX_VSC_PIPE_DATA_LENGTH(uint32_t i0) { return 0x00000c00 + 0x1*i0; } - -static inline uint32_t REG_A5XX_VSC_PIPE_DATA_LENGTH_REG(uint32_t i0) { return 0x00000c00 + 0x1*i0; } - -#define REG_A5XX_VSC_PERFCTR_VSC_SEL_0 0x00000c60 - -#define REG_A5XX_VSC_PERFCTR_VSC_SEL_1 0x00000c61 - -#define REG_A5XX_VSC_RESOLVE_CNTL 0x00000cdd -#define A5XX_VSC_RESOLVE_CNTL_WINDOW_OFFSET_DISABLE 0x80000000 -#define A5XX_VSC_RESOLVE_CNTL_X__MASK 0x00007fff -#define A5XX_VSC_RESOLVE_CNTL_X__SHIFT 0 -static inline uint32_t A5XX_VSC_RESOLVE_CNTL_X(uint32_t val) -{ - return ((val) << A5XX_VSC_RESOLVE_CNTL_X__SHIFT) & A5XX_VSC_RESOLVE_CNTL_X__MASK; -} -#define A5XX_VSC_RESOLVE_CNTL_Y__MASK 0x7fff0000 -#define A5XX_VSC_RESOLVE_CNTL_Y__SHIFT 16 -static inline uint32_t A5XX_VSC_RESOLVE_CNTL_Y(uint32_t val) -{ - return ((val) << A5XX_VSC_RESOLVE_CNTL_Y__SHIFT) & A5XX_VSC_RESOLVE_CNTL_Y__MASK; -} - -#define REG_A5XX_GRAS_ADDR_MODE_CNTL 0x00000c81 - -#define REG_A5XX_GRAS_PERFCTR_TSE_SEL_0 0x00000c90 - -#define REG_A5XX_GRAS_PERFCTR_TSE_SEL_1 0x00000c91 - -#define REG_A5XX_GRAS_PERFCTR_TSE_SEL_2 0x00000c92 - -#define REG_A5XX_GRAS_PERFCTR_TSE_SEL_3 0x00000c93 - -#define REG_A5XX_GRAS_PERFCTR_RAS_SEL_0 0x00000c94 - -#define REG_A5XX_GRAS_PERFCTR_RAS_SEL_1 0x00000c95 - -#define REG_A5XX_GRAS_PERFCTR_RAS_SEL_2 0x00000c96 - -#define REG_A5XX_GRAS_PERFCTR_RAS_SEL_3 0x00000c97 - -#define REG_A5XX_GRAS_PERFCTR_LRZ_SEL_0 0x00000c98 - -#define REG_A5XX_GRAS_PERFCTR_LRZ_SEL_1 0x00000c99 - -#define REG_A5XX_GRAS_PERFCTR_LRZ_SEL_2 0x00000c9a - -#define REG_A5XX_GRAS_PERFCTR_LRZ_SEL_3 0x00000c9b - -#define REG_A5XX_RB_DBG_ECO_CNTL 0x00000cc4 - -#define REG_A5XX_RB_ADDR_MODE_CNTL 0x00000cc5 - -#define REG_A5XX_RB_MODE_CNTL 0x00000cc6 - -#define REG_A5XX_RB_CCU_CNTL 0x00000cc7 - -#define REG_A5XX_RB_PERFCTR_RB_SEL_0 0x00000cd0 - -#define REG_A5XX_RB_PERFCTR_RB_SEL_1 0x00000cd1 - -#define REG_A5XX_RB_PERFCTR_RB_SEL_2 0x00000cd2 - -#define REG_A5XX_RB_PERFCTR_RB_SEL_3 0x00000cd3 - -#define REG_A5XX_RB_PERFCTR_RB_SEL_4 0x00000cd4 - -#define REG_A5XX_RB_PERFCTR_RB_SEL_5 0x00000cd5 - -#define REG_A5XX_RB_PERFCTR_RB_SEL_6 0x00000cd6 - -#define REG_A5XX_RB_PERFCTR_RB_SEL_7 0x00000cd7 - -#define REG_A5XX_RB_PERFCTR_CCU_SEL_0 0x00000cd8 - -#define REG_A5XX_RB_PERFCTR_CCU_SEL_1 0x00000cd9 - -#define REG_A5XX_RB_PERFCTR_CCU_SEL_2 0x00000cda - -#define REG_A5XX_RB_PERFCTR_CCU_SEL_3 0x00000cdb - -#define REG_A5XX_RB_POWERCTR_RB_SEL_0 0x00000ce0 - -#define REG_A5XX_RB_POWERCTR_RB_SEL_1 0x00000ce1 - -#define REG_A5XX_RB_POWERCTR_RB_SEL_2 0x00000ce2 - -#define REG_A5XX_RB_POWERCTR_RB_SEL_3 0x00000ce3 - -#define REG_A5XX_RB_POWERCTR_CCU_SEL_0 0x00000ce4 - -#define REG_A5XX_RB_POWERCTR_CCU_SEL_1 0x00000ce5 - -#define REG_A5XX_RB_PERFCTR_CMP_SEL_0 0x00000cec - -#define REG_A5XX_RB_PERFCTR_CMP_SEL_1 0x00000ced - -#define REG_A5XX_RB_PERFCTR_CMP_SEL_2 0x00000cee - -#define REG_A5XX_RB_PERFCTR_CMP_SEL_3 0x00000cef - -#define REG_A5XX_PC_DBG_ECO_CNTL 0x00000d00 -#define A5XX_PC_DBG_ECO_CNTL_TWOPASSUSEWFI 0x00000100 - -#define REG_A5XX_PC_ADDR_MODE_CNTL 0x00000d01 - -#define REG_A5XX_PC_MODE_CNTL 0x00000d02 - -#define REG_A5XX_PC_INDEX_BUF_LO 0x00000d04 - -#define REG_A5XX_PC_INDEX_BUF_HI 0x00000d05 - -#define REG_A5XX_PC_START_INDEX 0x00000d06 - -#define REG_A5XX_PC_MAX_INDEX 0x00000d07 - -#define REG_A5XX_PC_TESSFACTOR_ADDR_LO 0x00000d08 - -#define REG_A5XX_PC_TESSFACTOR_ADDR_HI 0x00000d09 - -#define REG_A5XX_PC_PERFCTR_PC_SEL_0 0x00000d10 - -#define REG_A5XX_PC_PERFCTR_PC_SEL_1 0x00000d11 - -#define REG_A5XX_PC_PERFCTR_PC_SEL_2 0x00000d12 - -#define REG_A5XX_PC_PERFCTR_PC_SEL_3 0x00000d13 - -#define REG_A5XX_PC_PERFCTR_PC_SEL_4 0x00000d14 - -#define REG_A5XX_PC_PERFCTR_PC_SEL_5 0x00000d15 - -#define REG_A5XX_PC_PERFCTR_PC_SEL_6 0x00000d16 - -#define REG_A5XX_PC_PERFCTR_PC_SEL_7 0x00000d17 - -#define REG_A5XX_HLSQ_TIMEOUT_THRESHOLD_0 0x00000e00 - -#define REG_A5XX_HLSQ_TIMEOUT_THRESHOLD_1 0x00000e01 - -#define REG_A5XX_HLSQ_DBG_ECO_CNTL 0x00000e04 - -#define REG_A5XX_HLSQ_ADDR_MODE_CNTL 0x00000e05 - -#define REG_A5XX_HLSQ_MODE_CNTL 0x00000e06 - -#define REG_A5XX_HLSQ_PERFCTR_HLSQ_SEL_0 0x00000e10 - -#define REG_A5XX_HLSQ_PERFCTR_HLSQ_SEL_1 0x00000e11 - -#define REG_A5XX_HLSQ_PERFCTR_HLSQ_SEL_2 0x00000e12 - -#define REG_A5XX_HLSQ_PERFCTR_HLSQ_SEL_3 0x00000e13 - -#define REG_A5XX_HLSQ_PERFCTR_HLSQ_SEL_4 0x00000e14 - -#define REG_A5XX_HLSQ_PERFCTR_HLSQ_SEL_5 0x00000e15 - -#define REG_A5XX_HLSQ_PERFCTR_HLSQ_SEL_6 0x00000e16 - -#define REG_A5XX_HLSQ_PERFCTR_HLSQ_SEL_7 0x00000e17 - -#define REG_A5XX_HLSQ_SPTP_RDSEL 0x00000f08 - -#define REG_A5XX_HLSQ_DBG_READ_SEL 0x0000bc00 - -#define REG_A5XX_HLSQ_DBG_AHB_READ_APERTURE 0x0000a000 - -#define REG_A5XX_VFD_ADDR_MODE_CNTL 0x00000e41 - -#define REG_A5XX_VFD_MODE_CNTL 0x00000e42 - -#define REG_A5XX_VFD_PERFCTR_VFD_SEL_0 0x00000e50 - -#define REG_A5XX_VFD_PERFCTR_VFD_SEL_1 0x00000e51 - -#define REG_A5XX_VFD_PERFCTR_VFD_SEL_2 0x00000e52 - -#define REG_A5XX_VFD_PERFCTR_VFD_SEL_3 0x00000e53 - -#define REG_A5XX_VFD_PERFCTR_VFD_SEL_4 0x00000e54 - -#define REG_A5XX_VFD_PERFCTR_VFD_SEL_5 0x00000e55 - -#define REG_A5XX_VFD_PERFCTR_VFD_SEL_6 0x00000e56 - -#define REG_A5XX_VFD_PERFCTR_VFD_SEL_7 0x00000e57 - -#define REG_A5XX_VPC_DBG_ECO_CNTL 0x00000e60 - -#define REG_A5XX_VPC_ADDR_MODE_CNTL 0x00000e61 - -#define REG_A5XX_VPC_MODE_CNTL 0x00000e62 -#define A5XX_VPC_MODE_CNTL_BINNING_PASS 0x00000001 - -#define REG_A5XX_VPC_PERFCTR_VPC_SEL_0 0x00000e64 - -#define REG_A5XX_VPC_PERFCTR_VPC_SEL_1 0x00000e65 - -#define REG_A5XX_VPC_PERFCTR_VPC_SEL_2 0x00000e66 - -#define REG_A5XX_VPC_PERFCTR_VPC_SEL_3 0x00000e67 - -#define REG_A5XX_UCHE_ADDR_MODE_CNTL 0x00000e80 - -#define REG_A5XX_UCHE_SVM_CNTL 0x00000e82 - -#define REG_A5XX_UCHE_WRITE_THRU_BASE_LO 0x00000e87 - -#define REG_A5XX_UCHE_WRITE_THRU_BASE_HI 0x00000e88 - -#define REG_A5XX_UCHE_TRAP_BASE_LO 0x00000e89 - -#define REG_A5XX_UCHE_TRAP_BASE_HI 0x00000e8a - -#define REG_A5XX_UCHE_GMEM_RANGE_MIN_LO 0x00000e8b - -#define REG_A5XX_UCHE_GMEM_RANGE_MIN_HI 0x00000e8c - -#define REG_A5XX_UCHE_GMEM_RANGE_MAX_LO 0x00000e8d - -#define REG_A5XX_UCHE_GMEM_RANGE_MAX_HI 0x00000e8e - -#define REG_A5XX_UCHE_DBG_ECO_CNTL_2 0x00000e8f - -#define REG_A5XX_UCHE_DBG_ECO_CNTL 0x00000e90 - -#define REG_A5XX_UCHE_CACHE_INVALIDATE_MIN_LO 0x00000e91 - -#define REG_A5XX_UCHE_CACHE_INVALIDATE_MIN_HI 0x00000e92 - -#define REG_A5XX_UCHE_CACHE_INVALIDATE_MAX_LO 0x00000e93 - -#define REG_A5XX_UCHE_CACHE_INVALIDATE_MAX_HI 0x00000e94 - -#define REG_A5XX_UCHE_CACHE_INVALIDATE 0x00000e95 - -#define REG_A5XX_UCHE_CACHE_WAYS 0x00000e96 - -#define REG_A5XX_UCHE_PERFCTR_UCHE_SEL_0 0x00000ea0 - -#define REG_A5XX_UCHE_PERFCTR_UCHE_SEL_1 0x00000ea1 - -#define REG_A5XX_UCHE_PERFCTR_UCHE_SEL_2 0x00000ea2 - -#define REG_A5XX_UCHE_PERFCTR_UCHE_SEL_3 0x00000ea3 - -#define REG_A5XX_UCHE_PERFCTR_UCHE_SEL_4 0x00000ea4 - -#define REG_A5XX_UCHE_PERFCTR_UCHE_SEL_5 0x00000ea5 - -#define REG_A5XX_UCHE_PERFCTR_UCHE_SEL_6 0x00000ea6 - -#define REG_A5XX_UCHE_PERFCTR_UCHE_SEL_7 0x00000ea7 - -#define REG_A5XX_UCHE_POWERCTR_UCHE_SEL_0 0x00000ea8 - -#define REG_A5XX_UCHE_POWERCTR_UCHE_SEL_1 0x00000ea9 - -#define REG_A5XX_UCHE_POWERCTR_UCHE_SEL_2 0x00000eaa - -#define REG_A5XX_UCHE_POWERCTR_UCHE_SEL_3 0x00000eab - -#define REG_A5XX_UCHE_TRAP_LOG_LO 0x00000eb1 - -#define REG_A5XX_UCHE_TRAP_LOG_HI 0x00000eb2 - -#define REG_A5XX_SP_DBG_ECO_CNTL 0x00000ec0 - -#define REG_A5XX_SP_ADDR_MODE_CNTL 0x00000ec1 - -#define REG_A5XX_SP_MODE_CNTL 0x00000ec2 - -#define REG_A5XX_SP_PERFCTR_SP_SEL_0 0x00000ed0 - -#define REG_A5XX_SP_PERFCTR_SP_SEL_1 0x00000ed1 - -#define REG_A5XX_SP_PERFCTR_SP_SEL_2 0x00000ed2 - -#define REG_A5XX_SP_PERFCTR_SP_SEL_3 0x00000ed3 - -#define REG_A5XX_SP_PERFCTR_SP_SEL_4 0x00000ed4 - -#define REG_A5XX_SP_PERFCTR_SP_SEL_5 0x00000ed5 - -#define REG_A5XX_SP_PERFCTR_SP_SEL_6 0x00000ed6 - -#define REG_A5XX_SP_PERFCTR_SP_SEL_7 0x00000ed7 - -#define REG_A5XX_SP_PERFCTR_SP_SEL_8 0x00000ed8 - -#define REG_A5XX_SP_PERFCTR_SP_SEL_9 0x00000ed9 - -#define REG_A5XX_SP_PERFCTR_SP_SEL_10 0x00000eda - -#define REG_A5XX_SP_PERFCTR_SP_SEL_11 0x00000edb - -#define REG_A5XX_SP_POWERCTR_SP_SEL_0 0x00000edc - -#define REG_A5XX_SP_POWERCTR_SP_SEL_1 0x00000edd - -#define REG_A5XX_SP_POWERCTR_SP_SEL_2 0x00000ede - -#define REG_A5XX_SP_POWERCTR_SP_SEL_3 0x00000edf - -#define REG_A5XX_TPL1_ADDR_MODE_CNTL 0x00000f01 - -#define REG_A5XX_TPL1_MODE_CNTL 0x00000f02 - -#define REG_A5XX_TPL1_PERFCTR_TP_SEL_0 0x00000f10 - -#define REG_A5XX_TPL1_PERFCTR_TP_SEL_1 0x00000f11 - -#define REG_A5XX_TPL1_PERFCTR_TP_SEL_2 0x00000f12 - -#define REG_A5XX_TPL1_PERFCTR_TP_SEL_3 0x00000f13 - -#define REG_A5XX_TPL1_PERFCTR_TP_SEL_4 0x00000f14 - -#define REG_A5XX_TPL1_PERFCTR_TP_SEL_5 0x00000f15 - -#define REG_A5XX_TPL1_PERFCTR_TP_SEL_6 0x00000f16 - -#define REG_A5XX_TPL1_PERFCTR_TP_SEL_7 0x00000f17 - -#define REG_A5XX_TPL1_POWERCTR_TP_SEL_0 0x00000f18 - -#define REG_A5XX_TPL1_POWERCTR_TP_SEL_1 0x00000f19 - -#define REG_A5XX_TPL1_POWERCTR_TP_SEL_2 0x00000f1a - -#define REG_A5XX_TPL1_POWERCTR_TP_SEL_3 0x00000f1b - -#define REG_A5XX_VBIF_VERSION 0x00003000 - -#define REG_A5XX_VBIF_CLKON 0x00003001 - -#define REG_A5XX_VBIF_ABIT_SORT 0x00003028 - -#define REG_A5XX_VBIF_ABIT_SORT_CONF 0x00003029 - -#define REG_A5XX_VBIF_ROUND_ROBIN_QOS_ARB 0x00003049 - -#define REG_A5XX_VBIF_GATE_OFF_WRREQ_EN 0x0000302a - -#define REG_A5XX_VBIF_IN_RD_LIM_CONF0 0x0000302c - -#define REG_A5XX_VBIF_IN_RD_LIM_CONF1 0x0000302d - -#define REG_A5XX_VBIF_XIN_HALT_CTRL0 0x00003080 - -#define REG_A5XX_VBIF_XIN_HALT_CTRL1 0x00003081 - -#define REG_A5XX_VBIF_TEST_BUS_OUT_CTRL 0x00003084 - -#define REG_A5XX_VBIF_TEST_BUS1_CTRL0 0x00003085 - -#define REG_A5XX_VBIF_TEST_BUS1_CTRL1 0x00003086 - -#define REG_A5XX_VBIF_TEST_BUS2_CTRL0 0x00003087 - -#define REG_A5XX_VBIF_TEST_BUS2_CTRL1 0x00003088 - -#define REG_A5XX_VBIF_TEST_BUS_OUT 0x0000308c - -#define REG_A5XX_VBIF_PERF_CNT_EN0 0x000030c0 - -#define REG_A5XX_VBIF_PERF_CNT_EN1 0x000030c1 - -#define REG_A5XX_VBIF_PERF_CNT_EN2 0x000030c2 - -#define REG_A5XX_VBIF_PERF_CNT_EN3 0x000030c3 - -#define REG_A5XX_VBIF_PERF_CNT_CLR0 0x000030c8 - -#define REG_A5XX_VBIF_PERF_CNT_CLR1 0x000030c9 - -#define REG_A5XX_VBIF_PERF_CNT_CLR2 0x000030ca - -#define REG_A5XX_VBIF_PERF_CNT_CLR3 0x000030cb - -#define REG_A5XX_VBIF_PERF_CNT_SEL0 0x000030d0 - -#define REG_A5XX_VBIF_PERF_CNT_SEL1 0x000030d1 - -#define REG_A5XX_VBIF_PERF_CNT_SEL2 0x000030d2 - -#define REG_A5XX_VBIF_PERF_CNT_SEL3 0x000030d3 - -#define REG_A5XX_VBIF_PERF_CNT_LOW0 0x000030d8 - -#define REG_A5XX_VBIF_PERF_CNT_LOW1 0x000030d9 - -#define REG_A5XX_VBIF_PERF_CNT_LOW2 0x000030da - -#define REG_A5XX_VBIF_PERF_CNT_LOW3 0x000030db - -#define REG_A5XX_VBIF_PERF_CNT_HIGH0 0x000030e0 - -#define REG_A5XX_VBIF_PERF_CNT_HIGH1 0x000030e1 - -#define REG_A5XX_VBIF_PERF_CNT_HIGH2 0x000030e2 - -#define REG_A5XX_VBIF_PERF_CNT_HIGH3 0x000030e3 - -#define REG_A5XX_VBIF_PERF_PWR_CNT_EN0 0x00003100 - -#define REG_A5XX_VBIF_PERF_PWR_CNT_EN1 0x00003101 - -#define REG_A5XX_VBIF_PERF_PWR_CNT_EN2 0x00003102 - -#define REG_A5XX_VBIF_PERF_PWR_CNT_LOW0 0x00003110 - -#define REG_A5XX_VBIF_PERF_PWR_CNT_LOW1 0x00003111 - -#define REG_A5XX_VBIF_PERF_PWR_CNT_LOW2 0x00003112 - -#define REG_A5XX_VBIF_PERF_PWR_CNT_HIGH0 0x00003118 - -#define REG_A5XX_VBIF_PERF_PWR_CNT_HIGH1 0x00003119 - -#define REG_A5XX_VBIF_PERF_PWR_CNT_HIGH2 0x0000311a - -#define REG_A5XX_GPMU_INST_RAM_BASE 0x00008800 - -#define REG_A5XX_GPMU_DATA_RAM_BASE 0x00009800 - -#define REG_A5XX_GPMU_SP_POWER_CNTL 0x0000a881 - -#define REG_A5XX_GPMU_RBCCU_CLOCK_CNTL 0x0000a886 - -#define REG_A5XX_GPMU_RBCCU_POWER_CNTL 0x0000a887 - -#define REG_A5XX_GPMU_SP_PWR_CLK_STATUS 0x0000a88b -#define A5XX_GPMU_SP_PWR_CLK_STATUS_PWR_ON 0x00100000 - -#define REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS 0x0000a88d -#define A5XX_GPMU_RBCCU_PWR_CLK_STATUS_PWR_ON 0x00100000 - -#define REG_A5XX_GPMU_PWR_COL_STAGGER_DELAY 0x0000a891 - -#define REG_A5XX_GPMU_PWR_COL_INTER_FRAME_CTRL 0x0000a892 - -#define REG_A5XX_GPMU_PWR_COL_INTER_FRAME_HYST 0x0000a893 - -#define REG_A5XX_GPMU_PWR_COL_BINNING_CTRL 0x0000a894 - -#define REG_A5XX_GPMU_CLOCK_THROTTLE_CTRL 0x0000a8a3 - -#define REG_A5XX_GPMU_WFI_CONFIG 0x0000a8c1 - -#define REG_A5XX_GPMU_RBBM_INTR_INFO 0x0000a8d6 - -#define REG_A5XX_GPMU_CM3_SYSRESET 0x0000a8d8 - -#define REG_A5XX_GPMU_GENERAL_0 0x0000a8e0 - -#define REG_A5XX_GPMU_GENERAL_1 0x0000a8e1 - -#define REG_A5XX_SP_POWER_COUNTER_0_LO 0x0000a840 - -#define REG_A5XX_SP_POWER_COUNTER_0_HI 0x0000a841 - -#define REG_A5XX_SP_POWER_COUNTER_1_LO 0x0000a842 - -#define REG_A5XX_SP_POWER_COUNTER_1_HI 0x0000a843 - -#define REG_A5XX_SP_POWER_COUNTER_2_LO 0x0000a844 - -#define REG_A5XX_SP_POWER_COUNTER_2_HI 0x0000a845 - -#define REG_A5XX_SP_POWER_COUNTER_3_LO 0x0000a846 - -#define REG_A5XX_SP_POWER_COUNTER_3_HI 0x0000a847 - -#define REG_A5XX_TP_POWER_COUNTER_0_LO 0x0000a848 - -#define REG_A5XX_TP_POWER_COUNTER_0_HI 0x0000a849 - -#define REG_A5XX_TP_POWER_COUNTER_1_LO 0x0000a84a - -#define REG_A5XX_TP_POWER_COUNTER_1_HI 0x0000a84b - -#define REG_A5XX_TP_POWER_COUNTER_2_LO 0x0000a84c - -#define REG_A5XX_TP_POWER_COUNTER_2_HI 0x0000a84d - -#define REG_A5XX_TP_POWER_COUNTER_3_LO 0x0000a84e - -#define REG_A5XX_TP_POWER_COUNTER_3_HI 0x0000a84f - -#define REG_A5XX_RB_POWER_COUNTER_0_LO 0x0000a850 - -#define REG_A5XX_RB_POWER_COUNTER_0_HI 0x0000a851 - -#define REG_A5XX_RB_POWER_COUNTER_1_LO 0x0000a852 - -#define REG_A5XX_RB_POWER_COUNTER_1_HI 0x0000a853 - -#define REG_A5XX_RB_POWER_COUNTER_2_LO 0x0000a854 - -#define REG_A5XX_RB_POWER_COUNTER_2_HI 0x0000a855 - -#define REG_A5XX_RB_POWER_COUNTER_3_LO 0x0000a856 - -#define REG_A5XX_RB_POWER_COUNTER_3_HI 0x0000a857 - -#define REG_A5XX_CCU_POWER_COUNTER_0_LO 0x0000a858 - -#define REG_A5XX_CCU_POWER_COUNTER_0_HI 0x0000a859 - -#define REG_A5XX_CCU_POWER_COUNTER_1_LO 0x0000a85a - -#define REG_A5XX_CCU_POWER_COUNTER_1_HI 0x0000a85b - -#define REG_A5XX_UCHE_POWER_COUNTER_0_LO 0x0000a85c - -#define REG_A5XX_UCHE_POWER_COUNTER_0_HI 0x0000a85d - -#define REG_A5XX_UCHE_POWER_COUNTER_1_LO 0x0000a85e - -#define REG_A5XX_UCHE_POWER_COUNTER_1_HI 0x0000a85f - -#define REG_A5XX_UCHE_POWER_COUNTER_2_LO 0x0000a860 - -#define REG_A5XX_UCHE_POWER_COUNTER_2_HI 0x0000a861 - -#define REG_A5XX_UCHE_POWER_COUNTER_3_LO 0x0000a862 - -#define REG_A5XX_UCHE_POWER_COUNTER_3_HI 0x0000a863 - -#define REG_A5XX_CP_POWER_COUNTER_0_LO 0x0000a864 - -#define REG_A5XX_CP_POWER_COUNTER_0_HI 0x0000a865 - -#define REG_A5XX_CP_POWER_COUNTER_1_LO 0x0000a866 - -#define REG_A5XX_CP_POWER_COUNTER_1_HI 0x0000a867 - -#define REG_A5XX_CP_POWER_COUNTER_2_LO 0x0000a868 - -#define REG_A5XX_CP_POWER_COUNTER_2_HI 0x0000a869 - -#define REG_A5XX_CP_POWER_COUNTER_3_LO 0x0000a86a - -#define REG_A5XX_CP_POWER_COUNTER_3_HI 0x0000a86b - -#define REG_A5XX_GPMU_POWER_COUNTER_0_LO 0x0000a86c - -#define REG_A5XX_GPMU_POWER_COUNTER_0_HI 0x0000a86d - -#define REG_A5XX_GPMU_POWER_COUNTER_1_LO 0x0000a86e - -#define REG_A5XX_GPMU_POWER_COUNTER_1_HI 0x0000a86f - -#define REG_A5XX_GPMU_POWER_COUNTER_2_LO 0x0000a870 - -#define REG_A5XX_GPMU_POWER_COUNTER_2_HI 0x0000a871 - -#define REG_A5XX_GPMU_POWER_COUNTER_3_LO 0x0000a872 - -#define REG_A5XX_GPMU_POWER_COUNTER_3_HI 0x0000a873 - -#define REG_A5XX_GPMU_POWER_COUNTER_4_LO 0x0000a874 - -#define REG_A5XX_GPMU_POWER_COUNTER_4_HI 0x0000a875 - -#define REG_A5XX_GPMU_POWER_COUNTER_5_LO 0x0000a876 - -#define REG_A5XX_GPMU_POWER_COUNTER_5_HI 0x0000a877 - -#define REG_A5XX_GPMU_POWER_COUNTER_ENABLE 0x0000a878 - -#define REG_A5XX_GPMU_ALWAYS_ON_COUNTER_LO 0x0000a879 - -#define REG_A5XX_GPMU_ALWAYS_ON_COUNTER_HI 0x0000a87a - -#define REG_A5XX_GPMU_ALWAYS_ON_COUNTER_RESET 0x0000a87b - -#define REG_A5XX_GPMU_POWER_COUNTER_SELECT_0 0x0000a87c - -#define REG_A5XX_GPMU_POWER_COUNTER_SELECT_1 0x0000a87d - -#define REG_A5XX_GPMU_CLOCK_THROTTLE_CTRL 0x0000a8a3 - -#define REG_A5XX_GPMU_THROTTLE_UNMASK_FORCE_CTRL 0x0000a8a8 - -#define REG_A5XX_GPMU_TEMP_SENSOR_ID 0x0000ac00 - -#define REG_A5XX_GPMU_TEMP_SENSOR_CONFIG 0x0000ac01 - -#define REG_A5XX_GPMU_TEMP_VAL 0x0000ac02 - -#define REG_A5XX_GPMU_DELTA_TEMP_THRESHOLD 0x0000ac03 - -#define REG_A5XX_GPMU_TEMP_THRESHOLD_INTR_STATUS 0x0000ac05 - -#define REG_A5XX_GPMU_TEMP_THRESHOLD_INTR_EN_MASK 0x0000ac06 - -#define REG_A5XX_GPMU_LEAKAGE_TEMP_COEFF_0_1 0x0000ac40 - -#define REG_A5XX_GPMU_LEAKAGE_TEMP_COEFF_2_3 0x0000ac41 - -#define REG_A5XX_GPMU_LEAKAGE_VTG_COEFF_0_1 0x0000ac42 - -#define REG_A5XX_GPMU_LEAKAGE_VTG_COEFF_2_3 0x0000ac43 - -#define REG_A5XX_GPMU_BASE_LEAKAGE 0x0000ac46 - -#define REG_A5XX_GPMU_GPMU_VOLTAGE 0x0000ac60 - -#define REG_A5XX_GPMU_GPMU_VOLTAGE_INTR_STATUS 0x0000ac61 - -#define REG_A5XX_GPMU_GPMU_VOLTAGE_INTR_EN_MASK 0x0000ac62 - -#define REG_A5XX_GPMU_GPMU_PWR_THRESHOLD 0x0000ac80 - -#define REG_A5XX_GPMU_GPMU_LLM_GLM_SLEEP_CTRL 0x0000acc4 - -#define REG_A5XX_GPMU_GPMU_LLM_GLM_SLEEP_STATUS 0x0000acc5 - -#define REG_A5XX_GDPM_CONFIG1 0x0000b80c - -#define REG_A5XX_GDPM_CONFIG2 0x0000b80d - -#define REG_A5XX_GDPM_INT_EN 0x0000b80f - -#define REG_A5XX_GDPM_INT_MASK 0x0000b811 - -#define REG_A5XX_GPMU_BEC_ENABLE 0x0000b9a0 - -#define REG_A5XX_GPU_CS_SENSOR_GENERAL_STATUS 0x0000c41a - -#define REG_A5XX_GPU_CS_AMP_CALIBRATION_STATUS1_0 0x0000c41d - -#define REG_A5XX_GPU_CS_AMP_CALIBRATION_STATUS1_2 0x0000c41f - -#define REG_A5XX_GPU_CS_AMP_CALIBRATION_STATUS1_4 0x0000c421 - -#define REG_A5XX_GPU_CS_ENABLE_REG 0x0000c520 - -#define REG_A5XX_GPU_CS_AMP_CALIBRATION_CONTROL1 0x0000c557 - -#define REG_A5XX_GRAS_CL_CNTL 0x0000e000 -#define A5XX_GRAS_CL_CNTL_ZERO_GB_SCALE_Z 0x00000040 - -#define REG_A5XX_UNKNOWN_E001 0x0000e001 - -#define REG_A5XX_UNKNOWN_E004 0x0000e004 - -#define REG_A5XX_GRAS_CNTL 0x0000e005 -#define A5XX_GRAS_CNTL_VARYING 0x00000001 -#define A5XX_GRAS_CNTL_UNK3 0x00000008 -#define A5XX_GRAS_CNTL_XCOORD 0x00000040 -#define A5XX_GRAS_CNTL_YCOORD 0x00000080 -#define A5XX_GRAS_CNTL_ZCOORD 0x00000100 -#define A5XX_GRAS_CNTL_WCOORD 0x00000200 - -#define REG_A5XX_GRAS_CL_GUARDBAND_CLIP_ADJ 0x0000e006 -#define A5XX_GRAS_CL_GUARDBAND_CLIP_ADJ_HORZ__MASK 0x000003ff -#define A5XX_GRAS_CL_GUARDBAND_CLIP_ADJ_HORZ__SHIFT 0 -static inline uint32_t A5XX_GRAS_CL_GUARDBAND_CLIP_ADJ_HORZ(uint32_t val) -{ - return ((val) << A5XX_GRAS_CL_GUARDBAND_CLIP_ADJ_HORZ__SHIFT) & A5XX_GRAS_CL_GUARDBAND_CLIP_ADJ_HORZ__MASK; -} -#define A5XX_GRAS_CL_GUARDBAND_CLIP_ADJ_VERT__MASK 0x000ffc00 -#define A5XX_GRAS_CL_GUARDBAND_CLIP_ADJ_VERT__SHIFT 10 -static inline uint32_t A5XX_GRAS_CL_GUARDBAND_CLIP_ADJ_VERT(uint32_t val) -{ - return ((val) << A5XX_GRAS_CL_GUARDBAND_CLIP_ADJ_VERT__SHIFT) & A5XX_GRAS_CL_GUARDBAND_CLIP_ADJ_VERT__MASK; -} - -#define REG_A5XX_GRAS_CL_VPORT_XOFFSET_0 0x0000e010 -#define A5XX_GRAS_CL_VPORT_XOFFSET_0__MASK 0xffffffff -#define A5XX_GRAS_CL_VPORT_XOFFSET_0__SHIFT 0 -static inline uint32_t A5XX_GRAS_CL_VPORT_XOFFSET_0(float val) -{ - return ((fui(val)) << A5XX_GRAS_CL_VPORT_XOFFSET_0__SHIFT) & A5XX_GRAS_CL_VPORT_XOFFSET_0__MASK; -} - -#define REG_A5XX_GRAS_CL_VPORT_XSCALE_0 0x0000e011 -#define A5XX_GRAS_CL_VPORT_XSCALE_0__MASK 0xffffffff -#define A5XX_GRAS_CL_VPORT_XSCALE_0__SHIFT 0 -static inline uint32_t A5XX_GRAS_CL_VPORT_XSCALE_0(float val) -{ - return ((fui(val)) << A5XX_GRAS_CL_VPORT_XSCALE_0__SHIFT) & A5XX_GRAS_CL_VPORT_XSCALE_0__MASK; -} - -#define REG_A5XX_GRAS_CL_VPORT_YOFFSET_0 0x0000e012 -#define A5XX_GRAS_CL_VPORT_YOFFSET_0__MASK 0xffffffff -#define A5XX_GRAS_CL_VPORT_YOFFSET_0__SHIFT 0 -static inline uint32_t A5XX_GRAS_CL_VPORT_YOFFSET_0(float val) -{ - return ((fui(val)) << A5XX_GRAS_CL_VPORT_YOFFSET_0__SHIFT) & A5XX_GRAS_CL_VPORT_YOFFSET_0__MASK; -} - -#define REG_A5XX_GRAS_CL_VPORT_YSCALE_0 0x0000e013 -#define A5XX_GRAS_CL_VPORT_YSCALE_0__MASK 0xffffffff -#define A5XX_GRAS_CL_VPORT_YSCALE_0__SHIFT 0 -static inline uint32_t A5XX_GRAS_CL_VPORT_YSCALE_0(float val) -{ - return ((fui(val)) << A5XX_GRAS_CL_VPORT_YSCALE_0__SHIFT) & A5XX_GRAS_CL_VPORT_YSCALE_0__MASK; -} - -#define REG_A5XX_GRAS_CL_VPORT_ZOFFSET_0 0x0000e014 -#define A5XX_GRAS_CL_VPORT_ZOFFSET_0__MASK 0xffffffff -#define A5XX_GRAS_CL_VPORT_ZOFFSET_0__SHIFT 0 -static inline uint32_t A5XX_GRAS_CL_VPORT_ZOFFSET_0(float val) -{ - return ((fui(val)) << A5XX_GRAS_CL_VPORT_ZOFFSET_0__SHIFT) & A5XX_GRAS_CL_VPORT_ZOFFSET_0__MASK; -} - -#define REG_A5XX_GRAS_CL_VPORT_ZSCALE_0 0x0000e015 -#define A5XX_GRAS_CL_VPORT_ZSCALE_0__MASK 0xffffffff -#define A5XX_GRAS_CL_VPORT_ZSCALE_0__SHIFT 0 -static inline uint32_t A5XX_GRAS_CL_VPORT_ZSCALE_0(float val) -{ - return ((fui(val)) << A5XX_GRAS_CL_VPORT_ZSCALE_0__SHIFT) & A5XX_GRAS_CL_VPORT_ZSCALE_0__MASK; -} - -#define REG_A5XX_GRAS_SU_CNTL 0x0000e090 -#define A5XX_GRAS_SU_CNTL_CULL_FRONT 0x00000001 -#define A5XX_GRAS_SU_CNTL_CULL_BACK 0x00000002 -#define A5XX_GRAS_SU_CNTL_FRONT_CW 0x00000004 -#define A5XX_GRAS_SU_CNTL_LINEHALFWIDTH__MASK 0x000007f8 -#define A5XX_GRAS_SU_CNTL_LINEHALFWIDTH__SHIFT 3 -static inline uint32_t A5XX_GRAS_SU_CNTL_LINEHALFWIDTH(float val) -{ - return ((((int32_t)(val * 4.0))) << A5XX_GRAS_SU_CNTL_LINEHALFWIDTH__SHIFT) & A5XX_GRAS_SU_CNTL_LINEHALFWIDTH__MASK; -} -#define A5XX_GRAS_SU_CNTL_POLY_OFFSET 0x00000800 -#define A5XX_GRAS_SU_CNTL_MSAA_ENABLE 0x00002000 - -#define REG_A5XX_GRAS_SU_POINT_MINMAX 0x0000e091 -#define A5XX_GRAS_SU_POINT_MINMAX_MIN__MASK 0x0000ffff -#define A5XX_GRAS_SU_POINT_MINMAX_MIN__SHIFT 0 -static inline uint32_t A5XX_GRAS_SU_POINT_MINMAX_MIN(float val) -{ - return ((((uint32_t)(val * 16.0))) << A5XX_GRAS_SU_POINT_MINMAX_MIN__SHIFT) & A5XX_GRAS_SU_POINT_MINMAX_MIN__MASK; -} -#define A5XX_GRAS_SU_POINT_MINMAX_MAX__MASK 0xffff0000 -#define A5XX_GRAS_SU_POINT_MINMAX_MAX__SHIFT 16 -static inline uint32_t A5XX_GRAS_SU_POINT_MINMAX_MAX(float val) -{ - return ((((uint32_t)(val * 16.0))) << A5XX_GRAS_SU_POINT_MINMAX_MAX__SHIFT) & A5XX_GRAS_SU_POINT_MINMAX_MAX__MASK; -} - -#define REG_A5XX_GRAS_SU_POINT_SIZE 0x0000e092 -#define A5XX_GRAS_SU_POINT_SIZE__MASK 0xffffffff -#define A5XX_GRAS_SU_POINT_SIZE__SHIFT 0 -static inline uint32_t A5XX_GRAS_SU_POINT_SIZE(float val) -{ - return ((((int32_t)(val * 16.0))) << A5XX_GRAS_SU_POINT_SIZE__SHIFT) & A5XX_GRAS_SU_POINT_SIZE__MASK; -} - -#define REG_A5XX_GRAS_SU_LAYERED 0x0000e093 - -#define REG_A5XX_GRAS_SU_DEPTH_PLANE_CNTL 0x0000e094 -#define A5XX_GRAS_SU_DEPTH_PLANE_CNTL_FRAG_WRITES_Z 0x00000001 -#define A5XX_GRAS_SU_DEPTH_PLANE_CNTL_UNK1 0x00000002 - -#define REG_A5XX_GRAS_SU_POLY_OFFSET_SCALE 0x0000e095 -#define A5XX_GRAS_SU_POLY_OFFSET_SCALE__MASK 0xffffffff -#define A5XX_GRAS_SU_POLY_OFFSET_SCALE__SHIFT 0 -static inline uint32_t A5XX_GRAS_SU_POLY_OFFSET_SCALE(float val) -{ - return ((fui(val)) << A5XX_GRAS_SU_POLY_OFFSET_SCALE__SHIFT) & A5XX_GRAS_SU_POLY_OFFSET_SCALE__MASK; -} - -#define REG_A5XX_GRAS_SU_POLY_OFFSET_OFFSET 0x0000e096 -#define A5XX_GRAS_SU_POLY_OFFSET_OFFSET__MASK 0xffffffff -#define A5XX_GRAS_SU_POLY_OFFSET_OFFSET__SHIFT 0 -static inline uint32_t A5XX_GRAS_SU_POLY_OFFSET_OFFSET(float val) -{ - return ((fui(val)) << A5XX_GRAS_SU_POLY_OFFSET_OFFSET__SHIFT) & A5XX_GRAS_SU_POLY_OFFSET_OFFSET__MASK; -} - -#define REG_A5XX_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP 0x0000e097 -#define A5XX_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP__MASK 0xffffffff -#define A5XX_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP__SHIFT 0 -static inline uint32_t A5XX_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP(float val) -{ - return ((fui(val)) << A5XX_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP__SHIFT) & A5XX_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP__MASK; -} - -#define REG_A5XX_GRAS_SU_DEPTH_BUFFER_INFO 0x0000e098 -#define A5XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT__MASK 0x00000007 -#define A5XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT__SHIFT 0 -static inline uint32_t A5XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT(enum a5xx_depth_format val) -{ - return ((val) << A5XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT__SHIFT) & A5XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT__MASK; -} - -#define REG_A5XX_GRAS_SU_CONSERVATIVE_RAS_CNTL 0x0000e099 - -#define REG_A5XX_GRAS_SC_CNTL 0x0000e0a0 -#define A5XX_GRAS_SC_CNTL_BINNING_PASS 0x00000001 -#define A5XX_GRAS_SC_CNTL_SAMPLES_PASSED 0x00008000 - -#define REG_A5XX_GRAS_SC_BIN_CNTL 0x0000e0a1 - -#define REG_A5XX_GRAS_SC_RAS_MSAA_CNTL 0x0000e0a2 -#define A5XX_GRAS_SC_RAS_MSAA_CNTL_SAMPLES__MASK 0x00000003 -#define A5XX_GRAS_SC_RAS_MSAA_CNTL_SAMPLES__SHIFT 0 -static inline uint32_t A5XX_GRAS_SC_RAS_MSAA_CNTL_SAMPLES(enum a3xx_msaa_samples val) -{ - return ((val) << A5XX_GRAS_SC_RAS_MSAA_CNTL_SAMPLES__SHIFT) & A5XX_GRAS_SC_RAS_MSAA_CNTL_SAMPLES__MASK; -} - -#define REG_A5XX_GRAS_SC_DEST_MSAA_CNTL 0x0000e0a3 -#define A5XX_GRAS_SC_DEST_MSAA_CNTL_SAMPLES__MASK 0x00000003 -#define A5XX_GRAS_SC_DEST_MSAA_CNTL_SAMPLES__SHIFT 0 -static inline uint32_t A5XX_GRAS_SC_DEST_MSAA_CNTL_SAMPLES(enum a3xx_msaa_samples val) -{ - return ((val) << A5XX_GRAS_SC_DEST_MSAA_CNTL_SAMPLES__SHIFT) & A5XX_GRAS_SC_DEST_MSAA_CNTL_SAMPLES__MASK; -} -#define A5XX_GRAS_SC_DEST_MSAA_CNTL_MSAA_DISABLE 0x00000004 - -#define REG_A5XX_GRAS_SC_SCREEN_SCISSOR_CNTL 0x0000e0a4 - -#define REG_A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0 0x0000e0aa -#define A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0_WINDOW_OFFSET_DISABLE 0x80000000 -#define A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0_X__MASK 0x00007fff -#define A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0_X__SHIFT 0 -static inline uint32_t A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0_X(uint32_t val) -{ - return ((val) << A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0_X__SHIFT) & A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0_X__MASK; -} -#define A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0_Y__MASK 0x7fff0000 -#define A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0_Y__SHIFT 16 -static inline uint32_t A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0_Y(uint32_t val) -{ - return ((val) << A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0_Y__SHIFT) & A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0_Y__MASK; -} - -#define REG_A5XX_GRAS_SC_SCREEN_SCISSOR_BR_0 0x0000e0ab -#define A5XX_GRAS_SC_SCREEN_SCISSOR_BR_0_WINDOW_OFFSET_DISABLE 0x80000000 -#define A5XX_GRAS_SC_SCREEN_SCISSOR_BR_0_X__MASK 0x00007fff -#define A5XX_GRAS_SC_SCREEN_SCISSOR_BR_0_X__SHIFT 0 -static inline uint32_t A5XX_GRAS_SC_SCREEN_SCISSOR_BR_0_X(uint32_t val) -{ - return ((val) << A5XX_GRAS_SC_SCREEN_SCISSOR_BR_0_X__SHIFT) & A5XX_GRAS_SC_SCREEN_SCISSOR_BR_0_X__MASK; -} -#define A5XX_GRAS_SC_SCREEN_SCISSOR_BR_0_Y__MASK 0x7fff0000 -#define A5XX_GRAS_SC_SCREEN_SCISSOR_BR_0_Y__SHIFT 16 -static inline uint32_t A5XX_GRAS_SC_SCREEN_SCISSOR_BR_0_Y(uint32_t val) -{ - return ((val) << A5XX_GRAS_SC_SCREEN_SCISSOR_BR_0_Y__SHIFT) & A5XX_GRAS_SC_SCREEN_SCISSOR_BR_0_Y__MASK; -} - -#define REG_A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0 0x0000e0ca -#define A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_WINDOW_OFFSET_DISABLE 0x80000000 -#define A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_X__MASK 0x00007fff -#define A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_X__SHIFT 0 -static inline uint32_t A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_X(uint32_t val) -{ - return ((val) << A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_X__SHIFT) & A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_X__MASK; -} -#define A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_Y__MASK 0x7fff0000 -#define A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_Y__SHIFT 16 -static inline uint32_t A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_Y(uint32_t val) -{ - return ((val) << A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_Y__SHIFT) & A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_Y__MASK; -} - -#define REG_A5XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0 0x0000e0cb -#define A5XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_WINDOW_OFFSET_DISABLE 0x80000000 -#define A5XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_X__MASK 0x00007fff -#define A5XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_X__SHIFT 0 -static inline uint32_t A5XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_X(uint32_t val) -{ - return ((val) << A5XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_X__SHIFT) & A5XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_X__MASK; -} -#define A5XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_Y__MASK 0x7fff0000 -#define A5XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_Y__SHIFT 16 -static inline uint32_t A5XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_Y(uint32_t val) -{ - return ((val) << A5XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_Y__SHIFT) & A5XX_GRAS_SC_VIEWPORT_SCISSOR_BR_0_Y__MASK; -} - -#define REG_A5XX_GRAS_SC_WINDOW_SCISSOR_TL 0x0000e0ea -#define A5XX_GRAS_SC_WINDOW_SCISSOR_TL_WINDOW_OFFSET_DISABLE 0x80000000 -#define A5XX_GRAS_SC_WINDOW_SCISSOR_TL_X__MASK 0x00007fff -#define A5XX_GRAS_SC_WINDOW_SCISSOR_TL_X__SHIFT 0 -static inline uint32_t A5XX_GRAS_SC_WINDOW_SCISSOR_TL_X(uint32_t val) -{ - return ((val) << A5XX_GRAS_SC_WINDOW_SCISSOR_TL_X__SHIFT) & A5XX_GRAS_SC_WINDOW_SCISSOR_TL_X__MASK; -} -#define A5XX_GRAS_SC_WINDOW_SCISSOR_TL_Y__MASK 0x7fff0000 -#define A5XX_GRAS_SC_WINDOW_SCISSOR_TL_Y__SHIFT 16 -static inline uint32_t A5XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(uint32_t val) -{ - return ((val) << A5XX_GRAS_SC_WINDOW_SCISSOR_TL_Y__SHIFT) & A5XX_GRAS_SC_WINDOW_SCISSOR_TL_Y__MASK; -} - -#define REG_A5XX_GRAS_SC_WINDOW_SCISSOR_BR 0x0000e0eb -#define A5XX_GRAS_SC_WINDOW_SCISSOR_BR_WINDOW_OFFSET_DISABLE 0x80000000 -#define A5XX_GRAS_SC_WINDOW_SCISSOR_BR_X__MASK 0x00007fff -#define A5XX_GRAS_SC_WINDOW_SCISSOR_BR_X__SHIFT 0 -static inline uint32_t A5XX_GRAS_SC_WINDOW_SCISSOR_BR_X(uint32_t val) -{ - return ((val) << A5XX_GRAS_SC_WINDOW_SCISSOR_BR_X__SHIFT) & A5XX_GRAS_SC_WINDOW_SCISSOR_BR_X__MASK; -} -#define A5XX_GRAS_SC_WINDOW_SCISSOR_BR_Y__MASK 0x7fff0000 -#define A5XX_GRAS_SC_WINDOW_SCISSOR_BR_Y__SHIFT 16 -static inline uint32_t A5XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(uint32_t val) -{ - return ((val) << A5XX_GRAS_SC_WINDOW_SCISSOR_BR_Y__SHIFT) & A5XX_GRAS_SC_WINDOW_SCISSOR_BR_Y__MASK; -} - -#define REG_A5XX_GRAS_LRZ_CNTL 0x0000e100 -#define A5XX_GRAS_LRZ_CNTL_ENABLE 0x00000001 -#define A5XX_GRAS_LRZ_CNTL_LRZ_WRITE 0x00000002 -#define A5XX_GRAS_LRZ_CNTL_GREATER 0x00000004 - -#define REG_A5XX_GRAS_LRZ_BUFFER_BASE_LO 0x0000e101 - -#define REG_A5XX_GRAS_LRZ_BUFFER_BASE_HI 0x0000e102 - -#define REG_A5XX_GRAS_LRZ_BUFFER_PITCH 0x0000e103 -#define A5XX_GRAS_LRZ_BUFFER_PITCH__MASK 0xffffffff -#define A5XX_GRAS_LRZ_BUFFER_PITCH__SHIFT 0 -static inline uint32_t A5XX_GRAS_LRZ_BUFFER_PITCH(uint32_t val) -{ - return ((val >> 5) << A5XX_GRAS_LRZ_BUFFER_PITCH__SHIFT) & A5XX_GRAS_LRZ_BUFFER_PITCH__MASK; -} - -#define REG_A5XX_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_LO 0x0000e104 - -#define REG_A5XX_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_HI 0x0000e105 - -#define REG_A5XX_RB_CNTL 0x0000e140 -#define A5XX_RB_CNTL_WIDTH__MASK 0x000000ff -#define A5XX_RB_CNTL_WIDTH__SHIFT 0 -static inline uint32_t A5XX_RB_CNTL_WIDTH(uint32_t val) -{ - return ((val >> 5) << A5XX_RB_CNTL_WIDTH__SHIFT) & A5XX_RB_CNTL_WIDTH__MASK; -} -#define A5XX_RB_CNTL_HEIGHT__MASK 0x0001fe00 -#define A5XX_RB_CNTL_HEIGHT__SHIFT 9 -static inline uint32_t A5XX_RB_CNTL_HEIGHT(uint32_t val) -{ - return ((val >> 5) << A5XX_RB_CNTL_HEIGHT__SHIFT) & A5XX_RB_CNTL_HEIGHT__MASK; -} -#define A5XX_RB_CNTL_BYPASS 0x00020000 - -#define REG_A5XX_RB_RENDER_CNTL 0x0000e141 -#define A5XX_RB_RENDER_CNTL_BINNING_PASS 0x00000001 -#define A5XX_RB_RENDER_CNTL_SAMPLES_PASSED 0x00000040 -#define A5XX_RB_RENDER_CNTL_DISABLE_COLOR_PIPE 0x00000080 -#define A5XX_RB_RENDER_CNTL_FLAG_DEPTH 0x00004000 -#define A5XX_RB_RENDER_CNTL_FLAG_DEPTH2 0x00008000 -#define A5XX_RB_RENDER_CNTL_FLAG_MRTS__MASK 0x00ff0000 -#define A5XX_RB_RENDER_CNTL_FLAG_MRTS__SHIFT 16 -static inline uint32_t A5XX_RB_RENDER_CNTL_FLAG_MRTS(uint32_t val) -{ - return ((val) << A5XX_RB_RENDER_CNTL_FLAG_MRTS__SHIFT) & A5XX_RB_RENDER_CNTL_FLAG_MRTS__MASK; -} -#define A5XX_RB_RENDER_CNTL_FLAG_MRTS2__MASK 0xff000000 -#define A5XX_RB_RENDER_CNTL_FLAG_MRTS2__SHIFT 24 -static inline uint32_t A5XX_RB_RENDER_CNTL_FLAG_MRTS2(uint32_t val) -{ - return ((val) << A5XX_RB_RENDER_CNTL_FLAG_MRTS2__SHIFT) & A5XX_RB_RENDER_CNTL_FLAG_MRTS2__MASK; -} - -#define REG_A5XX_RB_RAS_MSAA_CNTL 0x0000e142 -#define A5XX_RB_RAS_MSAA_CNTL_SAMPLES__MASK 0x00000003 -#define A5XX_RB_RAS_MSAA_CNTL_SAMPLES__SHIFT 0 -static inline uint32_t A5XX_RB_RAS_MSAA_CNTL_SAMPLES(enum a3xx_msaa_samples val) -{ - return ((val) << A5XX_RB_RAS_MSAA_CNTL_SAMPLES__SHIFT) & A5XX_RB_RAS_MSAA_CNTL_SAMPLES__MASK; -} - -#define REG_A5XX_RB_DEST_MSAA_CNTL 0x0000e143 -#define A5XX_RB_DEST_MSAA_CNTL_SAMPLES__MASK 0x00000003 -#define A5XX_RB_DEST_MSAA_CNTL_SAMPLES__SHIFT 0 -static inline uint32_t A5XX_RB_DEST_MSAA_CNTL_SAMPLES(enum a3xx_msaa_samples val) -{ - return ((val) << A5XX_RB_DEST_MSAA_CNTL_SAMPLES__SHIFT) & A5XX_RB_DEST_MSAA_CNTL_SAMPLES__MASK; -} -#define A5XX_RB_DEST_MSAA_CNTL_MSAA_DISABLE 0x00000004 - -#define REG_A5XX_RB_RENDER_CONTROL0 0x0000e144 -#define A5XX_RB_RENDER_CONTROL0_VARYING 0x00000001 -#define A5XX_RB_RENDER_CONTROL0_UNK3 0x00000008 -#define A5XX_RB_RENDER_CONTROL0_XCOORD 0x00000040 -#define A5XX_RB_RENDER_CONTROL0_YCOORD 0x00000080 -#define A5XX_RB_RENDER_CONTROL0_ZCOORD 0x00000100 -#define A5XX_RB_RENDER_CONTROL0_WCOORD 0x00000200 - -#define REG_A5XX_RB_RENDER_CONTROL1 0x0000e145 -#define A5XX_RB_RENDER_CONTROL1_SAMPLEMASK 0x00000001 -#define A5XX_RB_RENDER_CONTROL1_FACENESS 0x00000002 -#define A5XX_RB_RENDER_CONTROL1_SAMPLEID 0x00000004 - -#define REG_A5XX_RB_FS_OUTPUT_CNTL 0x0000e146 -#define A5XX_RB_FS_OUTPUT_CNTL_MRT__MASK 0x0000000f -#define A5XX_RB_FS_OUTPUT_CNTL_MRT__SHIFT 0 -static inline uint32_t A5XX_RB_FS_OUTPUT_CNTL_MRT(uint32_t val) -{ - return ((val) << A5XX_RB_FS_OUTPUT_CNTL_MRT__SHIFT) & A5XX_RB_FS_OUTPUT_CNTL_MRT__MASK; -} -#define A5XX_RB_FS_OUTPUT_CNTL_FRAG_WRITES_Z 0x00000020 - -#define REG_A5XX_RB_RENDER_COMPONENTS 0x0000e147 -#define A5XX_RB_RENDER_COMPONENTS_RT0__MASK 0x0000000f -#define A5XX_RB_RENDER_COMPONENTS_RT0__SHIFT 0 -static inline uint32_t A5XX_RB_RENDER_COMPONENTS_RT0(uint32_t val) -{ - return ((val) << A5XX_RB_RENDER_COMPONENTS_RT0__SHIFT) & A5XX_RB_RENDER_COMPONENTS_RT0__MASK; -} -#define A5XX_RB_RENDER_COMPONENTS_RT1__MASK 0x000000f0 -#define A5XX_RB_RENDER_COMPONENTS_RT1__SHIFT 4 -static inline uint32_t A5XX_RB_RENDER_COMPONENTS_RT1(uint32_t val) -{ - return ((val) << A5XX_RB_RENDER_COMPONENTS_RT1__SHIFT) & A5XX_RB_RENDER_COMPONENTS_RT1__MASK; -} -#define A5XX_RB_RENDER_COMPONENTS_RT2__MASK 0x00000f00 -#define A5XX_RB_RENDER_COMPONENTS_RT2__SHIFT 8 -static inline uint32_t A5XX_RB_RENDER_COMPONENTS_RT2(uint32_t val) -{ - return ((val) << A5XX_RB_RENDER_COMPONENTS_RT2__SHIFT) & A5XX_RB_RENDER_COMPONENTS_RT2__MASK; -} -#define A5XX_RB_RENDER_COMPONENTS_RT3__MASK 0x0000f000 -#define A5XX_RB_RENDER_COMPONENTS_RT3__SHIFT 12 -static inline uint32_t A5XX_RB_RENDER_COMPONENTS_RT3(uint32_t val) -{ - return ((val) << A5XX_RB_RENDER_COMPONENTS_RT3__SHIFT) & A5XX_RB_RENDER_COMPONENTS_RT3__MASK; -} -#define A5XX_RB_RENDER_COMPONENTS_RT4__MASK 0x000f0000 -#define A5XX_RB_RENDER_COMPONENTS_RT4__SHIFT 16 -static inline uint32_t A5XX_RB_RENDER_COMPONENTS_RT4(uint32_t val) -{ - return ((val) << A5XX_RB_RENDER_COMPONENTS_RT4__SHIFT) & A5XX_RB_RENDER_COMPONENTS_RT4__MASK; -} -#define A5XX_RB_RENDER_COMPONENTS_RT5__MASK 0x00f00000 -#define A5XX_RB_RENDER_COMPONENTS_RT5__SHIFT 20 -static inline uint32_t A5XX_RB_RENDER_COMPONENTS_RT5(uint32_t val) -{ - return ((val) << A5XX_RB_RENDER_COMPONENTS_RT5__SHIFT) & A5XX_RB_RENDER_COMPONENTS_RT5__MASK; -} -#define A5XX_RB_RENDER_COMPONENTS_RT6__MASK 0x0f000000 -#define A5XX_RB_RENDER_COMPONENTS_RT6__SHIFT 24 -static inline uint32_t A5XX_RB_RENDER_COMPONENTS_RT6(uint32_t val) -{ - return ((val) << A5XX_RB_RENDER_COMPONENTS_RT6__SHIFT) & A5XX_RB_RENDER_COMPONENTS_RT6__MASK; -} -#define A5XX_RB_RENDER_COMPONENTS_RT7__MASK 0xf0000000 -#define A5XX_RB_RENDER_COMPONENTS_RT7__SHIFT 28 -static inline uint32_t A5XX_RB_RENDER_COMPONENTS_RT7(uint32_t val) -{ - return ((val) << A5XX_RB_RENDER_COMPONENTS_RT7__SHIFT) & A5XX_RB_RENDER_COMPONENTS_RT7__MASK; -} - -static inline uint32_t REG_A5XX_RB_MRT(uint32_t i0) { return 0x0000e150 + 0x7*i0; } - -static inline uint32_t REG_A5XX_RB_MRT_CONTROL(uint32_t i0) { return 0x0000e150 + 0x7*i0; } -#define A5XX_RB_MRT_CONTROL_BLEND 0x00000001 -#define A5XX_RB_MRT_CONTROL_BLEND2 0x00000002 -#define A5XX_RB_MRT_CONTROL_ROP_ENABLE 0x00000004 -#define A5XX_RB_MRT_CONTROL_ROP_CODE__MASK 0x00000078 -#define A5XX_RB_MRT_CONTROL_ROP_CODE__SHIFT 3 -static inline uint32_t A5XX_RB_MRT_CONTROL_ROP_CODE(enum a3xx_rop_code val) -{ - return ((val) << A5XX_RB_MRT_CONTROL_ROP_CODE__SHIFT) & A5XX_RB_MRT_CONTROL_ROP_CODE__MASK; -} -#define A5XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK 0x00000780 -#define A5XX_RB_MRT_CONTROL_COMPONENT_ENABLE__SHIFT 7 -static inline uint32_t A5XX_RB_MRT_CONTROL_COMPONENT_ENABLE(uint32_t val) -{ - return ((val) << A5XX_RB_MRT_CONTROL_COMPONENT_ENABLE__SHIFT) & A5XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK; -} - -static inline uint32_t REG_A5XX_RB_MRT_BLEND_CONTROL(uint32_t i0) { return 0x0000e151 + 0x7*i0; } -#define A5XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__MASK 0x0000001f -#define A5XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__SHIFT 0 -static inline uint32_t A5XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(enum adreno_rb_blend_factor val) -{ - return ((val) << A5XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__SHIFT) & A5XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__MASK; -} -#define A5XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE__MASK 0x000000e0 -#define A5XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE__SHIFT 5 -static inline uint32_t A5XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(enum a3xx_rb_blend_opcode val) -{ - return ((val) << A5XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE__SHIFT) & A5XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE__MASK; -} -#define A5XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR__MASK 0x00001f00 -#define A5XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR__SHIFT 8 -static inline uint32_t A5XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(enum adreno_rb_blend_factor val) -{ - return ((val) << A5XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR__SHIFT) & A5XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR__MASK; -} -#define A5XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR__MASK 0x001f0000 -#define A5XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR__SHIFT 16 -static inline uint32_t A5XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(enum adreno_rb_blend_factor val) -{ - return ((val) << A5XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR__SHIFT) & A5XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR__MASK; -} -#define A5XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE__MASK 0x00e00000 -#define A5XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE__SHIFT 21 -static inline uint32_t A5XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(enum a3xx_rb_blend_opcode val) -{ - return ((val) << A5XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE__SHIFT) & A5XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE__MASK; -} -#define A5XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__MASK 0x1f000000 -#define A5XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__SHIFT 24 -static inline uint32_t A5XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(enum adreno_rb_blend_factor val) -{ - return ((val) << A5XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__SHIFT) & A5XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__MASK; -} - -static inline uint32_t REG_A5XX_RB_MRT_BUF_INFO(uint32_t i0) { return 0x0000e152 + 0x7*i0; } -#define A5XX_RB_MRT_BUF_INFO_COLOR_FORMAT__MASK 0x000000ff -#define A5XX_RB_MRT_BUF_INFO_COLOR_FORMAT__SHIFT 0 -static inline uint32_t A5XX_RB_MRT_BUF_INFO_COLOR_FORMAT(enum a5xx_color_fmt val) -{ - return ((val) << A5XX_RB_MRT_BUF_INFO_COLOR_FORMAT__SHIFT) & A5XX_RB_MRT_BUF_INFO_COLOR_FORMAT__MASK; -} -#define A5XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE__MASK 0x00000300 -#define A5XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE__SHIFT 8 -static inline uint32_t A5XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(enum a5xx_tile_mode val) -{ - return ((val) << A5XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE__SHIFT) & A5XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE__MASK; -} -#define A5XX_RB_MRT_BUF_INFO_DITHER_MODE__MASK 0x00001800 -#define A5XX_RB_MRT_BUF_INFO_DITHER_MODE__SHIFT 11 -static inline uint32_t A5XX_RB_MRT_BUF_INFO_DITHER_MODE(enum adreno_rb_dither_mode val) -{ - return ((val) << A5XX_RB_MRT_BUF_INFO_DITHER_MODE__SHIFT) & A5XX_RB_MRT_BUF_INFO_DITHER_MODE__MASK; -} -#define A5XX_RB_MRT_BUF_INFO_COLOR_SWAP__MASK 0x00006000 -#define A5XX_RB_MRT_BUF_INFO_COLOR_SWAP__SHIFT 13 -static inline uint32_t A5XX_RB_MRT_BUF_INFO_COLOR_SWAP(enum a3xx_color_swap val) -{ - return ((val) << A5XX_RB_MRT_BUF_INFO_COLOR_SWAP__SHIFT) & A5XX_RB_MRT_BUF_INFO_COLOR_SWAP__MASK; -} -#define A5XX_RB_MRT_BUF_INFO_COLOR_SRGB 0x00008000 - -static inline uint32_t REG_A5XX_RB_MRT_PITCH(uint32_t i0) { return 0x0000e153 + 0x7*i0; } -#define A5XX_RB_MRT_PITCH__MASK 0xffffffff -#define A5XX_RB_MRT_PITCH__SHIFT 0 -static inline uint32_t A5XX_RB_MRT_PITCH(uint32_t val) -{ - return ((val >> 6) << A5XX_RB_MRT_PITCH__SHIFT) & A5XX_RB_MRT_PITCH__MASK; -} - -static inline uint32_t REG_A5XX_RB_MRT_ARRAY_PITCH(uint32_t i0) { return 0x0000e154 + 0x7*i0; } -#define A5XX_RB_MRT_ARRAY_PITCH__MASK 0xffffffff -#define A5XX_RB_MRT_ARRAY_PITCH__SHIFT 0 -static inline uint32_t A5XX_RB_MRT_ARRAY_PITCH(uint32_t val) -{ - return ((val >> 6) << A5XX_RB_MRT_ARRAY_PITCH__SHIFT) & A5XX_RB_MRT_ARRAY_PITCH__MASK; -} - -static inline uint32_t REG_A5XX_RB_MRT_BASE_LO(uint32_t i0) { return 0x0000e155 + 0x7*i0; } - -static inline uint32_t REG_A5XX_RB_MRT_BASE_HI(uint32_t i0) { return 0x0000e156 + 0x7*i0; } - -#define REG_A5XX_RB_BLEND_RED 0x0000e1a0 -#define A5XX_RB_BLEND_RED_UINT__MASK 0x000000ff -#define A5XX_RB_BLEND_RED_UINT__SHIFT 0 -static inline uint32_t A5XX_RB_BLEND_RED_UINT(uint32_t val) -{ - return ((val) << A5XX_RB_BLEND_RED_UINT__SHIFT) & A5XX_RB_BLEND_RED_UINT__MASK; -} -#define A5XX_RB_BLEND_RED_SINT__MASK 0x0000ff00 -#define A5XX_RB_BLEND_RED_SINT__SHIFT 8 -static inline uint32_t A5XX_RB_BLEND_RED_SINT(uint32_t val) -{ - return ((val) << A5XX_RB_BLEND_RED_SINT__SHIFT) & A5XX_RB_BLEND_RED_SINT__MASK; -} -#define A5XX_RB_BLEND_RED_FLOAT__MASK 0xffff0000 -#define A5XX_RB_BLEND_RED_FLOAT__SHIFT 16 -static inline uint32_t A5XX_RB_BLEND_RED_FLOAT(float val) -{ - return ((util_float_to_half(val)) << A5XX_RB_BLEND_RED_FLOAT__SHIFT) & A5XX_RB_BLEND_RED_FLOAT__MASK; -} - -#define REG_A5XX_RB_BLEND_RED_F32 0x0000e1a1 -#define A5XX_RB_BLEND_RED_F32__MASK 0xffffffff -#define A5XX_RB_BLEND_RED_F32__SHIFT 0 -static inline uint32_t A5XX_RB_BLEND_RED_F32(float val) -{ - return ((fui(val)) << A5XX_RB_BLEND_RED_F32__SHIFT) & A5XX_RB_BLEND_RED_F32__MASK; -} - -#define REG_A5XX_RB_BLEND_GREEN 0x0000e1a2 -#define A5XX_RB_BLEND_GREEN_UINT__MASK 0x000000ff -#define A5XX_RB_BLEND_GREEN_UINT__SHIFT 0 -static inline uint32_t A5XX_RB_BLEND_GREEN_UINT(uint32_t val) -{ - return ((val) << A5XX_RB_BLEND_GREEN_UINT__SHIFT) & A5XX_RB_BLEND_GREEN_UINT__MASK; -} -#define A5XX_RB_BLEND_GREEN_SINT__MASK 0x0000ff00 -#define A5XX_RB_BLEND_GREEN_SINT__SHIFT 8 -static inline uint32_t A5XX_RB_BLEND_GREEN_SINT(uint32_t val) -{ - return ((val) << A5XX_RB_BLEND_GREEN_SINT__SHIFT) & A5XX_RB_BLEND_GREEN_SINT__MASK; -} -#define A5XX_RB_BLEND_GREEN_FLOAT__MASK 0xffff0000 -#define A5XX_RB_BLEND_GREEN_FLOAT__SHIFT 16 -static inline uint32_t A5XX_RB_BLEND_GREEN_FLOAT(float val) -{ - return ((util_float_to_half(val)) << A5XX_RB_BLEND_GREEN_FLOAT__SHIFT) & A5XX_RB_BLEND_GREEN_FLOAT__MASK; -} - -#define REG_A5XX_RB_BLEND_GREEN_F32 0x0000e1a3 -#define A5XX_RB_BLEND_GREEN_F32__MASK 0xffffffff -#define A5XX_RB_BLEND_GREEN_F32__SHIFT 0 -static inline uint32_t A5XX_RB_BLEND_GREEN_F32(float val) -{ - return ((fui(val)) << A5XX_RB_BLEND_GREEN_F32__SHIFT) & A5XX_RB_BLEND_GREEN_F32__MASK; -} - -#define REG_A5XX_RB_BLEND_BLUE 0x0000e1a4 -#define A5XX_RB_BLEND_BLUE_UINT__MASK 0x000000ff -#define A5XX_RB_BLEND_BLUE_UINT__SHIFT 0 -static inline uint32_t A5XX_RB_BLEND_BLUE_UINT(uint32_t val) -{ - return ((val) << A5XX_RB_BLEND_BLUE_UINT__SHIFT) & A5XX_RB_BLEND_BLUE_UINT__MASK; -} -#define A5XX_RB_BLEND_BLUE_SINT__MASK 0x0000ff00 -#define A5XX_RB_BLEND_BLUE_SINT__SHIFT 8 -static inline uint32_t A5XX_RB_BLEND_BLUE_SINT(uint32_t val) -{ - return ((val) << A5XX_RB_BLEND_BLUE_SINT__SHIFT) & A5XX_RB_BLEND_BLUE_SINT__MASK; -} -#define A5XX_RB_BLEND_BLUE_FLOAT__MASK 0xffff0000 -#define A5XX_RB_BLEND_BLUE_FLOAT__SHIFT 16 -static inline uint32_t A5XX_RB_BLEND_BLUE_FLOAT(float val) -{ - return ((util_float_to_half(val)) << A5XX_RB_BLEND_BLUE_FLOAT__SHIFT) & A5XX_RB_BLEND_BLUE_FLOAT__MASK; -} - -#define REG_A5XX_RB_BLEND_BLUE_F32 0x0000e1a5 -#define A5XX_RB_BLEND_BLUE_F32__MASK 0xffffffff -#define A5XX_RB_BLEND_BLUE_F32__SHIFT 0 -static inline uint32_t A5XX_RB_BLEND_BLUE_F32(float val) -{ - return ((fui(val)) << A5XX_RB_BLEND_BLUE_F32__SHIFT) & A5XX_RB_BLEND_BLUE_F32__MASK; -} - -#define REG_A5XX_RB_BLEND_ALPHA 0x0000e1a6 -#define A5XX_RB_BLEND_ALPHA_UINT__MASK 0x000000ff -#define A5XX_RB_BLEND_ALPHA_UINT__SHIFT 0 -static inline uint32_t A5XX_RB_BLEND_ALPHA_UINT(uint32_t val) -{ - return ((val) << A5XX_RB_BLEND_ALPHA_UINT__SHIFT) & A5XX_RB_BLEND_ALPHA_UINT__MASK; -} -#define A5XX_RB_BLEND_ALPHA_SINT__MASK 0x0000ff00 -#define A5XX_RB_BLEND_ALPHA_SINT__SHIFT 8 -static inline uint32_t A5XX_RB_BLEND_ALPHA_SINT(uint32_t val) -{ - return ((val) << A5XX_RB_BLEND_ALPHA_SINT__SHIFT) & A5XX_RB_BLEND_ALPHA_SINT__MASK; -} -#define A5XX_RB_BLEND_ALPHA_FLOAT__MASK 0xffff0000 -#define A5XX_RB_BLEND_ALPHA_FLOAT__SHIFT 16 -static inline uint32_t A5XX_RB_BLEND_ALPHA_FLOAT(float val) -{ - return ((util_float_to_half(val)) << A5XX_RB_BLEND_ALPHA_FLOAT__SHIFT) & A5XX_RB_BLEND_ALPHA_FLOAT__MASK; -} - -#define REG_A5XX_RB_BLEND_ALPHA_F32 0x0000e1a7 -#define A5XX_RB_BLEND_ALPHA_F32__MASK 0xffffffff -#define A5XX_RB_BLEND_ALPHA_F32__SHIFT 0 -static inline uint32_t A5XX_RB_BLEND_ALPHA_F32(float val) -{ - return ((fui(val)) << A5XX_RB_BLEND_ALPHA_F32__SHIFT) & A5XX_RB_BLEND_ALPHA_F32__MASK; -} - -#define REG_A5XX_RB_ALPHA_CONTROL 0x0000e1a8 -#define A5XX_RB_ALPHA_CONTROL_ALPHA_REF__MASK 0x000000ff -#define A5XX_RB_ALPHA_CONTROL_ALPHA_REF__SHIFT 0 -static inline uint32_t A5XX_RB_ALPHA_CONTROL_ALPHA_REF(uint32_t val) -{ - return ((val) << A5XX_RB_ALPHA_CONTROL_ALPHA_REF__SHIFT) & A5XX_RB_ALPHA_CONTROL_ALPHA_REF__MASK; -} -#define A5XX_RB_ALPHA_CONTROL_ALPHA_TEST 0x00000100 -#define A5XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC__MASK 0x00000e00 -#define A5XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC__SHIFT 9 -static inline uint32_t A5XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC(enum adreno_compare_func val) -{ - return ((val) << A5XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC__SHIFT) & A5XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC__MASK; -} - -#define REG_A5XX_RB_BLEND_CNTL 0x0000e1a9 -#define A5XX_RB_BLEND_CNTL_ENABLE_BLEND__MASK 0x000000ff -#define A5XX_RB_BLEND_CNTL_ENABLE_BLEND__SHIFT 0 -static inline uint32_t A5XX_RB_BLEND_CNTL_ENABLE_BLEND(uint32_t val) -{ - return ((val) << A5XX_RB_BLEND_CNTL_ENABLE_BLEND__SHIFT) & A5XX_RB_BLEND_CNTL_ENABLE_BLEND__MASK; -} -#define A5XX_RB_BLEND_CNTL_INDEPENDENT_BLEND 0x00000100 -#define A5XX_RB_BLEND_CNTL_ALPHA_TO_COVERAGE 0x00000400 -#define A5XX_RB_BLEND_CNTL_SAMPLE_MASK__MASK 0xffff0000 -#define A5XX_RB_BLEND_CNTL_SAMPLE_MASK__SHIFT 16 -static inline uint32_t A5XX_RB_BLEND_CNTL_SAMPLE_MASK(uint32_t val) -{ - return ((val) << A5XX_RB_BLEND_CNTL_SAMPLE_MASK__SHIFT) & A5XX_RB_BLEND_CNTL_SAMPLE_MASK__MASK; -} - -#define REG_A5XX_RB_DEPTH_PLANE_CNTL 0x0000e1b0 -#define A5XX_RB_DEPTH_PLANE_CNTL_FRAG_WRITES_Z 0x00000001 -#define A5XX_RB_DEPTH_PLANE_CNTL_UNK1 0x00000002 - -#define REG_A5XX_RB_DEPTH_CNTL 0x0000e1b1 -#define A5XX_RB_DEPTH_CNTL_Z_ENABLE 0x00000001 -#define A5XX_RB_DEPTH_CNTL_Z_WRITE_ENABLE 0x00000002 -#define A5XX_RB_DEPTH_CNTL_ZFUNC__MASK 0x0000001c -#define A5XX_RB_DEPTH_CNTL_ZFUNC__SHIFT 2 -static inline uint32_t A5XX_RB_DEPTH_CNTL_ZFUNC(enum adreno_compare_func val) -{ - return ((val) << A5XX_RB_DEPTH_CNTL_ZFUNC__SHIFT) & A5XX_RB_DEPTH_CNTL_ZFUNC__MASK; -} -#define A5XX_RB_DEPTH_CNTL_Z_TEST_ENABLE 0x00000040 - -#define REG_A5XX_RB_DEPTH_BUFFER_INFO 0x0000e1b2 -#define A5XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT__MASK 0x00000007 -#define A5XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT__SHIFT 0 -static inline uint32_t A5XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT(enum a5xx_depth_format val) -{ - return ((val) << A5XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT__SHIFT) & A5XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT__MASK; -} - -#define REG_A5XX_RB_DEPTH_BUFFER_BASE_LO 0x0000e1b3 - -#define REG_A5XX_RB_DEPTH_BUFFER_BASE_HI 0x0000e1b4 - -#define REG_A5XX_RB_DEPTH_BUFFER_PITCH 0x0000e1b5 -#define A5XX_RB_DEPTH_BUFFER_PITCH__MASK 0xffffffff -#define A5XX_RB_DEPTH_BUFFER_PITCH__SHIFT 0 -static inline uint32_t A5XX_RB_DEPTH_BUFFER_PITCH(uint32_t val) -{ - return ((val >> 6) << A5XX_RB_DEPTH_BUFFER_PITCH__SHIFT) & A5XX_RB_DEPTH_BUFFER_PITCH__MASK; -} - -#define REG_A5XX_RB_DEPTH_BUFFER_ARRAY_PITCH 0x0000e1b6 -#define A5XX_RB_DEPTH_BUFFER_ARRAY_PITCH__MASK 0xffffffff -#define A5XX_RB_DEPTH_BUFFER_ARRAY_PITCH__SHIFT 0 -static inline uint32_t A5XX_RB_DEPTH_BUFFER_ARRAY_PITCH(uint32_t val) -{ - return ((val >> 6) << A5XX_RB_DEPTH_BUFFER_ARRAY_PITCH__SHIFT) & A5XX_RB_DEPTH_BUFFER_ARRAY_PITCH__MASK; -} - -#define REG_A5XX_RB_STENCIL_CONTROL 0x0000e1c0 -#define A5XX_RB_STENCIL_CONTROL_STENCIL_ENABLE 0x00000001 -#define A5XX_RB_STENCIL_CONTROL_STENCIL_ENABLE_BF 0x00000002 -#define A5XX_RB_STENCIL_CONTROL_STENCIL_READ 0x00000004 -#define A5XX_RB_STENCIL_CONTROL_FUNC__MASK 0x00000700 -#define A5XX_RB_STENCIL_CONTROL_FUNC__SHIFT 8 -static inline uint32_t A5XX_RB_STENCIL_CONTROL_FUNC(enum adreno_compare_func val) -{ - return ((val) << A5XX_RB_STENCIL_CONTROL_FUNC__SHIFT) & A5XX_RB_STENCIL_CONTROL_FUNC__MASK; -} -#define A5XX_RB_STENCIL_CONTROL_FAIL__MASK 0x00003800 -#define A5XX_RB_STENCIL_CONTROL_FAIL__SHIFT 11 -static inline uint32_t A5XX_RB_STENCIL_CONTROL_FAIL(enum adreno_stencil_op val) -{ - return ((val) << A5XX_RB_STENCIL_CONTROL_FAIL__SHIFT) & A5XX_RB_STENCIL_CONTROL_FAIL__MASK; -} -#define A5XX_RB_STENCIL_CONTROL_ZPASS__MASK 0x0001c000 -#define A5XX_RB_STENCIL_CONTROL_ZPASS__SHIFT 14 -static inline uint32_t A5XX_RB_STENCIL_CONTROL_ZPASS(enum adreno_stencil_op val) -{ - return ((val) << A5XX_RB_STENCIL_CONTROL_ZPASS__SHIFT) & A5XX_RB_STENCIL_CONTROL_ZPASS__MASK; -} -#define A5XX_RB_STENCIL_CONTROL_ZFAIL__MASK 0x000e0000 -#define A5XX_RB_STENCIL_CONTROL_ZFAIL__SHIFT 17 -static inline uint32_t A5XX_RB_STENCIL_CONTROL_ZFAIL(enum adreno_stencil_op val) -{ - return ((val) << A5XX_RB_STENCIL_CONTROL_ZFAIL__SHIFT) & A5XX_RB_STENCIL_CONTROL_ZFAIL__MASK; -} -#define A5XX_RB_STENCIL_CONTROL_FUNC_BF__MASK 0x00700000 -#define A5XX_RB_STENCIL_CONTROL_FUNC_BF__SHIFT 20 -static inline uint32_t A5XX_RB_STENCIL_CONTROL_FUNC_BF(enum adreno_compare_func val) -{ - return ((val) << A5XX_RB_STENCIL_CONTROL_FUNC_BF__SHIFT) & A5XX_RB_STENCIL_CONTROL_FUNC_BF__MASK; -} -#define A5XX_RB_STENCIL_CONTROL_FAIL_BF__MASK 0x03800000 -#define A5XX_RB_STENCIL_CONTROL_FAIL_BF__SHIFT 23 -static inline uint32_t A5XX_RB_STENCIL_CONTROL_FAIL_BF(enum adreno_stencil_op val) -{ - return ((val) << A5XX_RB_STENCIL_CONTROL_FAIL_BF__SHIFT) & A5XX_RB_STENCIL_CONTROL_FAIL_BF__MASK; -} -#define A5XX_RB_STENCIL_CONTROL_ZPASS_BF__MASK 0x1c000000 -#define A5XX_RB_STENCIL_CONTROL_ZPASS_BF__SHIFT 26 -static inline uint32_t A5XX_RB_STENCIL_CONTROL_ZPASS_BF(enum adreno_stencil_op val) -{ - return ((val) << A5XX_RB_STENCIL_CONTROL_ZPASS_BF__SHIFT) & A5XX_RB_STENCIL_CONTROL_ZPASS_BF__MASK; -} -#define A5XX_RB_STENCIL_CONTROL_ZFAIL_BF__MASK 0xe0000000 -#define A5XX_RB_STENCIL_CONTROL_ZFAIL_BF__SHIFT 29 -static inline uint32_t A5XX_RB_STENCIL_CONTROL_ZFAIL_BF(enum adreno_stencil_op val) -{ - return ((val) << A5XX_RB_STENCIL_CONTROL_ZFAIL_BF__SHIFT) & A5XX_RB_STENCIL_CONTROL_ZFAIL_BF__MASK; -} - -#define REG_A5XX_RB_STENCIL_INFO 0x0000e1c1 -#define A5XX_RB_STENCIL_INFO_SEPARATE_STENCIL 0x00000001 - -#define REG_A5XX_RB_STENCIL_BASE_LO 0x0000e1c2 - -#define REG_A5XX_RB_STENCIL_BASE_HI 0x0000e1c3 - -#define REG_A5XX_RB_STENCIL_PITCH 0x0000e1c4 -#define A5XX_RB_STENCIL_PITCH__MASK 0xffffffff -#define A5XX_RB_STENCIL_PITCH__SHIFT 0 -static inline uint32_t A5XX_RB_STENCIL_PITCH(uint32_t val) -{ - return ((val >> 6) << A5XX_RB_STENCIL_PITCH__SHIFT) & A5XX_RB_STENCIL_PITCH__MASK; -} - -#define REG_A5XX_RB_STENCIL_ARRAY_PITCH 0x0000e1c5 -#define A5XX_RB_STENCIL_ARRAY_PITCH__MASK 0xffffffff -#define A5XX_RB_STENCIL_ARRAY_PITCH__SHIFT 0 -static inline uint32_t A5XX_RB_STENCIL_ARRAY_PITCH(uint32_t val) -{ - return ((val >> 6) << A5XX_RB_STENCIL_ARRAY_PITCH__SHIFT) & A5XX_RB_STENCIL_ARRAY_PITCH__MASK; -} - -#define REG_A5XX_RB_STENCILREFMASK 0x0000e1c6 -#define A5XX_RB_STENCILREFMASK_STENCILREF__MASK 0x000000ff -#define A5XX_RB_STENCILREFMASK_STENCILREF__SHIFT 0 -static inline uint32_t A5XX_RB_STENCILREFMASK_STENCILREF(uint32_t val) -{ - return ((val) << A5XX_RB_STENCILREFMASK_STENCILREF__SHIFT) & A5XX_RB_STENCILREFMASK_STENCILREF__MASK; -} -#define A5XX_RB_STENCILREFMASK_STENCILMASK__MASK 0x0000ff00 -#define A5XX_RB_STENCILREFMASK_STENCILMASK__SHIFT 8 -static inline uint32_t A5XX_RB_STENCILREFMASK_STENCILMASK(uint32_t val) -{ - return ((val) << A5XX_RB_STENCILREFMASK_STENCILMASK__SHIFT) & A5XX_RB_STENCILREFMASK_STENCILMASK__MASK; -} -#define A5XX_RB_STENCILREFMASK_STENCILWRITEMASK__MASK 0x00ff0000 -#define A5XX_RB_STENCILREFMASK_STENCILWRITEMASK__SHIFT 16 -static inline uint32_t A5XX_RB_STENCILREFMASK_STENCILWRITEMASK(uint32_t val) -{ - return ((val) << A5XX_RB_STENCILREFMASK_STENCILWRITEMASK__SHIFT) & A5XX_RB_STENCILREFMASK_STENCILWRITEMASK__MASK; -} - -#define REG_A5XX_RB_STENCILREFMASK_BF 0x0000e1c7 -#define A5XX_RB_STENCILREFMASK_BF_STENCILREF__MASK 0x000000ff -#define A5XX_RB_STENCILREFMASK_BF_STENCILREF__SHIFT 0 -static inline uint32_t A5XX_RB_STENCILREFMASK_BF_STENCILREF(uint32_t val) -{ - return ((val) << A5XX_RB_STENCILREFMASK_BF_STENCILREF__SHIFT) & A5XX_RB_STENCILREFMASK_BF_STENCILREF__MASK; -} -#define A5XX_RB_STENCILREFMASK_BF_STENCILMASK__MASK 0x0000ff00 -#define A5XX_RB_STENCILREFMASK_BF_STENCILMASK__SHIFT 8 -static inline uint32_t A5XX_RB_STENCILREFMASK_BF_STENCILMASK(uint32_t val) -{ - return ((val) << A5XX_RB_STENCILREFMASK_BF_STENCILMASK__SHIFT) & A5XX_RB_STENCILREFMASK_BF_STENCILMASK__MASK; -} -#define A5XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__MASK 0x00ff0000 -#define A5XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__SHIFT 16 -static inline uint32_t A5XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(uint32_t val) -{ - return ((val) << A5XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__SHIFT) & A5XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__MASK; -} - -#define REG_A5XX_RB_WINDOW_OFFSET 0x0000e1d0 -#define A5XX_RB_WINDOW_OFFSET_WINDOW_OFFSET_DISABLE 0x80000000 -#define A5XX_RB_WINDOW_OFFSET_X__MASK 0x00007fff -#define A5XX_RB_WINDOW_OFFSET_X__SHIFT 0 -static inline uint32_t A5XX_RB_WINDOW_OFFSET_X(uint32_t val) -{ - return ((val) << A5XX_RB_WINDOW_OFFSET_X__SHIFT) & A5XX_RB_WINDOW_OFFSET_X__MASK; -} -#define A5XX_RB_WINDOW_OFFSET_Y__MASK 0x7fff0000 -#define A5XX_RB_WINDOW_OFFSET_Y__SHIFT 16 -static inline uint32_t A5XX_RB_WINDOW_OFFSET_Y(uint32_t val) -{ - return ((val) << A5XX_RB_WINDOW_OFFSET_Y__SHIFT) & A5XX_RB_WINDOW_OFFSET_Y__MASK; -} - -#define REG_A5XX_RB_SAMPLE_COUNT_CONTROL 0x0000e1d1 -#define A5XX_RB_SAMPLE_COUNT_CONTROL_COPY 0x00000002 - -#define REG_A5XX_RB_BLIT_CNTL 0x0000e210 -#define A5XX_RB_BLIT_CNTL_BUF__MASK 0x0000000f -#define A5XX_RB_BLIT_CNTL_BUF__SHIFT 0 -static inline uint32_t A5XX_RB_BLIT_CNTL_BUF(enum a5xx_blit_buf val) -{ - return ((val) << A5XX_RB_BLIT_CNTL_BUF__SHIFT) & A5XX_RB_BLIT_CNTL_BUF__MASK; -} - -#define REG_A5XX_RB_RESOLVE_CNTL_1 0x0000e211 -#define A5XX_RB_RESOLVE_CNTL_1_WINDOW_OFFSET_DISABLE 0x80000000 -#define A5XX_RB_RESOLVE_CNTL_1_X__MASK 0x00007fff -#define A5XX_RB_RESOLVE_CNTL_1_X__SHIFT 0 -static inline uint32_t A5XX_RB_RESOLVE_CNTL_1_X(uint32_t val) -{ - return ((val) << A5XX_RB_RESOLVE_CNTL_1_X__SHIFT) & A5XX_RB_RESOLVE_CNTL_1_X__MASK; -} -#define A5XX_RB_RESOLVE_CNTL_1_Y__MASK 0x7fff0000 -#define A5XX_RB_RESOLVE_CNTL_1_Y__SHIFT 16 -static inline uint32_t A5XX_RB_RESOLVE_CNTL_1_Y(uint32_t val) -{ - return ((val) << A5XX_RB_RESOLVE_CNTL_1_Y__SHIFT) & A5XX_RB_RESOLVE_CNTL_1_Y__MASK; -} - -#define REG_A5XX_RB_RESOLVE_CNTL_2 0x0000e212 -#define A5XX_RB_RESOLVE_CNTL_2_WINDOW_OFFSET_DISABLE 0x80000000 -#define A5XX_RB_RESOLVE_CNTL_2_X__MASK 0x00007fff -#define A5XX_RB_RESOLVE_CNTL_2_X__SHIFT 0 -static inline uint32_t A5XX_RB_RESOLVE_CNTL_2_X(uint32_t val) -{ - return ((val) << A5XX_RB_RESOLVE_CNTL_2_X__SHIFT) & A5XX_RB_RESOLVE_CNTL_2_X__MASK; -} -#define A5XX_RB_RESOLVE_CNTL_2_Y__MASK 0x7fff0000 -#define A5XX_RB_RESOLVE_CNTL_2_Y__SHIFT 16 -static inline uint32_t A5XX_RB_RESOLVE_CNTL_2_Y(uint32_t val) -{ - return ((val) << A5XX_RB_RESOLVE_CNTL_2_Y__SHIFT) & A5XX_RB_RESOLVE_CNTL_2_Y__MASK; -} - -#define REG_A5XX_RB_RESOLVE_CNTL_3 0x0000e213 -#define A5XX_RB_RESOLVE_CNTL_3_TILED 0x00000001 - -#define REG_A5XX_RB_BLIT_DST_LO 0x0000e214 - -#define REG_A5XX_RB_BLIT_DST_HI 0x0000e215 - -#define REG_A5XX_RB_BLIT_DST_PITCH 0x0000e216 -#define A5XX_RB_BLIT_DST_PITCH__MASK 0xffffffff -#define A5XX_RB_BLIT_DST_PITCH__SHIFT 0 -static inline uint32_t A5XX_RB_BLIT_DST_PITCH(uint32_t val) -{ - return ((val >> 6) << A5XX_RB_BLIT_DST_PITCH__SHIFT) & A5XX_RB_BLIT_DST_PITCH__MASK; -} - -#define REG_A5XX_RB_BLIT_DST_ARRAY_PITCH 0x0000e217 -#define A5XX_RB_BLIT_DST_ARRAY_PITCH__MASK 0xffffffff -#define A5XX_RB_BLIT_DST_ARRAY_PITCH__SHIFT 0 -static inline uint32_t A5XX_RB_BLIT_DST_ARRAY_PITCH(uint32_t val) -{ - return ((val >> 6) << A5XX_RB_BLIT_DST_ARRAY_PITCH__SHIFT) & A5XX_RB_BLIT_DST_ARRAY_PITCH__MASK; -} - -#define REG_A5XX_RB_CLEAR_COLOR_DW0 0x0000e218 - -#define REG_A5XX_RB_CLEAR_COLOR_DW1 0x0000e219 - -#define REG_A5XX_RB_CLEAR_COLOR_DW2 0x0000e21a - -#define REG_A5XX_RB_CLEAR_COLOR_DW3 0x0000e21b - -#define REG_A5XX_RB_CLEAR_CNTL 0x0000e21c -#define A5XX_RB_CLEAR_CNTL_FAST_CLEAR 0x00000002 -#define A5XX_RB_CLEAR_CNTL_MSAA_RESOLVE 0x00000004 -#define A5XX_RB_CLEAR_CNTL_MASK__MASK 0x000000f0 -#define A5XX_RB_CLEAR_CNTL_MASK__SHIFT 4 -static inline uint32_t A5XX_RB_CLEAR_CNTL_MASK(uint32_t val) -{ - return ((val) << A5XX_RB_CLEAR_CNTL_MASK__SHIFT) & A5XX_RB_CLEAR_CNTL_MASK__MASK; -} - -#define REG_A5XX_RB_DEPTH_FLAG_BUFFER_BASE_LO 0x0000e240 - -#define REG_A5XX_RB_DEPTH_FLAG_BUFFER_BASE_HI 0x0000e241 - -#define REG_A5XX_RB_DEPTH_FLAG_BUFFER_PITCH 0x0000e242 - -static inline uint32_t REG_A5XX_RB_MRT_FLAG_BUFFER(uint32_t i0) { return 0x0000e243 + 0x4*i0; } - -static inline uint32_t REG_A5XX_RB_MRT_FLAG_BUFFER_ADDR_LO(uint32_t i0) { return 0x0000e243 + 0x4*i0; } - -static inline uint32_t REG_A5XX_RB_MRT_FLAG_BUFFER_ADDR_HI(uint32_t i0) { return 0x0000e244 + 0x4*i0; } - -static inline uint32_t REG_A5XX_RB_MRT_FLAG_BUFFER_PITCH(uint32_t i0) { return 0x0000e245 + 0x4*i0; } -#define A5XX_RB_MRT_FLAG_BUFFER_PITCH__MASK 0xffffffff -#define A5XX_RB_MRT_FLAG_BUFFER_PITCH__SHIFT 0 -static inline uint32_t A5XX_RB_MRT_FLAG_BUFFER_PITCH(uint32_t val) -{ - return ((val >> 6) << A5XX_RB_MRT_FLAG_BUFFER_PITCH__SHIFT) & A5XX_RB_MRT_FLAG_BUFFER_PITCH__MASK; -} - -static inline uint32_t REG_A5XX_RB_MRT_FLAG_BUFFER_ARRAY_PITCH(uint32_t i0) { return 0x0000e246 + 0x4*i0; } -#define A5XX_RB_MRT_FLAG_BUFFER_ARRAY_PITCH__MASK 0xffffffff -#define A5XX_RB_MRT_FLAG_BUFFER_ARRAY_PITCH__SHIFT 0 -static inline uint32_t A5XX_RB_MRT_FLAG_BUFFER_ARRAY_PITCH(uint32_t val) -{ - return ((val >> 6) << A5XX_RB_MRT_FLAG_BUFFER_ARRAY_PITCH__SHIFT) & A5XX_RB_MRT_FLAG_BUFFER_ARRAY_PITCH__MASK; -} - -#define REG_A5XX_RB_BLIT_FLAG_DST_LO 0x0000e263 - -#define REG_A5XX_RB_BLIT_FLAG_DST_HI 0x0000e264 - -#define REG_A5XX_RB_BLIT_FLAG_DST_PITCH 0x0000e265 -#define A5XX_RB_BLIT_FLAG_DST_PITCH__MASK 0xffffffff -#define A5XX_RB_BLIT_FLAG_DST_PITCH__SHIFT 0 -static inline uint32_t A5XX_RB_BLIT_FLAG_DST_PITCH(uint32_t val) -{ - return ((val >> 6) << A5XX_RB_BLIT_FLAG_DST_PITCH__SHIFT) & A5XX_RB_BLIT_FLAG_DST_PITCH__MASK; -} - -#define REG_A5XX_RB_BLIT_FLAG_DST_ARRAY_PITCH 0x0000e266 -#define A5XX_RB_BLIT_FLAG_DST_ARRAY_PITCH__MASK 0xffffffff -#define A5XX_RB_BLIT_FLAG_DST_ARRAY_PITCH__SHIFT 0 -static inline uint32_t A5XX_RB_BLIT_FLAG_DST_ARRAY_PITCH(uint32_t val) -{ - return ((val >> 6) << A5XX_RB_BLIT_FLAG_DST_ARRAY_PITCH__SHIFT) & A5XX_RB_BLIT_FLAG_DST_ARRAY_PITCH__MASK; -} - -#define REG_A5XX_RB_SAMPLE_COUNT_ADDR_LO 0x0000e267 - -#define REG_A5XX_RB_SAMPLE_COUNT_ADDR_HI 0x0000e268 - -#define REG_A5XX_VPC_CNTL_0 0x0000e280 -#define A5XX_VPC_CNTL_0_STRIDE_IN_VPC__MASK 0x0000007f -#define A5XX_VPC_CNTL_0_STRIDE_IN_VPC__SHIFT 0 -static inline uint32_t A5XX_VPC_CNTL_0_STRIDE_IN_VPC(uint32_t val) -{ - return ((val) << A5XX_VPC_CNTL_0_STRIDE_IN_VPC__SHIFT) & A5XX_VPC_CNTL_0_STRIDE_IN_VPC__MASK; -} -#define A5XX_VPC_CNTL_0_VARYING 0x00000800 - -static inline uint32_t REG_A5XX_VPC_VARYING_INTERP(uint32_t i0) { return 0x0000e282 + 0x1*i0; } - -static inline uint32_t REG_A5XX_VPC_VARYING_INTERP_MODE(uint32_t i0) { return 0x0000e282 + 0x1*i0; } - -static inline uint32_t REG_A5XX_VPC_VARYING_PS_REPL(uint32_t i0) { return 0x0000e28a + 0x1*i0; } - -static inline uint32_t REG_A5XX_VPC_VARYING_PS_REPL_MODE(uint32_t i0) { return 0x0000e28a + 0x1*i0; } - -#define REG_A5XX_UNKNOWN_E292 0x0000e292 - -#define REG_A5XX_UNKNOWN_E293 0x0000e293 - -static inline uint32_t REG_A5XX_VPC_VAR(uint32_t i0) { return 0x0000e294 + 0x1*i0; } - -static inline uint32_t REG_A5XX_VPC_VAR_DISABLE(uint32_t i0) { return 0x0000e294 + 0x1*i0; } - -#define REG_A5XX_VPC_GS_SIV_CNTL 0x0000e298 - -#define REG_A5XX_UNKNOWN_E29A 0x0000e29a - -#define REG_A5XX_VPC_PACK 0x0000e29d -#define A5XX_VPC_PACK_NUMNONPOSVAR__MASK 0x000000ff -#define A5XX_VPC_PACK_NUMNONPOSVAR__SHIFT 0 -static inline uint32_t A5XX_VPC_PACK_NUMNONPOSVAR(uint32_t val) -{ - return ((val) << A5XX_VPC_PACK_NUMNONPOSVAR__SHIFT) & A5XX_VPC_PACK_NUMNONPOSVAR__MASK; -} -#define A5XX_VPC_PACK_PSIZELOC__MASK 0x0000ff00 -#define A5XX_VPC_PACK_PSIZELOC__SHIFT 8 -static inline uint32_t A5XX_VPC_PACK_PSIZELOC(uint32_t val) -{ - return ((val) << A5XX_VPC_PACK_PSIZELOC__SHIFT) & A5XX_VPC_PACK_PSIZELOC__MASK; -} - -#define REG_A5XX_VPC_FS_PRIMITIVEID_CNTL 0x0000e2a0 - -#define REG_A5XX_VPC_SO_BUF_CNTL 0x0000e2a1 -#define A5XX_VPC_SO_BUF_CNTL_BUF0 0x00000001 -#define A5XX_VPC_SO_BUF_CNTL_BUF1 0x00000008 -#define A5XX_VPC_SO_BUF_CNTL_BUF2 0x00000040 -#define A5XX_VPC_SO_BUF_CNTL_BUF3 0x00000200 -#define A5XX_VPC_SO_BUF_CNTL_ENABLE 0x00008000 - -#define REG_A5XX_VPC_SO_OVERRIDE 0x0000e2a2 -#define A5XX_VPC_SO_OVERRIDE_SO_DISABLE 0x00000001 - -#define REG_A5XX_VPC_SO_CNTL 0x0000e2a3 -#define A5XX_VPC_SO_CNTL_ENABLE 0x00010000 - -#define REG_A5XX_VPC_SO_PROG 0x0000e2a4 -#define A5XX_VPC_SO_PROG_A_BUF__MASK 0x00000003 -#define A5XX_VPC_SO_PROG_A_BUF__SHIFT 0 -static inline uint32_t A5XX_VPC_SO_PROG_A_BUF(uint32_t val) -{ - return ((val) << A5XX_VPC_SO_PROG_A_BUF__SHIFT) & A5XX_VPC_SO_PROG_A_BUF__MASK; -} -#define A5XX_VPC_SO_PROG_A_OFF__MASK 0x000007fc -#define A5XX_VPC_SO_PROG_A_OFF__SHIFT 2 -static inline uint32_t A5XX_VPC_SO_PROG_A_OFF(uint32_t val) -{ - return ((val >> 2) << A5XX_VPC_SO_PROG_A_OFF__SHIFT) & A5XX_VPC_SO_PROG_A_OFF__MASK; -} -#define A5XX_VPC_SO_PROG_A_EN 0x00000800 -#define A5XX_VPC_SO_PROG_B_BUF__MASK 0x00003000 -#define A5XX_VPC_SO_PROG_B_BUF__SHIFT 12 -static inline uint32_t A5XX_VPC_SO_PROG_B_BUF(uint32_t val) -{ - return ((val) << A5XX_VPC_SO_PROG_B_BUF__SHIFT) & A5XX_VPC_SO_PROG_B_BUF__MASK; -} -#define A5XX_VPC_SO_PROG_B_OFF__MASK 0x007fc000 -#define A5XX_VPC_SO_PROG_B_OFF__SHIFT 14 -static inline uint32_t A5XX_VPC_SO_PROG_B_OFF(uint32_t val) -{ - return ((val >> 2) << A5XX_VPC_SO_PROG_B_OFF__SHIFT) & A5XX_VPC_SO_PROG_B_OFF__MASK; -} -#define A5XX_VPC_SO_PROG_B_EN 0x00800000 - -static inline uint32_t REG_A5XX_VPC_SO(uint32_t i0) { return 0x0000e2a7 + 0x7*i0; } - -static inline uint32_t REG_A5XX_VPC_SO_BUFFER_BASE_LO(uint32_t i0) { return 0x0000e2a7 + 0x7*i0; } - -static inline uint32_t REG_A5XX_VPC_SO_BUFFER_BASE_HI(uint32_t i0) { return 0x0000e2a8 + 0x7*i0; } - -static inline uint32_t REG_A5XX_VPC_SO_BUFFER_SIZE(uint32_t i0) { return 0x0000e2a9 + 0x7*i0; } - -static inline uint32_t REG_A5XX_VPC_SO_NCOMP(uint32_t i0) { return 0x0000e2aa + 0x7*i0; } - -static inline uint32_t REG_A5XX_VPC_SO_BUFFER_OFFSET(uint32_t i0) { return 0x0000e2ab + 0x7*i0; } - -static inline uint32_t REG_A5XX_VPC_SO_FLUSH_BASE_LO(uint32_t i0) { return 0x0000e2ac + 0x7*i0; } - -static inline uint32_t REG_A5XX_VPC_SO_FLUSH_BASE_HI(uint32_t i0) { return 0x0000e2ad + 0x7*i0; } - -#define REG_A5XX_PC_PRIMITIVE_CNTL 0x0000e384 -#define A5XX_PC_PRIMITIVE_CNTL_STRIDE_IN_VPC__MASK 0x0000007f -#define A5XX_PC_PRIMITIVE_CNTL_STRIDE_IN_VPC__SHIFT 0 -static inline uint32_t A5XX_PC_PRIMITIVE_CNTL_STRIDE_IN_VPC(uint32_t val) -{ - return ((val) << A5XX_PC_PRIMITIVE_CNTL_STRIDE_IN_VPC__SHIFT) & A5XX_PC_PRIMITIVE_CNTL_STRIDE_IN_VPC__MASK; -} -#define A5XX_PC_PRIMITIVE_CNTL_PRIMITIVE_RESTART 0x00000100 -#define A5XX_PC_PRIMITIVE_CNTL_COUNT_PRIMITIVES 0x00000200 -#define A5XX_PC_PRIMITIVE_CNTL_PROVOKING_VTX_LAST 0x00000400 - -#define REG_A5XX_PC_PRIM_VTX_CNTL 0x0000e385 -#define A5XX_PC_PRIM_VTX_CNTL_PSIZE 0x00000800 - -#define REG_A5XX_PC_RASTER_CNTL 0x0000e388 -#define A5XX_PC_RASTER_CNTL_POLYMODE_FRONT_PTYPE__MASK 0x00000007 -#define A5XX_PC_RASTER_CNTL_POLYMODE_FRONT_PTYPE__SHIFT 0 -static inline uint32_t A5XX_PC_RASTER_CNTL_POLYMODE_FRONT_PTYPE(enum adreno_pa_su_sc_draw val) -{ - return ((val) << A5XX_PC_RASTER_CNTL_POLYMODE_FRONT_PTYPE__SHIFT) & A5XX_PC_RASTER_CNTL_POLYMODE_FRONT_PTYPE__MASK; -} -#define A5XX_PC_RASTER_CNTL_POLYMODE_BACK_PTYPE__MASK 0x00000038 -#define A5XX_PC_RASTER_CNTL_POLYMODE_BACK_PTYPE__SHIFT 3 -static inline uint32_t A5XX_PC_RASTER_CNTL_POLYMODE_BACK_PTYPE(enum adreno_pa_su_sc_draw val) -{ - return ((val) << A5XX_PC_RASTER_CNTL_POLYMODE_BACK_PTYPE__SHIFT) & A5XX_PC_RASTER_CNTL_POLYMODE_BACK_PTYPE__MASK; -} -#define A5XX_PC_RASTER_CNTL_POLYMODE_ENABLE 0x00000040 - -#define REG_A5XX_UNKNOWN_E389 0x0000e389 - -#define REG_A5XX_PC_RESTART_INDEX 0x0000e38c - -#define REG_A5XX_PC_GS_LAYERED 0x0000e38d - -#define REG_A5XX_PC_GS_PARAM 0x0000e38e -#define A5XX_PC_GS_PARAM_MAX_VERTICES__MASK 0x000003ff -#define A5XX_PC_GS_PARAM_MAX_VERTICES__SHIFT 0 -static inline uint32_t A5XX_PC_GS_PARAM_MAX_VERTICES(uint32_t val) -{ - return ((val) << A5XX_PC_GS_PARAM_MAX_VERTICES__SHIFT) & A5XX_PC_GS_PARAM_MAX_VERTICES__MASK; -} -#define A5XX_PC_GS_PARAM_INVOCATIONS__MASK 0x0000f800 -#define A5XX_PC_GS_PARAM_INVOCATIONS__SHIFT 11 -static inline uint32_t A5XX_PC_GS_PARAM_INVOCATIONS(uint32_t val) -{ - return ((val) << A5XX_PC_GS_PARAM_INVOCATIONS__SHIFT) & A5XX_PC_GS_PARAM_INVOCATIONS__MASK; -} -#define A5XX_PC_GS_PARAM_PRIMTYPE__MASK 0x01800000 -#define A5XX_PC_GS_PARAM_PRIMTYPE__SHIFT 23 -static inline uint32_t A5XX_PC_GS_PARAM_PRIMTYPE(enum adreno_pa_su_sc_draw val) -{ - return ((val) << A5XX_PC_GS_PARAM_PRIMTYPE__SHIFT) & A5XX_PC_GS_PARAM_PRIMTYPE__MASK; -} - -#define REG_A5XX_PC_HS_PARAM 0x0000e38f -#define A5XX_PC_HS_PARAM_VERTICES_OUT__MASK 0x0000003f -#define A5XX_PC_HS_PARAM_VERTICES_OUT__SHIFT 0 -static inline uint32_t A5XX_PC_HS_PARAM_VERTICES_OUT(uint32_t val) -{ - return ((val) << A5XX_PC_HS_PARAM_VERTICES_OUT__SHIFT) & A5XX_PC_HS_PARAM_VERTICES_OUT__MASK; -} -#define A5XX_PC_HS_PARAM_SPACING__MASK 0x00600000 -#define A5XX_PC_HS_PARAM_SPACING__SHIFT 21 -static inline uint32_t A5XX_PC_HS_PARAM_SPACING(enum a4xx_tess_spacing val) -{ - return ((val) << A5XX_PC_HS_PARAM_SPACING__SHIFT) & A5XX_PC_HS_PARAM_SPACING__MASK; -} -#define A5XX_PC_HS_PARAM_CW 0x00800000 -#define A5XX_PC_HS_PARAM_CONNECTED 0x01000000 - -#define REG_A5XX_PC_POWER_CNTL 0x0000e3b0 - -#define REG_A5XX_VFD_CONTROL_0 0x0000e400 -#define A5XX_VFD_CONTROL_0_VTXCNT__MASK 0x0000003f -#define A5XX_VFD_CONTROL_0_VTXCNT__SHIFT 0 -static inline uint32_t A5XX_VFD_CONTROL_0_VTXCNT(uint32_t val) -{ - return ((val) << A5XX_VFD_CONTROL_0_VTXCNT__SHIFT) & A5XX_VFD_CONTROL_0_VTXCNT__MASK; -} - -#define REG_A5XX_VFD_CONTROL_1 0x0000e401 -#define A5XX_VFD_CONTROL_1_REGID4VTX__MASK 0x000000ff -#define A5XX_VFD_CONTROL_1_REGID4VTX__SHIFT 0 -static inline uint32_t A5XX_VFD_CONTROL_1_REGID4VTX(uint32_t val) -{ - return ((val) << A5XX_VFD_CONTROL_1_REGID4VTX__SHIFT) & A5XX_VFD_CONTROL_1_REGID4VTX__MASK; -} -#define A5XX_VFD_CONTROL_1_REGID4INST__MASK 0x0000ff00 -#define A5XX_VFD_CONTROL_1_REGID4INST__SHIFT 8 -static inline uint32_t A5XX_VFD_CONTROL_1_REGID4INST(uint32_t val) -{ - return ((val) << A5XX_VFD_CONTROL_1_REGID4INST__SHIFT) & A5XX_VFD_CONTROL_1_REGID4INST__MASK; -} -#define A5XX_VFD_CONTROL_1_REGID4PRIMID__MASK 0x00ff0000 -#define A5XX_VFD_CONTROL_1_REGID4PRIMID__SHIFT 16 -static inline uint32_t A5XX_VFD_CONTROL_1_REGID4PRIMID(uint32_t val) -{ - return ((val) << A5XX_VFD_CONTROL_1_REGID4PRIMID__SHIFT) & A5XX_VFD_CONTROL_1_REGID4PRIMID__MASK; -} - -#define REG_A5XX_VFD_CONTROL_2 0x0000e402 -#define A5XX_VFD_CONTROL_2_REGID_PATCHID__MASK 0x000000ff -#define A5XX_VFD_CONTROL_2_REGID_PATCHID__SHIFT 0 -static inline uint32_t A5XX_VFD_CONTROL_2_REGID_PATCHID(uint32_t val) -{ - return ((val) << A5XX_VFD_CONTROL_2_REGID_PATCHID__SHIFT) & A5XX_VFD_CONTROL_2_REGID_PATCHID__MASK; -} - -#define REG_A5XX_VFD_CONTROL_3 0x0000e403 -#define A5XX_VFD_CONTROL_3_REGID_PATCHID__MASK 0x0000ff00 -#define A5XX_VFD_CONTROL_3_REGID_PATCHID__SHIFT 8 -static inline uint32_t A5XX_VFD_CONTROL_3_REGID_PATCHID(uint32_t val) -{ - return ((val) << A5XX_VFD_CONTROL_3_REGID_PATCHID__SHIFT) & A5XX_VFD_CONTROL_3_REGID_PATCHID__MASK; -} -#define A5XX_VFD_CONTROL_3_REGID_TESSX__MASK 0x00ff0000 -#define A5XX_VFD_CONTROL_3_REGID_TESSX__SHIFT 16 -static inline uint32_t A5XX_VFD_CONTROL_3_REGID_TESSX(uint32_t val) -{ - return ((val) << A5XX_VFD_CONTROL_3_REGID_TESSX__SHIFT) & A5XX_VFD_CONTROL_3_REGID_TESSX__MASK; -} -#define A5XX_VFD_CONTROL_3_REGID_TESSY__MASK 0xff000000 -#define A5XX_VFD_CONTROL_3_REGID_TESSY__SHIFT 24 -static inline uint32_t A5XX_VFD_CONTROL_3_REGID_TESSY(uint32_t val) -{ - return ((val) << A5XX_VFD_CONTROL_3_REGID_TESSY__SHIFT) & A5XX_VFD_CONTROL_3_REGID_TESSY__MASK; -} - -#define REG_A5XX_VFD_CONTROL_4 0x0000e404 - -#define REG_A5XX_VFD_CONTROL_5 0x0000e405 - -#define REG_A5XX_VFD_INDEX_OFFSET 0x0000e408 - -#define REG_A5XX_VFD_INSTANCE_START_OFFSET 0x0000e409 - -static inline uint32_t REG_A5XX_VFD_FETCH(uint32_t i0) { return 0x0000e40a + 0x4*i0; } - -static inline uint32_t REG_A5XX_VFD_FETCH_BASE_LO(uint32_t i0) { return 0x0000e40a + 0x4*i0; } - -static inline uint32_t REG_A5XX_VFD_FETCH_BASE_HI(uint32_t i0) { return 0x0000e40b + 0x4*i0; } - -static inline uint32_t REG_A5XX_VFD_FETCH_SIZE(uint32_t i0) { return 0x0000e40c + 0x4*i0; } - -static inline uint32_t REG_A5XX_VFD_FETCH_STRIDE(uint32_t i0) { return 0x0000e40d + 0x4*i0; } - -static inline uint32_t REG_A5XX_VFD_DECODE(uint32_t i0) { return 0x0000e48a + 0x2*i0; } - -static inline uint32_t REG_A5XX_VFD_DECODE_INSTR(uint32_t i0) { return 0x0000e48a + 0x2*i0; } -#define A5XX_VFD_DECODE_INSTR_IDX__MASK 0x0000001f -#define A5XX_VFD_DECODE_INSTR_IDX__SHIFT 0 -static inline uint32_t A5XX_VFD_DECODE_INSTR_IDX(uint32_t val) -{ - return ((val) << A5XX_VFD_DECODE_INSTR_IDX__SHIFT) & A5XX_VFD_DECODE_INSTR_IDX__MASK; -} -#define A5XX_VFD_DECODE_INSTR_INSTANCED 0x00020000 -#define A5XX_VFD_DECODE_INSTR_FORMAT__MASK 0x0ff00000 -#define A5XX_VFD_DECODE_INSTR_FORMAT__SHIFT 20 -static inline uint32_t A5XX_VFD_DECODE_INSTR_FORMAT(enum a5xx_vtx_fmt val) -{ - return ((val) << A5XX_VFD_DECODE_INSTR_FORMAT__SHIFT) & A5XX_VFD_DECODE_INSTR_FORMAT__MASK; -} -#define A5XX_VFD_DECODE_INSTR_SWAP__MASK 0x30000000 -#define A5XX_VFD_DECODE_INSTR_SWAP__SHIFT 28 -static inline uint32_t A5XX_VFD_DECODE_INSTR_SWAP(enum a3xx_color_swap val) -{ - return ((val) << A5XX_VFD_DECODE_INSTR_SWAP__SHIFT) & A5XX_VFD_DECODE_INSTR_SWAP__MASK; -} -#define A5XX_VFD_DECODE_INSTR_UNK30 0x40000000 -#define A5XX_VFD_DECODE_INSTR_FLOAT 0x80000000 - -static inline uint32_t REG_A5XX_VFD_DECODE_STEP_RATE(uint32_t i0) { return 0x0000e48b + 0x2*i0; } - -static inline uint32_t REG_A5XX_VFD_DEST_CNTL(uint32_t i0) { return 0x0000e4ca + 0x1*i0; } - -static inline uint32_t REG_A5XX_VFD_DEST_CNTL_INSTR(uint32_t i0) { return 0x0000e4ca + 0x1*i0; } -#define A5XX_VFD_DEST_CNTL_INSTR_WRITEMASK__MASK 0x0000000f -#define A5XX_VFD_DEST_CNTL_INSTR_WRITEMASK__SHIFT 0 -static inline uint32_t A5XX_VFD_DEST_CNTL_INSTR_WRITEMASK(uint32_t val) -{ - return ((val) << A5XX_VFD_DEST_CNTL_INSTR_WRITEMASK__SHIFT) & A5XX_VFD_DEST_CNTL_INSTR_WRITEMASK__MASK; -} -#define A5XX_VFD_DEST_CNTL_INSTR_REGID__MASK 0x00000ff0 -#define A5XX_VFD_DEST_CNTL_INSTR_REGID__SHIFT 4 -static inline uint32_t A5XX_VFD_DEST_CNTL_INSTR_REGID(uint32_t val) -{ - return ((val) << A5XX_VFD_DEST_CNTL_INSTR_REGID__SHIFT) & A5XX_VFD_DEST_CNTL_INSTR_REGID__MASK; -} - -#define REG_A5XX_VFD_POWER_CNTL 0x0000e4f0 - -#define REG_A5XX_SP_SP_CNTL 0x0000e580 - -#define REG_A5XX_SP_VS_CONFIG 0x0000e584 -#define A5XX_SP_VS_CONFIG_ENABLED 0x00000001 -#define A5XX_SP_VS_CONFIG_CONSTOBJECTOFFSET__MASK 0x000000fe -#define A5XX_SP_VS_CONFIG_CONSTOBJECTOFFSET__SHIFT 1 -static inline uint32_t A5XX_SP_VS_CONFIG_CONSTOBJECTOFFSET(uint32_t val) -{ - return ((val) << A5XX_SP_VS_CONFIG_CONSTOBJECTOFFSET__SHIFT) & A5XX_SP_VS_CONFIG_CONSTOBJECTOFFSET__MASK; -} -#define A5XX_SP_VS_CONFIG_SHADEROBJOFFSET__MASK 0x00007f00 -#define A5XX_SP_VS_CONFIG_SHADEROBJOFFSET__SHIFT 8 -static inline uint32_t A5XX_SP_VS_CONFIG_SHADEROBJOFFSET(uint32_t val) -{ - return ((val) << A5XX_SP_VS_CONFIG_SHADEROBJOFFSET__SHIFT) & A5XX_SP_VS_CONFIG_SHADEROBJOFFSET__MASK; -} - -#define REG_A5XX_SP_FS_CONFIG 0x0000e585 -#define A5XX_SP_FS_CONFIG_ENABLED 0x00000001 -#define A5XX_SP_FS_CONFIG_CONSTOBJECTOFFSET__MASK 0x000000fe -#define A5XX_SP_FS_CONFIG_CONSTOBJECTOFFSET__SHIFT 1 -static inline uint32_t A5XX_SP_FS_CONFIG_CONSTOBJECTOFFSET(uint32_t val) -{ - return ((val) << A5XX_SP_FS_CONFIG_CONSTOBJECTOFFSET__SHIFT) & A5XX_SP_FS_CONFIG_CONSTOBJECTOFFSET__MASK; -} -#define A5XX_SP_FS_CONFIG_SHADEROBJOFFSET__MASK 0x00007f00 -#define A5XX_SP_FS_CONFIG_SHADEROBJOFFSET__SHIFT 8 -static inline uint32_t A5XX_SP_FS_CONFIG_SHADEROBJOFFSET(uint32_t val) -{ - return ((val) << A5XX_SP_FS_CONFIG_SHADEROBJOFFSET__SHIFT) & A5XX_SP_FS_CONFIG_SHADEROBJOFFSET__MASK; -} - -#define REG_A5XX_SP_HS_CONFIG 0x0000e586 -#define A5XX_SP_HS_CONFIG_ENABLED 0x00000001 -#define A5XX_SP_HS_CONFIG_CONSTOBJECTOFFSET__MASK 0x000000fe -#define A5XX_SP_HS_CONFIG_CONSTOBJECTOFFSET__SHIFT 1 -static inline uint32_t A5XX_SP_HS_CONFIG_CONSTOBJECTOFFSET(uint32_t val) -{ - return ((val) << A5XX_SP_HS_CONFIG_CONSTOBJECTOFFSET__SHIFT) & A5XX_SP_HS_CONFIG_CONSTOBJECTOFFSET__MASK; -} -#define A5XX_SP_HS_CONFIG_SHADEROBJOFFSET__MASK 0x00007f00 -#define A5XX_SP_HS_CONFIG_SHADEROBJOFFSET__SHIFT 8 -static inline uint32_t A5XX_SP_HS_CONFIG_SHADEROBJOFFSET(uint32_t val) -{ - return ((val) << A5XX_SP_HS_CONFIG_SHADEROBJOFFSET__SHIFT) & A5XX_SP_HS_CONFIG_SHADEROBJOFFSET__MASK; -} - -#define REG_A5XX_SP_DS_CONFIG 0x0000e587 -#define A5XX_SP_DS_CONFIG_ENABLED 0x00000001 -#define A5XX_SP_DS_CONFIG_CONSTOBJECTOFFSET__MASK 0x000000fe -#define A5XX_SP_DS_CONFIG_CONSTOBJECTOFFSET__SHIFT 1 -static inline uint32_t A5XX_SP_DS_CONFIG_CONSTOBJECTOFFSET(uint32_t val) -{ - return ((val) << A5XX_SP_DS_CONFIG_CONSTOBJECTOFFSET__SHIFT) & A5XX_SP_DS_CONFIG_CONSTOBJECTOFFSET__MASK; -} -#define A5XX_SP_DS_CONFIG_SHADEROBJOFFSET__MASK 0x00007f00 -#define A5XX_SP_DS_CONFIG_SHADEROBJOFFSET__SHIFT 8 -static inline uint32_t A5XX_SP_DS_CONFIG_SHADEROBJOFFSET(uint32_t val) -{ - return ((val) << A5XX_SP_DS_CONFIG_SHADEROBJOFFSET__SHIFT) & A5XX_SP_DS_CONFIG_SHADEROBJOFFSET__MASK; -} - -#define REG_A5XX_SP_GS_CONFIG 0x0000e588 -#define A5XX_SP_GS_CONFIG_ENABLED 0x00000001 -#define A5XX_SP_GS_CONFIG_CONSTOBJECTOFFSET__MASK 0x000000fe -#define A5XX_SP_GS_CONFIG_CONSTOBJECTOFFSET__SHIFT 1 -static inline uint32_t A5XX_SP_GS_CONFIG_CONSTOBJECTOFFSET(uint32_t val) -{ - return ((val) << A5XX_SP_GS_CONFIG_CONSTOBJECTOFFSET__SHIFT) & A5XX_SP_GS_CONFIG_CONSTOBJECTOFFSET__MASK; -} -#define A5XX_SP_GS_CONFIG_SHADEROBJOFFSET__MASK 0x00007f00 -#define A5XX_SP_GS_CONFIG_SHADEROBJOFFSET__SHIFT 8 -static inline uint32_t A5XX_SP_GS_CONFIG_SHADEROBJOFFSET(uint32_t val) -{ - return ((val) << A5XX_SP_GS_CONFIG_SHADEROBJOFFSET__SHIFT) & A5XX_SP_GS_CONFIG_SHADEROBJOFFSET__MASK; -} - -#define REG_A5XX_SP_CS_CONFIG 0x0000e589 -#define A5XX_SP_CS_CONFIG_ENABLED 0x00000001 -#define A5XX_SP_CS_CONFIG_CONSTOBJECTOFFSET__MASK 0x000000fe -#define A5XX_SP_CS_CONFIG_CONSTOBJECTOFFSET__SHIFT 1 -static inline uint32_t A5XX_SP_CS_CONFIG_CONSTOBJECTOFFSET(uint32_t val) -{ - return ((val) << A5XX_SP_CS_CONFIG_CONSTOBJECTOFFSET__SHIFT) & A5XX_SP_CS_CONFIG_CONSTOBJECTOFFSET__MASK; -} -#define A5XX_SP_CS_CONFIG_SHADEROBJOFFSET__MASK 0x00007f00 -#define A5XX_SP_CS_CONFIG_SHADEROBJOFFSET__SHIFT 8 -static inline uint32_t A5XX_SP_CS_CONFIG_SHADEROBJOFFSET(uint32_t val) -{ - return ((val) << A5XX_SP_CS_CONFIG_SHADEROBJOFFSET__SHIFT) & A5XX_SP_CS_CONFIG_SHADEROBJOFFSET__MASK; -} - -#define REG_A5XX_SP_VS_CONFIG_MAX_CONST 0x0000e58a - -#define REG_A5XX_SP_FS_CONFIG_MAX_CONST 0x0000e58b - -#define REG_A5XX_SP_VS_CTRL_REG0 0x0000e590 -#define A5XX_SP_VS_CTRL_REG0_THREADSIZE__MASK 0x00000008 -#define A5XX_SP_VS_CTRL_REG0_THREADSIZE__SHIFT 3 -static inline uint32_t A5XX_SP_VS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val) -{ - return ((val) << A5XX_SP_VS_CTRL_REG0_THREADSIZE__SHIFT) & A5XX_SP_VS_CTRL_REG0_THREADSIZE__MASK; -} -#define A5XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__MASK 0x000003f0 -#define A5XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT 4 -static inline uint32_t A5XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val) -{ - return ((val) << A5XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT) & A5XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__MASK; -} -#define A5XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__MASK 0x0000fc00 -#define A5XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT 10 -static inline uint32_t A5XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val) -{ - return ((val) << A5XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT) & A5XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__MASK; -} -#define A5XX_SP_VS_CTRL_REG0_VARYING 0x00010000 -#define A5XX_SP_VS_CTRL_REG0_PIXLODENABLE 0x00100000 -#define A5XX_SP_VS_CTRL_REG0_BRANCHSTACK__MASK 0xfe000000 -#define A5XX_SP_VS_CTRL_REG0_BRANCHSTACK__SHIFT 25 -static inline uint32_t A5XX_SP_VS_CTRL_REG0_BRANCHSTACK(uint32_t val) -{ - return ((val) << A5XX_SP_VS_CTRL_REG0_BRANCHSTACK__SHIFT) & A5XX_SP_VS_CTRL_REG0_BRANCHSTACK__MASK; -} - -#define REG_A5XX_SP_PRIMITIVE_CNTL 0x0000e592 -#define A5XX_SP_PRIMITIVE_CNTL_VSOUT__MASK 0x0000001f -#define A5XX_SP_PRIMITIVE_CNTL_VSOUT__SHIFT 0 -static inline uint32_t A5XX_SP_PRIMITIVE_CNTL_VSOUT(uint32_t val) -{ - return ((val) << A5XX_SP_PRIMITIVE_CNTL_VSOUT__SHIFT) & A5XX_SP_PRIMITIVE_CNTL_VSOUT__MASK; -} - -static inline uint32_t REG_A5XX_SP_VS_OUT(uint32_t i0) { return 0x0000e593 + 0x1*i0; } - -static inline uint32_t REG_A5XX_SP_VS_OUT_REG(uint32_t i0) { return 0x0000e593 + 0x1*i0; } -#define A5XX_SP_VS_OUT_REG_A_REGID__MASK 0x000000ff -#define A5XX_SP_VS_OUT_REG_A_REGID__SHIFT 0 -static inline uint32_t A5XX_SP_VS_OUT_REG_A_REGID(uint32_t val) -{ - return ((val) << A5XX_SP_VS_OUT_REG_A_REGID__SHIFT) & A5XX_SP_VS_OUT_REG_A_REGID__MASK; -} -#define A5XX_SP_VS_OUT_REG_A_COMPMASK__MASK 0x00000f00 -#define A5XX_SP_VS_OUT_REG_A_COMPMASK__SHIFT 8 -static inline uint32_t A5XX_SP_VS_OUT_REG_A_COMPMASK(uint32_t val) -{ - return ((val) << A5XX_SP_VS_OUT_REG_A_COMPMASK__SHIFT) & A5XX_SP_VS_OUT_REG_A_COMPMASK__MASK; -} -#define A5XX_SP_VS_OUT_REG_B_REGID__MASK 0x00ff0000 -#define A5XX_SP_VS_OUT_REG_B_REGID__SHIFT 16 -static inline uint32_t A5XX_SP_VS_OUT_REG_B_REGID(uint32_t val) -{ - return ((val) << A5XX_SP_VS_OUT_REG_B_REGID__SHIFT) & A5XX_SP_VS_OUT_REG_B_REGID__MASK; -} -#define A5XX_SP_VS_OUT_REG_B_COMPMASK__MASK 0x0f000000 -#define A5XX_SP_VS_OUT_REG_B_COMPMASK__SHIFT 24 -static inline uint32_t A5XX_SP_VS_OUT_REG_B_COMPMASK(uint32_t val) -{ - return ((val) << A5XX_SP_VS_OUT_REG_B_COMPMASK__SHIFT) & A5XX_SP_VS_OUT_REG_B_COMPMASK__MASK; -} - -static inline uint32_t REG_A5XX_SP_VS_VPC_DST(uint32_t i0) { return 0x0000e5a3 + 0x1*i0; } - -static inline uint32_t REG_A5XX_SP_VS_VPC_DST_REG(uint32_t i0) { return 0x0000e5a3 + 0x1*i0; } -#define A5XX_SP_VS_VPC_DST_REG_OUTLOC0__MASK 0x000000ff -#define A5XX_SP_VS_VPC_DST_REG_OUTLOC0__SHIFT 0 -static inline uint32_t A5XX_SP_VS_VPC_DST_REG_OUTLOC0(uint32_t val) -{ - return ((val) << A5XX_SP_VS_VPC_DST_REG_OUTLOC0__SHIFT) & A5XX_SP_VS_VPC_DST_REG_OUTLOC0__MASK; -} -#define A5XX_SP_VS_VPC_DST_REG_OUTLOC1__MASK 0x0000ff00 -#define A5XX_SP_VS_VPC_DST_REG_OUTLOC1__SHIFT 8 -static inline uint32_t A5XX_SP_VS_VPC_DST_REG_OUTLOC1(uint32_t val) -{ - return ((val) << A5XX_SP_VS_VPC_DST_REG_OUTLOC1__SHIFT) & A5XX_SP_VS_VPC_DST_REG_OUTLOC1__MASK; -} -#define A5XX_SP_VS_VPC_DST_REG_OUTLOC2__MASK 0x00ff0000 -#define A5XX_SP_VS_VPC_DST_REG_OUTLOC2__SHIFT 16 -static inline uint32_t A5XX_SP_VS_VPC_DST_REG_OUTLOC2(uint32_t val) -{ - return ((val) << A5XX_SP_VS_VPC_DST_REG_OUTLOC2__SHIFT) & A5XX_SP_VS_VPC_DST_REG_OUTLOC2__MASK; -} -#define A5XX_SP_VS_VPC_DST_REG_OUTLOC3__MASK 0xff000000 -#define A5XX_SP_VS_VPC_DST_REG_OUTLOC3__SHIFT 24 -static inline uint32_t A5XX_SP_VS_VPC_DST_REG_OUTLOC3(uint32_t val) -{ - return ((val) << A5XX_SP_VS_VPC_DST_REG_OUTLOC3__SHIFT) & A5XX_SP_VS_VPC_DST_REG_OUTLOC3__MASK; -} - -#define REG_A5XX_UNKNOWN_E5AB 0x0000e5ab - -#define REG_A5XX_SP_VS_OBJ_START_LO 0x0000e5ac - -#define REG_A5XX_SP_VS_OBJ_START_HI 0x0000e5ad - -#define REG_A5XX_SP_FS_CTRL_REG0 0x0000e5c0 -#define A5XX_SP_FS_CTRL_REG0_THREADSIZE__MASK 0x00000008 -#define A5XX_SP_FS_CTRL_REG0_THREADSIZE__SHIFT 3 -static inline uint32_t A5XX_SP_FS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val) -{ - return ((val) << A5XX_SP_FS_CTRL_REG0_THREADSIZE__SHIFT) & A5XX_SP_FS_CTRL_REG0_THREADSIZE__MASK; -} -#define A5XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__MASK 0x000003f0 -#define A5XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT 4 -static inline uint32_t A5XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val) -{ - return ((val) << A5XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT) & A5XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__MASK; -} -#define A5XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__MASK 0x0000fc00 -#define A5XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT 10 -static inline uint32_t A5XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val) -{ - return ((val) << A5XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT) & A5XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__MASK; -} -#define A5XX_SP_FS_CTRL_REG0_VARYING 0x00010000 -#define A5XX_SP_FS_CTRL_REG0_PIXLODENABLE 0x00100000 -#define A5XX_SP_FS_CTRL_REG0_BRANCHSTACK__MASK 0xfe000000 -#define A5XX_SP_FS_CTRL_REG0_BRANCHSTACK__SHIFT 25 -static inline uint32_t A5XX_SP_FS_CTRL_REG0_BRANCHSTACK(uint32_t val) -{ - return ((val) << A5XX_SP_FS_CTRL_REG0_BRANCHSTACK__SHIFT) & A5XX_SP_FS_CTRL_REG0_BRANCHSTACK__MASK; -} - -#define REG_A5XX_UNKNOWN_E5C2 0x0000e5c2 - -#define REG_A5XX_SP_FS_OBJ_START_LO 0x0000e5c3 - -#define REG_A5XX_SP_FS_OBJ_START_HI 0x0000e5c4 - -#define REG_A5XX_SP_BLEND_CNTL 0x0000e5c9 -#define A5XX_SP_BLEND_CNTL_ENABLED 0x00000001 -#define A5XX_SP_BLEND_CNTL_UNK8 0x00000100 -#define A5XX_SP_BLEND_CNTL_ALPHA_TO_COVERAGE 0x00000400 - -#define REG_A5XX_SP_FS_OUTPUT_CNTL 0x0000e5ca -#define A5XX_SP_FS_OUTPUT_CNTL_MRT__MASK 0x0000000f -#define A5XX_SP_FS_OUTPUT_CNTL_MRT__SHIFT 0 -static inline uint32_t A5XX_SP_FS_OUTPUT_CNTL_MRT(uint32_t val) -{ - return ((val) << A5XX_SP_FS_OUTPUT_CNTL_MRT__SHIFT) & A5XX_SP_FS_OUTPUT_CNTL_MRT__MASK; -} -#define A5XX_SP_FS_OUTPUT_CNTL_DEPTH_REGID__MASK 0x00001fe0 -#define A5XX_SP_FS_OUTPUT_CNTL_DEPTH_REGID__SHIFT 5 -static inline uint32_t A5XX_SP_FS_OUTPUT_CNTL_DEPTH_REGID(uint32_t val) -{ - return ((val) << A5XX_SP_FS_OUTPUT_CNTL_DEPTH_REGID__SHIFT) & A5XX_SP_FS_OUTPUT_CNTL_DEPTH_REGID__MASK; -} -#define A5XX_SP_FS_OUTPUT_CNTL_SAMPLEMASK_REGID__MASK 0x001fe000 -#define A5XX_SP_FS_OUTPUT_CNTL_SAMPLEMASK_REGID__SHIFT 13 -static inline uint32_t A5XX_SP_FS_OUTPUT_CNTL_SAMPLEMASK_REGID(uint32_t val) -{ - return ((val) << A5XX_SP_FS_OUTPUT_CNTL_SAMPLEMASK_REGID__SHIFT) & A5XX_SP_FS_OUTPUT_CNTL_SAMPLEMASK_REGID__MASK; -} - -static inline uint32_t REG_A5XX_SP_FS_OUTPUT(uint32_t i0) { return 0x0000e5cb + 0x1*i0; } - -static inline uint32_t REG_A5XX_SP_FS_OUTPUT_REG(uint32_t i0) { return 0x0000e5cb + 0x1*i0; } -#define A5XX_SP_FS_OUTPUT_REG_REGID__MASK 0x000000ff -#define A5XX_SP_FS_OUTPUT_REG_REGID__SHIFT 0 -static inline uint32_t A5XX_SP_FS_OUTPUT_REG_REGID(uint32_t val) -{ - return ((val) << A5XX_SP_FS_OUTPUT_REG_REGID__SHIFT) & A5XX_SP_FS_OUTPUT_REG_REGID__MASK; -} -#define A5XX_SP_FS_OUTPUT_REG_HALF_PRECISION 0x00000100 - -static inline uint32_t REG_A5XX_SP_FS_MRT(uint32_t i0) { return 0x0000e5d3 + 0x1*i0; } - -static inline uint32_t REG_A5XX_SP_FS_MRT_REG(uint32_t i0) { return 0x0000e5d3 + 0x1*i0; } -#define A5XX_SP_FS_MRT_REG_COLOR_FORMAT__MASK 0x000000ff -#define A5XX_SP_FS_MRT_REG_COLOR_FORMAT__SHIFT 0 -static inline uint32_t A5XX_SP_FS_MRT_REG_COLOR_FORMAT(enum a5xx_color_fmt val) -{ - return ((val) << A5XX_SP_FS_MRT_REG_COLOR_FORMAT__SHIFT) & A5XX_SP_FS_MRT_REG_COLOR_FORMAT__MASK; -} -#define A5XX_SP_FS_MRT_REG_COLOR_SINT 0x00000100 -#define A5XX_SP_FS_MRT_REG_COLOR_UINT 0x00000200 -#define A5XX_SP_FS_MRT_REG_COLOR_SRGB 0x00000400 - -#define REG_A5XX_UNKNOWN_E5DB 0x0000e5db - -#define REG_A5XX_SP_CS_CTRL_REG0 0x0000e5f0 -#define A5XX_SP_CS_CTRL_REG0_THREADSIZE__MASK 0x00000008 -#define A5XX_SP_CS_CTRL_REG0_THREADSIZE__SHIFT 3 -static inline uint32_t A5XX_SP_CS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val) -{ - return ((val) << A5XX_SP_CS_CTRL_REG0_THREADSIZE__SHIFT) & A5XX_SP_CS_CTRL_REG0_THREADSIZE__MASK; -} -#define A5XX_SP_CS_CTRL_REG0_HALFREGFOOTPRINT__MASK 0x000003f0 -#define A5XX_SP_CS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT 4 -static inline uint32_t A5XX_SP_CS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val) -{ - return ((val) << A5XX_SP_CS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT) & A5XX_SP_CS_CTRL_REG0_HALFREGFOOTPRINT__MASK; -} -#define A5XX_SP_CS_CTRL_REG0_FULLREGFOOTPRINT__MASK 0x0000fc00 -#define A5XX_SP_CS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT 10 -static inline uint32_t A5XX_SP_CS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val) -{ - return ((val) << A5XX_SP_CS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT) & A5XX_SP_CS_CTRL_REG0_FULLREGFOOTPRINT__MASK; -} -#define A5XX_SP_CS_CTRL_REG0_VARYING 0x00010000 -#define A5XX_SP_CS_CTRL_REG0_PIXLODENABLE 0x00100000 -#define A5XX_SP_CS_CTRL_REG0_BRANCHSTACK__MASK 0xfe000000 -#define A5XX_SP_CS_CTRL_REG0_BRANCHSTACK__SHIFT 25 -static inline uint32_t A5XX_SP_CS_CTRL_REG0_BRANCHSTACK(uint32_t val) -{ - return ((val) << A5XX_SP_CS_CTRL_REG0_BRANCHSTACK__SHIFT) & A5XX_SP_CS_CTRL_REG0_BRANCHSTACK__MASK; -} - -#define REG_A5XX_UNKNOWN_E5F2 0x0000e5f2 - -#define REG_A5XX_SP_CS_OBJ_START_LO 0x0000e5f3 - -#define REG_A5XX_SP_CS_OBJ_START_HI 0x0000e5f4 - -#define REG_A5XX_SP_HS_CTRL_REG0 0x0000e600 -#define A5XX_SP_HS_CTRL_REG0_THREADSIZE__MASK 0x00000008 -#define A5XX_SP_HS_CTRL_REG0_THREADSIZE__SHIFT 3 -static inline uint32_t A5XX_SP_HS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val) -{ - return ((val) << A5XX_SP_HS_CTRL_REG0_THREADSIZE__SHIFT) & A5XX_SP_HS_CTRL_REG0_THREADSIZE__MASK; -} -#define A5XX_SP_HS_CTRL_REG0_HALFREGFOOTPRINT__MASK 0x000003f0 -#define A5XX_SP_HS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT 4 -static inline uint32_t A5XX_SP_HS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val) -{ - return ((val) << A5XX_SP_HS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT) & A5XX_SP_HS_CTRL_REG0_HALFREGFOOTPRINT__MASK; -} -#define A5XX_SP_HS_CTRL_REG0_FULLREGFOOTPRINT__MASK 0x0000fc00 -#define A5XX_SP_HS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT 10 -static inline uint32_t A5XX_SP_HS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val) -{ - return ((val) << A5XX_SP_HS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT) & A5XX_SP_HS_CTRL_REG0_FULLREGFOOTPRINT__MASK; -} -#define A5XX_SP_HS_CTRL_REG0_VARYING 0x00010000 -#define A5XX_SP_HS_CTRL_REG0_PIXLODENABLE 0x00100000 -#define A5XX_SP_HS_CTRL_REG0_BRANCHSTACK__MASK 0xfe000000 -#define A5XX_SP_HS_CTRL_REG0_BRANCHSTACK__SHIFT 25 -static inline uint32_t A5XX_SP_HS_CTRL_REG0_BRANCHSTACK(uint32_t val) -{ - return ((val) << A5XX_SP_HS_CTRL_REG0_BRANCHSTACK__SHIFT) & A5XX_SP_HS_CTRL_REG0_BRANCHSTACK__MASK; -} - -#define REG_A5XX_UNKNOWN_E602 0x0000e602 - -#define REG_A5XX_SP_HS_OBJ_START_LO 0x0000e603 - -#define REG_A5XX_SP_HS_OBJ_START_HI 0x0000e604 - -#define REG_A5XX_SP_DS_CTRL_REG0 0x0000e610 -#define A5XX_SP_DS_CTRL_REG0_THREADSIZE__MASK 0x00000008 -#define A5XX_SP_DS_CTRL_REG0_THREADSIZE__SHIFT 3 -static inline uint32_t A5XX_SP_DS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val) -{ - return ((val) << A5XX_SP_DS_CTRL_REG0_THREADSIZE__SHIFT) & A5XX_SP_DS_CTRL_REG0_THREADSIZE__MASK; -} -#define A5XX_SP_DS_CTRL_REG0_HALFREGFOOTPRINT__MASK 0x000003f0 -#define A5XX_SP_DS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT 4 -static inline uint32_t A5XX_SP_DS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val) -{ - return ((val) << A5XX_SP_DS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT) & A5XX_SP_DS_CTRL_REG0_HALFREGFOOTPRINT__MASK; -} -#define A5XX_SP_DS_CTRL_REG0_FULLREGFOOTPRINT__MASK 0x0000fc00 -#define A5XX_SP_DS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT 10 -static inline uint32_t A5XX_SP_DS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val) -{ - return ((val) << A5XX_SP_DS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT) & A5XX_SP_DS_CTRL_REG0_FULLREGFOOTPRINT__MASK; -} -#define A5XX_SP_DS_CTRL_REG0_VARYING 0x00010000 -#define A5XX_SP_DS_CTRL_REG0_PIXLODENABLE 0x00100000 -#define A5XX_SP_DS_CTRL_REG0_BRANCHSTACK__MASK 0xfe000000 -#define A5XX_SP_DS_CTRL_REG0_BRANCHSTACK__SHIFT 25 -static inline uint32_t A5XX_SP_DS_CTRL_REG0_BRANCHSTACK(uint32_t val) -{ - return ((val) << A5XX_SP_DS_CTRL_REG0_BRANCHSTACK__SHIFT) & A5XX_SP_DS_CTRL_REG0_BRANCHSTACK__MASK; -} - -#define REG_A5XX_UNKNOWN_E62B 0x0000e62b - -#define REG_A5XX_SP_DS_OBJ_START_LO 0x0000e62c - -#define REG_A5XX_SP_DS_OBJ_START_HI 0x0000e62d - -#define REG_A5XX_SP_GS_CTRL_REG0 0x0000e640 -#define A5XX_SP_GS_CTRL_REG0_THREADSIZE__MASK 0x00000008 -#define A5XX_SP_GS_CTRL_REG0_THREADSIZE__SHIFT 3 -static inline uint32_t A5XX_SP_GS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val) -{ - return ((val) << A5XX_SP_GS_CTRL_REG0_THREADSIZE__SHIFT) & A5XX_SP_GS_CTRL_REG0_THREADSIZE__MASK; -} -#define A5XX_SP_GS_CTRL_REG0_HALFREGFOOTPRINT__MASK 0x000003f0 -#define A5XX_SP_GS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT 4 -static inline uint32_t A5XX_SP_GS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val) -{ - return ((val) << A5XX_SP_GS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT) & A5XX_SP_GS_CTRL_REG0_HALFREGFOOTPRINT__MASK; -} -#define A5XX_SP_GS_CTRL_REG0_FULLREGFOOTPRINT__MASK 0x0000fc00 -#define A5XX_SP_GS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT 10 -static inline uint32_t A5XX_SP_GS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val) -{ - return ((val) << A5XX_SP_GS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT) & A5XX_SP_GS_CTRL_REG0_FULLREGFOOTPRINT__MASK; -} -#define A5XX_SP_GS_CTRL_REG0_VARYING 0x00010000 -#define A5XX_SP_GS_CTRL_REG0_PIXLODENABLE 0x00100000 -#define A5XX_SP_GS_CTRL_REG0_BRANCHSTACK__MASK 0xfe000000 -#define A5XX_SP_GS_CTRL_REG0_BRANCHSTACK__SHIFT 25 -static inline uint32_t A5XX_SP_GS_CTRL_REG0_BRANCHSTACK(uint32_t val) -{ - return ((val) << A5XX_SP_GS_CTRL_REG0_BRANCHSTACK__SHIFT) & A5XX_SP_GS_CTRL_REG0_BRANCHSTACK__MASK; -} - -#define REG_A5XX_UNKNOWN_E65B 0x0000e65b - -#define REG_A5XX_SP_GS_OBJ_START_LO 0x0000e65c - -#define REG_A5XX_SP_GS_OBJ_START_HI 0x0000e65d - -#define REG_A5XX_TPL1_TP_RAS_MSAA_CNTL 0x0000e704 -#define A5XX_TPL1_TP_RAS_MSAA_CNTL_SAMPLES__MASK 0x00000003 -#define A5XX_TPL1_TP_RAS_MSAA_CNTL_SAMPLES__SHIFT 0 -static inline uint32_t A5XX_TPL1_TP_RAS_MSAA_CNTL_SAMPLES(enum a3xx_msaa_samples val) -{ - return ((val) << A5XX_TPL1_TP_RAS_MSAA_CNTL_SAMPLES__SHIFT) & A5XX_TPL1_TP_RAS_MSAA_CNTL_SAMPLES__MASK; -} - -#define REG_A5XX_TPL1_TP_DEST_MSAA_CNTL 0x0000e705 -#define A5XX_TPL1_TP_DEST_MSAA_CNTL_SAMPLES__MASK 0x00000003 -#define A5XX_TPL1_TP_DEST_MSAA_CNTL_SAMPLES__SHIFT 0 -static inline uint32_t A5XX_TPL1_TP_DEST_MSAA_CNTL_SAMPLES(enum a3xx_msaa_samples val) -{ - return ((val) << A5XX_TPL1_TP_DEST_MSAA_CNTL_SAMPLES__SHIFT) & A5XX_TPL1_TP_DEST_MSAA_CNTL_SAMPLES__MASK; -} -#define A5XX_TPL1_TP_DEST_MSAA_CNTL_MSAA_DISABLE 0x00000004 - -#define REG_A5XX_TPL1_TP_BORDER_COLOR_BASE_ADDR_LO 0x0000e706 - -#define REG_A5XX_TPL1_TP_BORDER_COLOR_BASE_ADDR_HI 0x0000e707 - -#define REG_A5XX_TPL1_VS_TEX_COUNT 0x0000e700 - -#define REG_A5XX_TPL1_HS_TEX_COUNT 0x0000e701 - -#define REG_A5XX_TPL1_DS_TEX_COUNT 0x0000e702 - -#define REG_A5XX_TPL1_GS_TEX_COUNT 0x0000e703 - -#define REG_A5XX_TPL1_VS_TEX_SAMP_LO 0x0000e722 - -#define REG_A5XX_TPL1_VS_TEX_SAMP_HI 0x0000e723 - -#define REG_A5XX_TPL1_HS_TEX_SAMP_LO 0x0000e724 - -#define REG_A5XX_TPL1_HS_TEX_SAMP_HI 0x0000e725 - -#define REG_A5XX_TPL1_DS_TEX_SAMP_LO 0x0000e726 - -#define REG_A5XX_TPL1_DS_TEX_SAMP_HI 0x0000e727 - -#define REG_A5XX_TPL1_GS_TEX_SAMP_LO 0x0000e728 - -#define REG_A5XX_TPL1_GS_TEX_SAMP_HI 0x0000e729 - -#define REG_A5XX_TPL1_VS_TEX_CONST_LO 0x0000e72a - -#define REG_A5XX_TPL1_VS_TEX_CONST_HI 0x0000e72b - -#define REG_A5XX_TPL1_HS_TEX_CONST_LO 0x0000e72c - -#define REG_A5XX_TPL1_HS_TEX_CONST_HI 0x0000e72d - -#define REG_A5XX_TPL1_DS_TEX_CONST_LO 0x0000e72e - -#define REG_A5XX_TPL1_DS_TEX_CONST_HI 0x0000e72f - -#define REG_A5XX_TPL1_GS_TEX_CONST_LO 0x0000e730 - -#define REG_A5XX_TPL1_GS_TEX_CONST_HI 0x0000e731 - -#define REG_A5XX_TPL1_FS_TEX_COUNT 0x0000e750 - -#define REG_A5XX_TPL1_CS_TEX_COUNT 0x0000e751 - -#define REG_A5XX_TPL1_FS_TEX_SAMP_LO 0x0000e75a - -#define REG_A5XX_TPL1_FS_TEX_SAMP_HI 0x0000e75b - -#define REG_A5XX_TPL1_CS_TEX_SAMP_LO 0x0000e75c - -#define REG_A5XX_TPL1_CS_TEX_SAMP_HI 0x0000e75d - -#define REG_A5XX_TPL1_FS_TEX_CONST_LO 0x0000e75e - -#define REG_A5XX_TPL1_FS_TEX_CONST_HI 0x0000e75f - -#define REG_A5XX_TPL1_CS_TEX_CONST_LO 0x0000e760 - -#define REG_A5XX_TPL1_CS_TEX_CONST_HI 0x0000e761 - -#define REG_A5XX_TPL1_TP_FS_ROTATION_CNTL 0x0000e764 - -#define REG_A5XX_HLSQ_CONTROL_0_REG 0x0000e784 -#define A5XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__MASK 0x00000001 -#define A5XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__SHIFT 0 -static inline uint32_t A5XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(enum a3xx_threadsize val) -{ - return ((val) << A5XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__SHIFT) & A5XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__MASK; -} -#define A5XX_HLSQ_CONTROL_0_REG_CSTHREADSIZE__MASK 0x00000004 -#define A5XX_HLSQ_CONTROL_0_REG_CSTHREADSIZE__SHIFT 2 -static inline uint32_t A5XX_HLSQ_CONTROL_0_REG_CSTHREADSIZE(enum a3xx_threadsize val) -{ - return ((val) << A5XX_HLSQ_CONTROL_0_REG_CSTHREADSIZE__SHIFT) & A5XX_HLSQ_CONTROL_0_REG_CSTHREADSIZE__MASK; -} - -#define REG_A5XX_HLSQ_CONTROL_1_REG 0x0000e785 -#define A5XX_HLSQ_CONTROL_1_REG_PRIMALLOCTHRESHOLD__MASK 0x0000003f -#define A5XX_HLSQ_CONTROL_1_REG_PRIMALLOCTHRESHOLD__SHIFT 0 -static inline uint32_t A5XX_HLSQ_CONTROL_1_REG_PRIMALLOCTHRESHOLD(uint32_t val) -{ - return ((val) << A5XX_HLSQ_CONTROL_1_REG_PRIMALLOCTHRESHOLD__SHIFT) & A5XX_HLSQ_CONTROL_1_REG_PRIMALLOCTHRESHOLD__MASK; -} - -#define REG_A5XX_HLSQ_CONTROL_2_REG 0x0000e786 -#define A5XX_HLSQ_CONTROL_2_REG_FACEREGID__MASK 0x000000ff -#define A5XX_HLSQ_CONTROL_2_REG_FACEREGID__SHIFT 0 -static inline uint32_t A5XX_HLSQ_CONTROL_2_REG_FACEREGID(uint32_t val) -{ - return ((val) << A5XX_HLSQ_CONTROL_2_REG_FACEREGID__SHIFT) & A5XX_HLSQ_CONTROL_2_REG_FACEREGID__MASK; -} -#define A5XX_HLSQ_CONTROL_2_REG_SAMPLEID__MASK 0x0000ff00 -#define A5XX_HLSQ_CONTROL_2_REG_SAMPLEID__SHIFT 8 -static inline uint32_t A5XX_HLSQ_CONTROL_2_REG_SAMPLEID(uint32_t val) -{ - return ((val) << A5XX_HLSQ_CONTROL_2_REG_SAMPLEID__SHIFT) & A5XX_HLSQ_CONTROL_2_REG_SAMPLEID__MASK; -} -#define A5XX_HLSQ_CONTROL_2_REG_SAMPLEMASK__MASK 0x00ff0000 -#define A5XX_HLSQ_CONTROL_2_REG_SAMPLEMASK__SHIFT 16 -static inline uint32_t A5XX_HLSQ_CONTROL_2_REG_SAMPLEMASK(uint32_t val) -{ - return ((val) << A5XX_HLSQ_CONTROL_2_REG_SAMPLEMASK__SHIFT) & A5XX_HLSQ_CONTROL_2_REG_SAMPLEMASK__MASK; -} - -#define REG_A5XX_HLSQ_CONTROL_3_REG 0x0000e787 -#define A5XX_HLSQ_CONTROL_3_REG_FRAGCOORDXYREGID__MASK 0x000000ff -#define A5XX_HLSQ_CONTROL_3_REG_FRAGCOORDXYREGID__SHIFT 0 -static inline uint32_t A5XX_HLSQ_CONTROL_3_REG_FRAGCOORDXYREGID(uint32_t val) -{ - return ((val) << A5XX_HLSQ_CONTROL_3_REG_FRAGCOORDXYREGID__SHIFT) & A5XX_HLSQ_CONTROL_3_REG_FRAGCOORDXYREGID__MASK; -} - -#define REG_A5XX_HLSQ_CONTROL_4_REG 0x0000e788 -#define A5XX_HLSQ_CONTROL_4_REG_XYCOORDREGID__MASK 0x00ff0000 -#define A5XX_HLSQ_CONTROL_4_REG_XYCOORDREGID__SHIFT 16 -static inline uint32_t A5XX_HLSQ_CONTROL_4_REG_XYCOORDREGID(uint32_t val) -{ - return ((val) << A5XX_HLSQ_CONTROL_4_REG_XYCOORDREGID__SHIFT) & A5XX_HLSQ_CONTROL_4_REG_XYCOORDREGID__MASK; -} -#define A5XX_HLSQ_CONTROL_4_REG_ZWCOORDREGID__MASK 0xff000000 -#define A5XX_HLSQ_CONTROL_4_REG_ZWCOORDREGID__SHIFT 24 -static inline uint32_t A5XX_HLSQ_CONTROL_4_REG_ZWCOORDREGID(uint32_t val) -{ - return ((val) << A5XX_HLSQ_CONTROL_4_REG_ZWCOORDREGID__SHIFT) & A5XX_HLSQ_CONTROL_4_REG_ZWCOORDREGID__MASK; -} - -#define REG_A5XX_HLSQ_UPDATE_CNTL 0x0000e78a - -#define REG_A5XX_HLSQ_VS_CONFIG 0x0000e78b -#define A5XX_HLSQ_VS_CONFIG_ENABLED 0x00000001 -#define A5XX_HLSQ_VS_CONFIG_CONSTOBJECTOFFSET__MASK 0x000000fe -#define A5XX_HLSQ_VS_CONFIG_CONSTOBJECTOFFSET__SHIFT 1 -static inline uint32_t A5XX_HLSQ_VS_CONFIG_CONSTOBJECTOFFSET(uint32_t val) -{ - return ((val) << A5XX_HLSQ_VS_CONFIG_CONSTOBJECTOFFSET__SHIFT) & A5XX_HLSQ_VS_CONFIG_CONSTOBJECTOFFSET__MASK; -} -#define A5XX_HLSQ_VS_CONFIG_SHADEROBJOFFSET__MASK 0x00007f00 -#define A5XX_HLSQ_VS_CONFIG_SHADEROBJOFFSET__SHIFT 8 -static inline uint32_t A5XX_HLSQ_VS_CONFIG_SHADEROBJOFFSET(uint32_t val) -{ - return ((val) << A5XX_HLSQ_VS_CONFIG_SHADEROBJOFFSET__SHIFT) & A5XX_HLSQ_VS_CONFIG_SHADEROBJOFFSET__MASK; -} - -#define REG_A5XX_HLSQ_FS_CONFIG 0x0000e78c -#define A5XX_HLSQ_FS_CONFIG_ENABLED 0x00000001 -#define A5XX_HLSQ_FS_CONFIG_CONSTOBJECTOFFSET__MASK 0x000000fe -#define A5XX_HLSQ_FS_CONFIG_CONSTOBJECTOFFSET__SHIFT 1 -static inline uint32_t A5XX_HLSQ_FS_CONFIG_CONSTOBJECTOFFSET(uint32_t val) -{ - return ((val) << A5XX_HLSQ_FS_CONFIG_CONSTOBJECTOFFSET__SHIFT) & A5XX_HLSQ_FS_CONFIG_CONSTOBJECTOFFSET__MASK; -} -#define A5XX_HLSQ_FS_CONFIG_SHADEROBJOFFSET__MASK 0x00007f00 -#define A5XX_HLSQ_FS_CONFIG_SHADEROBJOFFSET__SHIFT 8 -static inline uint32_t A5XX_HLSQ_FS_CONFIG_SHADEROBJOFFSET(uint32_t val) -{ - return ((val) << A5XX_HLSQ_FS_CONFIG_SHADEROBJOFFSET__SHIFT) & A5XX_HLSQ_FS_CONFIG_SHADEROBJOFFSET__MASK; -} - -#define REG_A5XX_HLSQ_HS_CONFIG 0x0000e78d -#define A5XX_HLSQ_HS_CONFIG_ENABLED 0x00000001 -#define A5XX_HLSQ_HS_CONFIG_CONSTOBJECTOFFSET__MASK 0x000000fe -#define A5XX_HLSQ_HS_CONFIG_CONSTOBJECTOFFSET__SHIFT 1 -static inline uint32_t A5XX_HLSQ_HS_CONFIG_CONSTOBJECTOFFSET(uint32_t val) -{ - return ((val) << A5XX_HLSQ_HS_CONFIG_CONSTOBJECTOFFSET__SHIFT) & A5XX_HLSQ_HS_CONFIG_CONSTOBJECTOFFSET__MASK; -} -#define A5XX_HLSQ_HS_CONFIG_SHADEROBJOFFSET__MASK 0x00007f00 -#define A5XX_HLSQ_HS_CONFIG_SHADEROBJOFFSET__SHIFT 8 -static inline uint32_t A5XX_HLSQ_HS_CONFIG_SHADEROBJOFFSET(uint32_t val) -{ - return ((val) << A5XX_HLSQ_HS_CONFIG_SHADEROBJOFFSET__SHIFT) & A5XX_HLSQ_HS_CONFIG_SHADEROBJOFFSET__MASK; -} - -#define REG_A5XX_HLSQ_DS_CONFIG 0x0000e78e -#define A5XX_HLSQ_DS_CONFIG_ENABLED 0x00000001 -#define A5XX_HLSQ_DS_CONFIG_CONSTOBJECTOFFSET__MASK 0x000000fe -#define A5XX_HLSQ_DS_CONFIG_CONSTOBJECTOFFSET__SHIFT 1 -static inline uint32_t A5XX_HLSQ_DS_CONFIG_CONSTOBJECTOFFSET(uint32_t val) -{ - return ((val) << A5XX_HLSQ_DS_CONFIG_CONSTOBJECTOFFSET__SHIFT) & A5XX_HLSQ_DS_CONFIG_CONSTOBJECTOFFSET__MASK; -} -#define A5XX_HLSQ_DS_CONFIG_SHADEROBJOFFSET__MASK 0x00007f00 -#define A5XX_HLSQ_DS_CONFIG_SHADEROBJOFFSET__SHIFT 8 -static inline uint32_t A5XX_HLSQ_DS_CONFIG_SHADEROBJOFFSET(uint32_t val) -{ - return ((val) << A5XX_HLSQ_DS_CONFIG_SHADEROBJOFFSET__SHIFT) & A5XX_HLSQ_DS_CONFIG_SHADEROBJOFFSET__MASK; -} - -#define REG_A5XX_HLSQ_GS_CONFIG 0x0000e78f -#define A5XX_HLSQ_GS_CONFIG_ENABLED 0x00000001 -#define A5XX_HLSQ_GS_CONFIG_CONSTOBJECTOFFSET__MASK 0x000000fe -#define A5XX_HLSQ_GS_CONFIG_CONSTOBJECTOFFSET__SHIFT 1 -static inline uint32_t A5XX_HLSQ_GS_CONFIG_CONSTOBJECTOFFSET(uint32_t val) -{ - return ((val) << A5XX_HLSQ_GS_CONFIG_CONSTOBJECTOFFSET__SHIFT) & A5XX_HLSQ_GS_CONFIG_CONSTOBJECTOFFSET__MASK; -} -#define A5XX_HLSQ_GS_CONFIG_SHADEROBJOFFSET__MASK 0x00007f00 -#define A5XX_HLSQ_GS_CONFIG_SHADEROBJOFFSET__SHIFT 8 -static inline uint32_t A5XX_HLSQ_GS_CONFIG_SHADEROBJOFFSET(uint32_t val) -{ - return ((val) << A5XX_HLSQ_GS_CONFIG_SHADEROBJOFFSET__SHIFT) & A5XX_HLSQ_GS_CONFIG_SHADEROBJOFFSET__MASK; -} - -#define REG_A5XX_HLSQ_CS_CONFIG 0x0000e790 -#define A5XX_HLSQ_CS_CONFIG_ENABLED 0x00000001 -#define A5XX_HLSQ_CS_CONFIG_CONSTOBJECTOFFSET__MASK 0x000000fe -#define A5XX_HLSQ_CS_CONFIG_CONSTOBJECTOFFSET__SHIFT 1 -static inline uint32_t A5XX_HLSQ_CS_CONFIG_CONSTOBJECTOFFSET(uint32_t val) -{ - return ((val) << A5XX_HLSQ_CS_CONFIG_CONSTOBJECTOFFSET__SHIFT) & A5XX_HLSQ_CS_CONFIG_CONSTOBJECTOFFSET__MASK; -} -#define A5XX_HLSQ_CS_CONFIG_SHADEROBJOFFSET__MASK 0x00007f00 -#define A5XX_HLSQ_CS_CONFIG_SHADEROBJOFFSET__SHIFT 8 -static inline uint32_t A5XX_HLSQ_CS_CONFIG_SHADEROBJOFFSET(uint32_t val) -{ - return ((val) << A5XX_HLSQ_CS_CONFIG_SHADEROBJOFFSET__SHIFT) & A5XX_HLSQ_CS_CONFIG_SHADEROBJOFFSET__MASK; -} - -#define REG_A5XX_HLSQ_VS_CNTL 0x0000e791 -#define A5XX_HLSQ_VS_CNTL_SSBO_ENABLE 0x00000001 -#define A5XX_HLSQ_VS_CNTL_INSTRLEN__MASK 0xfffffffe -#define A5XX_HLSQ_VS_CNTL_INSTRLEN__SHIFT 1 -static inline uint32_t A5XX_HLSQ_VS_CNTL_INSTRLEN(uint32_t val) -{ - return ((val) << A5XX_HLSQ_VS_CNTL_INSTRLEN__SHIFT) & A5XX_HLSQ_VS_CNTL_INSTRLEN__MASK; -} - -#define REG_A5XX_HLSQ_FS_CNTL 0x0000e792 -#define A5XX_HLSQ_FS_CNTL_SSBO_ENABLE 0x00000001 -#define A5XX_HLSQ_FS_CNTL_INSTRLEN__MASK 0xfffffffe -#define A5XX_HLSQ_FS_CNTL_INSTRLEN__SHIFT 1 -static inline uint32_t A5XX_HLSQ_FS_CNTL_INSTRLEN(uint32_t val) -{ - return ((val) << A5XX_HLSQ_FS_CNTL_INSTRLEN__SHIFT) & A5XX_HLSQ_FS_CNTL_INSTRLEN__MASK; -} - -#define REG_A5XX_HLSQ_HS_CNTL 0x0000e793 -#define A5XX_HLSQ_HS_CNTL_SSBO_ENABLE 0x00000001 -#define A5XX_HLSQ_HS_CNTL_INSTRLEN__MASK 0xfffffffe -#define A5XX_HLSQ_HS_CNTL_INSTRLEN__SHIFT 1 -static inline uint32_t A5XX_HLSQ_HS_CNTL_INSTRLEN(uint32_t val) -{ - return ((val) << A5XX_HLSQ_HS_CNTL_INSTRLEN__SHIFT) & A5XX_HLSQ_HS_CNTL_INSTRLEN__MASK; -} - -#define REG_A5XX_HLSQ_DS_CNTL 0x0000e794 -#define A5XX_HLSQ_DS_CNTL_SSBO_ENABLE 0x00000001 -#define A5XX_HLSQ_DS_CNTL_INSTRLEN__MASK 0xfffffffe -#define A5XX_HLSQ_DS_CNTL_INSTRLEN__SHIFT 1 -static inline uint32_t A5XX_HLSQ_DS_CNTL_INSTRLEN(uint32_t val) -{ - return ((val) << A5XX_HLSQ_DS_CNTL_INSTRLEN__SHIFT) & A5XX_HLSQ_DS_CNTL_INSTRLEN__MASK; -} - -#define REG_A5XX_HLSQ_GS_CNTL 0x0000e795 -#define A5XX_HLSQ_GS_CNTL_SSBO_ENABLE 0x00000001 -#define A5XX_HLSQ_GS_CNTL_INSTRLEN__MASK 0xfffffffe -#define A5XX_HLSQ_GS_CNTL_INSTRLEN__SHIFT 1 -static inline uint32_t A5XX_HLSQ_GS_CNTL_INSTRLEN(uint32_t val) -{ - return ((val) << A5XX_HLSQ_GS_CNTL_INSTRLEN__SHIFT) & A5XX_HLSQ_GS_CNTL_INSTRLEN__MASK; -} - -#define REG_A5XX_HLSQ_CS_CNTL 0x0000e796 -#define A5XX_HLSQ_CS_CNTL_SSBO_ENABLE 0x00000001 -#define A5XX_HLSQ_CS_CNTL_INSTRLEN__MASK 0xfffffffe -#define A5XX_HLSQ_CS_CNTL_INSTRLEN__SHIFT 1 -static inline uint32_t A5XX_HLSQ_CS_CNTL_INSTRLEN(uint32_t val) -{ - return ((val) << A5XX_HLSQ_CS_CNTL_INSTRLEN__SHIFT) & A5XX_HLSQ_CS_CNTL_INSTRLEN__MASK; -} - -#define REG_A5XX_HLSQ_CS_KERNEL_GROUP_X 0x0000e7b9 - -#define REG_A5XX_HLSQ_CS_KERNEL_GROUP_Y 0x0000e7ba - -#define REG_A5XX_HLSQ_CS_KERNEL_GROUP_Z 0x0000e7bb - -#define REG_A5XX_HLSQ_CS_NDRANGE_0 0x0000e7b0 -#define A5XX_HLSQ_CS_NDRANGE_0_KERNELDIM__MASK 0x00000003 -#define A5XX_HLSQ_CS_NDRANGE_0_KERNELDIM__SHIFT 0 -static inline uint32_t A5XX_HLSQ_CS_NDRANGE_0_KERNELDIM(uint32_t val) -{ - return ((val) << A5XX_HLSQ_CS_NDRANGE_0_KERNELDIM__SHIFT) & A5XX_HLSQ_CS_NDRANGE_0_KERNELDIM__MASK; -} -#define A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEX__MASK 0x00000ffc -#define A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEX__SHIFT 2 -static inline uint32_t A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEX(uint32_t val) -{ - return ((val) << A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEX__SHIFT) & A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEX__MASK; -} -#define A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEY__MASK 0x003ff000 -#define A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEY__SHIFT 12 -static inline uint32_t A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEY(uint32_t val) -{ - return ((val) << A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEY__SHIFT) & A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEY__MASK; -} -#define A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEZ__MASK 0xffc00000 -#define A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEZ__SHIFT 22 -static inline uint32_t A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEZ(uint32_t val) -{ - return ((val) << A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEZ__SHIFT) & A5XX_HLSQ_CS_NDRANGE_0_LOCALSIZEZ__MASK; -} - -#define REG_A5XX_HLSQ_CS_NDRANGE_1 0x0000e7b1 -#define A5XX_HLSQ_CS_NDRANGE_1_GLOBALSIZE_X__MASK 0xffffffff -#define A5XX_HLSQ_CS_NDRANGE_1_GLOBALSIZE_X__SHIFT 0 -static inline uint32_t A5XX_HLSQ_CS_NDRANGE_1_GLOBALSIZE_X(uint32_t val) -{ - return ((val) << A5XX_HLSQ_CS_NDRANGE_1_GLOBALSIZE_X__SHIFT) & A5XX_HLSQ_CS_NDRANGE_1_GLOBALSIZE_X__MASK; -} - -#define REG_A5XX_HLSQ_CS_NDRANGE_2 0x0000e7b2 -#define A5XX_HLSQ_CS_NDRANGE_2_GLOBALOFF_X__MASK 0xffffffff -#define A5XX_HLSQ_CS_NDRANGE_2_GLOBALOFF_X__SHIFT 0 -static inline uint32_t A5XX_HLSQ_CS_NDRANGE_2_GLOBALOFF_X(uint32_t val) -{ - return ((val) << A5XX_HLSQ_CS_NDRANGE_2_GLOBALOFF_X__SHIFT) & A5XX_HLSQ_CS_NDRANGE_2_GLOBALOFF_X__MASK; -} - -#define REG_A5XX_HLSQ_CS_NDRANGE_3 0x0000e7b3 -#define A5XX_HLSQ_CS_NDRANGE_3_GLOBALSIZE_Y__MASK 0xffffffff -#define A5XX_HLSQ_CS_NDRANGE_3_GLOBALSIZE_Y__SHIFT 0 -static inline uint32_t A5XX_HLSQ_CS_NDRANGE_3_GLOBALSIZE_Y(uint32_t val) -{ - return ((val) << A5XX_HLSQ_CS_NDRANGE_3_GLOBALSIZE_Y__SHIFT) & A5XX_HLSQ_CS_NDRANGE_3_GLOBALSIZE_Y__MASK; -} - -#define REG_A5XX_HLSQ_CS_NDRANGE_4 0x0000e7b4 -#define A5XX_HLSQ_CS_NDRANGE_4_GLOBALOFF_Y__MASK 0xffffffff -#define A5XX_HLSQ_CS_NDRANGE_4_GLOBALOFF_Y__SHIFT 0 -static inline uint32_t A5XX_HLSQ_CS_NDRANGE_4_GLOBALOFF_Y(uint32_t val) -{ - return ((val) << A5XX_HLSQ_CS_NDRANGE_4_GLOBALOFF_Y__SHIFT) & A5XX_HLSQ_CS_NDRANGE_4_GLOBALOFF_Y__MASK; -} - -#define REG_A5XX_HLSQ_CS_NDRANGE_5 0x0000e7b5 -#define A5XX_HLSQ_CS_NDRANGE_5_GLOBALSIZE_Z__MASK 0xffffffff -#define A5XX_HLSQ_CS_NDRANGE_5_GLOBALSIZE_Z__SHIFT 0 -static inline uint32_t A5XX_HLSQ_CS_NDRANGE_5_GLOBALSIZE_Z(uint32_t val) -{ - return ((val) << A5XX_HLSQ_CS_NDRANGE_5_GLOBALSIZE_Z__SHIFT) & A5XX_HLSQ_CS_NDRANGE_5_GLOBALSIZE_Z__MASK; -} - -#define REG_A5XX_HLSQ_CS_NDRANGE_6 0x0000e7b6 -#define A5XX_HLSQ_CS_NDRANGE_6_GLOBALOFF_Z__MASK 0xffffffff -#define A5XX_HLSQ_CS_NDRANGE_6_GLOBALOFF_Z__SHIFT 0 -static inline uint32_t A5XX_HLSQ_CS_NDRANGE_6_GLOBALOFF_Z(uint32_t val) -{ - return ((val) << A5XX_HLSQ_CS_NDRANGE_6_GLOBALOFF_Z__SHIFT) & A5XX_HLSQ_CS_NDRANGE_6_GLOBALOFF_Z__MASK; -} - -#define REG_A5XX_HLSQ_CS_CNTL_0 0x0000e7b7 -#define A5XX_HLSQ_CS_CNTL_0_WGIDCONSTID__MASK 0x000000ff -#define A5XX_HLSQ_CS_CNTL_0_WGIDCONSTID__SHIFT 0 -static inline uint32_t A5XX_HLSQ_CS_CNTL_0_WGIDCONSTID(uint32_t val) -{ - return ((val) << A5XX_HLSQ_CS_CNTL_0_WGIDCONSTID__SHIFT) & A5XX_HLSQ_CS_CNTL_0_WGIDCONSTID__MASK; -} -#define A5XX_HLSQ_CS_CNTL_0_UNK0__MASK 0x0000ff00 -#define A5XX_HLSQ_CS_CNTL_0_UNK0__SHIFT 8 -static inline uint32_t A5XX_HLSQ_CS_CNTL_0_UNK0(uint32_t val) -{ - return ((val) << A5XX_HLSQ_CS_CNTL_0_UNK0__SHIFT) & A5XX_HLSQ_CS_CNTL_0_UNK0__MASK; -} -#define A5XX_HLSQ_CS_CNTL_0_UNK1__MASK 0x00ff0000 -#define A5XX_HLSQ_CS_CNTL_0_UNK1__SHIFT 16 -static inline uint32_t A5XX_HLSQ_CS_CNTL_0_UNK1(uint32_t val) -{ - return ((val) << A5XX_HLSQ_CS_CNTL_0_UNK1__SHIFT) & A5XX_HLSQ_CS_CNTL_0_UNK1__MASK; -} -#define A5XX_HLSQ_CS_CNTL_0_LOCALIDREGID__MASK 0xff000000 -#define A5XX_HLSQ_CS_CNTL_0_LOCALIDREGID__SHIFT 24 -static inline uint32_t A5XX_HLSQ_CS_CNTL_0_LOCALIDREGID(uint32_t val) -{ - return ((val) << A5XX_HLSQ_CS_CNTL_0_LOCALIDREGID__SHIFT) & A5XX_HLSQ_CS_CNTL_0_LOCALIDREGID__MASK; -} - -#define REG_A5XX_HLSQ_CS_CNTL_1 0x0000e7b8 - -#define REG_A5XX_UNKNOWN_E7C0 0x0000e7c0 - -#define REG_A5XX_HLSQ_VS_CONSTLEN 0x0000e7c3 - -#define REG_A5XX_HLSQ_VS_INSTRLEN 0x0000e7c4 - -#define REG_A5XX_UNKNOWN_E7C5 0x0000e7c5 - -#define REG_A5XX_HLSQ_HS_CONSTLEN 0x0000e7c8 - -#define REG_A5XX_HLSQ_HS_INSTRLEN 0x0000e7c9 - -#define REG_A5XX_UNKNOWN_E7CA 0x0000e7ca - -#define REG_A5XX_HLSQ_DS_CONSTLEN 0x0000e7cd - -#define REG_A5XX_HLSQ_DS_INSTRLEN 0x0000e7ce - -#define REG_A5XX_UNKNOWN_E7CF 0x0000e7cf - -#define REG_A5XX_HLSQ_GS_CONSTLEN 0x0000e7d2 - -#define REG_A5XX_HLSQ_GS_INSTRLEN 0x0000e7d3 - -#define REG_A5XX_UNKNOWN_E7D4 0x0000e7d4 - -#define REG_A5XX_HLSQ_FS_CONSTLEN 0x0000e7d7 - -#define REG_A5XX_HLSQ_FS_INSTRLEN 0x0000e7d8 - -#define REG_A5XX_UNKNOWN_E7D9 0x0000e7d9 - -#define REG_A5XX_HLSQ_CS_CONSTLEN 0x0000e7dc - -#define REG_A5XX_HLSQ_CS_INSTRLEN 0x0000e7dd - -#define REG_A5XX_RB_2D_BLIT_CNTL 0x00002100 - -#define REG_A5XX_RB_2D_SRC_SOLID_DW0 0x00002101 - -#define REG_A5XX_RB_2D_SRC_SOLID_DW1 0x00002102 - -#define REG_A5XX_RB_2D_SRC_SOLID_DW2 0x00002103 - -#define REG_A5XX_RB_2D_SRC_SOLID_DW3 0x00002104 - -#define REG_A5XX_RB_2D_SRC_INFO 0x00002107 -#define A5XX_RB_2D_SRC_INFO_COLOR_FORMAT__MASK 0x000000ff -#define A5XX_RB_2D_SRC_INFO_COLOR_FORMAT__SHIFT 0 -static inline uint32_t A5XX_RB_2D_SRC_INFO_COLOR_FORMAT(enum a5xx_color_fmt val) -{ - return ((val) << A5XX_RB_2D_SRC_INFO_COLOR_FORMAT__SHIFT) & A5XX_RB_2D_SRC_INFO_COLOR_FORMAT__MASK; -} -#define A5XX_RB_2D_SRC_INFO_TILE_MODE__MASK 0x00000300 -#define A5XX_RB_2D_SRC_INFO_TILE_MODE__SHIFT 8 -static inline uint32_t A5XX_RB_2D_SRC_INFO_TILE_MODE(enum a5xx_tile_mode val) -{ - return ((val) << A5XX_RB_2D_SRC_INFO_TILE_MODE__SHIFT) & A5XX_RB_2D_SRC_INFO_TILE_MODE__MASK; -} -#define A5XX_RB_2D_SRC_INFO_COLOR_SWAP__MASK 0x00000c00 -#define A5XX_RB_2D_SRC_INFO_COLOR_SWAP__SHIFT 10 -static inline uint32_t A5XX_RB_2D_SRC_INFO_COLOR_SWAP(enum a3xx_color_swap val) -{ - return ((val) << A5XX_RB_2D_SRC_INFO_COLOR_SWAP__SHIFT) & A5XX_RB_2D_SRC_INFO_COLOR_SWAP__MASK; -} -#define A5XX_RB_2D_SRC_INFO_FLAGS 0x00001000 - -#define REG_A5XX_RB_2D_SRC_LO 0x00002108 - -#define REG_A5XX_RB_2D_SRC_HI 0x00002109 - -#define REG_A5XX_RB_2D_SRC_SIZE 0x0000210a -#define A5XX_RB_2D_SRC_SIZE_PITCH__MASK 0x0000ffff -#define A5XX_RB_2D_SRC_SIZE_PITCH__SHIFT 0 -static inline uint32_t A5XX_RB_2D_SRC_SIZE_PITCH(uint32_t val) -{ - return ((val >> 6) << A5XX_RB_2D_SRC_SIZE_PITCH__SHIFT) & A5XX_RB_2D_SRC_SIZE_PITCH__MASK; -} -#define A5XX_RB_2D_SRC_SIZE_ARRAY_PITCH__MASK 0xffff0000 -#define A5XX_RB_2D_SRC_SIZE_ARRAY_PITCH__SHIFT 16 -static inline uint32_t A5XX_RB_2D_SRC_SIZE_ARRAY_PITCH(uint32_t val) -{ - return ((val >> 6) << A5XX_RB_2D_SRC_SIZE_ARRAY_PITCH__SHIFT) & A5XX_RB_2D_SRC_SIZE_ARRAY_PITCH__MASK; -} - -#define REG_A5XX_RB_2D_DST_INFO 0x00002110 -#define A5XX_RB_2D_DST_INFO_COLOR_FORMAT__MASK 0x000000ff -#define A5XX_RB_2D_DST_INFO_COLOR_FORMAT__SHIFT 0 -static inline uint32_t A5XX_RB_2D_DST_INFO_COLOR_FORMAT(enum a5xx_color_fmt val) -{ - return ((val) << A5XX_RB_2D_DST_INFO_COLOR_FORMAT__SHIFT) & A5XX_RB_2D_DST_INFO_COLOR_FORMAT__MASK; -} -#define A5XX_RB_2D_DST_INFO_TILE_MODE__MASK 0x00000300 -#define A5XX_RB_2D_DST_INFO_TILE_MODE__SHIFT 8 -static inline uint32_t A5XX_RB_2D_DST_INFO_TILE_MODE(enum a5xx_tile_mode val) -{ - return ((val) << A5XX_RB_2D_DST_INFO_TILE_MODE__SHIFT) & A5XX_RB_2D_DST_INFO_TILE_MODE__MASK; -} -#define A5XX_RB_2D_DST_INFO_COLOR_SWAP__MASK 0x00000c00 -#define A5XX_RB_2D_DST_INFO_COLOR_SWAP__SHIFT 10 -static inline uint32_t A5XX_RB_2D_DST_INFO_COLOR_SWAP(enum a3xx_color_swap val) -{ - return ((val) << A5XX_RB_2D_DST_INFO_COLOR_SWAP__SHIFT) & A5XX_RB_2D_DST_INFO_COLOR_SWAP__MASK; -} -#define A5XX_RB_2D_DST_INFO_FLAGS 0x00001000 - -#define REG_A5XX_RB_2D_DST_LO 0x00002111 - -#define REG_A5XX_RB_2D_DST_HI 0x00002112 - -#define REG_A5XX_RB_2D_DST_SIZE 0x00002113 -#define A5XX_RB_2D_DST_SIZE_PITCH__MASK 0x0000ffff -#define A5XX_RB_2D_DST_SIZE_PITCH__SHIFT 0 -static inline uint32_t A5XX_RB_2D_DST_SIZE_PITCH(uint32_t val) -{ - return ((val >> 6) << A5XX_RB_2D_DST_SIZE_PITCH__SHIFT) & A5XX_RB_2D_DST_SIZE_PITCH__MASK; -} -#define A5XX_RB_2D_DST_SIZE_ARRAY_PITCH__MASK 0xffff0000 -#define A5XX_RB_2D_DST_SIZE_ARRAY_PITCH__SHIFT 16 -static inline uint32_t A5XX_RB_2D_DST_SIZE_ARRAY_PITCH(uint32_t val) -{ - return ((val >> 6) << A5XX_RB_2D_DST_SIZE_ARRAY_PITCH__SHIFT) & A5XX_RB_2D_DST_SIZE_ARRAY_PITCH__MASK; -} - -#define REG_A5XX_RB_2D_SRC_FLAGS_LO 0x00002140 - -#define REG_A5XX_RB_2D_SRC_FLAGS_HI 0x00002141 - -#define REG_A5XX_RB_2D_DST_FLAGS_LO 0x00002143 - -#define REG_A5XX_RB_2D_DST_FLAGS_HI 0x00002144 - -#define REG_A5XX_GRAS_2D_BLIT_CNTL 0x00002180 - -#define REG_A5XX_GRAS_2D_SRC_INFO 0x00002181 -#define A5XX_GRAS_2D_SRC_INFO_COLOR_FORMAT__MASK 0x000000ff -#define A5XX_GRAS_2D_SRC_INFO_COLOR_FORMAT__SHIFT 0 -static inline uint32_t A5XX_GRAS_2D_SRC_INFO_COLOR_FORMAT(enum a5xx_color_fmt val) -{ - return ((val) << A5XX_GRAS_2D_SRC_INFO_COLOR_FORMAT__SHIFT) & A5XX_GRAS_2D_SRC_INFO_COLOR_FORMAT__MASK; -} -#define A5XX_GRAS_2D_SRC_INFO_TILE_MODE__MASK 0x00000300 -#define A5XX_GRAS_2D_SRC_INFO_TILE_MODE__SHIFT 8 -static inline uint32_t A5XX_GRAS_2D_SRC_INFO_TILE_MODE(enum a5xx_tile_mode val) -{ - return ((val) << A5XX_GRAS_2D_SRC_INFO_TILE_MODE__SHIFT) & A5XX_GRAS_2D_SRC_INFO_TILE_MODE__MASK; -} -#define A5XX_GRAS_2D_SRC_INFO_COLOR_SWAP__MASK 0x00000c00 -#define A5XX_GRAS_2D_SRC_INFO_COLOR_SWAP__SHIFT 10 -static inline uint32_t A5XX_GRAS_2D_SRC_INFO_COLOR_SWAP(enum a3xx_color_swap val) -{ - return ((val) << A5XX_GRAS_2D_SRC_INFO_COLOR_SWAP__SHIFT) & A5XX_GRAS_2D_SRC_INFO_COLOR_SWAP__MASK; -} -#define A5XX_GRAS_2D_SRC_INFO_FLAGS 0x00001000 - -#define REG_A5XX_GRAS_2D_DST_INFO 0x00002182 -#define A5XX_GRAS_2D_DST_INFO_COLOR_FORMAT__MASK 0x000000ff -#define A5XX_GRAS_2D_DST_INFO_COLOR_FORMAT__SHIFT 0 -static inline uint32_t A5XX_GRAS_2D_DST_INFO_COLOR_FORMAT(enum a5xx_color_fmt val) -{ - return ((val) << A5XX_GRAS_2D_DST_INFO_COLOR_FORMAT__SHIFT) & A5XX_GRAS_2D_DST_INFO_COLOR_FORMAT__MASK; -} -#define A5XX_GRAS_2D_DST_INFO_TILE_MODE__MASK 0x00000300 -#define A5XX_GRAS_2D_DST_INFO_TILE_MODE__SHIFT 8 -static inline uint32_t A5XX_GRAS_2D_DST_INFO_TILE_MODE(enum a5xx_tile_mode val) -{ - return ((val) << A5XX_GRAS_2D_DST_INFO_TILE_MODE__SHIFT) & A5XX_GRAS_2D_DST_INFO_TILE_MODE__MASK; -} -#define A5XX_GRAS_2D_DST_INFO_COLOR_SWAP__MASK 0x00000c00 -#define A5XX_GRAS_2D_DST_INFO_COLOR_SWAP__SHIFT 10 -static inline uint32_t A5XX_GRAS_2D_DST_INFO_COLOR_SWAP(enum a3xx_color_swap val) -{ - return ((val) << A5XX_GRAS_2D_DST_INFO_COLOR_SWAP__SHIFT) & A5XX_GRAS_2D_DST_INFO_COLOR_SWAP__MASK; -} -#define A5XX_GRAS_2D_DST_INFO_FLAGS 0x00001000 - -#define REG_A5XX_UNKNOWN_2100 0x00002100 - -#define REG_A5XX_UNKNOWN_2180 0x00002180 - -#define REG_A5XX_UNKNOWN_2184 0x00002184 - -#define REG_A5XX_TEX_SAMP_0 0x00000000 -#define A5XX_TEX_SAMP_0_MIPFILTER_LINEAR_NEAR 0x00000001 -#define A5XX_TEX_SAMP_0_XY_MAG__MASK 0x00000006 -#define A5XX_TEX_SAMP_0_XY_MAG__SHIFT 1 -static inline uint32_t A5XX_TEX_SAMP_0_XY_MAG(enum a5xx_tex_filter val) -{ - return ((val) << A5XX_TEX_SAMP_0_XY_MAG__SHIFT) & A5XX_TEX_SAMP_0_XY_MAG__MASK; -} -#define A5XX_TEX_SAMP_0_XY_MIN__MASK 0x00000018 -#define A5XX_TEX_SAMP_0_XY_MIN__SHIFT 3 -static inline uint32_t A5XX_TEX_SAMP_0_XY_MIN(enum a5xx_tex_filter val) -{ - return ((val) << A5XX_TEX_SAMP_0_XY_MIN__SHIFT) & A5XX_TEX_SAMP_0_XY_MIN__MASK; -} -#define A5XX_TEX_SAMP_0_WRAP_S__MASK 0x000000e0 -#define A5XX_TEX_SAMP_0_WRAP_S__SHIFT 5 -static inline uint32_t A5XX_TEX_SAMP_0_WRAP_S(enum a5xx_tex_clamp val) -{ - return ((val) << A5XX_TEX_SAMP_0_WRAP_S__SHIFT) & A5XX_TEX_SAMP_0_WRAP_S__MASK; -} -#define A5XX_TEX_SAMP_0_WRAP_T__MASK 0x00000700 -#define A5XX_TEX_SAMP_0_WRAP_T__SHIFT 8 -static inline uint32_t A5XX_TEX_SAMP_0_WRAP_T(enum a5xx_tex_clamp val) -{ - return ((val) << A5XX_TEX_SAMP_0_WRAP_T__SHIFT) & A5XX_TEX_SAMP_0_WRAP_T__MASK; -} -#define A5XX_TEX_SAMP_0_WRAP_R__MASK 0x00003800 -#define A5XX_TEX_SAMP_0_WRAP_R__SHIFT 11 -static inline uint32_t A5XX_TEX_SAMP_0_WRAP_R(enum a5xx_tex_clamp val) -{ - return ((val) << A5XX_TEX_SAMP_0_WRAP_R__SHIFT) & A5XX_TEX_SAMP_0_WRAP_R__MASK; -} -#define A5XX_TEX_SAMP_0_ANISO__MASK 0x0001c000 -#define A5XX_TEX_SAMP_0_ANISO__SHIFT 14 -static inline uint32_t A5XX_TEX_SAMP_0_ANISO(enum a5xx_tex_aniso val) -{ - return ((val) << A5XX_TEX_SAMP_0_ANISO__SHIFT) & A5XX_TEX_SAMP_0_ANISO__MASK; -} -#define A5XX_TEX_SAMP_0_LOD_BIAS__MASK 0xfff80000 -#define A5XX_TEX_SAMP_0_LOD_BIAS__SHIFT 19 -static inline uint32_t A5XX_TEX_SAMP_0_LOD_BIAS(float val) -{ - return ((((int32_t)(val * 256.0))) << A5XX_TEX_SAMP_0_LOD_BIAS__SHIFT) & A5XX_TEX_SAMP_0_LOD_BIAS__MASK; -} - -#define REG_A5XX_TEX_SAMP_1 0x00000001 -#define A5XX_TEX_SAMP_1_COMPARE_FUNC__MASK 0x0000000e -#define A5XX_TEX_SAMP_1_COMPARE_FUNC__SHIFT 1 -static inline uint32_t A5XX_TEX_SAMP_1_COMPARE_FUNC(enum adreno_compare_func val) -{ - return ((val) << A5XX_TEX_SAMP_1_COMPARE_FUNC__SHIFT) & A5XX_TEX_SAMP_1_COMPARE_FUNC__MASK; -} -#define A5XX_TEX_SAMP_1_CUBEMAPSEAMLESSFILTOFF 0x00000010 -#define A5XX_TEX_SAMP_1_UNNORM_COORDS 0x00000020 -#define A5XX_TEX_SAMP_1_MIPFILTER_LINEAR_FAR 0x00000040 -#define A5XX_TEX_SAMP_1_MAX_LOD__MASK 0x000fff00 -#define A5XX_TEX_SAMP_1_MAX_LOD__SHIFT 8 -static inline uint32_t A5XX_TEX_SAMP_1_MAX_LOD(float val) -{ - return ((((uint32_t)(val * 256.0))) << A5XX_TEX_SAMP_1_MAX_LOD__SHIFT) & A5XX_TEX_SAMP_1_MAX_LOD__MASK; -} -#define A5XX_TEX_SAMP_1_MIN_LOD__MASK 0xfff00000 -#define A5XX_TEX_SAMP_1_MIN_LOD__SHIFT 20 -static inline uint32_t A5XX_TEX_SAMP_1_MIN_LOD(float val) -{ - return ((((uint32_t)(val * 256.0))) << A5XX_TEX_SAMP_1_MIN_LOD__SHIFT) & A5XX_TEX_SAMP_1_MIN_LOD__MASK; -} - -#define REG_A5XX_TEX_SAMP_2 0x00000002 -#define A5XX_TEX_SAMP_2_BCOLOR_OFFSET__MASK 0xfffffff0 -#define A5XX_TEX_SAMP_2_BCOLOR_OFFSET__SHIFT 4 -static inline uint32_t A5XX_TEX_SAMP_2_BCOLOR_OFFSET(uint32_t val) -{ - return ((val) << A5XX_TEX_SAMP_2_BCOLOR_OFFSET__SHIFT) & A5XX_TEX_SAMP_2_BCOLOR_OFFSET__MASK; -} - -#define REG_A5XX_TEX_SAMP_3 0x00000003 - -#define REG_A5XX_TEX_CONST_0 0x00000000 -#define A5XX_TEX_CONST_0_TILE_MODE__MASK 0x00000003 -#define A5XX_TEX_CONST_0_TILE_MODE__SHIFT 0 -static inline uint32_t A5XX_TEX_CONST_0_TILE_MODE(enum a5xx_tile_mode val) -{ - return ((val) << A5XX_TEX_CONST_0_TILE_MODE__SHIFT) & A5XX_TEX_CONST_0_TILE_MODE__MASK; -} -#define A5XX_TEX_CONST_0_SRGB 0x00000004 -#define A5XX_TEX_CONST_0_SWIZ_X__MASK 0x00000070 -#define A5XX_TEX_CONST_0_SWIZ_X__SHIFT 4 -static inline uint32_t A5XX_TEX_CONST_0_SWIZ_X(enum a5xx_tex_swiz val) -{ - return ((val) << A5XX_TEX_CONST_0_SWIZ_X__SHIFT) & A5XX_TEX_CONST_0_SWIZ_X__MASK; -} -#define A5XX_TEX_CONST_0_SWIZ_Y__MASK 0x00000380 -#define A5XX_TEX_CONST_0_SWIZ_Y__SHIFT 7 -static inline uint32_t A5XX_TEX_CONST_0_SWIZ_Y(enum a5xx_tex_swiz val) -{ - return ((val) << A5XX_TEX_CONST_0_SWIZ_Y__SHIFT) & A5XX_TEX_CONST_0_SWIZ_Y__MASK; -} -#define A5XX_TEX_CONST_0_SWIZ_Z__MASK 0x00001c00 -#define A5XX_TEX_CONST_0_SWIZ_Z__SHIFT 10 -static inline uint32_t A5XX_TEX_CONST_0_SWIZ_Z(enum a5xx_tex_swiz val) -{ - return ((val) << A5XX_TEX_CONST_0_SWIZ_Z__SHIFT) & A5XX_TEX_CONST_0_SWIZ_Z__MASK; -} -#define A5XX_TEX_CONST_0_SWIZ_W__MASK 0x0000e000 -#define A5XX_TEX_CONST_0_SWIZ_W__SHIFT 13 -static inline uint32_t A5XX_TEX_CONST_0_SWIZ_W(enum a5xx_tex_swiz val) -{ - return ((val) << A5XX_TEX_CONST_0_SWIZ_W__SHIFT) & A5XX_TEX_CONST_0_SWIZ_W__MASK; -} -#define A5XX_TEX_CONST_0_MIPLVLS__MASK 0x000f0000 -#define A5XX_TEX_CONST_0_MIPLVLS__SHIFT 16 -static inline uint32_t A5XX_TEX_CONST_0_MIPLVLS(uint32_t val) -{ - return ((val) << A5XX_TEX_CONST_0_MIPLVLS__SHIFT) & A5XX_TEX_CONST_0_MIPLVLS__MASK; -} -#define A5XX_TEX_CONST_0_SAMPLES__MASK 0x00300000 -#define A5XX_TEX_CONST_0_SAMPLES__SHIFT 20 -static inline uint32_t A5XX_TEX_CONST_0_SAMPLES(enum a3xx_msaa_samples val) -{ - return ((val) << A5XX_TEX_CONST_0_SAMPLES__SHIFT) & A5XX_TEX_CONST_0_SAMPLES__MASK; -} -#define A5XX_TEX_CONST_0_FMT__MASK 0x3fc00000 -#define A5XX_TEX_CONST_0_FMT__SHIFT 22 -static inline uint32_t A5XX_TEX_CONST_0_FMT(enum a5xx_tex_fmt val) -{ - return ((val) << A5XX_TEX_CONST_0_FMT__SHIFT) & A5XX_TEX_CONST_0_FMT__MASK; -} -#define A5XX_TEX_CONST_0_SWAP__MASK 0xc0000000 -#define A5XX_TEX_CONST_0_SWAP__SHIFT 30 -static inline uint32_t A5XX_TEX_CONST_0_SWAP(enum a3xx_color_swap val) -{ - return ((val) << A5XX_TEX_CONST_0_SWAP__SHIFT) & A5XX_TEX_CONST_0_SWAP__MASK; -} - -#define REG_A5XX_TEX_CONST_1 0x00000001 -#define A5XX_TEX_CONST_1_WIDTH__MASK 0x00007fff -#define A5XX_TEX_CONST_1_WIDTH__SHIFT 0 -static inline uint32_t A5XX_TEX_CONST_1_WIDTH(uint32_t val) -{ - return ((val) << A5XX_TEX_CONST_1_WIDTH__SHIFT) & A5XX_TEX_CONST_1_WIDTH__MASK; -} -#define A5XX_TEX_CONST_1_HEIGHT__MASK 0x3fff8000 -#define A5XX_TEX_CONST_1_HEIGHT__SHIFT 15 -static inline uint32_t A5XX_TEX_CONST_1_HEIGHT(uint32_t val) -{ - return ((val) << A5XX_TEX_CONST_1_HEIGHT__SHIFT) & A5XX_TEX_CONST_1_HEIGHT__MASK; -} - -#define REG_A5XX_TEX_CONST_2 0x00000002 -#define A5XX_TEX_CONST_2_FETCHSIZE__MASK 0x0000000f -#define A5XX_TEX_CONST_2_FETCHSIZE__SHIFT 0 -static inline uint32_t A5XX_TEX_CONST_2_FETCHSIZE(enum a5xx_tex_fetchsize val) -{ - return ((val) << A5XX_TEX_CONST_2_FETCHSIZE__SHIFT) & A5XX_TEX_CONST_2_FETCHSIZE__MASK; -} -#define A5XX_TEX_CONST_2_PITCH__MASK 0x1fffff80 -#define A5XX_TEX_CONST_2_PITCH__SHIFT 7 -static inline uint32_t A5XX_TEX_CONST_2_PITCH(uint32_t val) -{ - return ((val) << A5XX_TEX_CONST_2_PITCH__SHIFT) & A5XX_TEX_CONST_2_PITCH__MASK; -} -#define A5XX_TEX_CONST_2_TYPE__MASK 0x60000000 -#define A5XX_TEX_CONST_2_TYPE__SHIFT 29 -static inline uint32_t A5XX_TEX_CONST_2_TYPE(enum a5xx_tex_type val) -{ - return ((val) << A5XX_TEX_CONST_2_TYPE__SHIFT) & A5XX_TEX_CONST_2_TYPE__MASK; -} - -#define REG_A5XX_TEX_CONST_3 0x00000003 -#define A5XX_TEX_CONST_3_ARRAY_PITCH__MASK 0x00003fff -#define A5XX_TEX_CONST_3_ARRAY_PITCH__SHIFT 0 -static inline uint32_t A5XX_TEX_CONST_3_ARRAY_PITCH(uint32_t val) -{ - return ((val >> 12) << A5XX_TEX_CONST_3_ARRAY_PITCH__SHIFT) & A5XX_TEX_CONST_3_ARRAY_PITCH__MASK; -} -#define A5XX_TEX_CONST_3_FLAG 0x10000000 - -#define REG_A5XX_TEX_CONST_4 0x00000004 -#define A5XX_TEX_CONST_4_BASE_LO__MASK 0xffffffe0 -#define A5XX_TEX_CONST_4_BASE_LO__SHIFT 5 -static inline uint32_t A5XX_TEX_CONST_4_BASE_LO(uint32_t val) -{ - return ((val >> 5) << A5XX_TEX_CONST_4_BASE_LO__SHIFT) & A5XX_TEX_CONST_4_BASE_LO__MASK; -} - -#define REG_A5XX_TEX_CONST_5 0x00000005 -#define A5XX_TEX_CONST_5_BASE_HI__MASK 0x0001ffff -#define A5XX_TEX_CONST_5_BASE_HI__SHIFT 0 -static inline uint32_t A5XX_TEX_CONST_5_BASE_HI(uint32_t val) -{ - return ((val) << A5XX_TEX_CONST_5_BASE_HI__SHIFT) & A5XX_TEX_CONST_5_BASE_HI__MASK; -} -#define A5XX_TEX_CONST_5_DEPTH__MASK 0x3ffe0000 -#define A5XX_TEX_CONST_5_DEPTH__SHIFT 17 -static inline uint32_t A5XX_TEX_CONST_5_DEPTH(uint32_t val) -{ - return ((val) << A5XX_TEX_CONST_5_DEPTH__SHIFT) & A5XX_TEX_CONST_5_DEPTH__MASK; -} - -#define REG_A5XX_TEX_CONST_6 0x00000006 - -#define REG_A5XX_TEX_CONST_7 0x00000007 - -#define REG_A5XX_TEX_CONST_8 0x00000008 - -#define REG_A5XX_TEX_CONST_9 0x00000009 - -#define REG_A5XX_TEX_CONST_10 0x0000000a - -#define REG_A5XX_TEX_CONST_11 0x0000000b - -#define REG_A5XX_SSBO_0_0 0x00000000 -#define A5XX_SSBO_0_0_BASE_LO__MASK 0xffffffe0 -#define A5XX_SSBO_0_0_BASE_LO__SHIFT 5 -static inline uint32_t A5XX_SSBO_0_0_BASE_LO(uint32_t val) -{ - return ((val >> 5) << A5XX_SSBO_0_0_BASE_LO__SHIFT) & A5XX_SSBO_0_0_BASE_LO__MASK; -} - -#define REG_A5XX_SSBO_0_1 0x00000001 -#define A5XX_SSBO_0_1_PITCH__MASK 0x003fffff -#define A5XX_SSBO_0_1_PITCH__SHIFT 0 -static inline uint32_t A5XX_SSBO_0_1_PITCH(uint32_t val) -{ - return ((val) << A5XX_SSBO_0_1_PITCH__SHIFT) & A5XX_SSBO_0_1_PITCH__MASK; -} - -#define REG_A5XX_SSBO_0_2 0x00000002 -#define A5XX_SSBO_0_2_ARRAY_PITCH__MASK 0x03fff000 -#define A5XX_SSBO_0_2_ARRAY_PITCH__SHIFT 12 -static inline uint32_t A5XX_SSBO_0_2_ARRAY_PITCH(uint32_t val) -{ - return ((val >> 12) << A5XX_SSBO_0_2_ARRAY_PITCH__SHIFT) & A5XX_SSBO_0_2_ARRAY_PITCH__MASK; -} - -#define REG_A5XX_SSBO_0_3 0x00000003 -#define A5XX_SSBO_0_3_CPP__MASK 0x0000003f -#define A5XX_SSBO_0_3_CPP__SHIFT 0 -static inline uint32_t A5XX_SSBO_0_3_CPP(uint32_t val) -{ - return ((val) << A5XX_SSBO_0_3_CPP__SHIFT) & A5XX_SSBO_0_3_CPP__MASK; -} - -#define REG_A5XX_SSBO_1_0 0x00000000 -#define A5XX_SSBO_1_0_FMT__MASK 0x0000ff00 -#define A5XX_SSBO_1_0_FMT__SHIFT 8 -static inline uint32_t A5XX_SSBO_1_0_FMT(enum a5xx_tex_fmt val) -{ - return ((val) << A5XX_SSBO_1_0_FMT__SHIFT) & A5XX_SSBO_1_0_FMT__MASK; -} -#define A5XX_SSBO_1_0_WIDTH__MASK 0xffff0000 -#define A5XX_SSBO_1_0_WIDTH__SHIFT 16 -static inline uint32_t A5XX_SSBO_1_0_WIDTH(uint32_t val) -{ - return ((val) << A5XX_SSBO_1_0_WIDTH__SHIFT) & A5XX_SSBO_1_0_WIDTH__MASK; -} - -#define REG_A5XX_SSBO_1_1 0x00000001 -#define A5XX_SSBO_1_1_HEIGHT__MASK 0x0000ffff -#define A5XX_SSBO_1_1_HEIGHT__SHIFT 0 -static inline uint32_t A5XX_SSBO_1_1_HEIGHT(uint32_t val) -{ - return ((val) << A5XX_SSBO_1_1_HEIGHT__SHIFT) & A5XX_SSBO_1_1_HEIGHT__MASK; -} -#define A5XX_SSBO_1_1_DEPTH__MASK 0xffff0000 -#define A5XX_SSBO_1_1_DEPTH__SHIFT 16 -static inline uint32_t A5XX_SSBO_1_1_DEPTH(uint32_t val) -{ - return ((val) << A5XX_SSBO_1_1_DEPTH__SHIFT) & A5XX_SSBO_1_1_DEPTH__MASK; -} - -#define REG_A5XX_SSBO_2_0 0x00000000 -#define A5XX_SSBO_2_0_BASE_LO__MASK 0xffffffff -#define A5XX_SSBO_2_0_BASE_LO__SHIFT 0 -static inline uint32_t A5XX_SSBO_2_0_BASE_LO(uint32_t val) -{ - return ((val) << A5XX_SSBO_2_0_BASE_LO__SHIFT) & A5XX_SSBO_2_0_BASE_LO__MASK; -} - -#define REG_A5XX_SSBO_2_1 0x00000001 -#define A5XX_SSBO_2_1_BASE_HI__MASK 0xffffffff -#define A5XX_SSBO_2_1_BASE_HI__SHIFT 0 -static inline uint32_t A5XX_SSBO_2_1_BASE_HI(uint32_t val) -{ - return ((val) << A5XX_SSBO_2_1_BASE_HI__SHIFT) & A5XX_SSBO_2_1_BASE_HI__MASK; -} - - -#endif /* A5XX_XML */ diff --git a/selfdrive/modeld/thneed/debug/include/adreno_pm4.xml.h b/selfdrive/modeld/thneed/debug/include/adreno_pm4.xml.h deleted file mode 100644 index 08f8ff2682..0000000000 --- a/selfdrive/modeld/thneed/debug/include/adreno_pm4.xml.h +++ /dev/null @@ -1,1344 +0,0 @@ -#ifndef ADRENO_PM4_XML -#define ADRENO_PM4_XML - -/* Autogenerated file, DO NOT EDIT manually! - -This file was generated by the rules-ng-ng headergen tool in this git repository: -http://github.com/freedreno/envytools/ -git clone https://github.com/freedreno/envytools.git - -The rules-ng-ng source files this header was generated from are: -- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 501 bytes, from 2018-01-31 18:26:32) -- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2018-01-08 14:56:24) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 36805 bytes, from 2018-05-20 19:03:35) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 13634 bytes, from 2018-06-10 17:35:36) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 41584 bytes, from 2018-06-18 14:25:44) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 83840 bytes, from 2018-01-10 16:21:40) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 112086 bytes, from 2018-01-08 14:56:24) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a5xx.xml ( 147158 bytes, from 2018-06-18 14:25:44) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a6xx.xml ( 88437 bytes, from 2018-06-18 14:25:44) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a6xx_gmu.xml ( 10431 bytes, from 2018-06-10 17:37:04) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2018-01-08 14:56:24) - -Copyright (C) 2013-2018 by the following authors: -- Rob Clark (robclark) -- Ilia Mirkin (imirkin) - -Permission is hereby granted, free of charge, to any person obtaining -a copy of this software and associated documentation files (the -"Software"), to deal in the Software without restriction, including -without limitation the rights to use, copy, modify, merge, publish, -distribute, sublicense, and/or sell copies of the Software, and to -permit persons to whom the Software is furnished to do so, subject to -the following conditions: - -The above copyright notice and this permission notice (including the -next paragraph) shall be included in all copies or substantial -portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE -LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - - -enum vgt_event_type { - VS_DEALLOC = 0, - PS_DEALLOC = 1, - VS_DONE_TS = 2, - PS_DONE_TS = 3, - CACHE_FLUSH_TS = 4, - CONTEXT_DONE = 5, - CACHE_FLUSH = 6, - HLSQ_FLUSH = 7, - VIZQUERY_START = 7, - VIZQUERY_END = 8, - SC_WAIT_WC = 9, - RST_PIX_CNT = 13, - RST_VTX_CNT = 14, - TILE_FLUSH = 15, - STAT_EVENT = 16, - CACHE_FLUSH_AND_INV_TS_EVENT = 20, - ZPASS_DONE = 21, - CACHE_FLUSH_AND_INV_EVENT = 22, - PERFCOUNTER_START = 23, - PERFCOUNTER_STOP = 24, - VS_FETCH_DONE = 27, - FACENESS_FLUSH = 28, - FLUSH_SO_0 = 17, - FLUSH_SO_1 = 18, - FLUSH_SO_2 = 19, - FLUSH_SO_3 = 20, - PC_CCU_INVALIDATE_DEPTH = 24, - PC_CCU_INVALIDATE_COLOR = 25, - UNK_1C = 28, - UNK_1D = 29, - BLIT = 30, - UNK_25 = 37, - LRZ_FLUSH = 38, - UNK_2C = 44, - UNK_2D = 45, -}; - -enum pc_di_primtype { - DI_PT_NONE = 0, - DI_PT_POINTLIST_PSIZE = 1, - DI_PT_LINELIST = 2, - DI_PT_LINESTRIP = 3, - DI_PT_TRILIST = 4, - DI_PT_TRIFAN = 5, - DI_PT_TRISTRIP = 6, - DI_PT_LINELOOP = 7, - DI_PT_RECTLIST = 8, - DI_PT_POINTLIST = 9, - DI_PT_LINE_ADJ = 10, - DI_PT_LINESTRIP_ADJ = 11, - DI_PT_TRI_ADJ = 12, - DI_PT_TRISTRIP_ADJ = 13, -}; - -enum pc_di_src_sel { - DI_SRC_SEL_DMA = 0, - DI_SRC_SEL_IMMEDIATE = 1, - DI_SRC_SEL_AUTO_INDEX = 2, - DI_SRC_SEL_RESERVED = 3, -}; - -enum pc_di_index_size { - INDEX_SIZE_IGN = 0, - INDEX_SIZE_16_BIT = 0, - INDEX_SIZE_32_BIT = 1, - INDEX_SIZE_8_BIT = 2, - INDEX_SIZE_INVALID = 0, -}; - -enum pc_di_vis_cull_mode { - IGNORE_VISIBILITY = 0, - USE_VISIBILITY = 1, -}; - -enum adreno_pm4_packet_type { - CP_TYPE0_PKT = 0, - CP_TYPE1_PKT = 0x40000000, - CP_TYPE2_PKT = 0x80000000, - CP_TYPE3_PKT = 0xc0000000, - CP_TYPE4_PKT = 0x40000000, - CP_TYPE7_PKT = 0x70000000, -}; - -enum adreno_pm4_type3_packets { - CP_ME_INIT = 72, - CP_NOP = 16, - CP_PREEMPT_ENABLE = 28, - CP_PREEMPT_TOKEN = 30, - CP_INDIRECT_BUFFER = 63, - CP_INDIRECT_BUFFER_PFD = 55, - CP_WAIT_FOR_IDLE = 38, - CP_WAIT_REG_MEM = 60, - CP_WAIT_REG_EQ = 82, - CP_WAIT_REG_GTE = 83, - CP_WAIT_UNTIL_READ = 92, - CP_WAIT_IB_PFD_COMPLETE = 93, - CP_REG_RMW = 33, - CP_SET_BIN_DATA = 47, - CP_SET_BIN_DATA5 = 47, - CP_REG_TO_MEM = 62, - CP_MEM_WRITE = 61, - CP_MEM_WRITE_CNTR = 79, - CP_COND_EXEC = 68, - CP_COND_WRITE = 69, - CP_COND_WRITE5 = 69, - CP_EVENT_WRITE = 70, - CP_EVENT_WRITE_SHD = 88, - CP_EVENT_WRITE_CFL = 89, - CP_EVENT_WRITE_ZPD = 91, - CP_RUN_OPENCL = 49, - CP_DRAW_INDX = 34, - CP_DRAW_INDX_2 = 54, - CP_DRAW_INDX_BIN = 52, - CP_DRAW_INDX_2_BIN = 53, - CP_VIZ_QUERY = 35, - CP_SET_STATE = 37, - CP_SET_CONSTANT = 45, - CP_IM_LOAD = 39, - CP_IM_LOAD_IMMEDIATE = 43, - CP_LOAD_CONSTANT_CONTEXT = 46, - CP_INVALIDATE_STATE = 59, - CP_SET_SHADER_BASES = 74, - CP_SET_BIN_MASK = 80, - CP_SET_BIN_SELECT = 81, - CP_CONTEXT_UPDATE = 94, - CP_INTERRUPT = 64, - CP_IM_STORE = 44, - CP_SET_DRAW_INIT_FLAGS = 75, - CP_SET_PROTECTED_MODE = 95, - CP_BOOTSTRAP_UCODE = 111, - CP_LOAD_STATE = 48, - CP_LOAD_STATE4 = 48, - CP_COND_INDIRECT_BUFFER_PFE = 58, - CP_COND_INDIRECT_BUFFER_PFD = 50, - CP_INDIRECT_BUFFER_PFE = 63, - CP_SET_BIN = 76, - CP_TEST_TWO_MEMS = 113, - CP_REG_WR_NO_CTXT = 120, - CP_RECORD_PFP_TIMESTAMP = 17, - CP_SET_SECURE_MODE = 102, - CP_WAIT_FOR_ME = 19, - CP_SET_DRAW_STATE = 67, - CP_DRAW_INDX_OFFSET = 56, - CP_DRAW_INDIRECT = 40, - CP_DRAW_INDX_INDIRECT = 41, - CP_DRAW_AUTO = 36, - CP_UNKNOWN_19 = 25, - CP_UNKNOWN_1A = 26, - CP_UNKNOWN_4E = 78, - CP_WIDE_REG_WRITE = 116, - CP_SCRATCH_TO_REG = 77, - CP_REG_TO_SCRATCH = 74, - CP_WAIT_MEM_WRITES = 18, - CP_COND_REG_EXEC = 71, - CP_MEM_TO_REG = 66, - CP_EXEC_CS_INDIRECT = 65, - CP_EXEC_CS = 51, - CP_PERFCOUNTER_ACTION = 80, - CP_SMMU_TABLE_UPDATE = 83, - CP_SET_MARKER = 101, - CP_SET_PSEUDO_REG = 86, - CP_CONTEXT_REG_BUNCH = 92, - CP_YIELD_ENABLE = 28, - CP_SKIP_IB2_ENABLE_GLOBAL = 29, - CP_SKIP_IB2_ENABLE_LOCAL = 35, - CP_SET_SUBDRAW_SIZE = 53, - CP_SET_VISIBILITY_OVERRIDE = 100, - CP_PREEMPT_ENABLE_GLOBAL = 105, - CP_PREEMPT_ENABLE_LOCAL = 106, - CP_CONTEXT_SWITCH_YIELD = 107, - CP_SET_RENDER_MODE = 108, - CP_COMPUTE_CHECKPOINT = 110, - CP_MEM_TO_MEM = 115, - CP_BLIT = 44, - CP_REG_TEST = 57, - IN_IB_PREFETCH_END = 23, - IN_SUBBLK_PREFETCH = 31, - IN_INSTR_PREFETCH = 32, - IN_INSTR_MATCH = 71, - IN_CONST_PREFETCH = 73, - IN_INCR_UPDT_STATE = 85, - IN_INCR_UPDT_CONST = 86, - IN_INCR_UPDT_INSTR = 87, - PKT4 = 4, -}; - -enum adreno_state_block { - SB_VERT_TEX = 0, - SB_VERT_MIPADDR = 1, - SB_FRAG_TEX = 2, - SB_FRAG_MIPADDR = 3, - SB_VERT_SHADER = 4, - SB_GEOM_SHADER = 5, - SB_FRAG_SHADER = 6, - SB_COMPUTE_SHADER = 7, -}; - -enum adreno_state_type { - ST_SHADER = 0, - ST_CONSTANTS = 1, -}; - -enum adreno_state_src { - SS_DIRECT = 0, - SS_INVALID_ALL_IC = 2, - SS_INVALID_PART_IC = 3, - SS_INDIRECT = 4, - SS_INDIRECT_TCM = 5, - SS_INDIRECT_STM = 6, -}; - -enum a4xx_state_block { - SB4_VS_TEX = 0, - SB4_HS_TEX = 1, - SB4_DS_TEX = 2, - SB4_GS_TEX = 3, - SB4_FS_TEX = 4, - SB4_CS_TEX = 5, - SB4_VS_SHADER = 8, - SB4_HS_SHADER = 9, - SB4_DS_SHADER = 10, - SB4_GS_SHADER = 11, - SB4_FS_SHADER = 12, - SB4_CS_SHADER = 13, - SB4_SSBO = 14, - SB4_CS_SSBO = 15, -}; - -enum a4xx_state_type { - ST4_SHADER = 0, - ST4_CONSTANTS = 1, -}; - -enum a4xx_state_src { - SS4_DIRECT = 0, - SS4_INDIRECT = 2, -}; - -enum a4xx_index_size { - INDEX4_SIZE_8_BIT = 0, - INDEX4_SIZE_16_BIT = 1, - INDEX4_SIZE_32_BIT = 2, -}; - -enum cp_cond_function { - WRITE_ALWAYS = 0, - WRITE_LT = 1, - WRITE_LE = 2, - WRITE_EQ = 3, - WRITE_NE = 4, - WRITE_GE = 5, - WRITE_GT = 6, -}; - -enum render_mode_cmd { - BYPASS = 1, - BINNING = 2, - GMEM = 3, - BLIT2D = 5, - BLIT2DSCALE = 7, - END2D = 8, -}; - -enum cp_blit_cmd { - BLIT_OP_FILL = 0, - BLIT_OP_COPY = 1, - BLIT_OP_SCALE = 3, -}; - -#define REG_CP_LOAD_STATE_0 0x00000000 -#define CP_LOAD_STATE_0_DST_OFF__MASK 0x0000ffff -#define CP_LOAD_STATE_0_DST_OFF__SHIFT 0 -static inline uint32_t CP_LOAD_STATE_0_DST_OFF(uint32_t val) -{ - return ((val) << CP_LOAD_STATE_0_DST_OFF__SHIFT) & CP_LOAD_STATE_0_DST_OFF__MASK; -} -#define CP_LOAD_STATE_0_STATE_SRC__MASK 0x00070000 -#define CP_LOAD_STATE_0_STATE_SRC__SHIFT 16 -static inline uint32_t CP_LOAD_STATE_0_STATE_SRC(enum adreno_state_src val) -{ - return ((val) << CP_LOAD_STATE_0_STATE_SRC__SHIFT) & CP_LOAD_STATE_0_STATE_SRC__MASK; -} -#define CP_LOAD_STATE_0_STATE_BLOCK__MASK 0x00380000 -#define CP_LOAD_STATE_0_STATE_BLOCK__SHIFT 19 -static inline uint32_t CP_LOAD_STATE_0_STATE_BLOCK(enum adreno_state_block val) -{ - return ((val) << CP_LOAD_STATE_0_STATE_BLOCK__SHIFT) & CP_LOAD_STATE_0_STATE_BLOCK__MASK; -} -#define CP_LOAD_STATE_0_NUM_UNIT__MASK 0xffc00000 -#define CP_LOAD_STATE_0_NUM_UNIT__SHIFT 22 -static inline uint32_t CP_LOAD_STATE_0_NUM_UNIT(uint32_t val) -{ - return ((val) << CP_LOAD_STATE_0_NUM_UNIT__SHIFT) & CP_LOAD_STATE_0_NUM_UNIT__MASK; -} - -#define REG_CP_LOAD_STATE_1 0x00000001 -#define CP_LOAD_STATE_1_STATE_TYPE__MASK 0x00000003 -#define CP_LOAD_STATE_1_STATE_TYPE__SHIFT 0 -static inline uint32_t CP_LOAD_STATE_1_STATE_TYPE(enum adreno_state_type val) -{ - return ((val) << CP_LOAD_STATE_1_STATE_TYPE__SHIFT) & CP_LOAD_STATE_1_STATE_TYPE__MASK; -} -#define CP_LOAD_STATE_1_EXT_SRC_ADDR__MASK 0xfffffffc -#define CP_LOAD_STATE_1_EXT_SRC_ADDR__SHIFT 2 -static inline uint32_t CP_LOAD_STATE_1_EXT_SRC_ADDR(uint32_t val) -{ - assert(!(val & 0x3)); - return ((val >> 2) << CP_LOAD_STATE_1_EXT_SRC_ADDR__SHIFT) & CP_LOAD_STATE_1_EXT_SRC_ADDR__MASK; -} - -#define REG_CP_LOAD_STATE4_0 0x00000000 -#define CP_LOAD_STATE4_0_DST_OFF__MASK 0x00003fff -#define CP_LOAD_STATE4_0_DST_OFF__SHIFT 0 -static inline uint32_t CP_LOAD_STATE4_0_DST_OFF(uint32_t val) -{ - return ((val) << CP_LOAD_STATE4_0_DST_OFF__SHIFT) & CP_LOAD_STATE4_0_DST_OFF__MASK; -} -#define CP_LOAD_STATE4_0_STATE_SRC__MASK 0x00030000 -#define CP_LOAD_STATE4_0_STATE_SRC__SHIFT 16 -static inline uint32_t CP_LOAD_STATE4_0_STATE_SRC(enum a4xx_state_src val) -{ - return ((val) << CP_LOAD_STATE4_0_STATE_SRC__SHIFT) & CP_LOAD_STATE4_0_STATE_SRC__MASK; -} -#define CP_LOAD_STATE4_0_STATE_BLOCK__MASK 0x003c0000 -#define CP_LOAD_STATE4_0_STATE_BLOCK__SHIFT 18 -static inline uint32_t CP_LOAD_STATE4_0_STATE_BLOCK(enum a4xx_state_block val) -{ - return ((val) << CP_LOAD_STATE4_0_STATE_BLOCK__SHIFT) & CP_LOAD_STATE4_0_STATE_BLOCK__MASK; -} -#define CP_LOAD_STATE4_0_NUM_UNIT__MASK 0xffc00000 -#define CP_LOAD_STATE4_0_NUM_UNIT__SHIFT 22 -static inline uint32_t CP_LOAD_STATE4_0_NUM_UNIT(uint32_t val) -{ - return ((val) << CP_LOAD_STATE4_0_NUM_UNIT__SHIFT) & CP_LOAD_STATE4_0_NUM_UNIT__MASK; -} - -#define REG_CP_LOAD_STATE4_1 0x00000001 -#define CP_LOAD_STATE4_1_STATE_TYPE__MASK 0x00000003 -#define CP_LOAD_STATE4_1_STATE_TYPE__SHIFT 0 -static inline uint32_t CP_LOAD_STATE4_1_STATE_TYPE(enum a4xx_state_type val) -{ - return ((val) << CP_LOAD_STATE4_1_STATE_TYPE__SHIFT) & CP_LOAD_STATE4_1_STATE_TYPE__MASK; -} -#define CP_LOAD_STATE4_1_EXT_SRC_ADDR__MASK 0xfffffffc -#define CP_LOAD_STATE4_1_EXT_SRC_ADDR__SHIFT 2 -static inline uint32_t CP_LOAD_STATE4_1_EXT_SRC_ADDR(uint32_t val) -{ - assert(!(val & 0x3)); - return ((val >> 2) << CP_LOAD_STATE4_1_EXT_SRC_ADDR__SHIFT) & CP_LOAD_STATE4_1_EXT_SRC_ADDR__MASK; -} - -#define REG_CP_LOAD_STATE4_2 0x00000002 -#define CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI__MASK 0xffffffff -#define CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI__SHIFT 0 -static inline uint32_t CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(uint32_t val) -{ - return ((val) << CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI__SHIFT) & CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI__MASK; -} - -#define REG_CP_DRAW_INDX_0 0x00000000 -#define CP_DRAW_INDX_0_VIZ_QUERY__MASK 0xffffffff -#define CP_DRAW_INDX_0_VIZ_QUERY__SHIFT 0 -static inline uint32_t CP_DRAW_INDX_0_VIZ_QUERY(uint32_t val) -{ - return ((val) << CP_DRAW_INDX_0_VIZ_QUERY__SHIFT) & CP_DRAW_INDX_0_VIZ_QUERY__MASK; -} - -#define REG_CP_DRAW_INDX_1 0x00000001 -#define CP_DRAW_INDX_1_PRIM_TYPE__MASK 0x0000003f -#define CP_DRAW_INDX_1_PRIM_TYPE__SHIFT 0 -static inline uint32_t CP_DRAW_INDX_1_PRIM_TYPE(enum pc_di_primtype val) -{ - return ((val) << CP_DRAW_INDX_1_PRIM_TYPE__SHIFT) & CP_DRAW_INDX_1_PRIM_TYPE__MASK; -} -#define CP_DRAW_INDX_1_SOURCE_SELECT__MASK 0x000000c0 -#define CP_DRAW_INDX_1_SOURCE_SELECT__SHIFT 6 -static inline uint32_t CP_DRAW_INDX_1_SOURCE_SELECT(enum pc_di_src_sel val) -{ - return ((val) << CP_DRAW_INDX_1_SOURCE_SELECT__SHIFT) & CP_DRAW_INDX_1_SOURCE_SELECT__MASK; -} -#define CP_DRAW_INDX_1_VIS_CULL__MASK 0x00000600 -#define CP_DRAW_INDX_1_VIS_CULL__SHIFT 9 -static inline uint32_t CP_DRAW_INDX_1_VIS_CULL(enum pc_di_vis_cull_mode val) -{ - return ((val) << CP_DRAW_INDX_1_VIS_CULL__SHIFT) & CP_DRAW_INDX_1_VIS_CULL__MASK; -} -#define CP_DRAW_INDX_1_INDEX_SIZE__MASK 0x00000800 -#define CP_DRAW_INDX_1_INDEX_SIZE__SHIFT 11 -static inline uint32_t CP_DRAW_INDX_1_INDEX_SIZE(enum pc_di_index_size val) -{ - return ((val) << CP_DRAW_INDX_1_INDEX_SIZE__SHIFT) & CP_DRAW_INDX_1_INDEX_SIZE__MASK; -} -#define CP_DRAW_INDX_1_NOT_EOP 0x00001000 -#define CP_DRAW_INDX_1_SMALL_INDEX 0x00002000 -#define CP_DRAW_INDX_1_PRE_DRAW_INITIATOR_ENABLE 0x00004000 -#define CP_DRAW_INDX_1_NUM_INSTANCES__MASK 0xff000000 -#define CP_DRAW_INDX_1_NUM_INSTANCES__SHIFT 24 -static inline uint32_t CP_DRAW_INDX_1_NUM_INSTANCES(uint32_t val) -{ - return ((val) << CP_DRAW_INDX_1_NUM_INSTANCES__SHIFT) & CP_DRAW_INDX_1_NUM_INSTANCES__MASK; -} - -#define REG_CP_DRAW_INDX_2 0x00000002 -#define CP_DRAW_INDX_2_NUM_INDICES__MASK 0xffffffff -#define CP_DRAW_INDX_2_NUM_INDICES__SHIFT 0 -static inline uint32_t CP_DRAW_INDX_2_NUM_INDICES(uint32_t val) -{ - return ((val) << CP_DRAW_INDX_2_NUM_INDICES__SHIFT) & CP_DRAW_INDX_2_NUM_INDICES__MASK; -} - -#define REG_CP_DRAW_INDX_3 0x00000003 -#define CP_DRAW_INDX_3_INDX_BASE__MASK 0xffffffff -#define CP_DRAW_INDX_3_INDX_BASE__SHIFT 0 -static inline uint32_t CP_DRAW_INDX_3_INDX_BASE(uint32_t val) -{ - return ((val) << CP_DRAW_INDX_3_INDX_BASE__SHIFT) & CP_DRAW_INDX_3_INDX_BASE__MASK; -} - -#define REG_CP_DRAW_INDX_4 0x00000004 -#define CP_DRAW_INDX_4_INDX_SIZE__MASK 0xffffffff -#define CP_DRAW_INDX_4_INDX_SIZE__SHIFT 0 -static inline uint32_t CP_DRAW_INDX_4_INDX_SIZE(uint32_t val) -{ - return ((val) << CP_DRAW_INDX_4_INDX_SIZE__SHIFT) & CP_DRAW_INDX_4_INDX_SIZE__MASK; -} - -#define REG_CP_DRAW_INDX_2_0 0x00000000 -#define CP_DRAW_INDX_2_0_VIZ_QUERY__MASK 0xffffffff -#define CP_DRAW_INDX_2_0_VIZ_QUERY__SHIFT 0 -static inline uint32_t CP_DRAW_INDX_2_0_VIZ_QUERY(uint32_t val) -{ - return ((val) << CP_DRAW_INDX_2_0_VIZ_QUERY__SHIFT) & CP_DRAW_INDX_2_0_VIZ_QUERY__MASK; -} - -#define REG_CP_DRAW_INDX_2_1 0x00000001 -#define CP_DRAW_INDX_2_1_PRIM_TYPE__MASK 0x0000003f -#define CP_DRAW_INDX_2_1_PRIM_TYPE__SHIFT 0 -static inline uint32_t CP_DRAW_INDX_2_1_PRIM_TYPE(enum pc_di_primtype val) -{ - return ((val) << CP_DRAW_INDX_2_1_PRIM_TYPE__SHIFT) & CP_DRAW_INDX_2_1_PRIM_TYPE__MASK; -} -#define CP_DRAW_INDX_2_1_SOURCE_SELECT__MASK 0x000000c0 -#define CP_DRAW_INDX_2_1_SOURCE_SELECT__SHIFT 6 -static inline uint32_t CP_DRAW_INDX_2_1_SOURCE_SELECT(enum pc_di_src_sel val) -{ - return ((val) << CP_DRAW_INDX_2_1_SOURCE_SELECT__SHIFT) & CP_DRAW_INDX_2_1_SOURCE_SELECT__MASK; -} -#define CP_DRAW_INDX_2_1_VIS_CULL__MASK 0x00000600 -#define CP_DRAW_INDX_2_1_VIS_CULL__SHIFT 9 -static inline uint32_t CP_DRAW_INDX_2_1_VIS_CULL(enum pc_di_vis_cull_mode val) -{ - return ((val) << CP_DRAW_INDX_2_1_VIS_CULL__SHIFT) & CP_DRAW_INDX_2_1_VIS_CULL__MASK; -} -#define CP_DRAW_INDX_2_1_INDEX_SIZE__MASK 0x00000800 -#define CP_DRAW_INDX_2_1_INDEX_SIZE__SHIFT 11 -static inline uint32_t CP_DRAW_INDX_2_1_INDEX_SIZE(enum pc_di_index_size val) -{ - return ((val) << CP_DRAW_INDX_2_1_INDEX_SIZE__SHIFT) & CP_DRAW_INDX_2_1_INDEX_SIZE__MASK; -} -#define CP_DRAW_INDX_2_1_NOT_EOP 0x00001000 -#define CP_DRAW_INDX_2_1_SMALL_INDEX 0x00002000 -#define CP_DRAW_INDX_2_1_PRE_DRAW_INITIATOR_ENABLE 0x00004000 -#define CP_DRAW_INDX_2_1_NUM_INSTANCES__MASK 0xff000000 -#define CP_DRAW_INDX_2_1_NUM_INSTANCES__SHIFT 24 -static inline uint32_t CP_DRAW_INDX_2_1_NUM_INSTANCES(uint32_t val) -{ - return ((val) << CP_DRAW_INDX_2_1_NUM_INSTANCES__SHIFT) & CP_DRAW_INDX_2_1_NUM_INSTANCES__MASK; -} - -#define REG_CP_DRAW_INDX_2_2 0x00000002 -#define CP_DRAW_INDX_2_2_NUM_INDICES__MASK 0xffffffff -#define CP_DRAW_INDX_2_2_NUM_INDICES__SHIFT 0 -static inline uint32_t CP_DRAW_INDX_2_2_NUM_INDICES(uint32_t val) -{ - return ((val) << CP_DRAW_INDX_2_2_NUM_INDICES__SHIFT) & CP_DRAW_INDX_2_2_NUM_INDICES__MASK; -} - -#define REG_CP_DRAW_INDX_OFFSET_0 0x00000000 -#define CP_DRAW_INDX_OFFSET_0_PRIM_TYPE__MASK 0x0000003f -#define CP_DRAW_INDX_OFFSET_0_PRIM_TYPE__SHIFT 0 -static inline uint32_t CP_DRAW_INDX_OFFSET_0_PRIM_TYPE(enum pc_di_primtype val) -{ - return ((val) << CP_DRAW_INDX_OFFSET_0_PRIM_TYPE__SHIFT) & CP_DRAW_INDX_OFFSET_0_PRIM_TYPE__MASK; -} -#define CP_DRAW_INDX_OFFSET_0_SOURCE_SELECT__MASK 0x000000c0 -#define CP_DRAW_INDX_OFFSET_0_SOURCE_SELECT__SHIFT 6 -static inline uint32_t CP_DRAW_INDX_OFFSET_0_SOURCE_SELECT(enum pc_di_src_sel val) -{ - return ((val) << CP_DRAW_INDX_OFFSET_0_SOURCE_SELECT__SHIFT) & CP_DRAW_INDX_OFFSET_0_SOURCE_SELECT__MASK; -} -#define CP_DRAW_INDX_OFFSET_0_VIS_CULL__MASK 0x00000300 -#define CP_DRAW_INDX_OFFSET_0_VIS_CULL__SHIFT 8 -static inline uint32_t CP_DRAW_INDX_OFFSET_0_VIS_CULL(enum pc_di_vis_cull_mode val) -{ - return ((val) << CP_DRAW_INDX_OFFSET_0_VIS_CULL__SHIFT) & CP_DRAW_INDX_OFFSET_0_VIS_CULL__MASK; -} -#define CP_DRAW_INDX_OFFSET_0_INDEX_SIZE__MASK 0x00000c00 -#define CP_DRAW_INDX_OFFSET_0_INDEX_SIZE__SHIFT 10 -static inline uint32_t CP_DRAW_INDX_OFFSET_0_INDEX_SIZE(enum a4xx_index_size val) -{ - return ((val) << CP_DRAW_INDX_OFFSET_0_INDEX_SIZE__SHIFT) & CP_DRAW_INDX_OFFSET_0_INDEX_SIZE__MASK; -} -#define CP_DRAW_INDX_OFFSET_0_TESS_MODE__MASK 0x01f00000 -#define CP_DRAW_INDX_OFFSET_0_TESS_MODE__SHIFT 20 -static inline uint32_t CP_DRAW_INDX_OFFSET_0_TESS_MODE(uint32_t val) -{ - return ((val) << CP_DRAW_INDX_OFFSET_0_TESS_MODE__SHIFT) & CP_DRAW_INDX_OFFSET_0_TESS_MODE__MASK; -} - -#define REG_CP_DRAW_INDX_OFFSET_1 0x00000001 -#define CP_DRAW_INDX_OFFSET_1_NUM_INSTANCES__MASK 0xffffffff -#define CP_DRAW_INDX_OFFSET_1_NUM_INSTANCES__SHIFT 0 -static inline uint32_t CP_DRAW_INDX_OFFSET_1_NUM_INSTANCES(uint32_t val) -{ - return ((val) << CP_DRAW_INDX_OFFSET_1_NUM_INSTANCES__SHIFT) & CP_DRAW_INDX_OFFSET_1_NUM_INSTANCES__MASK; -} - -#define REG_CP_DRAW_INDX_OFFSET_2 0x00000002 -#define CP_DRAW_INDX_OFFSET_2_NUM_INDICES__MASK 0xffffffff -#define CP_DRAW_INDX_OFFSET_2_NUM_INDICES__SHIFT 0 -static inline uint32_t CP_DRAW_INDX_OFFSET_2_NUM_INDICES(uint32_t val) -{ - return ((val) << CP_DRAW_INDX_OFFSET_2_NUM_INDICES__SHIFT) & CP_DRAW_INDX_OFFSET_2_NUM_INDICES__MASK; -} - -#define REG_CP_DRAW_INDX_OFFSET_3 0x00000003 - -#define REG_CP_DRAW_INDX_OFFSET_4 0x00000004 -#define CP_DRAW_INDX_OFFSET_4_INDX_BASE__MASK 0xffffffff -#define CP_DRAW_INDX_OFFSET_4_INDX_BASE__SHIFT 0 -static inline uint32_t CP_DRAW_INDX_OFFSET_4_INDX_BASE(uint32_t val) -{ - return ((val) << CP_DRAW_INDX_OFFSET_4_INDX_BASE__SHIFT) & CP_DRAW_INDX_OFFSET_4_INDX_BASE__MASK; -} - -#define REG_CP_DRAW_INDX_OFFSET_5 0x00000005 -#define CP_DRAW_INDX_OFFSET_5_INDX_SIZE__MASK 0xffffffff -#define CP_DRAW_INDX_OFFSET_5_INDX_SIZE__SHIFT 0 -static inline uint32_t CP_DRAW_INDX_OFFSET_5_INDX_SIZE(uint32_t val) -{ - return ((val) << CP_DRAW_INDX_OFFSET_5_INDX_SIZE__SHIFT) & CP_DRAW_INDX_OFFSET_5_INDX_SIZE__MASK; -} - -#define REG_A4XX_CP_DRAW_INDIRECT_0 0x00000000 -#define A4XX_CP_DRAW_INDIRECT_0_PRIM_TYPE__MASK 0x0000003f -#define A4XX_CP_DRAW_INDIRECT_0_PRIM_TYPE__SHIFT 0 -static inline uint32_t A4XX_CP_DRAW_INDIRECT_0_PRIM_TYPE(enum pc_di_primtype val) -{ - return ((val) << A4XX_CP_DRAW_INDIRECT_0_PRIM_TYPE__SHIFT) & A4XX_CP_DRAW_INDIRECT_0_PRIM_TYPE__MASK; -} -#define A4XX_CP_DRAW_INDIRECT_0_SOURCE_SELECT__MASK 0x000000c0 -#define A4XX_CP_DRAW_INDIRECT_0_SOURCE_SELECT__SHIFT 6 -static inline uint32_t A4XX_CP_DRAW_INDIRECT_0_SOURCE_SELECT(enum pc_di_src_sel val) -{ - return ((val) << A4XX_CP_DRAW_INDIRECT_0_SOURCE_SELECT__SHIFT) & A4XX_CP_DRAW_INDIRECT_0_SOURCE_SELECT__MASK; -} -#define A4XX_CP_DRAW_INDIRECT_0_VIS_CULL__MASK 0x00000300 -#define A4XX_CP_DRAW_INDIRECT_0_VIS_CULL__SHIFT 8 -static inline uint32_t A4XX_CP_DRAW_INDIRECT_0_VIS_CULL(enum pc_di_vis_cull_mode val) -{ - return ((val) << A4XX_CP_DRAW_INDIRECT_0_VIS_CULL__SHIFT) & A4XX_CP_DRAW_INDIRECT_0_VIS_CULL__MASK; -} -#define A4XX_CP_DRAW_INDIRECT_0_INDEX_SIZE__MASK 0x00000c00 -#define A4XX_CP_DRAW_INDIRECT_0_INDEX_SIZE__SHIFT 10 -static inline uint32_t A4XX_CP_DRAW_INDIRECT_0_INDEX_SIZE(enum a4xx_index_size val) -{ - return ((val) << A4XX_CP_DRAW_INDIRECT_0_INDEX_SIZE__SHIFT) & A4XX_CP_DRAW_INDIRECT_0_INDEX_SIZE__MASK; -} -#define A4XX_CP_DRAW_INDIRECT_0_TESS_MODE__MASK 0x01f00000 -#define A4XX_CP_DRAW_INDIRECT_0_TESS_MODE__SHIFT 20 -static inline uint32_t A4XX_CP_DRAW_INDIRECT_0_TESS_MODE(uint32_t val) -{ - return ((val) << A4XX_CP_DRAW_INDIRECT_0_TESS_MODE__SHIFT) & A4XX_CP_DRAW_INDIRECT_0_TESS_MODE__MASK; -} - -#define REG_A4XX_CP_DRAW_INDIRECT_1 0x00000001 -#define A4XX_CP_DRAW_INDIRECT_1_INDIRECT__MASK 0xffffffff -#define A4XX_CP_DRAW_INDIRECT_1_INDIRECT__SHIFT 0 -static inline uint32_t A4XX_CP_DRAW_INDIRECT_1_INDIRECT(uint32_t val) -{ - return ((val) << A4XX_CP_DRAW_INDIRECT_1_INDIRECT__SHIFT) & A4XX_CP_DRAW_INDIRECT_1_INDIRECT__MASK; -} - - -#define REG_A5XX_CP_DRAW_INDIRECT_2 0x00000002 -#define A5XX_CP_DRAW_INDIRECT_2_INDIRECT_HI__MASK 0xffffffff -#define A5XX_CP_DRAW_INDIRECT_2_INDIRECT_HI__SHIFT 0 -static inline uint32_t A5XX_CP_DRAW_INDIRECT_2_INDIRECT_HI(uint32_t val) -{ - return ((val) << A5XX_CP_DRAW_INDIRECT_2_INDIRECT_HI__SHIFT) & A5XX_CP_DRAW_INDIRECT_2_INDIRECT_HI__MASK; -} - -#define REG_A4XX_CP_DRAW_INDX_INDIRECT_0 0x00000000 -#define A4XX_CP_DRAW_INDX_INDIRECT_0_PRIM_TYPE__MASK 0x0000003f -#define A4XX_CP_DRAW_INDX_INDIRECT_0_PRIM_TYPE__SHIFT 0 -static inline uint32_t A4XX_CP_DRAW_INDX_INDIRECT_0_PRIM_TYPE(enum pc_di_primtype val) -{ - return ((val) << A4XX_CP_DRAW_INDX_INDIRECT_0_PRIM_TYPE__SHIFT) & A4XX_CP_DRAW_INDX_INDIRECT_0_PRIM_TYPE__MASK; -} -#define A4XX_CP_DRAW_INDX_INDIRECT_0_SOURCE_SELECT__MASK 0x000000c0 -#define A4XX_CP_DRAW_INDX_INDIRECT_0_SOURCE_SELECT__SHIFT 6 -static inline uint32_t A4XX_CP_DRAW_INDX_INDIRECT_0_SOURCE_SELECT(enum pc_di_src_sel val) -{ - return ((val) << A4XX_CP_DRAW_INDX_INDIRECT_0_SOURCE_SELECT__SHIFT) & A4XX_CP_DRAW_INDX_INDIRECT_0_SOURCE_SELECT__MASK; -} -#define A4XX_CP_DRAW_INDX_INDIRECT_0_VIS_CULL__MASK 0x00000300 -#define A4XX_CP_DRAW_INDX_INDIRECT_0_VIS_CULL__SHIFT 8 -static inline uint32_t A4XX_CP_DRAW_INDX_INDIRECT_0_VIS_CULL(enum pc_di_vis_cull_mode val) -{ - return ((val) << A4XX_CP_DRAW_INDX_INDIRECT_0_VIS_CULL__SHIFT) & A4XX_CP_DRAW_INDX_INDIRECT_0_VIS_CULL__MASK; -} -#define A4XX_CP_DRAW_INDX_INDIRECT_0_INDEX_SIZE__MASK 0x00000c00 -#define A4XX_CP_DRAW_INDX_INDIRECT_0_INDEX_SIZE__SHIFT 10 -static inline uint32_t A4XX_CP_DRAW_INDX_INDIRECT_0_INDEX_SIZE(enum a4xx_index_size val) -{ - return ((val) << A4XX_CP_DRAW_INDX_INDIRECT_0_INDEX_SIZE__SHIFT) & A4XX_CP_DRAW_INDX_INDIRECT_0_INDEX_SIZE__MASK; -} -#define A4XX_CP_DRAW_INDX_INDIRECT_0_TESS_MODE__MASK 0x01f00000 -#define A4XX_CP_DRAW_INDX_INDIRECT_0_TESS_MODE__SHIFT 20 -static inline uint32_t A4XX_CP_DRAW_INDX_INDIRECT_0_TESS_MODE(uint32_t val) -{ - return ((val) << A4XX_CP_DRAW_INDX_INDIRECT_0_TESS_MODE__SHIFT) & A4XX_CP_DRAW_INDX_INDIRECT_0_TESS_MODE__MASK; -} - - -#define REG_A4XX_CP_DRAW_INDX_INDIRECT_1 0x00000001 -#define A4XX_CP_DRAW_INDX_INDIRECT_1_INDX_BASE__MASK 0xffffffff -#define A4XX_CP_DRAW_INDX_INDIRECT_1_INDX_BASE__SHIFT 0 -static inline uint32_t A4XX_CP_DRAW_INDX_INDIRECT_1_INDX_BASE(uint32_t val) -{ - return ((val) << A4XX_CP_DRAW_INDX_INDIRECT_1_INDX_BASE__SHIFT) & A4XX_CP_DRAW_INDX_INDIRECT_1_INDX_BASE__MASK; -} - -#define REG_A4XX_CP_DRAW_INDX_INDIRECT_2 0x00000002 -#define A4XX_CP_DRAW_INDX_INDIRECT_2_INDX_SIZE__MASK 0xffffffff -#define A4XX_CP_DRAW_INDX_INDIRECT_2_INDX_SIZE__SHIFT 0 -static inline uint32_t A4XX_CP_DRAW_INDX_INDIRECT_2_INDX_SIZE(uint32_t val) -{ - return ((val) << A4XX_CP_DRAW_INDX_INDIRECT_2_INDX_SIZE__SHIFT) & A4XX_CP_DRAW_INDX_INDIRECT_2_INDX_SIZE__MASK; -} - -#define REG_A4XX_CP_DRAW_INDX_INDIRECT_3 0x00000003 -#define A4XX_CP_DRAW_INDX_INDIRECT_3_INDIRECT__MASK 0xffffffff -#define A4XX_CP_DRAW_INDX_INDIRECT_3_INDIRECT__SHIFT 0 -static inline uint32_t A4XX_CP_DRAW_INDX_INDIRECT_3_INDIRECT(uint32_t val) -{ - return ((val) << A4XX_CP_DRAW_INDX_INDIRECT_3_INDIRECT__SHIFT) & A4XX_CP_DRAW_INDX_INDIRECT_3_INDIRECT__MASK; -} - - -#define REG_A5XX_CP_DRAW_INDX_INDIRECT_1 0x00000001 -#define A5XX_CP_DRAW_INDX_INDIRECT_1_INDX_BASE_LO__MASK 0xffffffff -#define A5XX_CP_DRAW_INDX_INDIRECT_1_INDX_BASE_LO__SHIFT 0 -static inline uint32_t A5XX_CP_DRAW_INDX_INDIRECT_1_INDX_BASE_LO(uint32_t val) -{ - return ((val) << A5XX_CP_DRAW_INDX_INDIRECT_1_INDX_BASE_LO__SHIFT) & A5XX_CP_DRAW_INDX_INDIRECT_1_INDX_BASE_LO__MASK; -} - -#define REG_A5XX_CP_DRAW_INDX_INDIRECT_2 0x00000002 -#define A5XX_CP_DRAW_INDX_INDIRECT_2_INDX_BASE_HI__MASK 0xffffffff -#define A5XX_CP_DRAW_INDX_INDIRECT_2_INDX_BASE_HI__SHIFT 0 -static inline uint32_t A5XX_CP_DRAW_INDX_INDIRECT_2_INDX_BASE_HI(uint32_t val) -{ - return ((val) << A5XX_CP_DRAW_INDX_INDIRECT_2_INDX_BASE_HI__SHIFT) & A5XX_CP_DRAW_INDX_INDIRECT_2_INDX_BASE_HI__MASK; -} - -#define REG_A5XX_CP_DRAW_INDX_INDIRECT_3 0x00000003 -#define A5XX_CP_DRAW_INDX_INDIRECT_3_MAX_INDICES__MASK 0xffffffff -#define A5XX_CP_DRAW_INDX_INDIRECT_3_MAX_INDICES__SHIFT 0 -static inline uint32_t A5XX_CP_DRAW_INDX_INDIRECT_3_MAX_INDICES(uint32_t val) -{ - return ((val) << A5XX_CP_DRAW_INDX_INDIRECT_3_MAX_INDICES__SHIFT) & A5XX_CP_DRAW_INDX_INDIRECT_3_MAX_INDICES__MASK; -} - -#define REG_A5XX_CP_DRAW_INDX_INDIRECT_4 0x00000004 -#define A5XX_CP_DRAW_INDX_INDIRECT_4_INDIRECT_LO__MASK 0xffffffff -#define A5XX_CP_DRAW_INDX_INDIRECT_4_INDIRECT_LO__SHIFT 0 -static inline uint32_t A5XX_CP_DRAW_INDX_INDIRECT_4_INDIRECT_LO(uint32_t val) -{ - return ((val) << A5XX_CP_DRAW_INDX_INDIRECT_4_INDIRECT_LO__SHIFT) & A5XX_CP_DRAW_INDX_INDIRECT_4_INDIRECT_LO__MASK; -} - -#define REG_A5XX_CP_DRAW_INDX_INDIRECT_5 0x00000005 -#define A5XX_CP_DRAW_INDX_INDIRECT_5_INDIRECT_HI__MASK 0xffffffff -#define A5XX_CP_DRAW_INDX_INDIRECT_5_INDIRECT_HI__SHIFT 0 -static inline uint32_t A5XX_CP_DRAW_INDX_INDIRECT_5_INDIRECT_HI(uint32_t val) -{ - return ((val) << A5XX_CP_DRAW_INDX_INDIRECT_5_INDIRECT_HI__SHIFT) & A5XX_CP_DRAW_INDX_INDIRECT_5_INDIRECT_HI__MASK; -} - -static inline uint32_t REG_CP_SET_DRAW_STATE_(uint32_t i0) { return 0x00000000 + 0x3*i0; } - -static inline uint32_t REG_CP_SET_DRAW_STATE__0(uint32_t i0) { return 0x00000000 + 0x3*i0; } -#define CP_SET_DRAW_STATE__0_COUNT__MASK 0x0000ffff -#define CP_SET_DRAW_STATE__0_COUNT__SHIFT 0 -static inline uint32_t CP_SET_DRAW_STATE__0_COUNT(uint32_t val) -{ - return ((val) << CP_SET_DRAW_STATE__0_COUNT__SHIFT) & CP_SET_DRAW_STATE__0_COUNT__MASK; -} -#define CP_SET_DRAW_STATE__0_DIRTY 0x00010000 -#define CP_SET_DRAW_STATE__0_DISABLE 0x00020000 -#define CP_SET_DRAW_STATE__0_DISABLE_ALL_GROUPS 0x00040000 -#define CP_SET_DRAW_STATE__0_LOAD_IMMED 0x00080000 -#define CP_SET_DRAW_STATE__0_GROUP_ID__MASK 0x1f000000 -#define CP_SET_DRAW_STATE__0_GROUP_ID__SHIFT 24 -static inline uint32_t CP_SET_DRAW_STATE__0_GROUP_ID(uint32_t val) -{ - return ((val) << CP_SET_DRAW_STATE__0_GROUP_ID__SHIFT) & CP_SET_DRAW_STATE__0_GROUP_ID__MASK; -} - -static inline uint32_t REG_CP_SET_DRAW_STATE__1(uint32_t i0) { return 0x00000001 + 0x3*i0; } -#define CP_SET_DRAW_STATE__1_ADDR_LO__MASK 0xffffffff -#define CP_SET_DRAW_STATE__1_ADDR_LO__SHIFT 0 -static inline uint32_t CP_SET_DRAW_STATE__1_ADDR_LO(uint32_t val) -{ - return ((val) << CP_SET_DRAW_STATE__1_ADDR_LO__SHIFT) & CP_SET_DRAW_STATE__1_ADDR_LO__MASK; -} - -static inline uint32_t REG_CP_SET_DRAW_STATE__2(uint32_t i0) { return 0x00000002 + 0x3*i0; } -#define CP_SET_DRAW_STATE__2_ADDR_HI__MASK 0xffffffff -#define CP_SET_DRAW_STATE__2_ADDR_HI__SHIFT 0 -static inline uint32_t CP_SET_DRAW_STATE__2_ADDR_HI(uint32_t val) -{ - return ((val) << CP_SET_DRAW_STATE__2_ADDR_HI__SHIFT) & CP_SET_DRAW_STATE__2_ADDR_HI__MASK; -} - -#define REG_CP_SET_BIN_0 0x00000000 - -#define REG_CP_SET_BIN_1 0x00000001 -#define CP_SET_BIN_1_X1__MASK 0x0000ffff -#define CP_SET_BIN_1_X1__SHIFT 0 -static inline uint32_t CP_SET_BIN_1_X1(uint32_t val) -{ - return ((val) << CP_SET_BIN_1_X1__SHIFT) & CP_SET_BIN_1_X1__MASK; -} -#define CP_SET_BIN_1_Y1__MASK 0xffff0000 -#define CP_SET_BIN_1_Y1__SHIFT 16 -static inline uint32_t CP_SET_BIN_1_Y1(uint32_t val) -{ - return ((val) << CP_SET_BIN_1_Y1__SHIFT) & CP_SET_BIN_1_Y1__MASK; -} - -#define REG_CP_SET_BIN_2 0x00000002 -#define CP_SET_BIN_2_X2__MASK 0x0000ffff -#define CP_SET_BIN_2_X2__SHIFT 0 -static inline uint32_t CP_SET_BIN_2_X2(uint32_t val) -{ - return ((val) << CP_SET_BIN_2_X2__SHIFT) & CP_SET_BIN_2_X2__MASK; -} -#define CP_SET_BIN_2_Y2__MASK 0xffff0000 -#define CP_SET_BIN_2_Y2__SHIFT 16 -static inline uint32_t CP_SET_BIN_2_Y2(uint32_t val) -{ - return ((val) << CP_SET_BIN_2_Y2__SHIFT) & CP_SET_BIN_2_Y2__MASK; -} - -#define REG_CP_SET_BIN_DATA_0 0x00000000 -#define CP_SET_BIN_DATA_0_BIN_DATA_ADDR__MASK 0xffffffff -#define CP_SET_BIN_DATA_0_BIN_DATA_ADDR__SHIFT 0 -static inline uint32_t CP_SET_BIN_DATA_0_BIN_DATA_ADDR(uint32_t val) -{ - return ((val) << CP_SET_BIN_DATA_0_BIN_DATA_ADDR__SHIFT) & CP_SET_BIN_DATA_0_BIN_DATA_ADDR__MASK; -} - -#define REG_CP_SET_BIN_DATA_1 0x00000001 -#define CP_SET_BIN_DATA_1_BIN_SIZE_ADDRESS__MASK 0xffffffff -#define CP_SET_BIN_DATA_1_BIN_SIZE_ADDRESS__SHIFT 0 -static inline uint32_t CP_SET_BIN_DATA_1_BIN_SIZE_ADDRESS(uint32_t val) -{ - return ((val) << CP_SET_BIN_DATA_1_BIN_SIZE_ADDRESS__SHIFT) & CP_SET_BIN_DATA_1_BIN_SIZE_ADDRESS__MASK; -} - -#define REG_CP_SET_BIN_DATA5_0 0x00000000 -#define CP_SET_BIN_DATA5_0_VSC_SIZE__MASK 0x003f0000 -#define CP_SET_BIN_DATA5_0_VSC_SIZE__SHIFT 16 -static inline uint32_t CP_SET_BIN_DATA5_0_VSC_SIZE(uint32_t val) -{ - return ((val) << CP_SET_BIN_DATA5_0_VSC_SIZE__SHIFT) & CP_SET_BIN_DATA5_0_VSC_SIZE__MASK; -} -#define CP_SET_BIN_DATA5_0_VSC_N__MASK 0x07c00000 -#define CP_SET_BIN_DATA5_0_VSC_N__SHIFT 22 -static inline uint32_t CP_SET_BIN_DATA5_0_VSC_N(uint32_t val) -{ - return ((val) << CP_SET_BIN_DATA5_0_VSC_N__SHIFT) & CP_SET_BIN_DATA5_0_VSC_N__MASK; -} - -#define REG_CP_SET_BIN_DATA5_1 0x00000001 -#define CP_SET_BIN_DATA5_1_BIN_DATA_ADDR_LO__MASK 0xffffffff -#define CP_SET_BIN_DATA5_1_BIN_DATA_ADDR_LO__SHIFT 0 -static inline uint32_t CP_SET_BIN_DATA5_1_BIN_DATA_ADDR_LO(uint32_t val) -{ - return ((val) << CP_SET_BIN_DATA5_1_BIN_DATA_ADDR_LO__SHIFT) & CP_SET_BIN_DATA5_1_BIN_DATA_ADDR_LO__MASK; -} - -#define REG_CP_SET_BIN_DATA5_2 0x00000002 -#define CP_SET_BIN_DATA5_2_BIN_DATA_ADDR_HI__MASK 0xffffffff -#define CP_SET_BIN_DATA5_2_BIN_DATA_ADDR_HI__SHIFT 0 -static inline uint32_t CP_SET_BIN_DATA5_2_BIN_DATA_ADDR_HI(uint32_t val) -{ - return ((val) << CP_SET_BIN_DATA5_2_BIN_DATA_ADDR_HI__SHIFT) & CP_SET_BIN_DATA5_2_BIN_DATA_ADDR_HI__MASK; -} - -#define REG_CP_SET_BIN_DATA5_3 0x00000003 -#define CP_SET_BIN_DATA5_3_BIN_SIZE_ADDRESS_LO__MASK 0xffffffff -#define CP_SET_BIN_DATA5_3_BIN_SIZE_ADDRESS_LO__SHIFT 0 -static inline uint32_t CP_SET_BIN_DATA5_3_BIN_SIZE_ADDRESS_LO(uint32_t val) -{ - return ((val) << CP_SET_BIN_DATA5_3_BIN_SIZE_ADDRESS_LO__SHIFT) & CP_SET_BIN_DATA5_3_BIN_SIZE_ADDRESS_LO__MASK; -} - -#define REG_CP_SET_BIN_DATA5_4 0x00000004 -#define CP_SET_BIN_DATA5_4_BIN_SIZE_ADDRESS_HI__MASK 0xffffffff -#define CP_SET_BIN_DATA5_4_BIN_SIZE_ADDRESS_HI__SHIFT 0 -static inline uint32_t CP_SET_BIN_DATA5_4_BIN_SIZE_ADDRESS_HI(uint32_t val) -{ - return ((val) << CP_SET_BIN_DATA5_4_BIN_SIZE_ADDRESS_HI__SHIFT) & CP_SET_BIN_DATA5_4_BIN_SIZE_ADDRESS_HI__MASK; -} - -#define REG_CP_REG_TO_MEM_0 0x00000000 -#define CP_REG_TO_MEM_0_REG__MASK 0x0000ffff -#define CP_REG_TO_MEM_0_REG__SHIFT 0 -static inline uint32_t CP_REG_TO_MEM_0_REG(uint32_t val) -{ - return ((val) << CP_REG_TO_MEM_0_REG__SHIFT) & CP_REG_TO_MEM_0_REG__MASK; -} -#define CP_REG_TO_MEM_0_CNT__MASK 0x3ff80000 -#define CP_REG_TO_MEM_0_CNT__SHIFT 19 -static inline uint32_t CP_REG_TO_MEM_0_CNT(uint32_t val) -{ - return ((val) << CP_REG_TO_MEM_0_CNT__SHIFT) & CP_REG_TO_MEM_0_CNT__MASK; -} -#define CP_REG_TO_MEM_0_64B 0x40000000 -#define CP_REG_TO_MEM_0_ACCUMULATE 0x80000000 - -#define REG_CP_REG_TO_MEM_1 0x00000001 -#define CP_REG_TO_MEM_1_DEST__MASK 0xffffffff -#define CP_REG_TO_MEM_1_DEST__SHIFT 0 -static inline uint32_t CP_REG_TO_MEM_1_DEST(uint32_t val) -{ - return ((val) << CP_REG_TO_MEM_1_DEST__SHIFT) & CP_REG_TO_MEM_1_DEST__MASK; -} - -#define REG_CP_MEM_TO_MEM_0 0x00000000 -#define CP_MEM_TO_MEM_0_NEG_A 0x00000001 -#define CP_MEM_TO_MEM_0_NEG_B 0x00000002 -#define CP_MEM_TO_MEM_0_NEG_C 0x00000004 -#define CP_MEM_TO_MEM_0_DOUBLE 0x20000000 - -#define REG_CP_COND_WRITE_0 0x00000000 -#define CP_COND_WRITE_0_FUNCTION__MASK 0x00000007 -#define CP_COND_WRITE_0_FUNCTION__SHIFT 0 -static inline uint32_t CP_COND_WRITE_0_FUNCTION(enum cp_cond_function val) -{ - return ((val) << CP_COND_WRITE_0_FUNCTION__SHIFT) & CP_COND_WRITE_0_FUNCTION__MASK; -} -#define CP_COND_WRITE_0_POLL_MEMORY 0x00000010 -#define CP_COND_WRITE_0_WRITE_MEMORY 0x00000100 - -#define REG_CP_COND_WRITE_1 0x00000001 -#define CP_COND_WRITE_1_POLL_ADDR__MASK 0xffffffff -#define CP_COND_WRITE_1_POLL_ADDR__SHIFT 0 -static inline uint32_t CP_COND_WRITE_1_POLL_ADDR(uint32_t val) -{ - return ((val) << CP_COND_WRITE_1_POLL_ADDR__SHIFT) & CP_COND_WRITE_1_POLL_ADDR__MASK; -} - -#define REG_CP_COND_WRITE_2 0x00000002 -#define CP_COND_WRITE_2_REF__MASK 0xffffffff -#define CP_COND_WRITE_2_REF__SHIFT 0 -static inline uint32_t CP_COND_WRITE_2_REF(uint32_t val) -{ - return ((val) << CP_COND_WRITE_2_REF__SHIFT) & CP_COND_WRITE_2_REF__MASK; -} - -#define REG_CP_COND_WRITE_3 0x00000003 -#define CP_COND_WRITE_3_MASK__MASK 0xffffffff -#define CP_COND_WRITE_3_MASK__SHIFT 0 -static inline uint32_t CP_COND_WRITE_3_MASK(uint32_t val) -{ - return ((val) << CP_COND_WRITE_3_MASK__SHIFT) & CP_COND_WRITE_3_MASK__MASK; -} - -#define REG_CP_COND_WRITE_4 0x00000004 -#define CP_COND_WRITE_4_WRITE_ADDR__MASK 0xffffffff -#define CP_COND_WRITE_4_WRITE_ADDR__SHIFT 0 -static inline uint32_t CP_COND_WRITE_4_WRITE_ADDR(uint32_t val) -{ - return ((val) << CP_COND_WRITE_4_WRITE_ADDR__SHIFT) & CP_COND_WRITE_4_WRITE_ADDR__MASK; -} - -#define REG_CP_COND_WRITE_5 0x00000005 -#define CP_COND_WRITE_5_WRITE_DATA__MASK 0xffffffff -#define CP_COND_WRITE_5_WRITE_DATA__SHIFT 0 -static inline uint32_t CP_COND_WRITE_5_WRITE_DATA(uint32_t val) -{ - return ((val) << CP_COND_WRITE_5_WRITE_DATA__SHIFT) & CP_COND_WRITE_5_WRITE_DATA__MASK; -} - -#define REG_CP_COND_WRITE5_0 0x00000000 -#define CP_COND_WRITE5_0_FUNCTION__MASK 0x00000007 -#define CP_COND_WRITE5_0_FUNCTION__SHIFT 0 -static inline uint32_t CP_COND_WRITE5_0_FUNCTION(enum cp_cond_function val) -{ - return ((val) << CP_COND_WRITE5_0_FUNCTION__SHIFT) & CP_COND_WRITE5_0_FUNCTION__MASK; -} -#define CP_COND_WRITE5_0_POLL_MEMORY 0x00000010 -#define CP_COND_WRITE5_0_WRITE_MEMORY 0x00000100 - -#define REG_CP_COND_WRITE5_1 0x00000001 -#define CP_COND_WRITE5_1_POLL_ADDR_LO__MASK 0xffffffff -#define CP_COND_WRITE5_1_POLL_ADDR_LO__SHIFT 0 -static inline uint32_t CP_COND_WRITE5_1_POLL_ADDR_LO(uint32_t val) -{ - return ((val) << CP_COND_WRITE5_1_POLL_ADDR_LO__SHIFT) & CP_COND_WRITE5_1_POLL_ADDR_LO__MASK; -} - -#define REG_CP_COND_WRITE5_2 0x00000002 -#define CP_COND_WRITE5_2_POLL_ADDR_HI__MASK 0xffffffff -#define CP_COND_WRITE5_2_POLL_ADDR_HI__SHIFT 0 -static inline uint32_t CP_COND_WRITE5_2_POLL_ADDR_HI(uint32_t val) -{ - return ((val) << CP_COND_WRITE5_2_POLL_ADDR_HI__SHIFT) & CP_COND_WRITE5_2_POLL_ADDR_HI__MASK; -} - -#define REG_CP_COND_WRITE5_3 0x00000003 -#define CP_COND_WRITE5_3_REF__MASK 0xffffffff -#define CP_COND_WRITE5_3_REF__SHIFT 0 -static inline uint32_t CP_COND_WRITE5_3_REF(uint32_t val) -{ - return ((val) << CP_COND_WRITE5_3_REF__SHIFT) & CP_COND_WRITE5_3_REF__MASK; -} - -#define REG_CP_COND_WRITE5_4 0x00000004 -#define CP_COND_WRITE5_4_MASK__MASK 0xffffffff -#define CP_COND_WRITE5_4_MASK__SHIFT 0 -static inline uint32_t CP_COND_WRITE5_4_MASK(uint32_t val) -{ - return ((val) << CP_COND_WRITE5_4_MASK__SHIFT) & CP_COND_WRITE5_4_MASK__MASK; -} - -#define REG_CP_COND_WRITE5_5 0x00000005 -#define CP_COND_WRITE5_5_WRITE_ADDR_LO__MASK 0xffffffff -#define CP_COND_WRITE5_5_WRITE_ADDR_LO__SHIFT 0 -static inline uint32_t CP_COND_WRITE5_5_WRITE_ADDR_LO(uint32_t val) -{ - return ((val) << CP_COND_WRITE5_5_WRITE_ADDR_LO__SHIFT) & CP_COND_WRITE5_5_WRITE_ADDR_LO__MASK; -} - -#define REG_CP_COND_WRITE5_6 0x00000006 -#define CP_COND_WRITE5_6_WRITE_ADDR_HI__MASK 0xffffffff -#define CP_COND_WRITE5_6_WRITE_ADDR_HI__SHIFT 0 -static inline uint32_t CP_COND_WRITE5_6_WRITE_ADDR_HI(uint32_t val) -{ - return ((val) << CP_COND_WRITE5_6_WRITE_ADDR_HI__SHIFT) & CP_COND_WRITE5_6_WRITE_ADDR_HI__MASK; -} - -#define REG_CP_COND_WRITE5_7 0x00000007 -#define CP_COND_WRITE5_7_WRITE_DATA__MASK 0xffffffff -#define CP_COND_WRITE5_7_WRITE_DATA__SHIFT 0 -static inline uint32_t CP_COND_WRITE5_7_WRITE_DATA(uint32_t val) -{ - return ((val) << CP_COND_WRITE5_7_WRITE_DATA__SHIFT) & CP_COND_WRITE5_7_WRITE_DATA__MASK; -} - -#define REG_CP_DISPATCH_COMPUTE_0 0x00000000 - -#define REG_CP_DISPATCH_COMPUTE_1 0x00000001 -#define CP_DISPATCH_COMPUTE_1_X__MASK 0xffffffff -#define CP_DISPATCH_COMPUTE_1_X__SHIFT 0 -static inline uint32_t CP_DISPATCH_COMPUTE_1_X(uint32_t val) -{ - return ((val) << CP_DISPATCH_COMPUTE_1_X__SHIFT) & CP_DISPATCH_COMPUTE_1_X__MASK; -} - -#define REG_CP_DISPATCH_COMPUTE_2 0x00000002 -#define CP_DISPATCH_COMPUTE_2_Y__MASK 0xffffffff -#define CP_DISPATCH_COMPUTE_2_Y__SHIFT 0 -static inline uint32_t CP_DISPATCH_COMPUTE_2_Y(uint32_t val) -{ - return ((val) << CP_DISPATCH_COMPUTE_2_Y__SHIFT) & CP_DISPATCH_COMPUTE_2_Y__MASK; -} - -#define REG_CP_DISPATCH_COMPUTE_3 0x00000003 -#define CP_DISPATCH_COMPUTE_3_Z__MASK 0xffffffff -#define CP_DISPATCH_COMPUTE_3_Z__SHIFT 0 -static inline uint32_t CP_DISPATCH_COMPUTE_3_Z(uint32_t val) -{ - return ((val) << CP_DISPATCH_COMPUTE_3_Z__SHIFT) & CP_DISPATCH_COMPUTE_3_Z__MASK; -} - -#define REG_CP_SET_RENDER_MODE_0 0x00000000 -#define CP_SET_RENDER_MODE_0_MODE__MASK 0x000001ff -#define CP_SET_RENDER_MODE_0_MODE__SHIFT 0 -static inline uint32_t CP_SET_RENDER_MODE_0_MODE(enum render_mode_cmd val) -{ - return ((val) << CP_SET_RENDER_MODE_0_MODE__SHIFT) & CP_SET_RENDER_MODE_0_MODE__MASK; -} - -#define REG_CP_SET_RENDER_MODE_1 0x00000001 -#define CP_SET_RENDER_MODE_1_ADDR_0_LO__MASK 0xffffffff -#define CP_SET_RENDER_MODE_1_ADDR_0_LO__SHIFT 0 -static inline uint32_t CP_SET_RENDER_MODE_1_ADDR_0_LO(uint32_t val) -{ - return ((val) << CP_SET_RENDER_MODE_1_ADDR_0_LO__SHIFT) & CP_SET_RENDER_MODE_1_ADDR_0_LO__MASK; -} - -#define REG_CP_SET_RENDER_MODE_2 0x00000002 -#define CP_SET_RENDER_MODE_2_ADDR_0_HI__MASK 0xffffffff -#define CP_SET_RENDER_MODE_2_ADDR_0_HI__SHIFT 0 -static inline uint32_t CP_SET_RENDER_MODE_2_ADDR_0_HI(uint32_t val) -{ - return ((val) << CP_SET_RENDER_MODE_2_ADDR_0_HI__SHIFT) & CP_SET_RENDER_MODE_2_ADDR_0_HI__MASK; -} - -#define REG_CP_SET_RENDER_MODE_3 0x00000003 -#define CP_SET_RENDER_MODE_3_VSC_ENABLE 0x00000008 -#define CP_SET_RENDER_MODE_3_GMEM_ENABLE 0x00000010 - -#define REG_CP_SET_RENDER_MODE_4 0x00000004 - -#define REG_CP_SET_RENDER_MODE_5 0x00000005 -#define CP_SET_RENDER_MODE_5_ADDR_1_LEN__MASK 0xffffffff -#define CP_SET_RENDER_MODE_5_ADDR_1_LEN__SHIFT 0 -static inline uint32_t CP_SET_RENDER_MODE_5_ADDR_1_LEN(uint32_t val) -{ - return ((val) << CP_SET_RENDER_MODE_5_ADDR_1_LEN__SHIFT) & CP_SET_RENDER_MODE_5_ADDR_1_LEN__MASK; -} - -#define REG_CP_SET_RENDER_MODE_6 0x00000006 -#define CP_SET_RENDER_MODE_6_ADDR_1_LO__MASK 0xffffffff -#define CP_SET_RENDER_MODE_6_ADDR_1_LO__SHIFT 0 -static inline uint32_t CP_SET_RENDER_MODE_6_ADDR_1_LO(uint32_t val) -{ - return ((val) << CP_SET_RENDER_MODE_6_ADDR_1_LO__SHIFT) & CP_SET_RENDER_MODE_6_ADDR_1_LO__MASK; -} - -#define REG_CP_SET_RENDER_MODE_7 0x00000007 -#define CP_SET_RENDER_MODE_7_ADDR_1_HI__MASK 0xffffffff -#define CP_SET_RENDER_MODE_7_ADDR_1_HI__SHIFT 0 -static inline uint32_t CP_SET_RENDER_MODE_7_ADDR_1_HI(uint32_t val) -{ - return ((val) << CP_SET_RENDER_MODE_7_ADDR_1_HI__SHIFT) & CP_SET_RENDER_MODE_7_ADDR_1_HI__MASK; -} - -#define REG_CP_COMPUTE_CHECKPOINT_0 0x00000000 -#define CP_COMPUTE_CHECKPOINT_0_ADDR_0_LO__MASK 0xffffffff -#define CP_COMPUTE_CHECKPOINT_0_ADDR_0_LO__SHIFT 0 -static inline uint32_t CP_COMPUTE_CHECKPOINT_0_ADDR_0_LO(uint32_t val) -{ - return ((val) << CP_COMPUTE_CHECKPOINT_0_ADDR_0_LO__SHIFT) & CP_COMPUTE_CHECKPOINT_0_ADDR_0_LO__MASK; -} - -#define REG_CP_COMPUTE_CHECKPOINT_1 0x00000001 -#define CP_COMPUTE_CHECKPOINT_1_ADDR_0_HI__MASK 0xffffffff -#define CP_COMPUTE_CHECKPOINT_1_ADDR_0_HI__SHIFT 0 -static inline uint32_t CP_COMPUTE_CHECKPOINT_1_ADDR_0_HI(uint32_t val) -{ - return ((val) << CP_COMPUTE_CHECKPOINT_1_ADDR_0_HI__SHIFT) & CP_COMPUTE_CHECKPOINT_1_ADDR_0_HI__MASK; -} - -#define REG_CP_COMPUTE_CHECKPOINT_2 0x00000002 - -#define REG_CP_COMPUTE_CHECKPOINT_3 0x00000003 -#define CP_COMPUTE_CHECKPOINT_3_ADDR_1_LEN__MASK 0xffffffff -#define CP_COMPUTE_CHECKPOINT_3_ADDR_1_LEN__SHIFT 0 -static inline uint32_t CP_COMPUTE_CHECKPOINT_3_ADDR_1_LEN(uint32_t val) -{ - return ((val) << CP_COMPUTE_CHECKPOINT_3_ADDR_1_LEN__SHIFT) & CP_COMPUTE_CHECKPOINT_3_ADDR_1_LEN__MASK; -} - -#define REG_CP_COMPUTE_CHECKPOINT_4 0x00000004 - -#define REG_CP_COMPUTE_CHECKPOINT_5 0x00000005 -#define CP_COMPUTE_CHECKPOINT_5_ADDR_1_LO__MASK 0xffffffff -#define CP_COMPUTE_CHECKPOINT_5_ADDR_1_LO__SHIFT 0 -static inline uint32_t CP_COMPUTE_CHECKPOINT_5_ADDR_1_LO(uint32_t val) -{ - return ((val) << CP_COMPUTE_CHECKPOINT_5_ADDR_1_LO__SHIFT) & CP_COMPUTE_CHECKPOINT_5_ADDR_1_LO__MASK; -} - -#define REG_CP_COMPUTE_CHECKPOINT_6 0x00000006 -#define CP_COMPUTE_CHECKPOINT_6_ADDR_1_HI__MASK 0xffffffff -#define CP_COMPUTE_CHECKPOINT_6_ADDR_1_HI__SHIFT 0 -static inline uint32_t CP_COMPUTE_CHECKPOINT_6_ADDR_1_HI(uint32_t val) -{ - return ((val) << CP_COMPUTE_CHECKPOINT_6_ADDR_1_HI__SHIFT) & CP_COMPUTE_CHECKPOINT_6_ADDR_1_HI__MASK; -} - -#define REG_CP_COMPUTE_CHECKPOINT_7 0x00000007 - -#define REG_CP_PERFCOUNTER_ACTION_0 0x00000000 - -#define REG_CP_PERFCOUNTER_ACTION_1 0x00000001 -#define CP_PERFCOUNTER_ACTION_1_ADDR_0_LO__MASK 0xffffffff -#define CP_PERFCOUNTER_ACTION_1_ADDR_0_LO__SHIFT 0 -static inline uint32_t CP_PERFCOUNTER_ACTION_1_ADDR_0_LO(uint32_t val) -{ - return ((val) << CP_PERFCOUNTER_ACTION_1_ADDR_0_LO__SHIFT) & CP_PERFCOUNTER_ACTION_1_ADDR_0_LO__MASK; -} - -#define REG_CP_PERFCOUNTER_ACTION_2 0x00000002 -#define CP_PERFCOUNTER_ACTION_2_ADDR_0_HI__MASK 0xffffffff -#define CP_PERFCOUNTER_ACTION_2_ADDR_0_HI__SHIFT 0 -static inline uint32_t CP_PERFCOUNTER_ACTION_2_ADDR_0_HI(uint32_t val) -{ - return ((val) << CP_PERFCOUNTER_ACTION_2_ADDR_0_HI__SHIFT) & CP_PERFCOUNTER_ACTION_2_ADDR_0_HI__MASK; -} - -#define REG_CP_EVENT_WRITE_0 0x00000000 -#define CP_EVENT_WRITE_0_EVENT__MASK 0x000000ff -#define CP_EVENT_WRITE_0_EVENT__SHIFT 0 -static inline uint32_t CP_EVENT_WRITE_0_EVENT(enum vgt_event_type val) -{ - return ((val) << CP_EVENT_WRITE_0_EVENT__SHIFT) & CP_EVENT_WRITE_0_EVENT__MASK; -} -#define CP_EVENT_WRITE_0_TIMESTAMP 0x40000000 - -#define REG_CP_EVENT_WRITE_1 0x00000001 -#define CP_EVENT_WRITE_1_ADDR_0_LO__MASK 0xffffffff -#define CP_EVENT_WRITE_1_ADDR_0_LO__SHIFT 0 -static inline uint32_t CP_EVENT_WRITE_1_ADDR_0_LO(uint32_t val) -{ - return ((val) << CP_EVENT_WRITE_1_ADDR_0_LO__SHIFT) & CP_EVENT_WRITE_1_ADDR_0_LO__MASK; -} - -#define REG_CP_EVENT_WRITE_2 0x00000002 -#define CP_EVENT_WRITE_2_ADDR_0_HI__MASK 0xffffffff -#define CP_EVENT_WRITE_2_ADDR_0_HI__SHIFT 0 -static inline uint32_t CP_EVENT_WRITE_2_ADDR_0_HI(uint32_t val) -{ - return ((val) << CP_EVENT_WRITE_2_ADDR_0_HI__SHIFT) & CP_EVENT_WRITE_2_ADDR_0_HI__MASK; -} - -#define REG_CP_EVENT_WRITE_3 0x00000003 - -#define REG_CP_BLIT_0 0x00000000 -#define CP_BLIT_0_OP__MASK 0x0000000f -#define CP_BLIT_0_OP__SHIFT 0 -static inline uint32_t CP_BLIT_0_OP(enum cp_blit_cmd val) -{ - return ((val) << CP_BLIT_0_OP__SHIFT) & CP_BLIT_0_OP__MASK; -} - -#define REG_CP_BLIT_1 0x00000001 -#define CP_BLIT_1_SRC_X1__MASK 0x00003fff -#define CP_BLIT_1_SRC_X1__SHIFT 0 -static inline uint32_t CP_BLIT_1_SRC_X1(uint32_t val) -{ - return ((val) << CP_BLIT_1_SRC_X1__SHIFT) & CP_BLIT_1_SRC_X1__MASK; -} -#define CP_BLIT_1_SRC_Y1__MASK 0x3fff0000 -#define CP_BLIT_1_SRC_Y1__SHIFT 16 -static inline uint32_t CP_BLIT_1_SRC_Y1(uint32_t val) -{ - return ((val) << CP_BLIT_1_SRC_Y1__SHIFT) & CP_BLIT_1_SRC_Y1__MASK; -} - -#define REG_CP_BLIT_2 0x00000002 -#define CP_BLIT_2_SRC_X2__MASK 0x00003fff -#define CP_BLIT_2_SRC_X2__SHIFT 0 -static inline uint32_t CP_BLIT_2_SRC_X2(uint32_t val) -{ - return ((val) << CP_BLIT_2_SRC_X2__SHIFT) & CP_BLIT_2_SRC_X2__MASK; -} -#define CP_BLIT_2_SRC_Y2__MASK 0x3fff0000 -#define CP_BLIT_2_SRC_Y2__SHIFT 16 -static inline uint32_t CP_BLIT_2_SRC_Y2(uint32_t val) -{ - return ((val) << CP_BLIT_2_SRC_Y2__SHIFT) & CP_BLIT_2_SRC_Y2__MASK; -} - -#define REG_CP_BLIT_3 0x00000003 -#define CP_BLIT_3_DST_X1__MASK 0x00003fff -#define CP_BLIT_3_DST_X1__SHIFT 0 -static inline uint32_t CP_BLIT_3_DST_X1(uint32_t val) -{ - return ((val) << CP_BLIT_3_DST_X1__SHIFT) & CP_BLIT_3_DST_X1__MASK; -} -#define CP_BLIT_3_DST_Y1__MASK 0x3fff0000 -#define CP_BLIT_3_DST_Y1__SHIFT 16 -static inline uint32_t CP_BLIT_3_DST_Y1(uint32_t val) -{ - return ((val) << CP_BLIT_3_DST_Y1__SHIFT) & CP_BLIT_3_DST_Y1__MASK; -} - -#define REG_CP_BLIT_4 0x00000004 -#define CP_BLIT_4_DST_X2__MASK 0x00003fff -#define CP_BLIT_4_DST_X2__SHIFT 0 -static inline uint32_t CP_BLIT_4_DST_X2(uint32_t val) -{ - return ((val) << CP_BLIT_4_DST_X2__SHIFT) & CP_BLIT_4_DST_X2__MASK; -} -#define CP_BLIT_4_DST_Y2__MASK 0x3fff0000 -#define CP_BLIT_4_DST_Y2__SHIFT 16 -static inline uint32_t CP_BLIT_4_DST_Y2(uint32_t val) -{ - return ((val) << CP_BLIT_4_DST_Y2__SHIFT) & CP_BLIT_4_DST_Y2__MASK; -} - -#define REG_CP_EXEC_CS_0 0x00000000 - -#define REG_CP_EXEC_CS_1 0x00000001 -#define CP_EXEC_CS_1_NGROUPS_X__MASK 0xffffffff -#define CP_EXEC_CS_1_NGROUPS_X__SHIFT 0 -static inline uint32_t CP_EXEC_CS_1_NGROUPS_X(uint32_t val) -{ - return ((val) << CP_EXEC_CS_1_NGROUPS_X__SHIFT) & CP_EXEC_CS_1_NGROUPS_X__MASK; -} - -#define REG_CP_EXEC_CS_2 0x00000002 -#define CP_EXEC_CS_2_NGROUPS_Y__MASK 0xffffffff -#define CP_EXEC_CS_2_NGROUPS_Y__SHIFT 0 -static inline uint32_t CP_EXEC_CS_2_NGROUPS_Y(uint32_t val) -{ - return ((val) << CP_EXEC_CS_2_NGROUPS_Y__SHIFT) & CP_EXEC_CS_2_NGROUPS_Y__MASK; -} - -#define REG_CP_EXEC_CS_3 0x00000003 -#define CP_EXEC_CS_3_NGROUPS_Z__MASK 0xffffffff -#define CP_EXEC_CS_3_NGROUPS_Z__SHIFT 0 -static inline uint32_t CP_EXEC_CS_3_NGROUPS_Z(uint32_t val) -{ - return ((val) << CP_EXEC_CS_3_NGROUPS_Z__SHIFT) & CP_EXEC_CS_3_NGROUPS_Z__MASK; -} - -#define REG_A4XX_CP_EXEC_CS_INDIRECT_0 0x00000000 - - -#define REG_A4XX_CP_EXEC_CS_INDIRECT_1 0x00000001 -#define A4XX_CP_EXEC_CS_INDIRECT_1_ADDR__MASK 0xffffffff -#define A4XX_CP_EXEC_CS_INDIRECT_1_ADDR__SHIFT 0 -static inline uint32_t A4XX_CP_EXEC_CS_INDIRECT_1_ADDR(uint32_t val) -{ - return ((val) << A4XX_CP_EXEC_CS_INDIRECT_1_ADDR__SHIFT) & A4XX_CP_EXEC_CS_INDIRECT_1_ADDR__MASK; -} - -#define REG_A4XX_CP_EXEC_CS_INDIRECT_2 0x00000002 -#define A4XX_CP_EXEC_CS_INDIRECT_2_LOCALSIZEX__MASK 0x00000ffc -#define A4XX_CP_EXEC_CS_INDIRECT_2_LOCALSIZEX__SHIFT 2 -static inline uint32_t A4XX_CP_EXEC_CS_INDIRECT_2_LOCALSIZEX(uint32_t val) -{ - return ((val) << A4XX_CP_EXEC_CS_INDIRECT_2_LOCALSIZEX__SHIFT) & A4XX_CP_EXEC_CS_INDIRECT_2_LOCALSIZEX__MASK; -} -#define A4XX_CP_EXEC_CS_INDIRECT_2_LOCALSIZEY__MASK 0x003ff000 -#define A4XX_CP_EXEC_CS_INDIRECT_2_LOCALSIZEY__SHIFT 12 -static inline uint32_t A4XX_CP_EXEC_CS_INDIRECT_2_LOCALSIZEY(uint32_t val) -{ - return ((val) << A4XX_CP_EXEC_CS_INDIRECT_2_LOCALSIZEY__SHIFT) & A4XX_CP_EXEC_CS_INDIRECT_2_LOCALSIZEY__MASK; -} -#define A4XX_CP_EXEC_CS_INDIRECT_2_LOCALSIZEZ__MASK 0xffc00000 -#define A4XX_CP_EXEC_CS_INDIRECT_2_LOCALSIZEZ__SHIFT 22 -static inline uint32_t A4XX_CP_EXEC_CS_INDIRECT_2_LOCALSIZEZ(uint32_t val) -{ - return ((val) << A4XX_CP_EXEC_CS_INDIRECT_2_LOCALSIZEZ__SHIFT) & A4XX_CP_EXEC_CS_INDIRECT_2_LOCALSIZEZ__MASK; -} - - -#define REG_A5XX_CP_EXEC_CS_INDIRECT_1 0x00000001 -#define A5XX_CP_EXEC_CS_INDIRECT_1_ADDR_LO__MASK 0xffffffff -#define A5XX_CP_EXEC_CS_INDIRECT_1_ADDR_LO__SHIFT 0 -static inline uint32_t A5XX_CP_EXEC_CS_INDIRECT_1_ADDR_LO(uint32_t val) -{ - return ((val) << A5XX_CP_EXEC_CS_INDIRECT_1_ADDR_LO__SHIFT) & A5XX_CP_EXEC_CS_INDIRECT_1_ADDR_LO__MASK; -} - -#define REG_A5XX_CP_EXEC_CS_INDIRECT_2 0x00000002 -#define A5XX_CP_EXEC_CS_INDIRECT_2_ADDR_HI__MASK 0xffffffff -#define A5XX_CP_EXEC_CS_INDIRECT_2_ADDR_HI__SHIFT 0 -static inline uint32_t A5XX_CP_EXEC_CS_INDIRECT_2_ADDR_HI(uint32_t val) -{ - return ((val) << A5XX_CP_EXEC_CS_INDIRECT_2_ADDR_HI__SHIFT) & A5XX_CP_EXEC_CS_INDIRECT_2_ADDR_HI__MASK; -} - -#define REG_A5XX_CP_EXEC_CS_INDIRECT_3 0x00000003 -#define A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEX__MASK 0x00000ffc -#define A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEX__SHIFT 2 -static inline uint32_t A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEX(uint32_t val) -{ - return ((val) << A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEX__SHIFT) & A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEX__MASK; -} -#define A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEY__MASK 0x003ff000 -#define A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEY__SHIFT 12 -static inline uint32_t A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEY(uint32_t val) -{ - return ((val) << A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEY__SHIFT) & A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEY__MASK; -} -#define A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEZ__MASK 0xffc00000 -#define A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEZ__SHIFT 22 -static inline uint32_t A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEZ(uint32_t val) -{ - return ((val) << A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEZ__SHIFT) & A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEZ__MASK; -} - - -#endif /* ADRENO_PM4_XML */ diff --git a/selfdrive/modeld/thneed/debug/include/adreno_pm4types.h b/selfdrive/modeld/thneed/debug/include/adreno_pm4types.h deleted file mode 100644 index aefffdd577..0000000000 --- a/selfdrive/modeld/thneed/debug/include/adreno_pm4types.h +++ /dev/null @@ -1,473 +0,0 @@ -/* Copyright (c) 2002,2007-2015, The Linux Foundation. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 and - * only version 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - */ -#ifndef __ADRENO_PM4TYPES_H -#define __ADRENO_PM4TYPES_H - -//#include "adreno.h" - -#define CP_PKT_MASK 0xc0000000 - -#define CP_TYPE0_PKT ((unsigned int)0 << 30) -#define CP_TYPE3_PKT ((unsigned int)3 << 30) -#define CP_TYPE4_PKT ((unsigned int)4 << 28) -#define CP_TYPE7_PKT ((unsigned int)7 << 28) - -#define PM4_TYPE4_PKT_SIZE_MAX 128 - -/* type3 packets */ - -/* Enable preemption flag */ -#define CP_PREEMPT_ENABLE 0x1C -/* Preemption token command on which preemption occurs */ -#define CP_PREEMPT_TOKEN 0x1E -/* Bit to set in CP_PREEMPT_TOKEN ordinal for interrupt on preemption */ -#define CP_PREEMPT_ORDINAL_INTERRUPT 24 -/* copy from ME scratch RAM to a register */ -#define CP_SCRATCH_TO_REG 0x4d - -/* Copy from REG to ME scratch RAM */ -#define CP_REG_TO_SCRATCH 0x4a - -/* Wait for memory writes to complete */ -#define CP_WAIT_MEM_WRITES 0x12 - -/* Conditional execution based on register comparison */ -#define CP_COND_REG_EXEC 0x47 - -/* Memory to REG copy */ -#define CP_MEM_TO_REG 0x42 - -/* initialize CP's micro-engine */ -#define CP_ME_INIT 0x48 - -/* skip N 32-bit words to get to the next packet */ -#define CP_NOP 0x10 - -/* indirect buffer dispatch. same as IB, but init is pipelined */ -#define CP_INDIRECT_BUFFER_PFD 0x37 - -/* wait for the IDLE state of the engine */ -#define CP_WAIT_FOR_IDLE 0x26 - -/* wait until a register or memory location is a specific value */ -#define CP_WAIT_REG_MEM 0x3c - -/* wait until a register location is equal to a specific value */ -#define CP_WAIT_REG_EQ 0x52 - -/* switches SMMU pagetable, used on a5xx only */ -#define CP_SMMU_TABLE_UPDATE 0x53 - -/* wait until a read completes */ -#define CP_WAIT_UNTIL_READ 0x5c - -/* wait until all base/size writes from an IB_PFD packet have completed */ -#define CP_WAIT_IB_PFD_COMPLETE 0x5d - -/* register read/modify/write */ -#define CP_REG_RMW 0x21 - -/* Set binning configuration registers */ -#define CP_SET_BIN_DATA 0x2f - -/* reads register in chip and writes to memory */ -#define CP_REG_TO_MEM 0x3e - -/* write N 32-bit words to memory */ -#define CP_MEM_WRITE 0x3d - -/* write CP_PROG_COUNTER value to memory */ -#define CP_MEM_WRITE_CNTR 0x4f - -/* conditional execution of a sequence of packets */ -#define CP_COND_EXEC 0x44 - -/* conditional write to memory or register */ -#define CP_COND_WRITE 0x45 - -/* generate an event that creates a write to memory when completed */ -#define CP_EVENT_WRITE 0x46 - -/* generate a VS|PS_done event */ -#define CP_EVENT_WRITE_SHD 0x58 - -/* generate a cache flush done event */ -#define CP_EVENT_WRITE_CFL 0x59 - -/* generate a z_pass done event */ -#define CP_EVENT_WRITE_ZPD 0x5b - - -/* initiate fetch of index buffer and draw */ -#define CP_DRAW_INDX 0x22 - -/* draw using supplied indices in packet */ -#define CP_DRAW_INDX_2 0x36 - -/* initiate fetch of index buffer and binIDs and draw */ -#define CP_DRAW_INDX_BIN 0x34 - -/* initiate fetch of bin IDs and draw using supplied indices */ -#define CP_DRAW_INDX_2_BIN 0x35 - -/* New draw packets defined for A4XX */ -#define CP_DRAW_INDX_OFFSET 0x38 -#define CP_DRAW_INDIRECT 0x28 -#define CP_DRAW_INDX_INDIRECT 0x29 -#define CP_DRAW_AUTO 0x24 - -/* begin/end initiator for viz query extent processing */ -#define CP_VIZ_QUERY 0x23 - -/* fetch state sub-blocks and initiate shader code DMAs */ -#define CP_SET_STATE 0x25 - -/* load constant into chip and to memory */ -#define CP_SET_CONSTANT 0x2d - -/* load sequencer instruction memory (pointer-based) */ -#define CP_IM_LOAD 0x27 - -/* load sequencer instruction memory (code embedded in packet) */ -#define CP_IM_LOAD_IMMEDIATE 0x2b - -/* load constants from a location in memory */ -#define CP_LOAD_CONSTANT_CONTEXT 0x2e - -/* selective invalidation of state pointers */ -#define CP_INVALIDATE_STATE 0x3b - - -/* dynamically changes shader instruction memory partition */ -#define CP_SET_SHADER_BASES 0x4A - -/* sets the 64-bit BIN_MASK register in the PFP */ -#define CP_SET_BIN_MASK 0x50 - -/* sets the 64-bit BIN_SELECT register in the PFP */ -#define CP_SET_BIN_SELECT 0x51 - - -/* updates the current context, if needed */ -#define CP_CONTEXT_UPDATE 0x5e - -/* generate interrupt from the command stream */ -#define CP_INTERRUPT 0x40 - -/* A5XX Enable yield in RB only */ -#define CP_YIELD_ENABLE 0x1C - -/* Enable/Disable/Defer A5x global preemption model */ -#define CP_PREEMPT_ENABLE_GLOBAL 0x69 - -/* Enable/Disable A5x local preemption model */ -#define CP_PREEMPT_ENABLE_LOCAL 0x6A - -/* Yeild token on a5xx similar to CP_PREEMPT on a4xx */ -#define CP_CONTEXT_SWITCH_YIELD 0x6B - -/* Inform CP about current render mode (needed for a5xx preemption) */ -#define CP_SET_RENDER_MODE 0x6C - -/* copy sequencer instruction memory to system memory */ -#define CP_IM_STORE 0x2c - -/* test 2 memory locations to dword values specified */ -#define CP_TEST_TWO_MEMS 0x71 - -/* Write register, ignoring context state for context sensitive registers */ -#define CP_REG_WR_NO_CTXT 0x78 - -/* - * for A4xx - * Write to register with address that does not fit into type-0 pkt - */ -#define CP_WIDE_REG_WRITE 0x74 - - -/* PFP waits until the FIFO between the PFP and the ME is empty */ -#define CP_WAIT_FOR_ME 0x13 - -/* Record the real-time when this packet is processed by PFP */ -#define CP_RECORD_PFP_TIMESTAMP 0x11 - -#define CP_SET_PROTECTED_MODE 0x5f /* sets the register protection mode */ - -/* Used to switch GPU between secure and non-secure modes */ -#define CP_SET_SECURE_MODE 0x66 - -#define CP_BOOTSTRAP_UCODE 0x6f /* bootstraps microcode */ - -/* - * for a3xx - */ - -#define CP_LOAD_STATE 0x30 /* load high level sequencer command */ - -/* Conditionally load a IB based on a flag */ -#define CP_COND_INDIRECT_BUFFER_PFE 0x3A /* prefetch enabled */ -#define CP_COND_INDIRECT_BUFFER_PFD 0x32 /* prefetch disabled */ - -/* Load a buffer with pre-fetch enabled */ -#define CP_INDIRECT_BUFFER_PFE 0x3F - -#define CP_EXEC_CL 0x31 - -/* (A4x) save PM4 stream pointers to execute upon a visible draw */ -#define CP_SET_DRAW_STATE 0x43 - -#define CP_LOADSTATE_DSTOFFSET_SHIFT 0x00000000 -#define CP_LOADSTATE_STATESRC_SHIFT 0x00000010 -#define CP_LOADSTATE_STATEBLOCKID_SHIFT 0x00000013 -#define CP_LOADSTATE_NUMOFUNITS_SHIFT 0x00000016 -#define CP_LOADSTATE_STATETYPE_SHIFT 0x00000000 -#define CP_LOADSTATE_EXTSRCADDR_SHIFT 0x00000002 - -static inline uint pm4_calc_odd_parity_bit(uint val) -{ - return (0x9669 >> (0xf & ((val) ^ - ((val) >> 4) ^ ((val) >> 8) ^ ((val) >> 12) ^ - ((val) >> 16) ^ ((val) >> 20) ^ ((val) >> 24) ^ - ((val) >> 28)))) & 1; -} - -/* - * PM4 packet header functions - * For all the packet functions the passed in count should be the size of the - * payload excluding the header - */ -static inline uint cp_type0_packet(uint regindx, uint cnt) -{ - return CP_TYPE0_PKT | ((cnt-1) << 16) | ((regindx) & 0x7FFF); -} - -static inline uint cp_type3_packet(uint opcode, uint cnt) -{ - return CP_TYPE3_PKT | ((cnt-1) << 16) | (((opcode) & 0xFF) << 8); -} - -static inline uint cp_type4_packet(uint opcode, uint cnt) -{ - return CP_TYPE4_PKT | ((cnt) << 0) | - (pm4_calc_odd_parity_bit(cnt) << 7) | - (((opcode) & 0x3FFFF) << 8) | - ((pm4_calc_odd_parity_bit(opcode) << 27)); -} - -static inline uint cp_type7_packet(uint opcode, uint cnt) -{ - return CP_TYPE7_PKT | ((cnt) << 0) | - (pm4_calc_odd_parity_bit(cnt) << 15) | - (((opcode) & 0x7F) << 16) | - ((pm4_calc_odd_parity_bit(opcode) << 23)); - -} - -#define pkt_is_type0(pkt) (((pkt) & 0XC0000000) == CP_TYPE0_PKT) - -#define type0_pkt_size(pkt) ((((pkt) >> 16) & 0x3FFF) + 1) -#define type0_pkt_offset(pkt) ((pkt) & 0x7FFF) - -/* - * Check both for the type3 opcode and make sure that the reserved bits [1:7] - * and 15 are 0 - */ - -#define pkt_is_type3(pkt) \ - ((((pkt) & 0xC0000000) == CP_TYPE3_PKT) && \ - (((pkt) & 0x80FE) == 0)) - -#define cp_type3_opcode(pkt) (((pkt) >> 8) & 0xFF) -#define type3_pkt_size(pkt) ((((pkt) >> 16) & 0x3FFF) + 1) - -#define pkt_is_type4(pkt) \ - ((((pkt) & 0xF0000000) == CP_TYPE4_PKT) && \ - ((((pkt) >> 27) & 0x1) == \ - pm4_calc_odd_parity_bit(cp_type4_base_index_one_reg_wr(pkt))) \ - && ((((pkt) >> 7) & 0x1) == \ - pm4_calc_odd_parity_bit(type4_pkt_size(pkt)))) - -#define cp_type4_base_index_one_reg_wr(pkt) (((pkt) >> 8) & 0x7FFFF) -#define type4_pkt_size(pkt) ((pkt) & 0x7F) - -#define pkt_is_type7(pkt) \ - ((((pkt) & 0xF0000000) == CP_TYPE7_PKT) && \ - (((pkt) & 0x0F000000) == 0) && \ - ((((pkt) >> 23) & 0x1) == \ - pm4_calc_odd_parity_bit(cp_type7_opcode(pkt))) \ - && ((((pkt) >> 15) & 0x1) == \ - pm4_calc_odd_parity_bit(type7_pkt_size(pkt)))) - -#define cp_type7_opcode(pkt) (((pkt) >> 16) & 0x7F) -#define type7_pkt_size(pkt) ((pkt) & 0x3FFF) - -/* dword base address of the GFX decode space */ -#define SUBBLOCK_OFFSET(reg) ((unsigned int)((reg) - (0x2000))) - -/* gmem command buffer length */ -#define CP_REG(reg) ((0x4 << 16) | (SUBBLOCK_OFFSET(reg))) - -// add these -#define ADRENO_GPUREV(x) 530 -#define lower_32_bits(n) ((uint32_t)(n)) -#define upper_32_bits(n) ((uint32_t)(((n) >> 16) >> 16)) - -/* Return true if the hardware uses the legacy (A4XX and older) PM4 format */ -#define ADRENO_LEGACY_PM4(_d) (ADRENO_GPUREV(_d) < 500) - -/** - * cp_packet - Generic CP packet to support different opcodes on - * different GPU cores. - * @adreno_dev: The adreno device - * @opcode: Operation for cp packet - * @size: size for cp packet - */ -static inline uint cp_packet(struct adreno_device *adreno_dev, - int opcode, uint size) -{ - if (ADRENO_LEGACY_PM4(adreno_dev)) - return cp_type3_packet(opcode, size); - - return cp_type7_packet(opcode, size); -} - -/** - * cp_mem_packet - Generic CP memory packet to support different - * opcodes on different GPU cores. - * @adreno_dev: The adreno device - * @opcode: mem operation for cp packet - * @size: size for cp packet - * @num_mem: num of mem access - */ -static inline uint cp_mem_packet(struct adreno_device *adreno_dev, - int opcode, uint size, uint num_mem) -{ - if (ADRENO_LEGACY_PM4(adreno_dev)) - return cp_type3_packet(opcode, size); - - return cp_type7_packet(opcode, size + num_mem); -} - -/* Return 1 if the command is an indirect buffer of any kind */ -static inline int adreno_cmd_is_ib(struct adreno_device *adreno_dev, - unsigned int cmd) -{ - return cmd == cp_mem_packet(adreno_dev, - CP_INDIRECT_BUFFER_PFE, 2, 1) || - cmd == cp_mem_packet(adreno_dev, - CP_INDIRECT_BUFFER_PFD, 2, 1) || - cmd == cp_mem_packet(adreno_dev, - CP_COND_INDIRECT_BUFFER_PFE, 2, 1) || - cmd == cp_mem_packet(adreno_dev, - CP_COND_INDIRECT_BUFFER_PFD, 2, 1); -} - -/** - * cp_gpuaddr - Generic function to add 64bit and 32bit gpuaddr - * to pm4 commands - * @adreno_dev: The adreno device - * @cmds: command pointer to add gpuaddr - * @gpuaddr: gpuaddr to add - */ -static inline uint cp_gpuaddr(struct adreno_device *adreno_dev, - uint *cmds, uint64_t gpuaddr) -{ - uint *start = cmds; - - if (ADRENO_LEGACY_PM4(adreno_dev)) - *cmds++ = (uint)gpuaddr; - else { - *cmds++ = lower_32_bits(gpuaddr); - *cmds++ = upper_32_bits(gpuaddr); - } - return cmds - start; -} - -/** - * cp_register - Generic function for gpu register operation - * @adreno_dev: The adreno device - * @reg: GPU register - * @size: count for PM4 operation - */ -static inline uint cp_register(struct adreno_device *adreno_dev, - unsigned int reg, unsigned int size) -{ - if (ADRENO_LEGACY_PM4(adreno_dev)) - return cp_type0_packet(reg, size); - - return cp_type4_packet(reg, size); -} - -/** - * cp_wait_for_me - common function for WAIT_FOR_ME - * @adreno_dev: The adreno device - * @cmds: command pointer to add gpuaddr - */ -static inline uint cp_wait_for_me(struct adreno_device *adreno_dev, - uint *cmds) -{ - uint *start = cmds; - - if (ADRENO_LEGACY_PM4(adreno_dev)) { - *cmds++ = cp_type3_packet(CP_WAIT_FOR_ME, 1); - *cmds++ = 0; - } else - *cmds++ = cp_type7_packet(CP_WAIT_FOR_ME, 0); - - return cmds - start; -} - -/** - * cp_wait_for_idle - common function for WAIT_FOR_IDLE - * @adreno_dev: The adreno device - * @cmds: command pointer to add gpuaddr - */ -static inline uint cp_wait_for_idle(struct adreno_device *adreno_dev, - uint *cmds) -{ - uint *start = cmds; - - if (ADRENO_LEGACY_PM4(adreno_dev)) { - *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1); - *cmds++ = 0; - } else - *cmds++ = cp_type7_packet(CP_WAIT_FOR_IDLE, 0); - - return cmds - start; -} - -/** - * cp_invalidate_state - common function for invalidating cp - * state - * @adreno_dev: The adreno device - * @cmds: command pointer to add gpuaddr - */ -static inline uint cp_invalidate_state(struct adreno_device *adreno_dev, - uint *cmds) -{ - uint *start = cmds; - - if (ADRENO_GPUREV(adreno_dev) < 500) { - *cmds++ = cp_type3_packet(CP_INVALIDATE_STATE, 1); - *cmds++ = 0x7fff; - } else { - *cmds++ = cp_type7_packet(CP_SET_DRAW_STATE, 3); - *cmds++ = 0x40000; - *cmds++ = 0; - *cmds++ = 0; - } - - return cmds - start; -} - -#endif /* __ADRENO_PM4TYPES_H */ diff --git a/selfdrive/modeld/thneed/debug/main.cc b/selfdrive/modeld/thneed/debug/main.cc deleted file mode 100644 index cf96c61e39..0000000000 --- a/selfdrive/modeld/thneed/debug/main.cc +++ /dev/null @@ -1,724 +0,0 @@ -#include -#include "include/msm_kgsl.h" -#include "common/clutil.h" -#include -#include -#include -#include -#include - -int run_num = 0; -int ioctl_num = 0; - -void hexdump(uint32_t *d, int len) { - assert((len%4) == 0); - printf(" dumping %p len 0x%x\n", d, len); - for (int i = 0; i < len/4; i++) { - if (i != 0 && (i%0x10) == 0) printf("\n"); - printf("%8x ", d[i]); - } - printf("\n"); -} - -void hexdump8(uint8_t *d, int len) { - printf(" dumping %p len 0x%x\n", d, len); - for (int i = 0; i < len; i++) { - if (i != 0 && (i%0x10) == 0) printf("\n"); - printf("%02x ", d[i]); - } - printf("\n"); -} - - -#include -#include -#include -using namespace std; - -#include "disasm/include/adreno_pm4types.h" - -#define REG_A5XX_TPL1_CS_TEX_CONST_LO 0x0000e760 -#define REG_A5XX_TPL1_CS_TEX_SAMP_LO 0x0000e75c - -class CachedCommand { - public: - CachedCommand(struct kgsl_gpu_command *cmd, int lfd); - void exec(bool wait); - private: - string cmd_0, cmd_1; - int obj_len; - int fd; - - struct kgsl_gpu_command cache; - struct kgsl_command_object cmds[2]; - struct kgsl_command_object objs[1]; -}; - -vector queue_cmds; - -void disassemble(uint32_t *src, int len) { - int i = 0; - while (i < len) { - int pktsize; - int pkttype = -1; - - if (pkt_is_type0(src[i])) { - pkttype = 0; - pktsize = type0_pkt_size(src[i]); - } else if (pkt_is_type3(src[i])) { - pkttype = 3; - pktsize = type3_pkt_size(src[i]); - } else if (pkt_is_type4(src[i])) { - pkttype = 4; - pktsize = type4_pkt_size(src[i]); - } else if (pkt_is_type7(src[i])) { - pkttype = 7; - pktsize = type7_pkt_size(src[i]); - } - printf("%3d: type:%d size:%d ", i, pkttype, pktsize); - - if (pkttype == 7) { - printf("op: %4x ", cp_type7_opcode(src[i])); - } - - if (pkttype == 4) { - printf("reg: %4x ", cp_type4_base_index_one_reg_wr(src[i])); - } - - for (int j = 0; j < pktsize+1; j++) { - printf("%8.8X ", src[i+j]); - } - printf("\n"); - - if (pkttype == 7 && cp_type7_opcode(src[i]) == CP_LOAD_STATE) { - // CP_LOAD_STATE4 - int sz = (src[i+1] & 0xffc00000) >> 22; - uint64_t addr = (uint64_t)(src[i+2] & 0xfffffffc) | ((uint64_t)(src[i+3]) << 32); - hexdump((uint32_t *)addr, sz*4); - } - - if (pkttype == 4 && cp_type4_base_index_one_reg_wr(src[i]) == REG_A5XX_TPL1_CS_TEX_CONST_LO) { - uint64_t addr = (uint64_t)(src[i+1] & 0xffffffff) | ((uint64_t)(src[i+2]) << 32); - hexdump((uint32_t *)addr, 0x40); - } - - if (pkttype == 4 && cp_type4_base_index_one_reg_wr(src[i]) == REG_A5XX_TPL1_CS_TEX_SAMP_LO) { - uint64_t addr = (uint64_t)(src[i+1] & 0xffffffff) | ((uint64_t)(src[i+2]) << 32); - hexdump((uint32_t *)addr, 0x40); - } - - if (pkttype == -1) break; - i += (1+pktsize); - } - assert(i == len); - -} - -int intercept = 1; -int prop_num = 0; - -extern "C" { - -/*void *gsl_memory_alloc_pure(long param_1, long param_2, long *param_3) { - void *(*my_gsl_memory_alloc_pure)(long param_1, long param_2, long *param_3); - my_gsl_memory_alloc_pure = reinterpret_cast(dlsym(RTLD_NEXT, "gsl_memory_alloc_pure")); - - void *ret = my_gsl_memory_alloc_pure(param_1, param_2, param_3); - printf("gsl_memory_alloc_pure: 0x%lx 0x%lx %p = %p\n", param_1, param_2, param_3, ret); - return ret; -}*/ - -void *mmap64(void *addr, size_t len, int prot, int flags, int fildes, off64_t off) { - void *(*my_mmap64)(void *addr, size_t len, int prot, int flags, int fildes, off64_t off); - my_mmap64 = reinterpret_cast(dlsym(RTLD_NEXT, "mmap64")); - - void *ret = my_mmap64(addr, len, prot, flags, fildes, off); - - if (fildes == 3) { - printf("mmap64(addr=%p, len=0x%zx, prot=0x%x, flags=0x%x, fildes=%d, off=0x%lx) = %p\n", addr, len, prot, flags, fildes, off, ret); - } - - return ret; -} - - -pid_t gettid(void); - -#undef ioctl -int ioctl(int filedes, unsigned long request, void *argp) { - int (*my_ioctl)(int filedes, unsigned long request, void *argp); - my_ioctl = reinterpret_cast(dlsym(RTLD_NEXT, "ioctl")); - int skip = 0; - -if (intercept) { - - int tid = gettid(); - - if (request == IOCTL_KGSL_GPU_COMMAND) { - struct kgsl_gpu_command *cmd = (struct kgsl_gpu_command *)argp; - printf("IOCTL_KGSL_GPU_COMMAND(%d): flags: 0x%lx numcmds: %u numobjs: %u numsyncs: %u context_id: %u timestamp: %u\n", - tid, - cmd->flags, - cmd->numcmds, cmd->numobjs, cmd->numsyncs, - cmd->context_id, cmd->timestamp); - - assert(cmd->numcmds == 2); - assert(cmd->numobjs == 1); - assert(cmd->numsyncs == 0); - - //struct kgsl_command_object *obj = (struct kgsl_command_object *)cmd->cmdlist; - //assert(obj[0].size == sizeof(queue_init)); - //memcpy(queue_init, (void*)obj[0].gpuaddr, sizeof(queue_init)); - //string qcmd((char*)obj[1].gpuaddr, obj[1].size); - if (run_num == 3) { - CachedCommand *ccmd = new CachedCommand(cmd, filedes); - queue_cmds.push_back(ccmd); - - //ccmd->exec(); - - //skip = 0; - //printf("command 0x%lx\n", obj[1].gpuaddr); - //disassemble((uint32_t *)qcmd.data(), qcmd.size()/4); - //queue_cmds.push_back(qcmd); - } - - #ifdef DUMP - char tmp[0x100]; - snprintf(tmp, sizeof(tmp), "/tmp/thneed/run_%d_%d", run_num, ioctl_num++); - FILE *f = fopen(tmp, "wb"); - #endif - - // kgsl_cmdbatch_add_cmdlist - for (int i = 0; i < cmd->numcmds; i++) { - struct kgsl_command_object *obj = (struct kgsl_command_object *)cmd->cmdlist; - printf(" cmd: %lx %5lx %5lx flags:%3x %d\n", - obj[i].offset, obj[i].gpuaddr, obj[i].size, obj[i].flags, obj[i].id); - //hexdump((uint32_t *)obj[i].gpuaddr, obj[i].size); - #ifdef DUMP - fwrite(&obj[i].size, sizeof(obj[i].size), 1, f); - fwrite((void*)obj[i].gpuaddr, obj[i].size, 1, f); - #endif - } - - // kgsl_cmdbatch_add_memlist - for (int i = 0; i < cmd->numobjs; i++) { - struct kgsl_command_object *obj = (struct kgsl_command_object *)cmd->objlist; - printf(" obj: %lx %5lx %5lx flags:%3x %d\n", - obj[i].offset, obj[i].gpuaddr, obj[i].size, obj[i].flags, obj[i].id); - //hexdump((uint32_t *)obj[i].gpuaddr, obj[i].size); - - #ifdef DUMP - fwrite(&obj[i].size, sizeof(obj[i].size), 1, f); - fwrite((void*)obj[i].gpuaddr, obj[i].size, 1, f); - #endif - } - - #ifdef DUMP - fclose(f); - #endif - - } else if (request == IOCTL_KGSL_SETPROPERTY) { - struct kgsl_device_getproperty *prop = (struct kgsl_device_getproperty *)argp; - printf("IOCTL_KGSL_SETPROPERTY(%d): 0x%x\n", tid, prop->type); - hexdump8((uint8_t*)prop->value, prop->sizebytes); - if (prop_num == 1) { printf("SKIPPING\n"); skip = 1; } - if (run_num == 3) prop_num++; - //hexdump((unsigned char*)prop->value, prop->sizebytes); - } else if (request == IOCTL_KGSL_GPUOBJ_SYNC) { - struct kgsl_gpuobj_sync *cmd = (struct kgsl_gpuobj_sync *)argp; - struct kgsl_gpuobj_sync_obj *objs = (struct kgsl_gpuobj_sync_obj *)(cmd->objs); - - printf("IOCTL_KGSL_GPUOBJ_SYNC(%d) count:%d ", tid, cmd->count); - for (int i = 0; i < cmd->count; i++) { - printf(" -- offset:0x%lx len:0x%lx id:%d op:%d ", objs[i].offset, objs[i].length, objs[i].id, objs[i].op); - } - printf("\n"); - } else if (request == IOCTL_KGSL_DEVICE_WAITTIMESTAMP_CTXTID) { - struct kgsl_device_waittimestamp_ctxtid *cmd = (struct kgsl_device_waittimestamp_ctxtid *)argp; - printf("IOCTL_KGSL_DEVICE_WAITTIMESTAMP_CTXTID(%d): context_id: %d timestamp: %d timeout: %d\n", - tid, cmd->context_id, cmd->timestamp, cmd->timeout); - } else if (request == IOCTL_KGSL_GPUOBJ_ALLOC) { - struct kgsl_gpuobj_alloc *cmd = (struct kgsl_gpuobj_alloc *)argp; - printf("IOCTL_KGSL_GPUOBJ_ALLOC: size:0x%lx flags:0x%lx va_len:0x%lx ", cmd->size, cmd->flags, cmd->va_len); - } else if (request == IOCTL_KGSL_GPUOBJ_FREE) { - //printf("IOCTL_KGSL_GPUOBJ_FREE\n"); - } else if (filedes == 3) { - printf("ioctl(%d) %lx\n", tid, request); - } - -} - - int ret; - if (skip) { - ret = 0; - } else { - ret = my_ioctl(filedes, request, argp); - } - - if (request == IOCTL_KGSL_GPUOBJ_ALLOC) { - struct kgsl_gpuobj_alloc *cmd = (struct kgsl_gpuobj_alloc *)argp; - printf("mmapsize:0x%lx id:%d metadata_len:%x metadata:0x%lx = %d\n", cmd->mmapsize, cmd->id, cmd->metadata_len, cmd->metadata, ret); - } - - return ret; -} - -} - -#include -#include "../runners/snpemodel.h" -#include -#include - -static inline uint64_t nanos_since_boot() { - struct timespec t; - clock_gettime(CLOCK_BOOTTIME, &t); - return t.tv_sec * 1000000000ULL + t.tv_nsec; -} - -int global_timestamp = -1; -CachedCommand::CachedCommand(struct kgsl_gpu_command *cmd, int lfd) { - fd = lfd; - assert(cmd->numcmds == 2); - assert(cmd->numobjs == 1); - assert(cmd->numsyncs == 0); - - global_timestamp = cmd->timestamp; - - printf("%p %p %p\n", cmd, (void*)cmd->cmdlist, (void*)cmd->objlist); - - memcpy(cmds, (void *)cmd->cmdlist, sizeof(struct kgsl_command_object)*2); - memcpy(objs, (void *)cmd->objlist, sizeof(struct kgsl_command_object)*1); - cmd_0.assign((char*)cmds[0].gpuaddr, cmds[0].size); - cmd_1.assign((char*)cmds[1].gpuaddr, cmds[1].size); - - - memcpy(&cache, cmd, sizeof(cache)); -} - -// i think you get these with cl_a5x_ringbuffer_alloc -uint64_t base = 0; - -void CachedCommand::exec(bool wait) { - printf("old addr 0x%lx ", cmds[1].gpuaddr); - cmds[1].gpuaddr = base; - printf("using addr 0x%lx with size 0x%4lx ", cmds[1].gpuaddr, cmd_1.size()); - base += (cmd_1.size()+0xff) & (~0xFF); - memcpy((void*)cmds[1].gpuaddr, cmd_1.data(), cmd_1.size()); - - // set up other buffers - memcpy((void*)cmds[0].gpuaddr, cmd_0.data(), cmd_0.size()); - memset((void*)objs[0].gpuaddr, 0, objs[0].size); - - cache.timestamp = ++global_timestamp; - cache.cmdlist = (uint64_t)cmds; - cache.objlist = (uint64_t)objs; - - // run - int ret = ioctl(fd, IOCTL_KGSL_GPU_COMMAND, &cache); - - if (wait) { - struct kgsl_device_waittimestamp_ctxtid wait; - wait.context_id = cache.context_id; - wait.timestamp = cache.timestamp; - wait.timeout = -1; - - uint64_t tb = nanos_since_boot(); - int wret = ioctl(fd, IOCTL_KGSL_DEVICE_WAITTIMESTAMP_CTXTID, &wait); - uint64_t te = nanos_since_boot(); - - printf("exec %d wait %d after %lu us\n", ret, wret, (te-tb)/1000); - } else { - printf("CachedCommand::exec got %d\n", ret); - } -} - - -int do_print = 0; - -#define TEMPORAL_SIZE 512 -#define DESIRE_LEN 8 -#define TRAFFIC_CONVENTION_LEN 2 - -FILE *f = NULL; - -cl_program clCreateProgramWithSource(cl_context context, cl_uint count, const char **strings, const size_t *lengths, cl_int *errcode_ret) { - cl_program (*my_clCreateProgramWithSource)(cl_context context, cl_uint count, const char **strings, const size_t *lengths, cl_int *errcode_ret) = NULL; - my_clCreateProgramWithSource = reinterpret_cast(dlsym(RTLD_NEXT, "REAL_clCreateProgramWithSource")); - //printf("clCreateProgramWithSource: %d\n", count); - - if (f == NULL) { - f = fopen("/tmp/kernels.cl", "w"); - } - - fprintf(f, "/* ************************ PROGRAM BREAK ****************************/\n"); - for (int i = 0; i < count; i++) { - fprintf(f, "%s\n", strings[i]); - if (i != 0) fprintf(f, "/* ************************ SECTION BREAK ****************************/\n"); - } - fflush(f); - - return my_clCreateProgramWithSource(context, count, strings, lengths, errcode_ret); -} - -map kernels; -map kernel_inputs; -map kernel_outputs; - -cl_kernel clCreateKernel(cl_program program, const char *kernel_name, cl_int *errcode_ret) { - cl_kernel (*my_clCreateKernel)(cl_program program, const char *kernel_name, cl_int *errcode_ret) = NULL; - my_clCreateKernel = reinterpret_cast(dlsym(RTLD_NEXT, "REAL_clCreateKernel")); - cl_kernel ret = my_clCreateKernel(program, kernel_name, errcode_ret); - - printf("clCreateKernel: %s -> %p\n", kernel_name, ret); - kernels.insert(make_pair(ret, kernel_name)); - return ret; -} - -typedef struct image { - size_t image_width; - size_t image_height; - size_t image_row_pitch; - cl_mem buffer; -} image; - -map buffers; -map images; - -cl_int clSetKernelArg(cl_kernel kernel, cl_uint arg_index, size_t arg_size, const void *arg_value) { - cl_int (*my_clSetKernelArg)(cl_kernel kernel, cl_uint arg_index, size_t arg_size, const void *arg_value) = NULL; - my_clSetKernelArg = reinterpret_cast(dlsym(RTLD_NEXT, "REAL_clSetKernelArg")); - - char arg_type[0x100]; - char arg_name[0x100]; - clGetKernelArgInfo(kernel, arg_index, CL_KERNEL_ARG_TYPE_NAME, sizeof(arg_type), arg_type, NULL); - clGetKernelArgInfo(kernel, arg_index, CL_KERNEL_ARG_NAME, sizeof(arg_name), arg_name, NULL); - printf(" %s %s", arg_type, arg_name); - - if (arg_size == 1) { - printf(" = %d", *((char*)arg_value)); - } else if (arg_size == 2) { - printf(" = %d", *((short*)arg_value)); - } else if (arg_size == 4) { - if (strcmp(arg_type, "float") == 0) { - printf(" = %f", *((float*)arg_value)); - } else { - printf(" = %d", *((int*)arg_value)); - } - } else if (arg_size == 8) { - cl_mem val = (cl_mem)(*((uintptr_t*)arg_value)); - printf(" = %p", val); - if (strcmp(arg_name, "input") == 0) kernel_inputs[kernel] = val; - if (strcmp(arg_name, "output") == 0) kernel_outputs[kernel] = val; - if (strcmp(arg_name, "accumulator") == 0) assert(kernel_inputs[kernel] = val); - - if (buffers.find(val) != buffers.end()) { - printf(" buffer %zu", buffers[val]); - } - - if (images.find(val) != images.end()) { - printf(" image %zu x %zu rp %zu @ %p", images[val].image_width, images[val].image_height, images[val].image_row_pitch, images[val].buffer); - } - - } else { - printf(" %zu", arg_size); - } - printf("\n"); - cl_int ret = my_clSetKernelArg(kernel, arg_index, arg_size, arg_value); - return ret; -} - -uint64_t start_time = 0; -uint64_t tns = 0; - -int cnt = 0; - -cl_int clEnqueueNDRangeKernel(cl_command_queue command_queue, - cl_kernel kernel, - cl_uint work_dim, - const size_t *global_work_offset, - const size_t *global_work_size, - const size_t *local_work_size, - cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event) { - - // SNPE doesn't use these - assert(num_events_in_wait_list == 0); - assert(global_work_offset == NULL); - - cl_int (*my_clEnqueueNDRangeKernel)(cl_command_queue, cl_kernel, cl_uint, const size_t *, const size_t *, const size_t *, cl_uint, const cl_event *, cl_event *) = NULL; - my_clEnqueueNDRangeKernel = reinterpret_cast(dlsym(RTLD_NEXT, "REAL_clEnqueueNDRangeKernel")); - - - uint64_t tb = nanos_since_boot(); - cl_int ret = my_clEnqueueNDRangeKernel(command_queue, kernel, work_dim, - global_work_offset, global_work_size, local_work_size, - num_events_in_wait_list, event_wait_list, event); - uint64_t te = nanos_since_boot(); - - /*ret = clWaitForEvents(1, event); - assert(ret == CL_SUCCESS); - uint64_t tq = nanos_since_boot();*/ - - if (do_print) { - tns += te-tb; - } - - printf("%10lu %10lu running(%3d) -- %p -- %56s -- %p -> %p %s ", (tb-start_time)/1000, (tns/1000), cnt++, kernel, kernels[kernel].c_str(), kernel_inputs[kernel], kernel_outputs[kernel], - (buffers[kernel_outputs[kernel]] != 0) ? "B" : "I"); - - printf("global -- "); - for (int i = 0; i < work_dim; i++) { - printf("%4zu ", global_work_size[i]); - } - printf("local -- "); - for (int i = 0; i < work_dim; i++) { - printf("%4zu ", local_work_size[i]); - } - printf("\n"); - - return ret; -} - - -cl_mem clCreateBuffer(cl_context context, cl_mem_flags flags, size_t size, void *host_ptr, cl_int *errcode_ret) { - cl_mem (*my_clCreateBuffer)(cl_context context, cl_mem_flags flags, size_t size, void *host_ptr, cl_int *errcode_ret) = NULL; - my_clCreateBuffer = reinterpret_cast(dlsym(RTLD_NEXT, "REAL_clCreateBuffer")); - - cl_mem ret = my_clCreateBuffer(context, flags, size, host_ptr, errcode_ret); - buffers[ret] = size; - printf("%p = clCreateBuffer %zu\n", ret, size); - return ret; -} - -cl_mem clCreateImage(cl_context context, cl_mem_flags flags, const cl_image_format *image_format, const cl_image_desc *image_desc, void *host_ptr, cl_int *errcode_ret) { - cl_mem (*my_clCreateImage)(cl_context context, cl_mem_flags flags, const cl_image_format *image_format, const cl_image_desc *image_desc, void *host_ptr, cl_int *errcode_ret) = NULL; - my_clCreateImage = reinterpret_cast(dlsym(RTLD_NEXT, "REAL_clCreateImage")); - - // SNPE only uses this - assert(CL_MEM_OBJECT_IMAGE2D == image_desc->image_type); - - // RGBA, HALF FLOAT - assert(CL_RGBA == image_format->image_channel_order); - assert(CL_HALF_FLOAT == image_format->image_channel_data_type); - - map lc = { - {CL_MEM_OBJECT_BUFFER, "CL_MEM_OBJECT_BUFFER"}, - {CL_MEM_OBJECT_IMAGE2D, "CL_MEM_OBJECT_IMAGE2D"}, // all this one - {CL_MEM_OBJECT_IMAGE3D, "CL_MEM_OBJECT_IMAGE3D"}, - {CL_MEM_OBJECT_IMAGE2D_ARRAY, "CL_MEM_OBJECT_IMAGE2D_ARRAY"}, - {CL_MEM_OBJECT_IMAGE1D, "CL_MEM_OBJECT_IMAGE1D"}, - {CL_MEM_OBJECT_IMAGE1D_ARRAY, "CL_MEM_OBJECT_IMAGE1D_ARRAY"}, - {CL_MEM_OBJECT_IMAGE1D_BUFFER, "CL_MEM_OBJECT_IMAGE1D_BUFFER"}}; - - assert(image_desc->image_depth == 0); - assert(image_desc->image_array_size == 0); - assert(image_desc->image_slice_pitch == 0); - //assert(image_desc->image_width * image_desc->image_height * 2 == image_desc->image_row_pitch); - - image img; - img.image_width = image_desc->image_width; - img.image_height = image_desc->image_height; - img.image_row_pitch = image_desc->image_row_pitch; - img.buffer = image_desc->buffer; - - cl_mem ret = my_clCreateImage(context, flags, image_format, image_desc, host_ptr, errcode_ret); - printf("%p = clCreateImage %s -- %p -- %d %d -- %4zu x %4zu x %4zu -- %4zu %4zu %4zu\n", ret, lc[image_desc->image_type].c_str(), - image_desc->buffer, - image_format->image_channel_order, image_format->image_channel_data_type, - image_desc->image_width, image_desc->image_height, image_desc->image_depth, - image_desc->image_array_size, image_desc->image_row_pitch, image_desc->image_slice_pitch - ); - images[ret] = img; - return ret; -} - -cl_int clWaitForEvents(cl_uint num_events, const cl_event *event_list) { - cl_int (*my_clWaitForEvents)(cl_uint num_events, const cl_event *event_list); - my_clWaitForEvents = reinterpret_cast(dlsym(RTLD_NEXT, "REAL_clWaitForEvents")); - printf("clWaitForEvents\n"); - return my_clWaitForEvents(num_events, event_list); -} - -cl_int clReleaseEvent(cl_event event) { - cl_int (*my_clReleaseEvent)(cl_event event); - my_clReleaseEvent = reinterpret_cast(dlsym(RTLD_NEXT, "REAL_clReleaseEvent")); - printf("clReleaseEvent: %p\n", event); - return my_clReleaseEvent(event); -} - -/*size_t total = 0; - -void *calloc(size_t num, size_t size) { - void *(*my_calloc)(size_t num, size_t size); - my_calloc = reinterpret_cast(dlsym(RTLD_NEXT, "REAL_calloc")); - - void *ret = my_calloc(num, size); - - if (do_print) { - total += num*size; - printf("calloc %p -- total:0x%zx -- num:0x%zx size:0x%zx\n", ret, total, num, size); - } - return ret; -} - -void free(void *ptr) { - void (*my_free)(void *ptr); - my_free = reinterpret_cast(dlsym(RTLD_NEXT, "REAL_free")); - - if (do_print) { - //printf("free: %p\n", ptr); - } else { - my_free(ptr); - } -}*/ - -void *dlsym(void *handle, const char *symbol) { - void *(*my_dlsym)(void *handle, const char *symbol) = (void *(*)(void *handle, const char *symbol))((uintptr_t)dlopen-0x2d4); - if (memcmp("REAL_", symbol, 5) == 0) { - return my_dlsym(handle, symbol+5); - } else if (strcmp("clCreateProgramWithSource", symbol) == 0) { - return (void*)clCreateProgramWithSource; - } else if (strcmp("clCreateKernel", symbol) == 0) { - return (void*)clCreateKernel; - } else if (strcmp("clEnqueueNDRangeKernel", symbol) == 0) { - return (void*)clEnqueueNDRangeKernel; - } else if (strcmp("clSetKernelArg", symbol) == 0) { - return (void*)clSetKernelArg; - } else if (strcmp("clCreateBuffer", symbol) == 0) { - return (void*)clCreateBuffer; - } else if (strcmp("clCreateImage", symbol) == 0) { - return (void*)clCreateImage; - /*} else if (strcmp("clReleaseEvent", symbol) == 0) { - return (void*)clReleaseEvent; - } else if (strcmp("clWaitForEvents", symbol) == 0) { - return (void*)clWaitForEvents;*/ - } else { - //printf("dlsym %s\n", symbol); - return my_dlsym(handle, symbol); - } -} - -int main(int argc, char* argv[]) { - int err; - start_time = nanos_since_boot(); - cl_device_id device_id = cl_get_device_id(CL_DEVICE_TYPE_DEFAULT); - cl_uint tmp; - - // sweet this is 64! - err = clGetDeviceInfo(device_id, CL_DEVICE_MAX_WRITE_IMAGE_ARGS, sizeof(tmp), &tmp, NULL); - assert(err == 0); - printf("CL_DEVICE_MAX_WRITE_IMAGE_ARGS: %u\n", tmp); - - err = clGetDeviceInfo(device_id, CL_DEVICE_MAX_READ_IMAGE_ARGS, sizeof(tmp), &tmp, NULL); - assert(err == 0); - printf("CL_DEVICE_MAX_READ_IMAGE_ARGS: %u\n", tmp); - - float *output = (float*)calloc(0x10000, sizeof(float)); - SNPEModel mdl(argv[1], output, 0, USE_GPU_RUNTIME); - - float state[TEMPORAL_SIZE]; - mdl.addRecurrent(state, TEMPORAL_SIZE); - - float desire[DESIRE_LEN]; - mdl.addDesire(desire, DESIRE_LEN); - - float traffic_convention[TRAFFIC_CONVENTION_LEN]; - mdl.addTrafficConvention(traffic_convention, TRAFFIC_CONVENTION_LEN); - - float *input = (float*)calloc(0x1000000, sizeof(float));; - printf("************** execute 1 **************\n"); - printf("%p %p %p %p -> %p\n", input, state, desire, traffic_convention, output); - run_num = 1; ioctl_num = 0; - do_print = 0; - start_time = nanos_since_boot(); - mdl.execute(input, 0); - printf("************** execute 2 **************\n"); - run_num = 2; ioctl_num = 0; - do_print = 0; - mdl.execute(input, 0); - printf("************** execute 3 **************\n"); - run_num = 3; ioctl_num = 0; - - do_print = 1; - start_time = nanos_since_boot(); - mdl.execute(input, 0); - do_print = 0; - - struct kgsl_gpuobj_alloc alloc; - memset(&alloc, 0, sizeof(alloc)); - alloc.size = 0x40000; - alloc.flags = 0x10000a00; - int fd = 3; - int ret = ioctl(fd, IOCTL_KGSL_GPUOBJ_ALLOC, &alloc); - void *addr = mmap64(NULL, alloc.mmapsize, 0x3, 0x1, fd, alloc.id*0x1000); - assert(addr != MAP_FAILED); - - intercept = 0; - while (1) { - printf("************** execute 4 **************\n"); - run_num = 4; - base = (uint64_t)addr; - - uint64_t tb = nanos_since_boot(); - int i = 0; - for (auto it = queue_cmds.begin(); it != queue_cmds.end(); ++it) { - printf("run %2d: ", i++); - //(*it)->exec(i == queue_cmds.size()); - (*it)->exec(true); - } - uint64_t te = nanos_since_boot(); - printf("model exec in %lu us\n", (te-tb)/1000); - - break; - } - - /*FILE *f = fopen("/proc/self/maps", "rb"); - char maps[0x100000]; - int len = fread(maps, 1, sizeof(maps), f); - maps[len] = '\0'; - fclose(f); - printf("%s\n", maps);*/ - - printf("buffers: %lu images: %lu\n", buffers.size(), images.size()); - printf("queues: %lu\n", queue_cmds.size()); - - // IOCTL_KGSL_GPU_COMMAND: flags: 0x11 numcmds: 2 numobjs: 1 numsyncs: 0 context_id: 7 timestamp: 77 - /*int ts = 100; - for (auto it = queue_cmds.begin(); it != queue_cmds.end(); ++it) { - auto qcmd = *it; - //disassemble((uint32_t *)qcmd.data(), qcmd.size()/4); - - struct kgsl_command_object cmdlists[2]; - struct kgsl_command_object objlists; - struct kgsl_gpu_command cmd; - uint8_t objs[0xc0]; - memset(objs, 0, 0xc0); - - memset(&cmd, 0, sizeof(cmd)); - memset(&cmdlists, 0, sizeof(struct kgsl_command_object)*2); - memset(&objlists, 0, sizeof(objlists)); - - cmd.flags = 0x11; - cmd.cmdlist = (uint64_t)cmdlists; - cmd.numcmds = 2; - cmd.objlist = (uint64_t)objlists; - cmd.numobjs = 1; - cmd.numsyncs = 0; - cmd.context_id = 7; - cmd.timestamp = ts++; - - cmdlists[0].gpuaddr = (uint64_t)queue_init; - cmdlists[0].size = 0xbc; - cmdlists[0].flags = 1; - cmdlists[1].gpuaddr = (uint64_t)qcmd.data(); - cmdlists[1].size = qcmd.size(); - cmdlists[1].flags = 1; - - objlists.gpuaddr = (uint64_t)objs; - objlists.size = 0xc0; - objlists.flags = 0x18; - }*/ -} - diff --git a/selfdrive/modeld/thneed/debug/microbenchmark/gemm.cl b/selfdrive/modeld/thneed/debug/microbenchmark/gemm.cl deleted file mode 100644 index 6a55406aee..0000000000 --- a/selfdrive/modeld/thneed/debug/microbenchmark/gemm.cl +++ /dev/null @@ -1,51 +0,0 @@ -// https://github.com/moskewcz/boda/issues/13 - -#define USE_FP16 - -#ifdef USE_FP16 - #define up(x) x - #define down(x) x - #define xtype half8 - #define skip 128 -#else - #define up(x) convert_float8(x) - #define down(x) convert_half8(x) - #define xtype float8 - #define skip 128 -#endif - -#pragma OPENCL EXTENSION cl_khr_fp16 : enable -__kernel void gemm(const int M, const int N, const int K, - global const half8* a, global const half8* b, global half8* c ) -{ - xtype c_r[8] = {0,0,0,0,0,0,0,0}; - - int const a_off_thr = get_global_id(0); - int const b_off_thr = get_global_id(1); - - int a_off = a_off_thr; - int b_off = b_off_thr; - for( int k = 0; k < 1024; k += 1 ) { - xtype a_r = up(a[a_off]); - xtype b_r = up(b[b_off]); - - c_r[0] += a_r.s0*b_r; - c_r[1] += a_r.s1*b_r; - c_r[2] += a_r.s2*b_r; - c_r[3] += a_r.s3*b_r; - c_r[4] += a_r.s4*b_r; - c_r[5] += a_r.s5*b_r; - c_r[6] += a_r.s6*b_r; - c_r[7] += a_r.s7*b_r; - - a_off += skip; - b_off += skip; - } - - int c_off = a_off_thr*1024 + b_off_thr; - for (int i = 0; i < 8; i++) { - c[c_off] = down(c_r[i]); - c_off += skip; - } -} - diff --git a/selfdrive/modeld/thneed/debug/microbenchmark/gemm_image.cl b/selfdrive/modeld/thneed/debug/microbenchmark/gemm_image.cl deleted file mode 100644 index f466a8ca49..0000000000 --- a/selfdrive/modeld/thneed/debug/microbenchmark/gemm_image.cl +++ /dev/null @@ -1,75 +0,0 @@ -// https://github.com/moskewcz/boda/issues/13 - -//#define USE_FP16 - -#ifdef USE_FP16 - #define xtype half4 - #define read_imagep read_imageh - #define write_imagep write_imageh -#else - #define xtype float4 - #define read_imagep read_imagef - #define write_imagep write_imagef -#endif - -#pragma OPENCL EXTENSION cl_khr_fp16 : enable -__kernel void gemm(const int M, const int N, const int K, - read_only image2d_t A, - read_only image2d_t B, - write_only image2d_t C) -{ - const sampler_t smp = CLK_NORMALIZED_COORDS_FALSE | - CLK_ADDRESS_CLAMP | - CLK_FILTER_NEAREST; - - xtype c_r[4] = {0,0,0,0}; - xtype a_r[4], b_r[4]; - - int const a_off_thr = get_global_id(0); - int const b_off_thr = get_global_id(1); - - int2 a_samp = {0, a_off_thr}; - int2 b_samp = {0, b_off_thr}; - - for (short k = 0; k < K/4; k++) { - for (short i = 0; i < 4; ++i) { - a_r[i] = read_imagep(A, smp, a_samp); - b_r[i] = read_imagep(B, smp, b_samp); - ++a_samp.x; - ++b_samp.x; - } - - for (short i = 0; i < 4; ++i) { - float4 ov = c_r[i]; - - ov.x += a_r[i].x * b_r[0].x; - ov.x += a_r[i].y * b_r[0].y; - ov.x += a_r[i].z * b_r[0].z; - ov.x += a_r[i].w * b_r[0].w; - - ov.y += a_r[i].x * b_r[1].x; - ov.y += a_r[i].y * b_r[1].y; - ov.y += a_r[i].z * b_r[1].z; - ov.y += a_r[i].w * b_r[1].w; - - ov.z += a_r[i].x * b_r[2].x; - ov.z += a_r[i].y * b_r[2].y; - ov.z += a_r[i].z * b_r[2].z; - ov.z += a_r[i].w * b_r[2].w; - - ov.w += a_r[i].x * b_r[3].x; - ov.w += a_r[i].y * b_r[3].y; - ov.w += a_r[i].z * b_r[3].z; - ov.w += a_r[i].w * b_r[3].w; - - c_r[i] = ov; - } - } - - int2 c_samp = {a_off_thr, b_off_thr*4}; - for (short i = 0; i < 4; i++) { - write_imagep(C, c_samp, c_r[i]); - ++c_samp.y; - } -} - diff --git a/selfdrive/modeld/thneed/debug/microbenchmark/go.c b/selfdrive/modeld/thneed/debug/microbenchmark/go.c deleted file mode 100644 index 18b91a6cb2..0000000000 --- a/selfdrive/modeld/thneed/debug/microbenchmark/go.c +++ /dev/null @@ -1,314 +0,0 @@ -#include -#include -#include -#include -#include - -/* -block7b_project_conv (Conv2D) (None, 8, 16, 352) 743424 block7b_activation[0][0] -8448*8*4 = 8*16*2112 = 270336 = input = 128*2112 -2112*88*4 = 743424 = weights = 2112*352 -1408*8*4 = 8*16*352 = 45056 = output = 128*352 - -FLOPS = 128*2112*352 = 95158272 = 95 MFLOPS -RAM = 128*2112 + 2112*352 + 128*352 = 1058816 = 1 M accesses - -# 22 groups -128*2112 + 2112*16 + 128*16 = 306176 -306176*22 = 6735872 real accesses - -This is a 128x2112 by 2112x352 matrix multiply - -work_size = {88, 4, 8} -Each kernel run computes 16 outputs - -0x7f7e8a6380 convolution_horizontal_reduced_reads_1x1 -- 88 4 8 -- 4 4 8 - image2d_t input = 0x7f7f490b00 image 8448 x 8 rp 67840 - short startPackedInputChannel = 0 - short numPackedInputChannelsForGroup = 528 - short totalNumPackedInputChannels = 528 - short packedOuputChannelOffset = 0 - short totalNumPackedOutputChannels = 88 - image2d_t weights = 0x7f7f52fb80 image 2112 x 88 rp 16896 - float* biases = 0x7f7f564d80 buffer 1408 - short filterSizeX = 1 - short filterSizeY = 1 - image2d_t output = 0x7f7f490e80 image 1408 x 8 rp 11264 - short paddingX = 0 - short paddingY = 0 - short strideX = 1 - short strideY = 1 - short neuron = 0 - float a = 1.000000 - float b = 1.000000 - float min_clamp = 0.000000 - float max_clamp = 0.000000 - float* parameters = 0x0 - float* batchNormBiases = 0x0 - short numOutputColumns = 16 -*/ - -#define GEMM -#define IMAGE - -void dump_maps() { - FILE *f = fopen("/proc/self/maps", "rb"); - char maps[0x100000]; - int len = fread(maps, 1, sizeof(maps), f); - maps[len] = '\0'; - maps[0x800] = '\0'; - fclose(f); - printf("%s\n", maps); -} - -static inline uint64_t nanos_since_boot() { - struct timespec t; - clock_gettime(CLOCK_BOOTTIME, &t); - return t.tv_sec * 1000000000ULL + t.tv_nsec; -} - -int main(int argc, char *argv[]) { - cl_int err; - - // cl init - cl_device_id device_id; - cl_context context; - cl_command_queue q; - { - cl_platform_id platform_id[2]; - cl_uint num_devices; - cl_uint num_platforms; - - err = clGetPlatformIDs(sizeof(platform_id)/sizeof(cl_platform_id), platform_id, &num_platforms); - assert(err == 0); - - err = clGetDeviceIDs(platform_id[0], CL_DEVICE_TYPE_DEFAULT, 1, &device_id, &num_devices); - assert(err == 0); - - context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &err); - assert(err == 0); - - q = clCreateCommandQueue(context, device_id, 0, &err); - assert(err == 0); - } - printf("cl ready\n"); - - char tmp[0x10000]; - memset(tmp, 0, sizeof(tmp)); - FILE *f = fopen(argv[1], "rb"); - fread(tmp, 1, sizeof(tmp), f); - fclose(f); - - const char *strings[1]; - size_t lengths[1]; - strings[0] = tmp; - lengths[0] = strlen(tmp); - - cl_program prog = clCreateProgramWithSource(context, 1, strings, lengths, &err); - assert(err == 0); - printf("creating program\n"); - - err = clBuildProgram(prog, 1, &device_id, "-D AVANTE_IS_GPU_A530_64", NULL, NULL); - - if (err != 0) { - printf("got err %d\n", err); - size_t length; - char buffer[2048]; - clGetProgramBuildInfo(prog, device_id, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &length); - buffer[length] = '\0'; - printf("%s\n", buffer); - } - assert(err == 0); - printf("built program\n"); - - -#ifdef GEMM - // 128x2112 by 2112x352 - int M,N,K; - - M = N = K = 1024; - //M = 128; K = 2112; N = 352; - - cl_kernel kern = clCreateKernel(prog, "gemm", &err); - assert(err == 0); - printf("creating kernel %p\n", kern); - - cl_mem A,B,C; - A = clCreateBuffer(context, CL_MEM_READ_WRITE, M*K*2, NULL, &err); - assert(err == 0); - B = clCreateBuffer(context, CL_MEM_READ_WRITE, K*N*2, NULL, &err); - assert(err == 0); - C = clCreateBuffer(context, CL_MEM_READ_WRITE, M*N*2, NULL, &err); - assert(err == 0); - printf("created buffers\n"); - -#ifdef IMAGE - cl_image_format fmt; - fmt.image_channel_order = CL_RGBA; - fmt.image_channel_data_type = CL_HALF_FLOAT; - - cl_image_desc desc; - desc.image_type = CL_MEM_OBJECT_IMAGE2D; - desc.image_depth = 0; desc.image_slice_pitch = 0; desc.num_mip_levels = 0; desc.num_samples = 0; - - desc.image_width = K; desc.image_height = M/4; - desc.buffer = A; - desc.image_row_pitch = desc.image_width*8; - A = clCreateImage(context, CL_MEM_READ_WRITE, &fmt, &desc, NULL, &err); - assert(err == 0); - - desc.image_width = K; desc.image_height = N/4; - desc.buffer = B; desc.image_row_pitch = desc.image_width*8; - B = clCreateImage(context, CL_MEM_READ_WRITE, &fmt, &desc, NULL, &err); - assert(err == 0); - - desc.image_width = M/4; desc.image_height = N; - desc.buffer = C; desc.image_row_pitch = desc.image_width*8; - C = clCreateImage(context, CL_MEM_READ_WRITE, &fmt, &desc, NULL, &err); - assert(err == 0); - printf("created images\n"); -#endif - - clSetKernelArg(kern, 0, sizeof(int), &M); - clSetKernelArg(kern, 1, sizeof(int), &N); - clSetKernelArg(kern, 2, sizeof(int), &K); - - clSetKernelArg(kern, 3, sizeof(cl_mem), &A); - clSetKernelArg(kern, 4, sizeof(cl_mem), &B); - clSetKernelArg(kern, 5, sizeof(cl_mem), &C); - printf("set args\n"); - -#ifdef IMAGE - size_t global_work_size[3] = {M/4, N/4, 1}; - size_t local_work_size[3] = {4, 64, 1}; -#else - size_t global_work_size[3] = {128, 128, 1}; - size_t local_work_size[3] = {2, 128, 1}; -#endif - -#else - cl_kernel kern = clCreateKernel(prog, "convolution_horizontal_reduced_reads_1x1", &err); - assert(err == 0); - printf("creating kernel\n"); - - cl_mem input; - cl_mem weights; - cl_mem weights_buffer; - cl_mem biases; - cl_mem outputs; - - cl_image_format fmt; - fmt.image_channel_order = CL_RGBA; - fmt.image_channel_data_type = CL_HALF_FLOAT; - - cl_image_desc desc; - desc.image_type = CL_MEM_OBJECT_IMAGE2D; - desc.image_depth = 0; desc.image_slice_pitch = 0; desc.num_mip_levels = 0; desc.num_samples = 0; - desc.buffer = NULL; - - biases = clCreateBuffer(context, CL_MEM_READ_WRITE, 1408, NULL, &err); - assert(err == 0); - - desc.image_width = 8448; desc.image_height = 8; desc.image_row_pitch = 67840; - desc.buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, desc.image_height * desc.image_row_pitch, NULL, &err); - assert(err == 0); - input = clCreateImage(context, CL_MEM_READ_WRITE, &fmt, &desc, NULL, &err); - assert(err == 0); - - desc.image_width = 2112; desc.image_height = 88; desc.image_row_pitch = 16896; - weights_buffer = desc.buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, desc.image_height * desc.image_row_pitch, NULL, &err); - assert(err == 0); - weights = clCreateImage(context, CL_MEM_READ_WRITE, &fmt, &desc, NULL, &err); - assert(err == 0); - - desc.image_width = 1408; desc.image_height = 8; desc.image_row_pitch = 11264; - desc.buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, desc.image_height * desc.image_row_pitch, NULL, &err); - assert(err == 0); - outputs = clCreateImage(context, CL_MEM_READ_WRITE, &fmt, &desc, NULL, &err); - assert(err == 0); - - void *n = NULL; - uint16_t v; - float fl; - - clSetKernelArg(kern, 0, sizeof(cl_mem), &input); - v = 0; clSetKernelArg(kern, 1, sizeof(v), &v); - v = 528; clSetKernelArg(kern, 2, sizeof(v), &v); - v = 528; clSetKernelArg(kern, 3, sizeof(v), &v); - v = 0; clSetKernelArg(kern, 4, sizeof(v), &v); - v = 88; clSetKernelArg(kern, 5, sizeof(v), &v); - clSetKernelArg(kern, 6, sizeof(cl_mem), &weights); - //clSetKernelArg(kern, 6, sizeof(cl_mem), &weights_buffer); - clSetKernelArg(kern, 7, sizeof(cl_mem), &biases); - v = 1; clSetKernelArg(kern, 8, sizeof(v), &v); - v = 1; clSetKernelArg(kern, 9, sizeof(v), &v); - clSetKernelArg(kern, 10, sizeof(cl_mem), &outputs); - v = 0; clSetKernelArg(kern, 11, sizeof(v), &v); - v = 0; clSetKernelArg(kern, 12, sizeof(v), &v); - v = 1; clSetKernelArg(kern, 13, sizeof(v), &v); - v = 1; clSetKernelArg(kern, 14, sizeof(v), &v); - v = 0; clSetKernelArg(kern, 15, sizeof(v), &v); - fl = 1.0; clSetKernelArg(kern, 16, sizeof(fl), &fl); - fl = 0.0; clSetKernelArg(kern, 17, sizeof(fl), &fl); - fl = 0.0; clSetKernelArg(kern, 18, sizeof(fl), &fl); - fl = 0.0; clSetKernelArg(kern, 19, sizeof(fl), &fl); - clSetKernelArg(kern, 20, sizeof(n), &n); - clSetKernelArg(kern, 21, sizeof(n), &n); - v = 16; clSetKernelArg(kern, 22, sizeof(v), &v); - - size_t global_work_size[3] = {88, 4, 8}; - size_t local_work_size[3] = {4, 4, 8}; -#endif - - printf("ready to enqueue\n"); - for (int i = 0; i < 20; i++) { - cl_event event; - err = clEnqueueNDRangeKernel(q, kern, 3, NULL, global_work_size, local_work_size, 0, NULL, &event); - assert(err == 0); - - uint64_t tb = nanos_since_boot(); - err = clWaitForEvents(1, &event); - assert(err == 0); - uint64_t te = nanos_since_boot(); - uint64_t us = (te-tb)/1000; - - float s = 1000000.0/us; - -#ifdef GEMM - float flops = M*N*K*s; - float rams = (M*N + N*K + M*K)*s; -#else - float flops = 95158272.0*s; - float rams = 1058816.0*s; - //float rams = 6735872.0*s; -#endif - - printf("%2d: wait %lu us -- %.2f GFLOPS -- %.2f GB/s\n", i, us, flops/1e9, rams*2/1e9); - } - - size_t binary_size = 0; - err = clGetProgramInfo(prog, CL_PROGRAM_BINARY_SIZES, sizeof(binary_size), &binary_size, NULL); - assert(err == 0); - assert(binary_size > 0); - - uint8_t *binary_buf = (uint8_t *)malloc(binary_size); - assert(binary_buf); - - uint8_t* bufs[1] = { binary_buf, }; - err = clGetProgramInfo(prog, CL_PROGRAM_BINARIES, sizeof(bufs), &bufs, NULL); - assert(err == 0); - - FILE *g = fopen("/tmp/bin.bin", "wb"); - fwrite(binary_buf, 1, binary_size, g); - fclose(g); - - /*dump_maps(); - for (uint64_t i = 0x7ffbd2000; i < 0x800000000; i += 0x1000) { - uint64_t cmd = *((uint64_t*)i); - printf("%llx: %llx\n", i, cmd); - }*/ - - - return 0; -} - diff --git a/selfdrive/modeld/thneed/debug/microbenchmark/run.sh b/selfdrive/modeld/thneed/debug/microbenchmark/run.sh deleted file mode 100755 index f77d27d817..0000000000 --- a/selfdrive/modeld/thneed/debug/microbenchmark/run.sh +++ /dev/null @@ -1,2 +0,0 @@ -#!/usr/bin/env bash -gcc -I/data/openpilot/phonelibs/opencl/include -L/system/vendor/lib64 -lOpenCL -lCB -lgsl go.c diff --git a/selfdrive/modeld/thneed/debug/test.cc b/selfdrive/modeld/thneed/debug/test.cc deleted file mode 100644 index 6f185b9f00..0000000000 --- a/selfdrive/modeld/thneed/debug/test.cc +++ /dev/null @@ -1,99 +0,0 @@ -#include "../thneed.h" -#include "../../runners/snpemodel.h" - -#define TEMPORAL_SIZE 512 -#define DESIRE_LEN 8 -#define TRAFFIC_CONVENTION_LEN 2 - -void hexdump(uint32_t *d, int len); - -int main(int argc, char* argv[]) { - #define OUTPUT_SIZE 0x10000 - float *output = (float*)calloc(OUTPUT_SIZE, sizeof(float)); - float *golden = (float*)calloc(OUTPUT_SIZE, sizeof(float)); - SNPEModel mdl(argv[1], output, 0, USE_GPU_RUNTIME); - - // cmd line test - if (argc > 2) { - for (int i = 2; i < argc; i++) { - float *buf[5]; - FILE *f = fopen(argv[i], "rb"); - - size_t sz; - for (int j = 0; j < 5; j++) { - fread(&sz, 1, sizeof(sz), f); - printf("reading %zu\n", sz); - buf[j] = (float*)malloc(sz); - fread(buf[j], 1, sz, f); - } - - if (sz != 9532) continue; - - mdl.addRecurrent(buf[0], TEMPORAL_SIZE); - mdl.addTrafficConvention(buf[1], TRAFFIC_CONVENTION_LEN); - mdl.addDesire(buf[2], DESIRE_LEN); - mdl.execute(buf[3], 0); - - hexdump((uint32_t*)buf[4], 0x100); - hexdump((uint32_t*)output, 0x100); - - for (int j = 0; j < sz/4; j++) { - if (buf[4][j] != output[j]) { - printf("MISMATCH %d real:%f comp:%f\n", j, buf[4][j], output[j]); - } - } - } - - return 0; - } - - float state[TEMPORAL_SIZE]; - mdl.addRecurrent(state, TEMPORAL_SIZE); - - float desire[DESIRE_LEN]; - mdl.addDesire(desire, DESIRE_LEN); - - float traffic_convention[TRAFFIC_CONVENTION_LEN]; - mdl.addTrafficConvention(traffic_convention, TRAFFIC_CONVENTION_LEN); - - float *input = (float*)calloc(0x1000000, sizeof(float));; - - // first run - printf("************** execute 1 **************\n"); - memset(output, 0, OUTPUT_SIZE * sizeof(float)); - mdl.execute(input, 0); - hexdump((uint32_t *)output, 0x100); - memcpy(golden, output, OUTPUT_SIZE * sizeof(float)); - - // second run - printf("************** execute 2 **************\n"); - memset(output, 0, OUTPUT_SIZE * sizeof(float)); - Thneed *t = new Thneed(); - t->record = 7; // debug print with record - mdl.execute(input, 0); - t->stop(); - hexdump((uint32_t *)output, 0x100); - if (memcmp(golden, output, OUTPUT_SIZE * sizeof(float)) != 0) { printf("FAILURE\n"); return -1; } - - // third run - printf("************** execute 3 **************\n"); - memset(output, 0, OUTPUT_SIZE * sizeof(float)); - t->record = 2; // debug print w/o record - float *inputs[4] = {state, traffic_convention, desire, input}; - t->execute(inputs, output, true); - hexdump((uint32_t *)output, 0x100); - if (memcmp(golden, output, OUTPUT_SIZE * sizeof(float)) != 0) { printf("FAILURE\n"); return -1; } - - printf("************** execute 4 **************\n"); - while (1) { - memset(output, 0, OUTPUT_SIZE * sizeof(float)); - //t->record = 2; // debug print w/o record - t->execute(inputs, output); - hexdump((uint32_t *)output, 0x100); - if (memcmp(golden, output, OUTPUT_SIZE * sizeof(float)) != 0) { printf("FAILURE\n"); return -1; } - break; - } - - printf("************** execute done **************\n"); -} - diff --git a/selfdrive/modeld/thneed/debug/thneed b/selfdrive/modeld/thneed/debug/thneed deleted file mode 100755 index ab2d721bb2..0000000000 --- a/selfdrive/modeld/thneed/debug/thneed +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/sh -export LD_LIBRARY_PATH="/data/openpilot/phonelibs/snpe/aarch64/:$HOME/openpilot/phonelibs/snpe/larch64:$HOME/openpilot/phonelibs/snpe/x86_64-linux-clang:$LD_LIBRARY_PATH" -exec ./_thneed $@ - diff --git a/selfdrive/modeld/thneed/thneed.cc b/selfdrive/modeld/thneed/thneed.cc index e08370a70d..4432856532 100644 --- a/selfdrive/modeld/thneed/thneed.cc +++ b/selfdrive/modeld/thneed/thneed.cc @@ -7,9 +7,16 @@ #include #include "thneed.h" +//#define SAVE_KERNELS + +//#define RUN_DISASSEMBLER +//#define RUN_OPTIMIZER + Thneed *g_thneed = NULL; int g_fd = -1; map, string> g_args; +map, int> g_args_size; +map g_program_source; static inline uint64_t nanos_since_boot() { struct timespec t; @@ -27,6 +34,8 @@ void hexdump(uint32_t *d, int len) { printf("\n"); } +// *********** ioctl interceptor *********** + extern "C" { int (*my_ioctl)(int filedes, unsigned long request, void *argp) = NULL; @@ -39,32 +48,33 @@ int ioctl(int filedes, unsigned long request, void *argp) { // save the fd if (request == IOCTL_KGSL_GPUOBJ_ALLOC) g_fd = filedes; + // note that this runs always, even without a thneed object if (request == IOCTL_KGSL_DRAWCTXT_CREATE) { struct kgsl_drawctxt_create *create = (struct kgsl_drawctxt_create *)argp; create->flags &= ~KGSL_CONTEXT_PRIORITY_MASK; create->flags |= 1 << KGSL_CONTEXT_PRIORITY_SHIFT; // priority from 1-15, 1 is max priority - printf("creating context with flags 0x%x\n", create->flags); + printf("IOCTL_KGSL_DRAWCTXT_CREATE: creating context with flags 0x%x\n", create->flags); } if (thneed != NULL) { if (request == IOCTL_KGSL_GPU_COMMAND) { struct kgsl_gpu_command *cmd = (struct kgsl_gpu_command *)argp; - if (thneed->record & 1) { + if (thneed->record & THNEED_RECORD) { thneed->timestamp = cmd->timestamp; thneed->context_id = cmd->context_id; thneed->cmds.push_back(unique_ptr(new CachedCommand(thneed, cmd))); } - if (thneed->record & 2) { - printf("IOCTL_KGSL_GPU_COMMAND(%2zu): flags: 0x%lx context_id: %u timestamp: %u\n", + if (thneed->record & THNEED_DEBUG) { + printf("IOCTL_KGSL_GPU_COMMAND(%2zu): flags: 0x%lx context_id: %u timestamp: %u numcmds: %d numobjs: %d\n", thneed->cmds.size(), cmd->flags, - cmd->context_id, cmd->timestamp); + cmd->context_id, cmd->timestamp, cmd->numcmds, cmd->numobjs); } } else if (request == IOCTL_KGSL_GPUOBJ_SYNC) { struct kgsl_gpuobj_sync *cmd = (struct kgsl_gpuobj_sync *)argp; struct kgsl_gpuobj_sync_obj *objs = (struct kgsl_gpuobj_sync_obj *)(cmd->objs); - if (thneed->record & 2) { + if (thneed->record & THNEED_DEBUG) { printf("IOCTL_KGSL_GPUOBJ_SYNC count:%d ", cmd->count); for (int i = 0; i < cmd->count; i++) { printf(" -- offset:0x%lx len:0x%lx id:%d op:%d ", objs[i].offset, objs[i].length, objs[i].id, objs[i].op); @@ -72,20 +82,20 @@ int ioctl(int filedes, unsigned long request, void *argp) { printf("\n"); } - if (thneed->record & 1) { + if (thneed->record & THNEED_RECORD) { thneed->syncobjs.push_back(string((char *)objs, sizeof(struct kgsl_gpuobj_sync_obj)*cmd->count)); } } else if (request == IOCTL_KGSL_DEVICE_WAITTIMESTAMP_CTXTID) { struct kgsl_device_waittimestamp_ctxtid *cmd = (struct kgsl_device_waittimestamp_ctxtid *)argp; - if (thneed->record & 2) { + if (thneed->record & THNEED_DEBUG) { printf("IOCTL_KGSL_DEVICE_WAITTIMESTAMP_CTXTID: context_id: %d timestamp: %d timeout: %d\n", cmd->context_id, cmd->timestamp, cmd->timeout); } } else if (request == IOCTL_KGSL_SETPROPERTY) { - if (thneed->record & 2) { + if (thneed->record & THNEED_DEBUG) { struct kgsl_device_getproperty *prop = (struct kgsl_device_getproperty *)argp; printf("IOCTL_KGSL_SETPROPERTY: 0x%x sizebytes:%zu\n", prop->type, prop->sizebytes); - if (thneed->record & 4) { + if (thneed->record & THNEED_VERBOSE_DEBUG) { hexdump((uint32_t *)prop->value, prop->sizebytes); if (prop->type == KGSL_PROP_PWR_CONSTRAINT) { struct kgsl_device_constraint *constraint = (struct kgsl_device_constraint *)prop->value; @@ -103,6 +113,8 @@ int ioctl(int filedes, unsigned long request, void *argp) { } +// *********** GPUMalloc *********** + GPUMalloc::GPUMalloc(int size, int fd) { struct kgsl_gpuobj_alloc alloc; memset(&alloc, 0, sizeof(alloc)); @@ -128,30 +140,38 @@ void *GPUMalloc::alloc(int size) { return ret; } +// *********** CachedCommand, at the ioctl layer *********** + CachedCommand::CachedCommand(Thneed *lthneed, struct kgsl_gpu_command *cmd) { thneed = lthneed; - assert(cmd->numcmds == 2); - assert(cmd->numobjs == 1); assert(cmd->numsyncs == 0); - memcpy(cmds, (void *)cmd->cmdlist, sizeof(struct kgsl_command_object)*2); - memcpy(objs, (void *)cmd->objlist, sizeof(struct kgsl_command_object)*1); - memcpy(&cache, cmd, sizeof(cache)); - cache.cmdlist = (uint64_t)cmds; - cache.objlist = (uint64_t)objs; - for (int i = 0; i < cmd->numcmds; i++) { - void *nn = thneed->ram->alloc(cmds[i].size); - memcpy(nn, (void*)cmds[i].gpuaddr, cmds[i].size); - cmds[i].gpuaddr = (uint64_t)nn; + if (cmd->numcmds > 0) { + cmds = make_unique(cmd->numcmds); + memcpy(cmds.get(), (void *)cmd->cmdlist, sizeof(struct kgsl_command_object)*cmd->numcmds); + cache.cmdlist = (uint64_t)cmds.get(); + for (int i = 0; i < cmd->numcmds; i++) { + void *nn = thneed->ram->alloc(cmds[i].size); + memcpy(nn, (void*)cmds[i].gpuaddr, cmds[i].size); + cmds[i].gpuaddr = (uint64_t)nn; + } } - for (int i = 0; i < cmd->numobjs; i++) { - void *nn = thneed->ram->alloc(objs[i].size); - memset(nn, 0, objs[i].size); - objs[i].gpuaddr = (uint64_t)nn; + if (cmd->numobjs > 0) { + objs = make_unique(cmd->numobjs); + memcpy(objs.get(), (void *)cmd->objlist, sizeof(struct kgsl_command_object)*cmd->numobjs); + cache.objlist = (uint64_t)objs.get(); + for (int i = 0; i < cmd->numobjs; i++) { + void *nn = thneed->ram->alloc(objs[i].size); + memset(nn, 0, objs[i].size); + objs[i].gpuaddr = (uint64_t)nn; + } } + + kq = thneed->ckq; + thneed->ckq.clear(); } void CachedCommand::exec(bool wait) { @@ -168,19 +188,33 @@ void CachedCommand::exec(bool wait) { int wret = ioctl(thneed->fd, IOCTL_KGSL_DEVICE_WAITTIMESTAMP_CTXTID, &wait); uint64_t te = nanos_since_boot(); - if (thneed->record & 2) printf("exec %d wait %d after %lu us\n", ret, wret, (te-tb)/1000); + if (thneed->record & THNEED_DEBUG) printf("exec %d wait %d after %lu us\n", ret, wret, (te-tb)/1000); } else { - if (thneed->record & 2) printf("CachedCommand::exec got %d\n", ret); + if (thneed->record & THNEED_DEBUG) printf("CachedCommand::exec got %d\n", ret); + } + + if (thneed->record & THNEED_VERBOSE_DEBUG) { + for (auto &it : kq) { + it->debug_print(false); + } + #ifdef RUN_DISASSEMBLER + // assuming 2 commands + disassemble(0); + disassemble(1); + #endif } assert(ret == 0); } -Thneed::Thneed() { +// *********** Thneed *********** + +Thneed::Thneed(bool do_clinit) { + if (do_clinit) clinit(); assert(g_fd != -1); fd = g_fd; ram = make_unique(0x40000, fd); - record = 1; + record = THNEED_RECORD; timestamp = -1; g_thneed = this; } @@ -189,29 +223,18 @@ void Thneed::stop() { record = 0; } -//#define SAVE_LOG - void Thneed::execute(float **finputs, float *foutput, bool slow) { + int ret; uint64_t tb, te; - if (record & 2) tb = nanos_since_boot(); - - #ifdef SAVE_LOG - char fn[0x100]; - snprintf(fn, sizeof(fn), "/tmp/thneed_log_%d", timestamp); - FILE *f = fopen(fn, "wb"); - #endif + if (record & THNEED_DEBUG) tb = nanos_since_boot(); // ****** copy inputs for (int idx = 0; idx < inputs.size(); ++idx) { size_t sz; clGetMemObjectInfo(inputs[idx], CL_MEM_SIZE, sizeof(sz), &sz, NULL); - #ifdef SAVE_LOG - fwrite(&sz, 1, sizeof(sz), f); - fwrite(finputs[idx], 1, sz, f); - #endif - - if (record & 2) printf("copying %lu -- %p -> %p\n", sz, finputs[idx], inputs[idx]); + if (record & THNEED_DEBUG) printf("copying %lu -- %p -> %p\n", sz, finputs[idx], inputs[idx]); + // TODO: This shouldn't have to block clEnqueueWriteBuffer(command_queue, inputs[idx], CL_TRUE, 0, sz, finputs[idx], 0, NULL, NULL); } @@ -229,40 +252,38 @@ void Thneed::execute(float **finputs, float *foutput, bool slow) { prop.type = KGSL_PROP_PWR_CONSTRAINT; prop.value = (void*)&constraint; prop.sizebytes = sizeof(constraint); - int ret = ioctl(fd, IOCTL_KGSL_SETPROPERTY, &prop); + ret = ioctl(fd, IOCTL_KGSL_SETPROPERTY, &prop); assert(ret == 0); // ****** run commands int i = 0; - for (auto it = cmds.begin(); it != cmds.end(); ++it) { + for (auto &it : cmds) { ++i; - if (record & 2) printf("run %2d: ", i); - (*it)->exec((i == cmds.size()) || slow); + if (record & THNEED_DEBUG) printf("run %2d @ %7lu us: ", i, (nanos_since_boot()-tb)/1000); + it->exec((i == cmds.size()) || slow); } // ****** sync objects - for (auto it = syncobjs.begin(); it != syncobjs.end(); ++it) { + for (auto &it : syncobjs) { struct kgsl_gpuobj_sync cmd; - cmd.objs = (uint64_t)it->data(); - cmd.obj_len = it->length(); - cmd.count = it->length() / sizeof(struct kgsl_gpuobj_sync_obj); + cmd.objs = (uint64_t)it.data(); + cmd.obj_len = it.length(); + cmd.count = it.length() / sizeof(struct kgsl_gpuobj_sync_obj); ret = ioctl(fd, IOCTL_KGSL_GPUOBJ_SYNC, &cmd); assert(ret == 0); } // ****** copy outputs - size_t sz; - clGetMemObjectInfo(output, CL_MEM_SIZE, sizeof(sz), &sz, NULL); - if (record & 2) printf("copying %lu for output %p -> %p\n", sz, output, foutput); - clEnqueueReadBuffer(command_queue, output, CL_TRUE, 0, sz, foutput, 0, NULL, NULL); - - #ifdef SAVE_LOG - fwrite(&sz, 1, sizeof(sz), f); - fwrite(foutput, 1, sz, f); - fclose(f); - #endif + if (output != NULL) { + size_t sz; + clGetMemObjectInfo(output, CL_MEM_SIZE, sizeof(sz), &sz, NULL); + if (record & THNEED_DEBUG) printf("copying %lu for output %p -> %p\n", sz, output, foutput); + clEnqueueReadBuffer(command_queue, output, CL_TRUE, 0, sz, foutput, 0, NULL, NULL); + } else { + printf("CAUTION: model output is NULL, does it have no outputs?\n"); + } // ****** unset power constraint constraint.type = KGSL_CONSTRAINT_NONE; @@ -272,25 +293,59 @@ void Thneed::execute(float **finputs, float *foutput, bool slow) { ret = ioctl(fd, IOCTL_KGSL_SETPROPERTY, &prop); assert(ret == 0); - if (record & 2) { + if (record & THNEED_DEBUG) { te = nanos_since_boot(); printf("model exec in %lu us\n", (te-tb)/1000); } } -// TODO: with a different way of getting the input and output buffers, we don't have to intercept CL at all +void Thneed::clinit() { + cl_int err; + + cl_platform_id platform_id[2]; + cl_uint num_devices; + cl_uint num_platforms; + + err = clGetPlatformIDs(sizeof(platform_id)/sizeof(cl_platform_id), platform_id, &num_platforms); + assert(err == 0); + + err = clGetDeviceIDs(platform_id[0], CL_DEVICE_TYPE_DEFAULT, 1, &device_id, &num_devices); + assert(err == 0); + + context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &err); + assert(err == 0); + + //cl_command_queue_properties props[3] = {CL_QUEUE_PROPERTIES, CL_QUEUE_PROFILING_ENABLE, 0}; + cl_command_queue_properties props[3] = {CL_QUEUE_PROPERTIES, 0, 0}; + command_queue = clCreateCommandQueueWithProperties(context, device_id, props, &err); + assert(err == 0); + + printf("Thneed::clinit done\n"); +} + +cl_int Thneed::clexec() { + printf("Thneed::clexec: running %lu queued kernels\n", kq.size()); + for (auto &k : kq) { + if (record & THNEED_RECORD) ckq.push_back(k); + cl_int ret = k->exec(); + assert(ret == CL_SUCCESS); + } + return clFinish(command_queue); +} + +// *********** OpenCL interceptor *********** -cl_int (*my_clSetKernelArg)(cl_kernel kernel, cl_uint arg_index, size_t arg_size, const void *arg_value) = NULL; cl_int thneed_clSetKernelArg(cl_kernel kernel, cl_uint arg_index, size_t arg_size, const void *arg_value) { - if (my_clSetKernelArg == NULL) my_clSetKernelArg = reinterpret_cast(dlsym(RTLD_NEXT, "REAL_clSetKernelArg")); + g_args_size[make_pair(kernel, arg_index)] = arg_size; if (arg_value != NULL) { g_args[make_pair(kernel, arg_index)] = string((char*)arg_value, arg_size); + } else { + g_args[make_pair(kernel, arg_index)] = string(""); } - cl_int ret = my_clSetKernelArg(kernel, arg_index, arg_size, arg_value); + cl_int ret = clSetKernelArg(kernel, arg_index, arg_size, arg_value); return ret; } -cl_int (*my_clEnqueueNDRangeKernel)(cl_command_queue, cl_kernel, cl_uint, const size_t *, const size_t *, const size_t *, cl_uint, const cl_event *, cl_event *) = NULL; cl_int thneed_clEnqueueNDRangeKernel(cl_command_queue command_queue, cl_kernel kernel, cl_uint work_dim, @@ -301,62 +356,183 @@ cl_int thneed_clEnqueueNDRangeKernel(cl_command_queue command_queue, const cl_event *event_wait_list, cl_event *event) { - if (my_clEnqueueNDRangeKernel == NULL) my_clEnqueueNDRangeKernel = reinterpret_cast(dlsym(RTLD_NEXT, "REAL_clEnqueueNDRangeKernel")); Thneed *thneed = g_thneed; // SNPE doesn't use these assert(num_events_in_wait_list == 0); assert(global_work_offset == NULL); + assert(event_wait_list == NULL); + + cl_int ret = 0; + if (thneed != NULL && thneed->record & THNEED_RECORD) { + if (thneed->context == NULL) { + thneed->command_queue = command_queue; + clGetKernelInfo(kernel, CL_KERNEL_CONTEXT, sizeof(thneed->context), &thneed->context, NULL); + clGetContextInfo(thneed->context, CL_CONTEXT_DEVICES, sizeof(thneed->device_id), &thneed->device_id, NULL); + } + + // if we are recording, we don't actually enqueue the kernel + thneed->kq.push_back(unique_ptr(new CLQueuedKernel(thneed, kernel, work_dim, global_work_size, local_work_size))); + *event = NULL; + } else { + ret = clEnqueueNDRangeKernel(command_queue, kernel, work_dim, + global_work_offset, global_work_size, local_work_size, + num_events_in_wait_list, event_wait_list, event); + } + + return ret; +} + +cl_int thneed_clFinish(cl_command_queue command_queue) { + Thneed *thneed = g_thneed; + + if (thneed != NULL && thneed->record & THNEED_RECORD) { + #ifdef RUN_OPTIMIZER + thneed->optimize(); + #endif + return thneed->clexec(); + } else { + return clFinish(command_queue); + } +} + +cl_program thneed_clCreateProgramWithSource(cl_context context, cl_uint count, const char **strings, const size_t *lengths, cl_int *errcode_ret) { + assert(count == 1); + cl_program ret = clCreateProgramWithSource(context, count, strings, lengths, errcode_ret); + g_program_source[ret] = strings[0]; + return ret; +} + +void *dlsym(void *handle, const char *symbol) { + // TODO: Find dlsym in a better way. Currently this is hand looked up in libdl.so +#if defined QCOM + void *(*my_dlsym)(void *handle, const char *symbol) = (void *(*)(void *handle, const char *symbol))((uintptr_t)dlopen-0x2d4); +#elif defined QCOM2 + void *(*my_dlsym)(void *handle, const char *symbol) = (void *(*)(void *handle, const char *symbol))((uintptr_t)dlopen+0x138); +#else + #error "Unsupported platform for thneed" +#endif + if (memcmp("REAL_", symbol, 5) == 0) { + return my_dlsym(handle, symbol+5); + } else if (strcmp("clFinish", symbol) == 0) { + return (void*)thneed_clFinish; + } else if (strcmp("clEnqueueNDRangeKernel", symbol) == 0) { + return (void*)thneed_clEnqueueNDRangeKernel; + } else if (strcmp("clSetKernelArg", symbol) == 0) { + return (void*)thneed_clSetKernelArg; + } else if (strcmp("clCreateProgramWithSource", symbol) == 0) { + return (void*)thneed_clCreateProgramWithSource; + } else { + return my_dlsym(handle, symbol); + } +} + +// *********** CLQueuedKernel *********** - char name[0x100]; - clGetKernelInfo(kernel, CL_KERNEL_FUNCTION_NAME, sizeof(name), name, NULL); +CLQueuedKernel::CLQueuedKernel(Thneed *lthneed, + cl_kernel _kernel, + cl_uint _work_dim, + const size_t *_global_work_size, + const size_t *_local_work_size) { + thneed = lthneed; + kernel = _kernel; + work_dim = _work_dim; + assert(work_dim <= 3); + for (int i = 0; i < work_dim; i++) { + global_work_size[i] = _global_work_size[i]; + local_work_size[i] = _local_work_size[i]; + } - cl_uint num_args; + char _name[0x100]; + clGetKernelInfo(kernel, CL_KERNEL_FUNCTION_NAME, sizeof(_name), _name, NULL); + name = string(_name); clGetKernelInfo(kernel, CL_KERNEL_NUM_ARGS, sizeof(num_args), &num_args, NULL); - if (thneed != NULL && thneed->record & 1) { - thneed->command_queue = command_queue; - for (int i = 0; i < num_args; i++) { + // get args + for (int i = 0; i < num_args; i++) { + char arg_name[0x100]; + clGetKernelArgInfo(kernel, i, CL_KERNEL_ARG_NAME, sizeof(arg_name), arg_name, NULL); + arg_names.push_back(string(arg_name)); + args.push_back(g_args[make_pair(kernel, i)]); + args_size.push_back(g_args_size[make_pair(kernel, i)]); + } + + // get program + clGetKernelInfo(kernel, CL_KERNEL_PROGRAM, sizeof(program), &program, NULL); +} + +int CLQueuedKernel::get_arg_num(const char *search_arg_name) { + for (int i = 0; i < num_args; i++) { + if (arg_names[i] == search_arg_name) return i; + } + printf("failed to find %s in %s\n", search_arg_name, name.c_str()); + assert(false); +} + +cl_int CLQueuedKernel::exec() { + if (kernel == NULL) { + kernel = clCreateKernel(program, name.c_str(), NULL); + arg_names.clear(); + + for (int j = 0; j < num_args; j++) { char arg_name[0x100]; - clGetKernelArgInfo(kernel, i, CL_KERNEL_ARG_NAME, sizeof(arg_name), arg_name, NULL); - string arg = g_args[make_pair(kernel, i)]; + clGetKernelArgInfo(kernel, j, CL_KERNEL_ARG_NAME, sizeof(arg_name), arg_name, NULL); + arg_names.push_back(string(arg_name)); + + cl_int ret; + if (args[j].size() != 0) { + assert(args[j].size() == args_size[j]); + ret = thneed_clSetKernelArg(kernel, j, args[j].size(), args[j].data()); + } else { + ret = thneed_clSetKernelArg(kernel, j, args_size[j], NULL); + } + assert(ret == CL_SUCCESS); + } + } - if (strcmp(arg_name, "input") == 0 && strcmp(name, "zero_pad_image_float") == 0) { - cl_mem mem; - memcpy(&mem, (void*)arg.data(), sizeof(mem)); - thneed->inputs.push_back(mem); + // save the global inputs/outputs + if (thneed->record & THNEED_RECORD) { + for (int i = 0; i < num_args; i++) { + if (name == "zero_pad_image_float" && arg_names[i] == "input") { + thneed->inputs.push_back(*(cl_mem*)(args[i].data())); } - if (strcmp(arg_name, "output") == 0 && strcmp(name, "image2d_to_buffer_float") == 0) { - cl_mem mem; - memcpy(&mem, (void*)arg.data(), sizeof(mem)); - thneed->output = mem; + if (name == "image2d_to_buffer_float" && arg_names[i] == "output") { + thneed->output = *(cl_mem*)(args[i].data()); } } } - if (thneed != NULL && thneed->record & 2) { - printf("%p %56s -- ", kernel, name); - for (int i = 0; i < work_dim; i++) { - printf("%4zu ", global_work_size[i]); - } - printf(" -- "); - for (int i = 0; i < work_dim; i++) { - printf("%4zu ", local_work_size[i]); - } - printf("\n"); + + if (thneed->record & THNEED_DEBUG) { + debug_print(thneed->record & THNEED_VERBOSE_DEBUG); + } + + return clEnqueueNDRangeKernel(thneed->command_queue, + kernel, work_dim, NULL, global_work_size, local_work_size, 0, NULL, NULL); +} + +void CLQueuedKernel::debug_print(bool verbose) { + printf("%p %56s -- ", kernel, name.c_str()); + for (int i = 0; i < work_dim; i++) { + printf("%4zu ", global_work_size[i]); + } + printf(" -- "); + for (int i = 0; i < work_dim; i++) { + printf("%4zu ", local_work_size[i]); } - if (thneed != NULL && thneed->record & 4) { - // extreme debug + printf("\n"); + + if (verbose) { for (int i = 0; i < num_args; i++) { char arg_type[0x100]; - char arg_name[0x100]; clGetKernelArgInfo(kernel, i, CL_KERNEL_ARG_TYPE_NAME, sizeof(arg_type), arg_type, NULL); - clGetKernelArgInfo(kernel, i, CL_KERNEL_ARG_NAME, sizeof(arg_name), arg_name, NULL); - string arg = g_args[make_pair(kernel, i)]; - printf(" %s %s", arg_type, arg_name); + string arg = args[i]; + printf(" %s %s", arg_type, arg_names[i].c_str()); void *arg_value = (void*)arg.data(); int arg_size = arg.size(); - if (arg_size == 1) { + if (arg_size == 0) { + printf(" (size) %d", args_size[i]); + } else if (arg_size == 1) { printf(" = %d", *((char*)arg_value)); } else if (arg_size == 2) { printf(" = %d", *((short*)arg_value)); @@ -373,19 +549,24 @@ cl_int thneed_clEnqueueNDRangeKernel(cl_command_queue command_queue, if (strcmp("image2d_t", arg_type) == 0 || strcmp("image1d_t", arg_type) == 0) { cl_image_format format; size_t width, height, depth, array_size, row_pitch, slice_pitch; + cl_mem buf; clGetImageInfo(val, CL_IMAGE_FORMAT, sizeof(format), &format, NULL); + assert(format.image_channel_order == CL_RGBA); assert(format.image_channel_data_type == CL_HALF_FLOAT); clGetImageInfo(val, CL_IMAGE_WIDTH, sizeof(width), &width, NULL); clGetImageInfo(val, CL_IMAGE_HEIGHT, sizeof(height), &height, NULL); + clGetImageInfo(val, CL_IMAGE_ROW_PITCH, sizeof(row_pitch), &row_pitch, NULL); clGetImageInfo(val, CL_IMAGE_DEPTH, sizeof(depth), &depth, NULL); clGetImageInfo(val, CL_IMAGE_ARRAY_SIZE, sizeof(array_size), &array_size, NULL); - clGetImageInfo(val, CL_IMAGE_ROW_PITCH, sizeof(row_pitch), &row_pitch, NULL); clGetImageInfo(val, CL_IMAGE_SLICE_PITCH, sizeof(slice_pitch), &slice_pitch, NULL); assert(depth == 0); assert(array_size == 0); assert(slice_pitch == 0); - printf(" image %zu x %zu rp %zu", width, height, row_pitch); + clGetImageInfo(val, CL_IMAGE_BUFFER, sizeof(buf), &buf, NULL); + size_t sz; + clGetMemObjectInfo(buf, CL_MEM_SIZE, sizeof(sz), &sz, NULL); + printf(" image %zu x %zu rp %zu @ %p buffer %zu", width, height, row_pitch, buf, sz); } else { size_t sz; clGetMemObjectInfo(val, CL_MEM_SIZE, sizeof(sz), &sz, NULL); @@ -396,79 +577,5 @@ cl_int thneed_clEnqueueNDRangeKernel(cl_command_queue command_queue, printf("\n"); } } - - cl_int ret = my_clEnqueueNDRangeKernel(command_queue, kernel, work_dim, - global_work_offset, global_work_size, local_work_size, - num_events_in_wait_list, event_wait_list, event); - - /*uint64_t tb = nanos_since_boot(); - clWaitForEvents(1, event); - uint64_t te = nanos_since_boot(); - if (thneed != NULL && thneed->record & 2) { - printf(" wait %lu us\n", (te-tb)/1000); - }*/ - - return ret; -} - -//#define SAVE_KERNELS - -#ifdef SAVE_KERNELS -map program_source; - -cl_program (*my_clCreateProgramWithSource)(cl_context context, cl_uint count, const char **strings, const size_t *lengths, cl_int *errcode_ret) = NULL; -cl_program thneed_clCreateProgramWithSource(cl_context context, cl_uint count, const char **strings, const size_t *lengths, cl_int *errcode_ret) { - if (my_clCreateProgramWithSource == NULL) my_clCreateProgramWithSource = reinterpret_cast(dlsym(RTLD_NEXT, "REAL_clCreateProgramWithSource")); - assert(count == 1); - size_t my_lengths[1]; - my_lengths[0] = lengths[0]; - - char fn[0x100]; - snprintf(fn, sizeof(fn), "/tmp/program_%zu.cl", strlen(strings[0])); - FILE *f = fopen(fn, "wb"); - fprintf(f, "%s", strings[0]); - fclose(f); - - char tmp[0x10000]; - memset(tmp, 0, sizeof(tmp)); - snprintf(fn, sizeof(fn), "/tmp/patched_%zu.cl", strlen(strings[0])); - FILE *g = fopen(fn, "rb"); - if (g != NULL) { - printf("LOADING PATCHED PROGRAM %s\n", fn); - fread(tmp, 1, sizeof(tmp), g); - fclose(g); - strings[0] = tmp; - my_lengths[0] = strlen(tmp); - } - - program_source[ret] = strings[0]; - - cl_program ret = my_clCreateProgramWithSource(context, count, strings, my_lengths, errcode_ret); - return ret; -} -#endif - -void *dlsym(void *handle, const char *symbol) { - // TODO: Find dlsym in a better way. Currently this is hand looked up in libdl.so -#if defined QCOM - void *(*my_dlsym)(void *handle, const char *symbol) = (void *(*)(void *handle, const char *symbol))((uintptr_t)dlopen-0x2d4); -#elif defined QCOM2 - void *(*my_dlsym)(void *handle, const char *symbol) = (void *(*)(void *handle, const char *symbol))((uintptr_t)dlopen+0x138); -#else - #error "Unsupported platform for thneed" -#endif - if (memcmp("REAL_", symbol, 5) == 0) { - return my_dlsym(handle, symbol+5); - } else if (strcmp("clEnqueueNDRangeKernel", symbol) == 0) { - return (void*)thneed_clEnqueueNDRangeKernel; - } else if (strcmp("clSetKernelArg", symbol) == 0) { - return (void*)thneed_clSetKernelArg; -#ifdef SAVE_KERNELS - } else if (strcmp("clCreateProgramWithSource", symbol) == 0) { - return (void*)thneed_clCreateProgramWithSource; -#endif - } else { - return my_dlsym(handle, symbol); - } } diff --git a/selfdrive/modeld/thneed/thneed.h b/selfdrive/modeld/thneed/thneed.h index a145a28476..e1039efdff 100644 --- a/selfdrive/modeld/thneed/thneed.h +++ b/selfdrive/modeld/thneed/thneed.h @@ -9,10 +9,18 @@ #include "include/msm_kgsl.h" #include #include +#include #include +#define THNEED_RECORD 1 +#define THNEED_DEBUG 2 +#define THNEED_VERBOSE_DEBUG 4 + using namespace std; +namespace json11 { + class Json; +} class Thneed; class GPUMalloc { @@ -25,28 +33,59 @@ class GPUMalloc { int remaining; }; +class CLQueuedKernel { + public: + CLQueuedKernel(Thneed *lthneed) { thneed = lthneed; } + CLQueuedKernel(Thneed *lthneed, + cl_kernel _kernel, + cl_uint _work_dim, + const size_t *_global_work_size, + const size_t *_local_work_size); + cl_int exec(); + void debug_print(bool verbose); + int get_arg_num(const char *search_arg_name); + cl_program program; + string name; + cl_uint num_args; + vector arg_names; + vector args; + vector args_size; + cl_kernel kernel = NULL; + json11::Json to_json() const; + + cl_uint work_dim; + size_t global_work_size[3] = {0}; + size_t local_work_size[3] = {0}; + private: + Thneed *thneed; +}; + class CachedCommand { public: CachedCommand(Thneed *lthneed, struct kgsl_gpu_command *cmd); void exec(bool wait); - void disassemble(); + void disassemble(int cmd_index); private: struct kgsl_gpu_command cache; - struct kgsl_command_object cmds[2]; - struct kgsl_command_object objs[1]; + unique_ptr cmds; + unique_ptr objs; Thneed *thneed; + vector > kq; }; class Thneed { public: - Thneed(); + Thneed(bool do_clinit=false); void stop(); void execute(float **finputs, float *foutput, bool slow=false); + int optimize(); vector inputs; - cl_mem output; + cl_mem output = NULL; + cl_context context = NULL; cl_command_queue command_queue; + cl_device_id device_id; int context_id; // protected? @@ -56,5 +95,19 @@ class Thneed { vector > cmds; vector syncobjs; int fd; + + // all CL kernels + cl_int clexec(); + vector > kq; + + // pending CL kernels + vector > ckq; + + // loading and saving + void load(const char *filename); + void save(const char *filename); + private: + void clinit(); + json11::Json to_json(); };