1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18#ifndef HEXAGON_MMVEC_MACROS_H
19#define HEXAGON_MMVEC_MACROS_H
20
21#include "qemu/osdep.h"
22#include "qemu/host-utils.h"
23#include "arch.h"
24#include "mmvec/system_ext_mmvec.h"
25
26#ifndef QEMU_GENERATE
27#define VdV (*(MMVector *)(VdV_void))
28#define VsV (*(MMVector *)(VsV_void))
29#define VuV (*(MMVector *)(VuV_void))
30#define VvV (*(MMVector *)(VvV_void))
31#define VwV (*(MMVector *)(VwV_void))
32#define VxV (*(MMVector *)(VxV_void))
33#define VyV (*(MMVector *)(VyV_void))
34
35#define VddV (*(MMVectorPair *)(VddV_void))
36#define VuuV (*(MMVectorPair *)(VuuV_void))
37#define VvvV (*(MMVectorPair *)(VvvV_void))
38#define VxxV (*(MMVectorPair *)(VxxV_void))
39
40#define QeV (*(MMQReg *)(QeV_void))
41#define QdV (*(MMQReg *)(QdV_void))
42#define QsV (*(MMQReg *)(QsV_void))
43#define QtV (*(MMQReg *)(QtV_void))
44#define QuV (*(MMQReg *)(QuV_void))
45#define QvV (*(MMQReg *)(QvV_void))
46#define QxV (*(MMQReg *)(QxV_void))
47#endif
48
49#define LOG_VTCM_BYTE(VA, MASK, VAL, IDX) \
50 do { \
51 env->vtcm_log.data.ub[IDX] = (VAL); \
52 if (MASK) { \
53 set_bit((IDX), env->vtcm_log.mask); \
54 } else { \
55 clear_bit((IDX), env->vtcm_log.mask); \
56 } \
57 env->vtcm_log.va[IDX] = (VA); \
58 } while (0)
59
60#define fNOTQ(VAL) \
61 ({ \
62 MMQReg _ret; \
63 int _i_; \
64 for (_i_ = 0; _i_ < fVECSIZE() / 64; _i_++) { \
65 _ret.ud[_i_] = ~VAL.ud[_i_]; \
66 } \
67 _ret;\
68 })
69#define fGETQBITS(REG, WIDTH, MASK, BITNO) \
70 ((MASK) & (REG.w[(BITNO) >> 5] >> ((BITNO) & 0x1f)))
71#define fGETQBIT(REG, BITNO) fGETQBITS(REG, 1, 1, BITNO)
72#define fGENMASKW(QREG, IDX) \
73 (((fGETQBIT(QREG, (IDX * 4 + 0)) ? 0xFF : 0x0) << 0) | \
74 ((fGETQBIT(QREG, (IDX * 4 + 1)) ? 0xFF : 0x0) << 8) | \
75 ((fGETQBIT(QREG, (IDX * 4 + 2)) ? 0xFF : 0x0) << 16) | \
76 ((fGETQBIT(QREG, (IDX * 4 + 3)) ? 0xFF : 0x0) << 24))
77#define fGETNIBBLE(IDX, SRC) (fSXTN(4, 8, (SRC >> (4 * IDX)) & 0xF))
78#define fGETCRUMB(IDX, SRC) (fSXTN(2, 8, (SRC >> (2 * IDX)) & 0x3))
79#define fGETCRUMB_SYMMETRIC(IDX, SRC) \
80 ((fGETCRUMB(IDX, SRC) >= 0 ? (2 - fGETCRUMB(IDX, SRC)) \
81 : fGETCRUMB(IDX, SRC)))
82#define fGENMASKH(QREG, IDX) \
83 (((fGETQBIT(QREG, (IDX * 2 + 0)) ? 0xFF : 0x0) << 0) | \
84 ((fGETQBIT(QREG, (IDX * 2 + 1)) ? 0xFF : 0x0) << 8))
85#define fGETMASKW(VREG, QREG, IDX) (VREG.w[IDX] & fGENMASKW((QREG), IDX))
86#define fGETMASKH(VREG, QREG, IDX) (VREG.h[IDX] & fGENMASKH((QREG), IDX))
87#define fCONDMASK8(QREG, IDX, YESVAL, NOVAL) \
88 (fGETQBIT(QREG, IDX) ? (YESVAL) : (NOVAL))
89#define fCONDMASK16(QREG, IDX, YESVAL, NOVAL) \
90 ((fGENMASKH(QREG, IDX) & (YESVAL)) | \
91 (fGENMASKH(fNOTQ(QREG), IDX) & (NOVAL)))
92#define fCONDMASK32(QREG, IDX, YESVAL, NOVAL) \
93 ((fGENMASKW(QREG, IDX) & (YESVAL)) | \
94 (fGENMASKW(fNOTQ(QREG), IDX) & (NOVAL)))
95#define fSETQBITS(REG, WIDTH, MASK, BITNO, VAL) \
96 do { \
97 uint32_t __TMP = (VAL); \
98 REG.w[(BITNO) >> 5] &= ~((MASK) << ((BITNO) & 0x1f)); \
99 REG.w[(BITNO) >> 5] |= (((__TMP) & (MASK)) << ((BITNO) & 0x1f)); \
100 } while (0)
101#define fSETQBIT(REG, BITNO, VAL) fSETQBITS(REG, 1, 1, BITNO, VAL)
102#define fVBYTES() (fVECSIZE())
103#define fVALIGN(ADDR, LOG2_ALIGNMENT) (ADDR = ADDR & ~(LOG2_ALIGNMENT - 1))
104#define fVLASTBYTE(ADDR, LOG2_ALIGNMENT) (ADDR = ADDR | (LOG2_ALIGNMENT - 1))
105#define fVELEM(WIDTH) ((fVECSIZE() * 8) / WIDTH)
106#define fVECLOGSIZE() (7)
107#define fVECSIZE() (1 << fVECLOGSIZE())
108#define fSWAPB(A, B) do { uint8_t tmp = A; A = B; B = tmp; } while (0)
109#define fV_AL_CHECK(EA, MASK) \
110 if ((EA) & (MASK)) { \
111 warn("aligning misaligned vector. EA=%08x", (EA)); \
112 }
113#define fSCATTER_INIT(REGION_START, LENGTH, ELEMENT_SIZE) \
114 mem_vector_scatter_init(env)
115#define fGATHER_INIT(REGION_START, LENGTH, ELEMENT_SIZE) \
116 mem_vector_gather_init(env)
117#define fSCATTER_FINISH(OP)
118#define fGATHER_FINISH()
119#define fLOG_SCATTER_OP(SIZE) \
120 do { \
121 env->vtcm_log.op = true; \
122 env->vtcm_log.op_size = SIZE; \
123 } while (0)
124#define fVLOG_VTCM_WORD_INCREMENT(EA, OFFSET, INC, IDX, ALIGNMENT, LEN) \
125 do { \
126 int log_byte = 0; \
127 target_ulong va = EA; \
128 target_ulong va_high = EA + LEN; \
129 for (int i0 = 0; i0 < 4; i0++) { \
130 log_byte = (va + i0) <= va_high; \
131 LOG_VTCM_BYTE(va + i0, log_byte, INC. ub[4 * IDX + i0], \
132 4 * IDX + i0); \
133 } \
134 } while (0)
135#define fVLOG_VTCM_HALFWORD_INCREMENT(EA, OFFSET, INC, IDX, ALIGNMENT, LEN) \
136 do { \
137 int log_byte = 0; \
138 target_ulong va = EA; \
139 target_ulong va_high = EA + LEN; \
140 for (int i0 = 0; i0 < 2; i0++) { \
141 log_byte = (va + i0) <= va_high; \
142 LOG_VTCM_BYTE(va + i0, log_byte, INC.ub[2 * IDX + i0], \
143 2 * IDX + i0); \
144 } \
145 } while (0)
146
147#define fVLOG_VTCM_HALFWORD_INCREMENT_DV(EA, OFFSET, INC, IDX, IDX2, IDX_H, \
148 ALIGNMENT, LEN) \
149 do { \
150 int log_byte = 0; \
151 target_ulong va = EA; \
152 target_ulong va_high = EA + LEN; \
153 for (int i0 = 0; i0 < 2; i0++) { \
154 log_byte = (va + i0) <= va_high; \
155 LOG_VTCM_BYTE(va + i0, log_byte, INC.ub[2 * IDX + i0], \
156 2 * IDX + i0); \
157 } \
158 } while (0)
159
160
161#define GATHER_FUNCTION(EA, OFFSET, IDX, LEN, ELEMENT_SIZE, BANK_IDX, QVAL) \
162 do { \
163 int i0; \
164 target_ulong va = EA; \
165 target_ulong va_high = EA + LEN; \
166 uintptr_t ra = GETPC(); \
167 int log_bank = 0; \
168 int log_byte = 0; \
169 for (i0 = 0; i0 < ELEMENT_SIZE; i0++) { \
170 log_byte = ((va + i0) <= va_high) && QVAL; \
171 log_bank |= (log_byte << i0); \
172 uint8_t B; \
173 B = cpu_ldub_data_ra(env, EA + i0, ra); \
174 env->tmp_VRegs[0].ub[ELEMENT_SIZE * IDX + i0] = B; \
175 LOG_VTCM_BYTE(va + i0, log_byte, B, ELEMENT_SIZE * IDX + i0); \
176 } \
177 } while (0)
178#define fVLOG_VTCM_GATHER_WORD(EA, OFFSET, IDX, LEN) \
179 do { \
180 GATHER_FUNCTION(EA, OFFSET, IDX, LEN, 4, IDX, 1); \
181 } while (0)
182#define fVLOG_VTCM_GATHER_HALFWORD(EA, OFFSET, IDX, LEN) \
183 do { \
184 GATHER_FUNCTION(EA, OFFSET, IDX, LEN, 2, IDX, 1); \
185 } while (0)
186#define fVLOG_VTCM_GATHER_HALFWORD_DV(EA, OFFSET, IDX, IDX2, IDX_H, LEN) \
187 do { \
188 GATHER_FUNCTION(EA, OFFSET, IDX, LEN, 2, (2 * IDX2 + IDX_H), 1); \
189 } while (0)
190#define fVLOG_VTCM_GATHER_WORDQ(EA, OFFSET, IDX, Q, LEN) \
191 do { \
192 GATHER_FUNCTION(EA, OFFSET, IDX, LEN, 4, IDX, \
193 fGETQBIT(QsV, 4 * IDX + i0)); \
194 } while (0)
195#define fVLOG_VTCM_GATHER_HALFWORDQ(EA, OFFSET, IDX, Q, LEN) \
196 do { \
197 GATHER_FUNCTION(EA, OFFSET, IDX, LEN, 2, IDX, \
198 fGETQBIT(QsV, 2 * IDX + i0)); \
199 } while (0)
200#define fVLOG_VTCM_GATHER_HALFWORDQ_DV(EA, OFFSET, IDX, IDX2, IDX_H, Q, LEN) \
201 do { \
202 GATHER_FUNCTION(EA, OFFSET, IDX, LEN, 2, (2 * IDX2 + IDX_H), \
203 fGETQBIT(QsV, 2 * IDX + i0)); \
204 } while (0)
205#define SCATTER_OP_WRITE_TO_MEM(TYPE) \
206 do { \
207 uintptr_t ra = GETPC(); \
208 for (int i = 0; i < sizeof(MMVector); i += sizeof(TYPE)) { \
209 if (test_bit(i, env->vtcm_log.mask)) { \
210 TYPE dst = 0; \
211 TYPE inc = 0; \
212 for (int j = 0; j < sizeof(TYPE); j++) { \
213 uint8_t val; \
214 val = cpu_ldub_data_ra(env, env->vtcm_log.va[i + j], ra); \
215 dst |= val << (8 * j); \
216 inc |= env->vtcm_log.data.ub[j + i] << (8 * j); \
217 clear_bit(j + i, env->vtcm_log.mask); \
218 env->vtcm_log.data.ub[j + i] = 0; \
219 } \
220 dst += inc; \
221 for (int j = 0; j < sizeof(TYPE); j++) { \
222 cpu_stb_data_ra(env, env->vtcm_log.va[i + j], \
223 (dst >> (8 * j)) & 0xFF, ra); \
224 } \
225 } \
226 } \
227 } while (0)
228#define SCATTER_OP_PROBE_MEM(TYPE, MMU_IDX, RETADDR) \
229 do { \
230 for (int i = 0; i < sizeof(MMVector); i += sizeof(TYPE)) { \
231 if (test_bit(i, env->vtcm_log.mask)) { \
232 for (int j = 0; j < sizeof(TYPE); j++) { \
233 probe_read(env, env->vtcm_log.va[i + j], 1, \
234 MMU_IDX, RETADDR); \
235 probe_write(env, env->vtcm_log.va[i + j], 1, \
236 MMU_IDX, RETADDR); \
237 } \
238 } \
239 } \
240 } while (0)
241#define SCATTER_FUNCTION(EA, OFFSET, IDX, LEN, ELEM_SIZE, BANK_IDX, QVAL, IN) \
242 do { \
243 int i0; \
244 target_ulong va = EA; \
245 target_ulong va_high = EA + LEN; \
246 int log_bank = 0; \
247 int log_byte = 0; \
248 for (i0 = 0; i0 < ELEM_SIZE; i0++) { \
249 log_byte = ((va + i0) <= va_high) && QVAL; \
250 log_bank |= (log_byte << i0); \
251 LOG_VTCM_BYTE(va + i0, log_byte, IN.ub[ELEM_SIZE * IDX + i0], \
252 ELEM_SIZE * IDX + i0); \
253 } \
254 } while (0)
255#define fVLOG_VTCM_HALFWORD(EA, OFFSET, IN, IDX, LEN) \
256 do { \
257 SCATTER_FUNCTION(EA, OFFSET, IDX, LEN, 2, IDX, 1, IN); \
258 } while (0)
259#define fVLOG_VTCM_WORD(EA, OFFSET, IN, IDX, LEN) \
260 do { \
261 SCATTER_FUNCTION(EA, OFFSET, IDX, LEN, 4, IDX, 1, IN); \
262 } while (0)
263#define fVLOG_VTCM_HALFWORDQ(EA, OFFSET, IN, IDX, Q, LEN) \
264 do { \
265 SCATTER_FUNCTION(EA, OFFSET, IDX, LEN, 2, IDX, \
266 fGETQBIT(QsV, 2 * IDX + i0), IN); \
267 } while (0)
268#define fVLOG_VTCM_WORDQ(EA, OFFSET, IN, IDX, Q, LEN) \
269 do { \
270 SCATTER_FUNCTION(EA, OFFSET, IDX, LEN, 4, IDX, \
271 fGETQBIT(QsV, 4 * IDX + i0), IN); \
272 } while (0)
273#define fVLOG_VTCM_HALFWORD_DV(EA, OFFSET, IN, IDX, IDX2, IDX_H, LEN) \
274 do { \
275 SCATTER_FUNCTION(EA, OFFSET, IDX, LEN, 2, \
276 (2 * IDX2 + IDX_H), 1, IN); \
277 } while (0)
278#define fVLOG_VTCM_HALFWORDQ_DV(EA, OFFSET, IN, IDX, Q, IDX2, IDX_H, LEN) \
279 do { \
280 SCATTER_FUNCTION(EA, OFFSET, IDX, LEN, 2, (2 * IDX2 + IDX_H), \
281 fGETQBIT(QsV, 2 * IDX + i0), IN); \
282 } while (0)
283#define fSTORERELEASE(EA, TYPE) \
284 do { \
285 fV_AL_CHECK(EA, fVECSIZE() - 1); \
286 } while (0)
287#ifdef QEMU_GENERATE
288#define fLOADMMV(EA, DST) gen_vreg_load(ctx, DST##_off, EA, true)
289#endif
290#ifdef QEMU_GENERATE
291#define fLOADMMVU(EA, DST) gen_vreg_load(ctx, DST##_off, EA, false)
292#endif
293#ifdef QEMU_GENERATE
294#define fSTOREMMV(EA, SRC) \
295 gen_vreg_store(ctx, insn, pkt, EA, SRC##_off, insn->slot, true)
296#endif
297#ifdef QEMU_GENERATE
298#define fSTOREMMVQ(EA, SRC, MASK) \
299 gen_vreg_masked_store(ctx, EA, SRC##_off, MASK##_off, insn->slot, false)
300#endif
301#ifdef QEMU_GENERATE
302#define fSTOREMMVNQ(EA, SRC, MASK) \
303 gen_vreg_masked_store(ctx, EA, SRC##_off, MASK##_off, insn->slot, true)
304#endif
305#ifdef QEMU_GENERATE
306#define fSTOREMMVU(EA, SRC) \
307 gen_vreg_store(ctx, insn, pkt, EA, SRC##_off, insn->slot, false)
308#endif
309#define fVFOREACH(WIDTH, VAR) for (VAR = 0; VAR < fVELEM(WIDTH); VAR++)
310#define fVARRAY_ELEMENT_ACCESS(ARRAY, TYPE, INDEX) \
311 ARRAY.v[(INDEX) / (fVECSIZE() / (sizeof(ARRAY.TYPE[0])))].TYPE[(INDEX) % \
312 (fVECSIZE() / (sizeof(ARRAY.TYPE[0])))]
313
314#define fVSATDW(U, V) fVSATW(((((long long)U) << 32) | fZXTN(32, 64, V)))
315#define fVASL_SATHI(U, V) fVSATW(((U) << 1) | ((V) >> 31))
316#define fVUADDSAT(WIDTH, U, V) \
317 fVSATUN(WIDTH, fZXTN(WIDTH, 2 * WIDTH, U) + fZXTN(WIDTH, 2 * WIDTH, V))
318#define fVSADDSAT(WIDTH, U, V) \
319 fVSATN(WIDTH, fSXTN(WIDTH, 2 * WIDTH, U) + fSXTN(WIDTH, 2 * WIDTH, V))
320#define fVUSUBSAT(WIDTH, U, V) \
321 fVSATUN(WIDTH, fZXTN(WIDTH, 2 * WIDTH, U) - fZXTN(WIDTH, 2 * WIDTH, V))
322#define fVSSUBSAT(WIDTH, U, V) \
323 fVSATN(WIDTH, fSXTN(WIDTH, 2 * WIDTH, U) - fSXTN(WIDTH, 2 * WIDTH, V))
324#define fVAVGU(WIDTH, U, V) \
325 ((fZXTN(WIDTH, 2 * WIDTH, U) + fZXTN(WIDTH, 2 * WIDTH, V)) >> 1)
326#define fVAVGURND(WIDTH, U, V) \
327 ((fZXTN(WIDTH, 2 * WIDTH, U) + fZXTN(WIDTH, 2 * WIDTH, V) + 1) >> 1)
328#define fVNAVGU(WIDTH, U, V) \
329 ((fZXTN(WIDTH, 2 * WIDTH, U) - fZXTN(WIDTH, 2 * WIDTH, V)) >> 1)
330#define fVNAVGURNDSAT(WIDTH, U, V) \
331 fVSATUN(WIDTH, ((fZXTN(WIDTH, 2 * WIDTH, U) - \
332 fZXTN(WIDTH, 2 * WIDTH, V) + 1) >> 1))
333#define fVAVGS(WIDTH, U, V) \
334 ((fSXTN(WIDTH, 2 * WIDTH, U) + fSXTN(WIDTH, 2 * WIDTH, V)) >> 1)
335#define fVAVGSRND(WIDTH, U, V) \
336 ((fSXTN(WIDTH, 2 * WIDTH, U) + fSXTN(WIDTH, 2 * WIDTH, V) + 1) >> 1)
337#define fVNAVGS(WIDTH, U, V) \
338 ((fSXTN(WIDTH, 2 * WIDTH, U) - fSXTN(WIDTH, 2 * WIDTH, V)) >> 1)
339#define fVNAVGSRND(WIDTH, U, V) \
340 ((fSXTN(WIDTH, 2 * WIDTH, U) - fSXTN(WIDTH, 2 * WIDTH, V) + 1) >> 1)
341#define fVNAVGSRNDSAT(WIDTH, U, V) \
342 fVSATN(WIDTH, ((fSXTN(WIDTH, 2 * WIDTH, U) - \
343 fSXTN(WIDTH, 2 * WIDTH, V) + 1) >> 1))
344#define fVNOROUND(VAL, SHAMT) VAL
345#define fVNOSAT(VAL) VAL
346#define fVROUND(VAL, SHAMT) \
347 ((VAL) + (((SHAMT) > 0) ? (1LL << ((SHAMT) - 1)) : 0))
348#define fCARRY_FROM_ADD32(A, B, C) \
349 (((fZXTN(32, 64, A) + fZXTN(32, 64, B) + C) >> 32) & 1)
350#define fUARCH_NOTE_PUMP_4X()
351#define fUARCH_NOTE_PUMP_2X()
352
353#define IV1DEAD()
354#endif
355