1
2
3
4#include <linux/ascii85.h>
5#include "msm_gem.h"
6#include "a6xx_gpu.h"
7#include "a6xx_gmu.h"
8#include "a6xx_gpu_state.h"
9#include "a6xx_gmu.xml.h"
10
11struct a6xx_gpu_state_obj {
12 const void *handle;
13 u32 *data;
14};
15
16struct a6xx_gpu_state {
17 struct msm_gpu_state base;
18
19 struct a6xx_gpu_state_obj *gmu_registers;
20 int nr_gmu_registers;
21
22 struct a6xx_gpu_state_obj *registers;
23 int nr_registers;
24
25 struct a6xx_gpu_state_obj *shaders;
26 int nr_shaders;
27
28 struct a6xx_gpu_state_obj *clusters;
29 int nr_clusters;
30
31 struct a6xx_gpu_state_obj *dbgahb_clusters;
32 int nr_dbgahb_clusters;
33
34 struct a6xx_gpu_state_obj *indexed_regs;
35 int nr_indexed_regs;
36
37 struct a6xx_gpu_state_obj *debugbus;
38 int nr_debugbus;
39
40 struct a6xx_gpu_state_obj *vbif_debugbus;
41
42 struct a6xx_gpu_state_obj *cx_debugbus;
43 int nr_cx_debugbus;
44
45 struct list_head objs;
46};
47
48static inline int CRASHDUMP_WRITE(u64 *in, u32 reg, u32 val)
49{
50 in[0] = val;
51 in[1] = (((u64) reg) << 44 | (1 << 21) | 1);
52
53 return 2;
54}
55
56static inline int CRASHDUMP_READ(u64 *in, u32 reg, u32 dwords, u64 target)
57{
58 in[0] = target;
59 in[1] = (((u64) reg) << 44 | dwords);
60
61 return 2;
62}
63
64static inline int CRASHDUMP_FINI(u64 *in)
65{
66 in[0] = 0;
67 in[1] = 0;
68
69 return 2;
70}
71
72struct a6xx_crashdumper {
73 void *ptr;
74 struct drm_gem_object *bo;
75 u64 iova;
76};
77
78struct a6xx_state_memobj {
79 struct list_head node;
80 unsigned long long data[];
81};
82
83static void *state_kcalloc(struct a6xx_gpu_state *a6xx_state, int nr, size_t objsize)
84{
85 struct a6xx_state_memobj *obj =
86 kzalloc((nr * objsize) + sizeof(*obj), GFP_KERNEL);
87
88 if (!obj)
89 return NULL;
90
91 list_add_tail(&obj->node, &a6xx_state->objs);
92 return &obj->data;
93}
94
95static void *state_kmemdup(struct a6xx_gpu_state *a6xx_state, void *src,
96 size_t size)
97{
98 void *dst = state_kcalloc(a6xx_state, 1, size);
99
100 if (dst)
101 memcpy(dst, src, size);
102 return dst;
103}
104
105
106
107
108
109#define A6XX_CD_DATA_OFFSET 8192
110#define A6XX_CD_DATA_SIZE (SZ_1M - 8192)
111
112static int a6xx_crashdumper_init(struct msm_gpu *gpu,
113 struct a6xx_crashdumper *dumper)
114{
115 dumper->ptr = msm_gem_kernel_new_locked(gpu->dev,
116 SZ_1M, MSM_BO_UNCACHED, gpu->aspace,
117 &dumper->bo, &dumper->iova);
118
119 if (!IS_ERR(dumper->ptr))
120 msm_gem_object_set_name(dumper->bo, "crashdump");
121
122 return PTR_ERR_OR_ZERO(dumper->ptr);
123}
124
125static int a6xx_crashdumper_run(struct msm_gpu *gpu,
126 struct a6xx_crashdumper *dumper)
127{
128 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
129 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
130 u32 val;
131 int ret;
132
133 if (IS_ERR_OR_NULL(dumper->ptr))
134 return -EINVAL;
135
136 if (!a6xx_gmu_sptprac_is_on(&a6xx_gpu->gmu))
137 return -EINVAL;
138
139
140 wmb();
141
142 gpu_write64(gpu, REG_A6XX_CP_CRASH_SCRIPT_BASE_LO,
143 REG_A6XX_CP_CRASH_SCRIPT_BASE_HI, dumper->iova);
144
145 gpu_write(gpu, REG_A6XX_CP_CRASH_DUMP_CNTL, 1);
146
147 ret = gpu_poll_timeout(gpu, REG_A6XX_CP_CRASH_DUMP_STATUS, val,
148 val & 0x02, 100, 10000);
149
150 gpu_write(gpu, REG_A6XX_CP_CRASH_DUMP_CNTL, 0);
151
152 return ret;
153}
154
155
156static int debugbus_read(struct msm_gpu *gpu, u32 block, u32 offset,
157 u32 *data)
158{
159 u32 reg = A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_INDEX(offset) |
160 A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_BLK_SEL(block);
161
162 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_A, reg);
163 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_B, reg);
164 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_C, reg);
165 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_D, reg);
166
167
168 udelay(1);
169
170 data[0] = gpu_read(gpu, REG_A6XX_DBGC_CFG_DBGBUS_TRACE_BUF2);
171 data[1] = gpu_read(gpu, REG_A6XX_DBGC_CFG_DBGBUS_TRACE_BUF1);
172
173 return 2;
174}
175
176#define cxdbg_write(ptr, offset, val) \
177 msm_writel((val), (ptr) + ((offset) << 2))
178
179#define cxdbg_read(ptr, offset) \
180 msm_readl((ptr) + ((offset) << 2))
181
182
183static int cx_debugbus_read(void *__iomem cxdbg, u32 block, u32 offset,
184 u32 *data)
185{
186 u32 reg = A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_INDEX(offset) |
187 A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_BLK_SEL(block);
188
189 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_A, reg);
190 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_B, reg);
191 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_C, reg);
192 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_D, reg);
193
194
195 udelay(1);
196
197 data[0] = cxdbg_read(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF2);
198 data[1] = cxdbg_read(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF1);
199
200 return 2;
201}
202
203
204static int vbif_debugbus_read(struct msm_gpu *gpu, u32 ctrl0, u32 ctrl1,
205 u32 reg, int count, u32 *data)
206{
207 int i;
208
209 gpu_write(gpu, ctrl0, reg);
210
211 for (i = 0; i < count; i++) {
212 gpu_write(gpu, ctrl1, i);
213 data[i] = gpu_read(gpu, REG_A6XX_VBIF_TEST_BUS_OUT);
214 }
215
216 return count;
217}
218
219#define AXI_ARB_BLOCKS 2
220#define XIN_AXI_BLOCKS 5
221#define XIN_CORE_BLOCKS 4
222
223#define VBIF_DEBUGBUS_BLOCK_SIZE \
224 ((16 * AXI_ARB_BLOCKS) + \
225 (18 * XIN_AXI_BLOCKS) + \
226 (12 * XIN_CORE_BLOCKS))
227
228static void a6xx_get_vbif_debugbus_block(struct msm_gpu *gpu,
229 struct a6xx_gpu_state *a6xx_state,
230 struct a6xx_gpu_state_obj *obj)
231{
232 u32 clk, *ptr;
233 int i;
234
235 obj->data = state_kcalloc(a6xx_state, VBIF_DEBUGBUS_BLOCK_SIZE,
236 sizeof(u32));
237 if (!obj->data)
238 return;
239
240 obj->handle = NULL;
241
242
243 clk = gpu_read(gpu, REG_A6XX_VBIF_CLKON);
244
245
246 gpu_write(gpu, REG_A6XX_VBIF_CLKON,
247 clk | A6XX_VBIF_CLKON_FORCE_ON_TESTBUS);
248
249
250 gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS1_CTRL0, 0);
251
252
253 gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS_OUT_CTRL, 1);
254
255 ptr = obj->data;
256
257 for (i = 0; i < AXI_ARB_BLOCKS; i++)
258 ptr += vbif_debugbus_read(gpu,
259 REG_A6XX_VBIF_TEST_BUS2_CTRL0,
260 REG_A6XX_VBIF_TEST_BUS2_CTRL1,
261 1 << (i + 16), 16, ptr);
262
263 for (i = 0; i < XIN_AXI_BLOCKS; i++)
264 ptr += vbif_debugbus_read(gpu,
265 REG_A6XX_VBIF_TEST_BUS2_CTRL0,
266 REG_A6XX_VBIF_TEST_BUS2_CTRL1,
267 1 << i, 18, ptr);
268
269
270 gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS2_CTRL0, 0);
271
272 for (i = 0; i < XIN_CORE_BLOCKS; i++)
273 ptr += vbif_debugbus_read(gpu,
274 REG_A6XX_VBIF_TEST_BUS1_CTRL0,
275 REG_A6XX_VBIF_TEST_BUS1_CTRL1,
276 1 << i, 12, ptr);
277
278
279 gpu_write(gpu, REG_A6XX_VBIF_CLKON, clk);
280}
281
282static void a6xx_get_debugbus_block(struct msm_gpu *gpu,
283 struct a6xx_gpu_state *a6xx_state,
284 const struct a6xx_debugbus_block *block,
285 struct a6xx_gpu_state_obj *obj)
286{
287 int i;
288 u32 *ptr;
289
290 obj->data = state_kcalloc(a6xx_state, block->count, sizeof(u64));
291 if (!obj->data)
292 return;
293
294 obj->handle = block;
295
296 for (ptr = obj->data, i = 0; i < block->count; i++)
297 ptr += debugbus_read(gpu, block->id, i, ptr);
298}
299
300static void a6xx_get_cx_debugbus_block(void __iomem *cxdbg,
301 struct a6xx_gpu_state *a6xx_state,
302 const struct a6xx_debugbus_block *block,
303 struct a6xx_gpu_state_obj *obj)
304{
305 int i;
306 u32 *ptr;
307
308 obj->data = state_kcalloc(a6xx_state, block->count, sizeof(u64));
309 if (!obj->data)
310 return;
311
312 obj->handle = block;
313
314 for (ptr = obj->data, i = 0; i < block->count; i++)
315 ptr += cx_debugbus_read(cxdbg, block->id, i, ptr);
316}
317
318static void a6xx_get_debugbus(struct msm_gpu *gpu,
319 struct a6xx_gpu_state *a6xx_state)
320{
321 struct resource *res;
322 void __iomem *cxdbg = NULL;
323 int nr_debugbus_blocks;
324
325
326
327 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_CNTLT,
328 A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT(0xf));
329
330 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_CNTLM,
331 A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE(0xf));
332
333 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_0, 0);
334 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_1, 0);
335 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_2, 0);
336 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_3, 0);
337
338 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_0, 0x76543210);
339 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_1, 0xFEDCBA98);
340
341 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_0, 0);
342 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_1, 0);
343 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_2, 0);
344 gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_3, 0);
345
346
347
348
349 res = platform_get_resource_byname(gpu->pdev, IORESOURCE_MEM,
350 "cx_dbgc");
351
352 if (res)
353 cxdbg = ioremap(res->start, resource_size(res));
354
355 if (cxdbg) {
356 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_CNTLT,
357 A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT(0xf));
358
359 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_CNTLM,
360 A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE(0xf));
361
362 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_0, 0);
363 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_1, 0);
364 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_2, 0);
365 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_3, 0);
366
367 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0,
368 0x76543210);
369 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1,
370 0xFEDCBA98);
371
372 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_0, 0);
373 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_1, 0);
374 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_2, 0);
375 cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_3, 0);
376 }
377
378 nr_debugbus_blocks = ARRAY_SIZE(a6xx_debugbus_blocks) +
379 (a6xx_has_gbif(to_adreno_gpu(gpu)) ? 1 : 0);
380
381 a6xx_state->debugbus = state_kcalloc(a6xx_state, nr_debugbus_blocks,
382 sizeof(*a6xx_state->debugbus));
383
384 if (a6xx_state->debugbus) {
385 int i;
386
387 for (i = 0; i < ARRAY_SIZE(a6xx_debugbus_blocks); i++)
388 a6xx_get_debugbus_block(gpu,
389 a6xx_state,
390 &a6xx_debugbus_blocks[i],
391 &a6xx_state->debugbus[i]);
392
393 a6xx_state->nr_debugbus = ARRAY_SIZE(a6xx_debugbus_blocks);
394
395
396
397
398
399
400 if (a6xx_has_gbif(to_adreno_gpu(gpu))) {
401 a6xx_get_debugbus_block(gpu, a6xx_state,
402 &a6xx_gbif_debugbus_block,
403 &a6xx_state->debugbus[i]);
404
405 a6xx_state->nr_debugbus += 1;
406 }
407 }
408
409
410 if (!a6xx_has_gbif(to_adreno_gpu(gpu))) {
411 a6xx_state->vbif_debugbus =
412 state_kcalloc(a6xx_state, 1,
413 sizeof(*a6xx_state->vbif_debugbus));
414
415 if (a6xx_state->vbif_debugbus)
416 a6xx_get_vbif_debugbus_block(gpu, a6xx_state,
417 a6xx_state->vbif_debugbus);
418 }
419
420 if (cxdbg) {
421 a6xx_state->cx_debugbus =
422 state_kcalloc(a6xx_state,
423 ARRAY_SIZE(a6xx_cx_debugbus_blocks),
424 sizeof(*a6xx_state->cx_debugbus));
425
426 if (a6xx_state->cx_debugbus) {
427 int i;
428
429 for (i = 0; i < ARRAY_SIZE(a6xx_cx_debugbus_blocks); i++)
430 a6xx_get_cx_debugbus_block(cxdbg,
431 a6xx_state,
432 &a6xx_cx_debugbus_blocks[i],
433 &a6xx_state->cx_debugbus[i]);
434
435 a6xx_state->nr_cx_debugbus =
436 ARRAY_SIZE(a6xx_cx_debugbus_blocks);
437 }
438
439 iounmap(cxdbg);
440 }
441}
442
443#define RANGE(reg, a) ((reg)[(a) + 1] - (reg)[(a)] + 1)
444
445
446static void a6xx_get_dbgahb_cluster(struct msm_gpu *gpu,
447 struct a6xx_gpu_state *a6xx_state,
448 const struct a6xx_dbgahb_cluster *dbgahb,
449 struct a6xx_gpu_state_obj *obj,
450 struct a6xx_crashdumper *dumper)
451{
452 u64 *in = dumper->ptr;
453 u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
454 size_t datasize;
455 int i, regcount = 0;
456
457 for (i = 0; i < A6XX_NUM_CONTEXTS; i++) {
458 int j;
459
460 in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL,
461 (dbgahb->statetype + i * 2) << 8);
462
463 for (j = 0; j < dbgahb->count; j += 2) {
464 int count = RANGE(dbgahb->registers, j);
465 u32 offset = REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE +
466 dbgahb->registers[j] - (dbgahb->base >> 2);
467
468 in += CRASHDUMP_READ(in, offset, count, out);
469
470 out += count * sizeof(u32);
471
472 if (i == 0)
473 regcount += count;
474 }
475 }
476
477 CRASHDUMP_FINI(in);
478
479 datasize = regcount * A6XX_NUM_CONTEXTS * sizeof(u32);
480
481 if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
482 return;
483
484 if (a6xx_crashdumper_run(gpu, dumper))
485 return;
486
487 obj->handle = dbgahb;
488 obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
489 datasize);
490}
491
492static void a6xx_get_dbgahb_clusters(struct msm_gpu *gpu,
493 struct a6xx_gpu_state *a6xx_state,
494 struct a6xx_crashdumper *dumper)
495{
496 int i;
497
498 a6xx_state->dbgahb_clusters = state_kcalloc(a6xx_state,
499 ARRAY_SIZE(a6xx_dbgahb_clusters),
500 sizeof(*a6xx_state->dbgahb_clusters));
501
502 if (!a6xx_state->dbgahb_clusters)
503 return;
504
505 a6xx_state->nr_dbgahb_clusters = ARRAY_SIZE(a6xx_dbgahb_clusters);
506
507 for (i = 0; i < ARRAY_SIZE(a6xx_dbgahb_clusters); i++)
508 a6xx_get_dbgahb_cluster(gpu, a6xx_state,
509 &a6xx_dbgahb_clusters[i],
510 &a6xx_state->dbgahb_clusters[i], dumper);
511}
512
513
514static void a6xx_get_cluster(struct msm_gpu *gpu,
515 struct a6xx_gpu_state *a6xx_state,
516 const struct a6xx_cluster *cluster,
517 struct a6xx_gpu_state_obj *obj,
518 struct a6xx_crashdumper *dumper)
519{
520 u64 *in = dumper->ptr;
521 u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
522 size_t datasize;
523 int i, regcount = 0;
524
525
526 if (cluster->sel_reg)
527 in += CRASHDUMP_WRITE(in, cluster->sel_reg, cluster->sel_val);
528
529 for (i = 0; i < A6XX_NUM_CONTEXTS; i++) {
530 int j;
531
532 in += CRASHDUMP_WRITE(in, REG_A6XX_CP_APERTURE_CNTL_CD,
533 (cluster->id << 8) | (i << 4) | i);
534
535 for (j = 0; j < cluster->count; j += 2) {
536 int count = RANGE(cluster->registers, j);
537
538 in += CRASHDUMP_READ(in, cluster->registers[j],
539 count, out);
540
541 out += count * sizeof(u32);
542
543 if (i == 0)
544 regcount += count;
545 }
546 }
547
548 CRASHDUMP_FINI(in);
549
550 datasize = regcount * A6XX_NUM_CONTEXTS * sizeof(u32);
551
552 if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
553 return;
554
555 if (a6xx_crashdumper_run(gpu, dumper))
556 return;
557
558 obj->handle = cluster;
559 obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
560 datasize);
561}
562
563static void a6xx_get_clusters(struct msm_gpu *gpu,
564 struct a6xx_gpu_state *a6xx_state,
565 struct a6xx_crashdumper *dumper)
566{
567 int i;
568
569 a6xx_state->clusters = state_kcalloc(a6xx_state,
570 ARRAY_SIZE(a6xx_clusters), sizeof(*a6xx_state->clusters));
571
572 if (!a6xx_state->clusters)
573 return;
574
575 a6xx_state->nr_clusters = ARRAY_SIZE(a6xx_clusters);
576
577 for (i = 0; i < ARRAY_SIZE(a6xx_clusters); i++)
578 a6xx_get_cluster(gpu, a6xx_state, &a6xx_clusters[i],
579 &a6xx_state->clusters[i], dumper);
580}
581
582
583static void a6xx_get_shader_block(struct msm_gpu *gpu,
584 struct a6xx_gpu_state *a6xx_state,
585 const struct a6xx_shader_block *block,
586 struct a6xx_gpu_state_obj *obj,
587 struct a6xx_crashdumper *dumper)
588{
589 u64 *in = dumper->ptr;
590 size_t datasize = block->size * A6XX_NUM_SHADER_BANKS * sizeof(u32);
591 int i;
592
593 if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
594 return;
595
596 for (i = 0; i < A6XX_NUM_SHADER_BANKS; i++) {
597 in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL,
598 (block->type << 8) | i);
599
600 in += CRASHDUMP_READ(in, REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE,
601 block->size, dumper->iova + A6XX_CD_DATA_OFFSET);
602 }
603
604 CRASHDUMP_FINI(in);
605
606 if (a6xx_crashdumper_run(gpu, dumper))
607 return;
608
609 obj->handle = block;
610 obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
611 datasize);
612}
613
614static void a6xx_get_shaders(struct msm_gpu *gpu,
615 struct a6xx_gpu_state *a6xx_state,
616 struct a6xx_crashdumper *dumper)
617{
618 int i;
619
620 a6xx_state->shaders = state_kcalloc(a6xx_state,
621 ARRAY_SIZE(a6xx_shader_blocks), sizeof(*a6xx_state->shaders));
622
623 if (!a6xx_state->shaders)
624 return;
625
626 a6xx_state->nr_shaders = ARRAY_SIZE(a6xx_shader_blocks);
627
628 for (i = 0; i < ARRAY_SIZE(a6xx_shader_blocks); i++)
629 a6xx_get_shader_block(gpu, a6xx_state, &a6xx_shader_blocks[i],
630 &a6xx_state->shaders[i], dumper);
631}
632
633
634static void a6xx_get_crashdumper_hlsq_registers(struct msm_gpu *gpu,
635 struct a6xx_gpu_state *a6xx_state,
636 const struct a6xx_registers *regs,
637 struct a6xx_gpu_state_obj *obj,
638 struct a6xx_crashdumper *dumper)
639
640{
641 u64 *in = dumper->ptr;
642 u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
643 int i, regcount = 0;
644
645 in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL, regs->val1);
646
647 for (i = 0; i < regs->count; i += 2) {
648 u32 count = RANGE(regs->registers, i);
649 u32 offset = REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE +
650 regs->registers[i] - (regs->val0 >> 2);
651
652 in += CRASHDUMP_READ(in, offset, count, out);
653
654 out += count * sizeof(u32);
655 regcount += count;
656 }
657
658 CRASHDUMP_FINI(in);
659
660 if (WARN_ON((regcount * sizeof(u32)) > A6XX_CD_DATA_SIZE))
661 return;
662
663 if (a6xx_crashdumper_run(gpu, dumper))
664 return;
665
666 obj->handle = regs;
667 obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
668 regcount * sizeof(u32));
669}
670
671
672static void a6xx_get_crashdumper_registers(struct msm_gpu *gpu,
673 struct a6xx_gpu_state *a6xx_state,
674 const struct a6xx_registers *regs,
675 struct a6xx_gpu_state_obj *obj,
676 struct a6xx_crashdumper *dumper)
677
678{
679 u64 *in = dumper->ptr;
680 u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
681 int i, regcount = 0;
682
683
684 if (regs->val0)
685 in += CRASHDUMP_WRITE(in, regs->val0, regs->val1);
686
687 for (i = 0; i < regs->count; i += 2) {
688 u32 count = RANGE(regs->registers, i);
689
690 in += CRASHDUMP_READ(in, regs->registers[i], count, out);
691
692 out += count * sizeof(u32);
693 regcount += count;
694 }
695
696 CRASHDUMP_FINI(in);
697
698 if (WARN_ON((regcount * sizeof(u32)) > A6XX_CD_DATA_SIZE))
699 return;
700
701 if (a6xx_crashdumper_run(gpu, dumper))
702 return;
703
704 obj->handle = regs;
705 obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
706 regcount * sizeof(u32));
707}
708
709
710static void a6xx_get_ahb_gpu_registers(struct msm_gpu *gpu,
711 struct a6xx_gpu_state *a6xx_state,
712 const struct a6xx_registers *regs,
713 struct a6xx_gpu_state_obj *obj)
714{
715 int i, regcount = 0, index = 0;
716
717 for (i = 0; i < regs->count; i += 2)
718 regcount += RANGE(regs->registers, i);
719
720 obj->handle = (const void *) regs;
721 obj->data = state_kcalloc(a6xx_state, regcount, sizeof(u32));
722 if (!obj->data)
723 return;
724
725 for (i = 0; i < regs->count; i += 2) {
726 u32 count = RANGE(regs->registers, i);
727 int j;
728
729 for (j = 0; j < count; j++)
730 obj->data[index++] = gpu_read(gpu,
731 regs->registers[i] + j);
732 }
733}
734
735
736static void _a6xx_get_gmu_registers(struct msm_gpu *gpu,
737 struct a6xx_gpu_state *a6xx_state,
738 const struct a6xx_registers *regs,
739 struct a6xx_gpu_state_obj *obj,
740 bool rscc)
741{
742 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
743 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
744 struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
745 int i, regcount = 0, index = 0;
746
747 for (i = 0; i < regs->count; i += 2)
748 regcount += RANGE(regs->registers, i);
749
750 obj->handle = (const void *) regs;
751 obj->data = state_kcalloc(a6xx_state, regcount, sizeof(u32));
752 if (!obj->data)
753 return;
754
755 for (i = 0; i < regs->count; i += 2) {
756 u32 count = RANGE(regs->registers, i);
757 int j;
758
759 for (j = 0; j < count; j++) {
760 u32 offset = regs->registers[i] + j;
761 u32 val;
762
763 if (rscc)
764 val = gmu_read_rscc(gmu, offset);
765 else
766 val = gmu_read(gmu, offset);
767
768 obj->data[index++] = val;
769 }
770 }
771}
772
773static void a6xx_get_gmu_registers(struct msm_gpu *gpu,
774 struct a6xx_gpu_state *a6xx_state)
775{
776 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
777 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
778
779 a6xx_state->gmu_registers = state_kcalloc(a6xx_state,
780 2, sizeof(*a6xx_state->gmu_registers));
781
782 if (!a6xx_state->gmu_registers)
783 return;
784
785 a6xx_state->nr_gmu_registers = 2;
786
787
788 _a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[0],
789 &a6xx_state->gmu_registers[0], false);
790 _a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[1],
791 &a6xx_state->gmu_registers[1], true);
792
793 if (!a6xx_gmu_gx_is_on(&a6xx_gpu->gmu))
794 return;
795
796
797 gpu_write(gpu, REG_A6XX_GMU_AO_AHB_FENCE_CTRL, 0);
798
799 _a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[2],
800 &a6xx_state->gmu_registers[2], false);
801}
802
803#define A6XX_GBIF_REGLIST_SIZE 1
804static void a6xx_get_registers(struct msm_gpu *gpu,
805 struct a6xx_gpu_state *a6xx_state,
806 struct a6xx_crashdumper *dumper)
807{
808 int i, count = ARRAY_SIZE(a6xx_ahb_reglist) +
809 ARRAY_SIZE(a6xx_reglist) +
810 ARRAY_SIZE(a6xx_hlsq_reglist) + A6XX_GBIF_REGLIST_SIZE;
811 int index = 0;
812 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
813
814 a6xx_state->registers = state_kcalloc(a6xx_state,
815 count, sizeof(*a6xx_state->registers));
816
817 if (!a6xx_state->registers)
818 return;
819
820 a6xx_state->nr_registers = count;
821
822 for (i = 0; i < ARRAY_SIZE(a6xx_ahb_reglist); i++)
823 a6xx_get_ahb_gpu_registers(gpu,
824 a6xx_state, &a6xx_ahb_reglist[i],
825 &a6xx_state->registers[index++]);
826
827 if (a6xx_has_gbif(adreno_gpu))
828 a6xx_get_ahb_gpu_registers(gpu,
829 a6xx_state, &a6xx_gbif_reglist,
830 &a6xx_state->registers[index++]);
831 else
832 a6xx_get_ahb_gpu_registers(gpu,
833 a6xx_state, &a6xx_vbif_reglist,
834 &a6xx_state->registers[index++]);
835
836 for (i = 0; i < ARRAY_SIZE(a6xx_reglist); i++)
837 a6xx_get_crashdumper_registers(gpu,
838 a6xx_state, &a6xx_reglist[i],
839 &a6xx_state->registers[index++],
840 dumper);
841
842 for (i = 0; i < ARRAY_SIZE(a6xx_hlsq_reglist); i++)
843 a6xx_get_crashdumper_hlsq_registers(gpu,
844 a6xx_state, &a6xx_hlsq_reglist[i],
845 &a6xx_state->registers[index++],
846 dumper);
847}
848
849
850static void a6xx_get_indexed_regs(struct msm_gpu *gpu,
851 struct a6xx_gpu_state *a6xx_state,
852 const struct a6xx_indexed_registers *indexed,
853 struct a6xx_gpu_state_obj *obj)
854{
855 int i;
856
857 obj->handle = (const void *) indexed;
858 obj->data = state_kcalloc(a6xx_state, indexed->count, sizeof(u32));
859 if (!obj->data)
860 return;
861
862
863 gpu_write(gpu, indexed->addr, 0);
864
865
866 for (i = 0; i < indexed->count; i++)
867 obj->data[i] = gpu_read(gpu, indexed->data);
868}
869
870static void a6xx_get_indexed_registers(struct msm_gpu *gpu,
871 struct a6xx_gpu_state *a6xx_state)
872{
873 u32 mempool_size;
874 int count = ARRAY_SIZE(a6xx_indexed_reglist) + 1;
875 int i;
876
877 a6xx_state->indexed_regs = state_kcalloc(a6xx_state, count,
878 sizeof(*a6xx_state->indexed_regs));
879 if (!a6xx_state->indexed_regs)
880 return;
881
882 for (i = 0; i < ARRAY_SIZE(a6xx_indexed_reglist); i++)
883 a6xx_get_indexed_regs(gpu, a6xx_state, &a6xx_indexed_reglist[i],
884 &a6xx_state->indexed_regs[i]);
885
886
887 mempool_size = gpu_read(gpu, REG_A6XX_CP_MEM_POOL_SIZE);
888 gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, 0);
889
890
891 a6xx_get_indexed_regs(gpu, a6xx_state, &a6xx_cp_mempool_indexed,
892 &a6xx_state->indexed_regs[i]);
893
894
895
896
897
898 a6xx_state->indexed_regs[i].data[0x2000] = mempool_size;
899
900
901 gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, mempool_size);
902
903 a6xx_state->nr_indexed_regs = count;
904}
905
906struct msm_gpu_state *a6xx_gpu_state_get(struct msm_gpu *gpu)
907{
908 struct a6xx_crashdumper dumper = { 0 };
909 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
910 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
911 struct a6xx_gpu_state *a6xx_state = kzalloc(sizeof(*a6xx_state),
912 GFP_KERNEL);
913
914 if (!a6xx_state)
915 return ERR_PTR(-ENOMEM);
916
917 INIT_LIST_HEAD(&a6xx_state->objs);
918
919
920 adreno_gpu_state_get(gpu, &a6xx_state->base);
921
922 a6xx_get_gmu_registers(gpu, a6xx_state);
923
924
925 if (!a6xx_gmu_gx_is_on(&a6xx_gpu->gmu))
926 return &a6xx_state->base;
927
928
929 a6xx_get_indexed_registers(gpu, a6xx_state);
930
931
932 if (!a6xx_crashdumper_init(gpu, &dumper)) {
933 a6xx_get_registers(gpu, a6xx_state, &dumper);
934 a6xx_get_shaders(gpu, a6xx_state, &dumper);
935 a6xx_get_clusters(gpu, a6xx_state, &dumper);
936 a6xx_get_dbgahb_clusters(gpu, a6xx_state, &dumper);
937
938 msm_gem_kernel_put(dumper.bo, gpu->aspace, true);
939 }
940
941 if (snapshot_debugbus)
942 a6xx_get_debugbus(gpu, a6xx_state);
943
944 return &a6xx_state->base;
945}
946
947static void a6xx_gpu_state_destroy(struct kref *kref)
948{
949 struct a6xx_state_memobj *obj, *tmp;
950 struct msm_gpu_state *state = container_of(kref,
951 struct msm_gpu_state, ref);
952 struct a6xx_gpu_state *a6xx_state = container_of(state,
953 struct a6xx_gpu_state, base);
954
955 list_for_each_entry_safe(obj, tmp, &a6xx_state->objs, node)
956 kfree(obj);
957
958 adreno_gpu_state_destroy(state);
959 kfree(a6xx_state);
960}
961
962int a6xx_gpu_state_put(struct msm_gpu_state *state)
963{
964 if (IS_ERR_OR_NULL(state))
965 return 1;
966
967 return kref_put(&state->ref, a6xx_gpu_state_destroy);
968}
969
970static void a6xx_show_registers(const u32 *registers, u32 *data, size_t count,
971 struct drm_printer *p)
972{
973 int i, index = 0;
974
975 if (!data)
976 return;
977
978 for (i = 0; i < count; i += 2) {
979 u32 count = RANGE(registers, i);
980 u32 offset = registers[i];
981 int j;
982
983 for (j = 0; j < count; index++, offset++, j++) {
984 if (data[index] == 0xdeafbead)
985 continue;
986
987 drm_printf(p, " - { offset: 0x%06x, value: 0x%08x }\n",
988 offset << 2, data[index]);
989 }
990 }
991}
992
993static void print_ascii85(struct drm_printer *p, size_t len, u32 *data)
994{
995 char out[ASCII85_BUFSZ];
996 long i, l, datalen = 0;
997
998 for (i = 0; i < len >> 2; i++) {
999 if (data[i])
1000 datalen = (i + 1) << 2;
1001 }
1002
1003 if (datalen == 0)
1004 return;
1005
1006 drm_puts(p, " data: !!ascii85 |\n");
1007 drm_puts(p, " ");
1008
1009
1010 l = ascii85_encode_len(datalen);
1011
1012 for (i = 0; i < l; i++)
1013 drm_puts(p, ascii85_encode(data[i], out));
1014
1015 drm_puts(p, "\n");
1016}
1017
1018static void print_name(struct drm_printer *p, const char *fmt, const char *name)
1019{
1020 drm_puts(p, fmt);
1021 drm_puts(p, name);
1022 drm_puts(p, "\n");
1023}
1024
1025static void a6xx_show_shader(struct a6xx_gpu_state_obj *obj,
1026 struct drm_printer *p)
1027{
1028 const struct a6xx_shader_block *block = obj->handle;
1029 int i;
1030
1031 if (!obj->handle)
1032 return;
1033
1034 print_name(p, " - type: ", block->name);
1035
1036 for (i = 0; i < A6XX_NUM_SHADER_BANKS; i++) {
1037 drm_printf(p, " - bank: %d\n", i);
1038 drm_printf(p, " size: %d\n", block->size);
1039
1040 if (!obj->data)
1041 continue;
1042
1043 print_ascii85(p, block->size << 2,
1044 obj->data + (block->size * i));
1045 }
1046}
1047
1048static void a6xx_show_cluster_data(const u32 *registers, int size, u32 *data,
1049 struct drm_printer *p)
1050{
1051 int ctx, index = 0;
1052
1053 for (ctx = 0; ctx < A6XX_NUM_CONTEXTS; ctx++) {
1054 int j;
1055
1056 drm_printf(p, " - context: %d\n", ctx);
1057
1058 for (j = 0; j < size; j += 2) {
1059 u32 count = RANGE(registers, j);
1060 u32 offset = registers[j];
1061 int k;
1062
1063 for (k = 0; k < count; index++, offset++, k++) {
1064 if (data[index] == 0xdeafbead)
1065 continue;
1066
1067 drm_printf(p, " - { offset: 0x%06x, value: 0x%08x }\n",
1068 offset << 2, data[index]);
1069 }
1070 }
1071 }
1072}
1073
1074static void a6xx_show_dbgahb_cluster(struct a6xx_gpu_state_obj *obj,
1075 struct drm_printer *p)
1076{
1077 const struct a6xx_dbgahb_cluster *dbgahb = obj->handle;
1078
1079 if (dbgahb) {
1080 print_name(p, " - cluster-name: ", dbgahb->name);
1081 a6xx_show_cluster_data(dbgahb->registers, dbgahb->count,
1082 obj->data, p);
1083 }
1084}
1085
1086static void a6xx_show_cluster(struct a6xx_gpu_state_obj *obj,
1087 struct drm_printer *p)
1088{
1089 const struct a6xx_cluster *cluster = obj->handle;
1090
1091 if (cluster) {
1092 print_name(p, " - cluster-name: ", cluster->name);
1093 a6xx_show_cluster_data(cluster->registers, cluster->count,
1094 obj->data, p);
1095 }
1096}
1097
1098static void a6xx_show_indexed_regs(struct a6xx_gpu_state_obj *obj,
1099 struct drm_printer *p)
1100{
1101 const struct a6xx_indexed_registers *indexed = obj->handle;
1102
1103 if (!indexed)
1104 return;
1105
1106 print_name(p, " - regs-name: ", indexed->name);
1107 drm_printf(p, " dwords: %d\n", indexed->count);
1108
1109 print_ascii85(p, indexed->count << 2, obj->data);
1110}
1111
1112static void a6xx_show_debugbus_block(const struct a6xx_debugbus_block *block,
1113 u32 *data, struct drm_printer *p)
1114{
1115 if (block) {
1116 print_name(p, " - debugbus-block: ", block->name);
1117
1118
1119
1120
1121
1122 drm_printf(p, " count: %d\n", block->count << 1);
1123
1124 print_ascii85(p, block->count << 3, data);
1125 }
1126}
1127
1128static void a6xx_show_debugbus(struct a6xx_gpu_state *a6xx_state,
1129 struct drm_printer *p)
1130{
1131 int i;
1132
1133 for (i = 0; i < a6xx_state->nr_debugbus; i++) {
1134 struct a6xx_gpu_state_obj *obj = &a6xx_state->debugbus[i];
1135
1136 a6xx_show_debugbus_block(obj->handle, obj->data, p);
1137 }
1138
1139 if (a6xx_state->vbif_debugbus) {
1140 struct a6xx_gpu_state_obj *obj = a6xx_state->vbif_debugbus;
1141
1142 drm_puts(p, " - debugbus-block: A6XX_DBGBUS_VBIF\n");
1143 drm_printf(p, " count: %d\n", VBIF_DEBUGBUS_BLOCK_SIZE);
1144
1145
1146 print_ascii85(p, VBIF_DEBUGBUS_BLOCK_SIZE << 2, obj->data);
1147 }
1148
1149 for (i = 0; i < a6xx_state->nr_cx_debugbus; i++) {
1150 struct a6xx_gpu_state_obj *obj = &a6xx_state->cx_debugbus[i];
1151
1152 a6xx_show_debugbus_block(obj->handle, obj->data, p);
1153 }
1154}
1155
1156void a6xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state,
1157 struct drm_printer *p)
1158{
1159 struct a6xx_gpu_state *a6xx_state = container_of(state,
1160 struct a6xx_gpu_state, base);
1161 int i;
1162
1163 if (IS_ERR_OR_NULL(state))
1164 return;
1165
1166 adreno_show(gpu, state, p);
1167
1168 drm_puts(p, "registers:\n");
1169 for (i = 0; i < a6xx_state->nr_registers; i++) {
1170 struct a6xx_gpu_state_obj *obj = &a6xx_state->registers[i];
1171 const struct a6xx_registers *regs = obj->handle;
1172
1173 if (!obj->handle)
1174 continue;
1175
1176 a6xx_show_registers(regs->registers, obj->data, regs->count, p);
1177 }
1178
1179 drm_puts(p, "registers-gmu:\n");
1180 for (i = 0; i < a6xx_state->nr_gmu_registers; i++) {
1181 struct a6xx_gpu_state_obj *obj = &a6xx_state->gmu_registers[i];
1182 const struct a6xx_registers *regs = obj->handle;
1183
1184 if (!obj->handle)
1185 continue;
1186
1187 a6xx_show_registers(regs->registers, obj->data, regs->count, p);
1188 }
1189
1190 drm_puts(p, "indexed-registers:\n");
1191 for (i = 0; i < a6xx_state->nr_indexed_regs; i++)
1192 a6xx_show_indexed_regs(&a6xx_state->indexed_regs[i], p);
1193
1194 drm_puts(p, "shader-blocks:\n");
1195 for (i = 0; i < a6xx_state->nr_shaders; i++)
1196 a6xx_show_shader(&a6xx_state->shaders[i], p);
1197
1198 drm_puts(p, "clusters:\n");
1199 for (i = 0; i < a6xx_state->nr_clusters; i++)
1200 a6xx_show_cluster(&a6xx_state->clusters[i], p);
1201
1202 for (i = 0; i < a6xx_state->nr_dbgahb_clusters; i++)
1203 a6xx_show_dbgahb_cluster(&a6xx_state->dbgahb_clusters[i], p);
1204
1205 drm_puts(p, "debugbus:\n");
1206 a6xx_show_debugbus(a6xx_state, p);
1207}
1208