1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44#include "priv.h"
45
46#include <core/memory.h>
47#include <core/tegra.h>
48#include <subdev/ltc.h>
49#include <subdev/mmu.h>
50
51struct gk20a_instobj {
52 struct nvkm_memory memory;
53 struct nvkm_mm_node *mn;
54 struct gk20a_instmem *imem;
55
56
57 u32 *vaddr;
58};
59#define gk20a_instobj(p) container_of((p), struct gk20a_instobj, memory)
60
61
62
63
64struct gk20a_instobj_dma {
65 struct gk20a_instobj base;
66
67 dma_addr_t handle;
68 struct nvkm_mm_node r;
69};
70#define gk20a_instobj_dma(p) \
71 container_of(gk20a_instobj(p), struct gk20a_instobj_dma, base)
72
73
74
75
76struct gk20a_instobj_iommu {
77 struct gk20a_instobj base;
78
79
80 struct list_head vaddr_node;
81
82 u32 use_cpt;
83
84
85 dma_addr_t *dma_addrs;
86
87 struct page *pages[];
88};
89#define gk20a_instobj_iommu(p) \
90 container_of(gk20a_instobj(p), struct gk20a_instobj_iommu, base)
91
92struct gk20a_instmem {
93 struct nvkm_instmem base;
94
95
96 struct mutex lock;
97
98
99 unsigned int vaddr_use;
100 unsigned int vaddr_max;
101 struct list_head vaddr_lru;
102
103
104 struct mutex *mm_mutex;
105 struct nvkm_mm *mm;
106 struct iommu_domain *domain;
107 unsigned long iommu_pgshift;
108 u16 iommu_bit;
109
110
111 unsigned long attrs;
112};
113#define gk20a_instmem(p) container_of((p), struct gk20a_instmem, base)
114
115static enum nvkm_memory_target
116gk20a_instobj_target(struct nvkm_memory *memory)
117{
118 return NVKM_MEM_TARGET_NCOH;
119}
120
121static u8
122gk20a_instobj_page(struct nvkm_memory *memory)
123{
124 return 12;
125}
126
127static u64
128gk20a_instobj_addr(struct nvkm_memory *memory)
129{
130 return (u64)gk20a_instobj(memory)->mn->offset << 12;
131}
132
133static u64
134gk20a_instobj_size(struct nvkm_memory *memory)
135{
136 return (u64)gk20a_instobj(memory)->mn->length << 12;
137}
138
139
140
141
142static void
143gk20a_instobj_iommu_recycle_vaddr(struct gk20a_instobj_iommu *obj)
144{
145 struct gk20a_instmem *imem = obj->base.imem;
146
147 WARN_ON(obj->use_cpt);
148 list_del(&obj->vaddr_node);
149 vunmap(obj->base.vaddr);
150 obj->base.vaddr = NULL;
151 imem->vaddr_use -= nvkm_memory_size(&obj->base.memory);
152 nvkm_debug(&imem->base.subdev, "vaddr used: %x/%x\n", imem->vaddr_use,
153 imem->vaddr_max);
154}
155
156
157
158
159static void
160gk20a_instmem_vaddr_gc(struct gk20a_instmem *imem, const u64 size)
161{
162 while (imem->vaddr_use + size > imem->vaddr_max) {
163
164 if (list_empty(&imem->vaddr_lru))
165 break;
166
167 gk20a_instobj_iommu_recycle_vaddr(
168 list_first_entry(&imem->vaddr_lru,
169 struct gk20a_instobj_iommu, vaddr_node));
170 }
171}
172
173static void __iomem *
174gk20a_instobj_acquire_dma(struct nvkm_memory *memory)
175{
176 struct gk20a_instobj *node = gk20a_instobj(memory);
177 struct gk20a_instmem *imem = node->imem;
178 struct nvkm_ltc *ltc = imem->base.subdev.device->ltc;
179
180 nvkm_ltc_flush(ltc);
181
182 return node->vaddr;
183}
184
185static void __iomem *
186gk20a_instobj_acquire_iommu(struct nvkm_memory *memory)
187{
188 struct gk20a_instobj_iommu *node = gk20a_instobj_iommu(memory);
189 struct gk20a_instmem *imem = node->base.imem;
190 struct nvkm_ltc *ltc = imem->base.subdev.device->ltc;
191 const u64 size = nvkm_memory_size(memory);
192
193 nvkm_ltc_flush(ltc);
194
195 mutex_lock(&imem->lock);
196
197 if (node->base.vaddr) {
198 if (!node->use_cpt) {
199
200 list_del(&node->vaddr_node);
201 }
202 goto out;
203 }
204
205
206 gk20a_instmem_vaddr_gc(imem, size);
207
208
209 node->base.vaddr = vmap(node->pages, size >> PAGE_SHIFT, VM_MAP,
210 pgprot_writecombine(PAGE_KERNEL));
211 if (!node->base.vaddr) {
212 nvkm_error(&imem->base.subdev, "cannot map instobj - "
213 "this is not going to end well...\n");
214 goto out;
215 }
216
217 imem->vaddr_use += size;
218 nvkm_debug(&imem->base.subdev, "vaddr used: %x/%x\n",
219 imem->vaddr_use, imem->vaddr_max);
220
221out:
222 node->use_cpt++;
223 mutex_unlock(&imem->lock);
224
225 return node->base.vaddr;
226}
227
228static void
229gk20a_instobj_release_dma(struct nvkm_memory *memory)
230{
231 struct gk20a_instobj *node = gk20a_instobj(memory);
232 struct gk20a_instmem *imem = node->imem;
233 struct nvkm_ltc *ltc = imem->base.subdev.device->ltc;
234
235
236 wmb();
237 nvkm_ltc_invalidate(ltc);
238}
239
240static void
241gk20a_instobj_release_iommu(struct nvkm_memory *memory)
242{
243 struct gk20a_instobj_iommu *node = gk20a_instobj_iommu(memory);
244 struct gk20a_instmem *imem = node->base.imem;
245 struct nvkm_ltc *ltc = imem->base.subdev.device->ltc;
246
247 mutex_lock(&imem->lock);
248
249
250 if (WARN_ON(node->use_cpt == 0))
251 goto out;
252
253
254 if (--node->use_cpt == 0)
255 list_add_tail(&node->vaddr_node, &imem->vaddr_lru);
256
257out:
258 mutex_unlock(&imem->lock);
259
260 wmb();
261 nvkm_ltc_invalidate(ltc);
262}
263
264static u32
265gk20a_instobj_rd32(struct nvkm_memory *memory, u64 offset)
266{
267 struct gk20a_instobj *node = gk20a_instobj(memory);
268
269 return node->vaddr[offset / 4];
270}
271
272static void
273gk20a_instobj_wr32(struct nvkm_memory *memory, u64 offset, u32 data)
274{
275 struct gk20a_instobj *node = gk20a_instobj(memory);
276
277 node->vaddr[offset / 4] = data;
278}
279
280static int
281gk20a_instobj_map(struct nvkm_memory *memory, u64 offset, struct nvkm_vmm *vmm,
282 struct nvkm_vma *vma, void *argv, u32 argc)
283{
284 struct gk20a_instobj *node = gk20a_instobj(memory);
285 struct nvkm_vmm_map map = {
286 .memory = &node->memory,
287 .offset = offset,
288 .mem = node->mn,
289 };
290
291 return nvkm_vmm_map(vmm, vma, argv, argc, &map);
292}
293
294static void *
295gk20a_instobj_dtor_dma(struct nvkm_memory *memory)
296{
297 struct gk20a_instobj_dma *node = gk20a_instobj_dma(memory);
298 struct gk20a_instmem *imem = node->base.imem;
299 struct device *dev = imem->base.subdev.device->dev;
300
301 if (unlikely(!node->base.vaddr))
302 goto out;
303
304 dma_free_attrs(dev, (u64)node->base.mn->length << PAGE_SHIFT,
305 node->base.vaddr, node->handle, imem->attrs);
306
307out:
308 return node;
309}
310
311static void *
312gk20a_instobj_dtor_iommu(struct nvkm_memory *memory)
313{
314 struct gk20a_instobj_iommu *node = gk20a_instobj_iommu(memory);
315 struct gk20a_instmem *imem = node->base.imem;
316 struct device *dev = imem->base.subdev.device->dev;
317 struct nvkm_mm_node *r = node->base.mn;
318 int i;
319
320 if (unlikely(!r))
321 goto out;
322
323 mutex_lock(&imem->lock);
324
325
326 if (node->base.vaddr)
327 gk20a_instobj_iommu_recycle_vaddr(node);
328
329 mutex_unlock(&imem->lock);
330
331
332 r->offset &= ~BIT(imem->iommu_bit - imem->iommu_pgshift);
333
334
335 for (i = 0; i < node->base.mn->length; i++) {
336 iommu_unmap(imem->domain,
337 (r->offset + i) << imem->iommu_pgshift, PAGE_SIZE);
338 dma_unmap_page(dev, node->dma_addrs[i], PAGE_SIZE,
339 DMA_BIDIRECTIONAL);
340 __free_page(node->pages[i]);
341 }
342
343
344 mutex_lock(imem->mm_mutex);
345 nvkm_mm_free(imem->mm, &r);
346 mutex_unlock(imem->mm_mutex);
347
348out:
349 return node;
350}
351
352static const struct nvkm_memory_func
353gk20a_instobj_func_dma = {
354 .dtor = gk20a_instobj_dtor_dma,
355 .target = gk20a_instobj_target,
356 .page = gk20a_instobj_page,
357 .addr = gk20a_instobj_addr,
358 .size = gk20a_instobj_size,
359 .acquire = gk20a_instobj_acquire_dma,
360 .release = gk20a_instobj_release_dma,
361 .map = gk20a_instobj_map,
362};
363
364static const struct nvkm_memory_func
365gk20a_instobj_func_iommu = {
366 .dtor = gk20a_instobj_dtor_iommu,
367 .target = gk20a_instobj_target,
368 .page = gk20a_instobj_page,
369 .addr = gk20a_instobj_addr,
370 .size = gk20a_instobj_size,
371 .acquire = gk20a_instobj_acquire_iommu,
372 .release = gk20a_instobj_release_iommu,
373 .map = gk20a_instobj_map,
374};
375
376static const struct nvkm_memory_ptrs
377gk20a_instobj_ptrs = {
378 .rd32 = gk20a_instobj_rd32,
379 .wr32 = gk20a_instobj_wr32,
380};
381
382static int
383gk20a_instobj_ctor_dma(struct gk20a_instmem *imem, u32 npages, u32 align,
384 struct gk20a_instobj **_node)
385{
386 struct gk20a_instobj_dma *node;
387 struct nvkm_subdev *subdev = &imem->base.subdev;
388 struct device *dev = subdev->device->dev;
389
390 if (!(node = kzalloc(sizeof(*node), GFP_KERNEL)))
391 return -ENOMEM;
392 *_node = &node->base;
393
394 nvkm_memory_ctor(&gk20a_instobj_func_dma, &node->base.memory);
395 node->base.memory.ptrs = &gk20a_instobj_ptrs;
396
397 node->base.vaddr = dma_alloc_attrs(dev, npages << PAGE_SHIFT,
398 &node->handle, GFP_KERNEL,
399 imem->attrs);
400 if (!node->base.vaddr) {
401 nvkm_error(subdev, "cannot allocate DMA memory\n");
402 return -ENOMEM;
403 }
404
405
406 if (unlikely(node->handle & (align - 1)))
407 nvkm_warn(subdev,
408 "memory not aligned as requested: %pad (0x%x)\n",
409 &node->handle, align);
410
411
412 node->r.type = 12;
413 node->r.offset = node->handle >> 12;
414 node->r.length = (npages << PAGE_SHIFT) >> 12;
415
416 node->base.mn = &node->r;
417 return 0;
418}
419
420static int
421gk20a_instobj_ctor_iommu(struct gk20a_instmem *imem, u32 npages, u32 align,
422 struct gk20a_instobj **_node)
423{
424 struct gk20a_instobj_iommu *node;
425 struct nvkm_subdev *subdev = &imem->base.subdev;
426 struct device *dev = subdev->device->dev;
427 struct nvkm_mm_node *r;
428 int ret;
429 int i;
430
431
432
433
434
435 if (!(node = kzalloc(sizeof(*node) + ((sizeof(node->pages[0]) +
436 sizeof(*node->dma_addrs)) * npages), GFP_KERNEL)))
437 return -ENOMEM;
438 *_node = &node->base;
439 node->dma_addrs = (void *)(node->pages + npages);
440
441 nvkm_memory_ctor(&gk20a_instobj_func_iommu, &node->base.memory);
442 node->base.memory.ptrs = &gk20a_instobj_ptrs;
443
444
445 for (i = 0; i < npages; i++) {
446 struct page *p = alloc_page(GFP_KERNEL);
447 dma_addr_t dma_adr;
448
449 if (p == NULL) {
450 ret = -ENOMEM;
451 goto free_pages;
452 }
453 node->pages[i] = p;
454 dma_adr = dma_map_page(dev, p, 0, PAGE_SIZE, DMA_BIDIRECTIONAL);
455 if (dma_mapping_error(dev, dma_adr)) {
456 nvkm_error(subdev, "DMA mapping error!\n");
457 ret = -ENOMEM;
458 goto free_pages;
459 }
460 node->dma_addrs[i] = dma_adr;
461 }
462
463 mutex_lock(imem->mm_mutex);
464
465 ret = nvkm_mm_head(imem->mm, 0, 1, npages, npages,
466 align >> imem->iommu_pgshift, &r);
467 mutex_unlock(imem->mm_mutex);
468 if (ret) {
469 nvkm_error(subdev, "IOMMU space is full!\n");
470 goto free_pages;
471 }
472
473
474 for (i = 0; i < npages; i++) {
475 u32 offset = (r->offset + i) << imem->iommu_pgshift;
476
477 ret = iommu_map(imem->domain, offset, node->dma_addrs[i],
478 PAGE_SIZE, IOMMU_READ | IOMMU_WRITE);
479 if (ret < 0) {
480 nvkm_error(subdev, "IOMMU mapping failure: %d\n", ret);
481
482 while (i-- > 0) {
483 offset -= PAGE_SIZE;
484 iommu_unmap(imem->domain, offset, PAGE_SIZE);
485 }
486 goto release_area;
487 }
488 }
489
490
491 r->offset |= BIT(imem->iommu_bit - imem->iommu_pgshift);
492
493 node->base.mn = r;
494 return 0;
495
496release_area:
497 mutex_lock(imem->mm_mutex);
498 nvkm_mm_free(imem->mm, &r);
499 mutex_unlock(imem->mm_mutex);
500
501free_pages:
502 for (i = 0; i < npages && node->pages[i] != NULL; i++) {
503 dma_addr_t dma_addr = node->dma_addrs[i];
504 if (dma_addr)
505 dma_unmap_page(dev, dma_addr, PAGE_SIZE,
506 DMA_BIDIRECTIONAL);
507 __free_page(node->pages[i]);
508 }
509
510 return ret;
511}
512
513static int
514gk20a_instobj_new(struct nvkm_instmem *base, u32 size, u32 align, bool zero,
515 struct nvkm_memory **pmemory)
516{
517 struct gk20a_instmem *imem = gk20a_instmem(base);
518 struct nvkm_subdev *subdev = &imem->base.subdev;
519 struct gk20a_instobj *node = NULL;
520 int ret;
521
522 nvkm_debug(subdev, "%s (%s): size: %x align: %x\n", __func__,
523 imem->domain ? "IOMMU" : "DMA", size, align);
524
525
526 size = max(roundup(size, PAGE_SIZE), PAGE_SIZE);
527 align = max(roundup(align, PAGE_SIZE), PAGE_SIZE);
528
529 if (imem->domain)
530 ret = gk20a_instobj_ctor_iommu(imem, size >> PAGE_SHIFT,
531 align, &node);
532 else
533 ret = gk20a_instobj_ctor_dma(imem, size >> PAGE_SHIFT,
534 align, &node);
535 *pmemory = node ? &node->memory : NULL;
536 if (ret)
537 return ret;
538
539 node->imem = imem;
540
541 nvkm_debug(subdev, "alloc size: 0x%x, align: 0x%x, gaddr: 0x%llx\n",
542 size, align, (u64)node->mn->offset << 12);
543
544 return 0;
545}
546
547static void *
548gk20a_instmem_dtor(struct nvkm_instmem *base)
549{
550 struct gk20a_instmem *imem = gk20a_instmem(base);
551
552
553 if (!list_empty(&imem->vaddr_lru))
554 nvkm_warn(&base->subdev, "instobj LRU not empty!\n");
555
556 if (imem->vaddr_use != 0)
557 nvkm_warn(&base->subdev, "instobj vmap area not empty! "
558 "0x%x bytes still mapped\n", imem->vaddr_use);
559
560 return imem;
561}
562
563static const struct nvkm_instmem_func
564gk20a_instmem = {
565 .dtor = gk20a_instmem_dtor,
566 .memory_new = gk20a_instobj_new,
567 .zero = false,
568};
569
570int
571gk20a_instmem_new(struct nvkm_device *device, int index,
572 struct nvkm_instmem **pimem)
573{
574 struct nvkm_device_tegra *tdev = device->func->tegra(device);
575 struct gk20a_instmem *imem;
576
577 if (!(imem = kzalloc(sizeof(*imem), GFP_KERNEL)))
578 return -ENOMEM;
579 nvkm_instmem_ctor(&gk20a_instmem, device, index, &imem->base);
580 mutex_init(&imem->lock);
581 *pimem = &imem->base;
582
583
584 imem->vaddr_use = 0;
585 imem->vaddr_max = 0x100000;
586 INIT_LIST_HEAD(&imem->vaddr_lru);
587
588 if (tdev->iommu.domain) {
589 imem->mm_mutex = &tdev->iommu.mutex;
590 imem->mm = &tdev->iommu.mm;
591 imem->domain = tdev->iommu.domain;
592 imem->iommu_pgshift = tdev->iommu.pgshift;
593 imem->iommu_bit = tdev->func->iommu_bit;
594
595 nvkm_info(&imem->base.subdev, "using IOMMU\n");
596 } else {
597 imem->attrs = DMA_ATTR_NON_CONSISTENT |
598 DMA_ATTR_WEAK_ORDERING |
599 DMA_ATTR_WRITE_COMBINE;
600
601 nvkm_info(&imem->base.subdev, "using DMA API\n");
602 }
603
604 return 0;
605}
606