1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28#include <linux/kernel.h>
29#include <linux/list.h>
30#include <linux/spinlock.h>
31#include <linux/mm.h>
32#include <linux/slab.h>
33#include <linux/device.h>
34#include <linux/hugetlb.h>
35#include <linux/delay.h>
36#include <linux/timex.h>
37#include <linux/srcu.h>
38#include <asm/processor.h>
39#include "gru.h"
40#include "grutables.h"
41#include <asm/uv/uv_hub.h>
42
43#define gru_random() get_cycles()
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60static inline int get_off_blade_tgh(struct gru_state *gru)
61{
62 int n;
63
64 n = GRU_NUM_TGH - gru->gs_tgh_first_remote;
65 n = gru_random() % n;
66 n += gru->gs_tgh_first_remote;
67 return n;
68}
69
70static inline int get_on_blade_tgh(struct gru_state *gru)
71{
72 return uv_blade_processor_id() >> gru->gs_tgh_local_shift;
73}
74
75static struct gru_tlb_global_handle *get_lock_tgh_handle(struct gru_state
76 *gru)
77{
78 struct gru_tlb_global_handle *tgh;
79 int n;
80
81 preempt_disable();
82 if (uv_numa_blade_id() == gru->gs_blade_id)
83 n = get_on_blade_tgh(gru);
84 else
85 n = get_off_blade_tgh(gru);
86 tgh = get_tgh_by_index(gru, n);
87 lock_tgh_handle(tgh);
88
89 return tgh;
90}
91
92static void get_unlock_tgh_handle(struct gru_tlb_global_handle *tgh)
93{
94 unlock_tgh_handle(tgh);
95 preempt_enable();
96}
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158void gru_flush_tlb_range(struct gru_mm_struct *gms, unsigned long start,
159 unsigned long len)
160{
161 struct gru_state *gru;
162 struct gru_mm_tracker *asids;
163 struct gru_tlb_global_handle *tgh;
164 unsigned long num;
165 int grupagesize, pagesize, pageshift, gid, asid;
166
167
168 pageshift = PAGE_SHIFT;
169 pagesize = (1UL << pageshift);
170 grupagesize = GRU_PAGESIZE(pageshift);
171 num = min(((len + pagesize - 1) >> pageshift), GRUMAXINVAL);
172
173 STAT(flush_tlb);
174 gru_dbg(grudev, "gms %p, start 0x%lx, len 0x%lx, asidmap 0x%lx\n", gms,
175 start, len, gms->ms_asidmap[0]);
176
177 spin_lock(&gms->ms_asid_lock);
178 for_each_gru_in_bitmap(gid, gms->ms_asidmap) {
179 STAT(flush_tlb_gru);
180 gru = GID_TO_GRU(gid);
181 asids = gms->ms_asids + gid;
182 asid = asids->mt_asid;
183 if (asids->mt_ctxbitmap && asid) {
184 STAT(flush_tlb_gru_tgh);
185 asid = GRUASID(asid, start);
186 gru_dbg(grudev,
187 " FLUSH gruid %d, asid 0x%x, vaddr 0x%lx, vamask 0x%x, num %ld, cbmap 0x%x\n",
188 gid, asid, start, grupagesize, num, asids->mt_ctxbitmap);
189 tgh = get_lock_tgh_handle(gru);
190 tgh_invalidate(tgh, start, ~0, asid, grupagesize, 0,
191 num - 1, asids->mt_ctxbitmap);
192 get_unlock_tgh_handle(tgh);
193 } else {
194 STAT(flush_tlb_gru_zero_asid);
195 asids->mt_asid = 0;
196 __clear_bit(gru->gs_gid, gms->ms_asidmap);
197 gru_dbg(grudev,
198 " CLEARASID gruid %d, asid 0x%x, cbtmap 0x%x, asidmap 0x%lx\n",
199 gid, asid, asids->mt_ctxbitmap,
200 gms->ms_asidmap[0]);
201 }
202 }
203 spin_unlock(&gms->ms_asid_lock);
204}
205
206
207
208
209void gru_flush_all_tlb(struct gru_state *gru)
210{
211 struct gru_tlb_global_handle *tgh;
212
213 gru_dbg(grudev, "gid %d\n", gru->gs_gid);
214 tgh = get_lock_tgh_handle(gru);
215 tgh_invalidate(tgh, 0, ~0, 0, 1, 1, GRUMAXINVAL - 1, 0xffff);
216 get_unlock_tgh_handle(tgh);
217}
218
219
220
221
222static void gru_invalidate_range_start(struct mmu_notifier *mn,
223 struct mm_struct *mm,
224 unsigned long start, unsigned long end)
225{
226 struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct,
227 ms_notifier);
228
229 STAT(mmu_invalidate_range);
230 atomic_inc(&gms->ms_range_active);
231 gru_dbg(grudev, "gms %p, start 0x%lx, end 0x%lx, act %d\n", gms,
232 start, end, atomic_read(&gms->ms_range_active));
233 gru_flush_tlb_range(gms, start, end - start);
234}
235
236static void gru_invalidate_range_end(struct mmu_notifier *mn,
237 struct mm_struct *mm, unsigned long start,
238 unsigned long end)
239{
240 struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct,
241 ms_notifier);
242
243
244 (void)atomic_dec_and_test(&gms->ms_range_active);
245
246 wake_up_all(&gms->ms_wait_queue);
247 gru_dbg(grudev, "gms %p, start 0x%lx, end 0x%lx\n", gms, start, end);
248}
249
250static void gru_invalidate_page(struct mmu_notifier *mn, struct mm_struct *mm,
251 unsigned long address)
252{
253 struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct,
254 ms_notifier);
255
256 STAT(mmu_invalidate_page);
257 gru_flush_tlb_range(gms, address, PAGE_SIZE);
258 gru_dbg(grudev, "gms %p, address 0x%lx\n", gms, address);
259}
260
261static void gru_release(struct mmu_notifier *mn, struct mm_struct *mm)
262{
263 struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct,
264 ms_notifier);
265
266 gms->ms_released = 1;
267 gru_dbg(grudev, "gms %p\n", gms);
268}
269
270
271static const struct mmu_notifier_ops gru_mmuops = {
272 .invalidate_page = gru_invalidate_page,
273 .invalidate_range_start = gru_invalidate_range_start,
274 .invalidate_range_end = gru_invalidate_range_end,
275 .release = gru_release,
276};
277
278
279static struct mmu_notifier *mmu_find_ops(struct mm_struct *mm,
280 const struct mmu_notifier_ops *ops)
281{
282 struct mmu_notifier *mn, *gru_mn = NULL;
283
284 if (mm->mmu_notifier_mm) {
285 rcu_read_lock();
286 hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list,
287 hlist)
288 if (mn->ops == ops) {
289 gru_mn = mn;
290 break;
291 }
292 rcu_read_unlock();
293 }
294 return gru_mn;
295}
296
297struct gru_mm_struct *gru_register_mmu_notifier(void)
298{
299 struct gru_mm_struct *gms;
300 struct mmu_notifier *mn;
301 int err;
302
303 mn = mmu_find_ops(current->mm, &gru_mmuops);
304 if (mn) {
305 gms = container_of(mn, struct gru_mm_struct, ms_notifier);
306 atomic_inc(&gms->ms_refcnt);
307 } else {
308 gms = kzalloc(sizeof(*gms), GFP_KERNEL);
309 if (gms) {
310 STAT(gms_alloc);
311 spin_lock_init(&gms->ms_asid_lock);
312 gms->ms_notifier.ops = &gru_mmuops;
313 atomic_set(&gms->ms_refcnt, 1);
314 init_waitqueue_head(&gms->ms_wait_queue);
315 err = __mmu_notifier_register(&gms->ms_notifier, current->mm);
316 if (err)
317 goto error;
318 }
319 }
320 gru_dbg(grudev, "gms %p, refcnt %d\n", gms,
321 atomic_read(&gms->ms_refcnt));
322 return gms;
323error:
324 kfree(gms);
325 return ERR_PTR(err);
326}
327
328void gru_drop_mmu_notifier(struct gru_mm_struct *gms)
329{
330 gru_dbg(grudev, "gms %p, refcnt %d, released %d\n", gms,
331 atomic_read(&gms->ms_refcnt), gms->ms_released);
332 if (atomic_dec_return(&gms->ms_refcnt) == 0) {
333 if (!gms->ms_released)
334 mmu_notifier_unregister(&gms->ms_notifier, current->mm);
335 kfree(gms);
336 STAT(gms_free);
337 }
338}
339
340
341
342
343
344
345
346
347
348
349
350
351
352#define MAX_LOCAL_TGH 16
353
354void gru_tgh_flush_init(struct gru_state *gru)
355{
356 int cpus, shift = 0, n;
357
358 cpus = uv_blade_nr_possible_cpus(gru->gs_blade_id);
359
360
361 if (cpus) {
362 n = 1 << fls(cpus - 1);
363
364
365
366
367
368
369
370 shift = max(0, fls(n - 1) - fls(MAX_LOCAL_TGH - 1));
371 }
372 gru->gs_tgh_local_shift = shift;
373
374
375 gru->gs_tgh_first_remote = (cpus + (1 << shift) - 1) >> shift;
376
377}
378