1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28#include <linux/kernel.h>
29#include <linux/list.h>
30#include <linux/spinlock.h>
31#include <linux/mm.h>
32#include <linux/slab.h>
33#include <linux/device.h>
34#include <linux/hugetlb.h>
35#include <linux/delay.h>
36#include <linux/timex.h>
37#include <linux/srcu.h>
38#include <asm/processor.h>
39#include "gru.h"
40#include "grutables.h"
41#include <asm/uv/uv_hub.h>
42
43#define gru_random() get_cycles()
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60static inline int get_off_blade_tgh(struct gru_state *gru)
61{
62 int n;
63
64 n = GRU_NUM_TGH - gru->gs_tgh_first_remote;
65 n = gru_random() % n;
66 n += gru->gs_tgh_first_remote;
67 return n;
68}
69
70static inline int get_on_blade_tgh(struct gru_state *gru)
71{
72 return uv_blade_processor_id() >> gru->gs_tgh_local_shift;
73}
74
75static struct gru_tlb_global_handle *get_lock_tgh_handle(struct gru_state
76 *gru)
77{
78 struct gru_tlb_global_handle *tgh;
79 int n;
80
81 preempt_disable();
82 if (uv_numa_blade_id() == gru->gs_blade_id)
83 n = get_on_blade_tgh(gru);
84 else
85 n = get_off_blade_tgh(gru);
86 tgh = get_tgh_by_index(gru, n);
87 lock_tgh_handle(tgh);
88
89 return tgh;
90}
91
92static void get_unlock_tgh_handle(struct gru_tlb_global_handle *tgh)
93{
94 unlock_tgh_handle(tgh);
95 preempt_enable();
96}
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158void gru_flush_tlb_range(struct gru_mm_struct *gms, unsigned long start,
159 unsigned long len)
160{
161 struct gru_state *gru;
162 struct gru_mm_tracker *asids;
163 struct gru_tlb_global_handle *tgh;
164 unsigned long num;
165 int grupagesize, pagesize, pageshift, gid, asid;
166
167
168 pageshift = PAGE_SHIFT;
169 pagesize = (1UL << pageshift);
170 grupagesize = GRU_PAGESIZE(pageshift);
171 num = min(((len + pagesize - 1) >> pageshift), GRUMAXINVAL);
172
173 STAT(flush_tlb);
174 gru_dbg(grudev, "gms %p, start 0x%lx, len 0x%lx, asidmap 0x%lx\n", gms,
175 start, len, gms->ms_asidmap[0]);
176
177 spin_lock(&gms->ms_asid_lock);
178 for_each_gru_in_bitmap(gid, gms->ms_asidmap) {
179 STAT(flush_tlb_gru);
180 gru = GID_TO_GRU(gid);
181 asids = gms->ms_asids + gid;
182 asid = asids->mt_asid;
183 if (asids->mt_ctxbitmap && asid) {
184 STAT(flush_tlb_gru_tgh);
185 asid = GRUASID(asid, start);
186 gru_dbg(grudev,
187 " FLUSH gruid %d, asid 0x%x, vaddr 0x%lx, vamask 0x%x, num %ld, cbmap 0x%x\n",
188 gid, asid, start, grupagesize, num, asids->mt_ctxbitmap);
189 tgh = get_lock_tgh_handle(gru);
190 tgh_invalidate(tgh, start, ~0, asid, grupagesize, 0,
191 num - 1, asids->mt_ctxbitmap);
192 get_unlock_tgh_handle(tgh);
193 } else {
194 STAT(flush_tlb_gru_zero_asid);
195 asids->mt_asid = 0;
196 __clear_bit(gru->gs_gid, gms->ms_asidmap);
197 gru_dbg(grudev,
198 " CLEARASID gruid %d, asid 0x%x, cbtmap 0x%x, asidmap 0x%lx\n",
199 gid, asid, asids->mt_ctxbitmap,
200 gms->ms_asidmap[0]);
201 }
202 }
203 spin_unlock(&gms->ms_asid_lock);
204}
205
206
207
208
209void gru_flush_all_tlb(struct gru_state *gru)
210{
211 struct gru_tlb_global_handle *tgh;
212
213 gru_dbg(grudev, "gid %d\n", gru->gs_gid);
214 tgh = get_lock_tgh_handle(gru);
215 tgh_invalidate(tgh, 0, ~0, 0, 1, 1, GRUMAXINVAL - 1, 0xffff);
216 get_unlock_tgh_handle(tgh);
217}
218
219
220
221
222static int gru_invalidate_range_start(struct mmu_notifier *mn,
223 const struct mmu_notifier_range *range)
224{
225 struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct,
226 ms_notifier);
227
228 STAT(mmu_invalidate_range);
229 atomic_inc(&gms->ms_range_active);
230 gru_dbg(grudev, "gms %p, start 0x%lx, end 0x%lx, act %d\n", gms,
231 range->start, range->end, atomic_read(&gms->ms_range_active));
232 gru_flush_tlb_range(gms, range->start, range->end - range->start);
233
234 return 0;
235}
236
237static void gru_invalidate_range_end(struct mmu_notifier *mn,
238 const struct mmu_notifier_range *range)
239{
240 struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct,
241 ms_notifier);
242
243
244 (void)atomic_dec_and_test(&gms->ms_range_active);
245
246 wake_up_all(&gms->ms_wait_queue);
247 gru_dbg(grudev, "gms %p, start 0x%lx, end 0x%lx\n",
248 gms, range->start, range->end);
249}
250
251static void gru_release(struct mmu_notifier *mn, struct mm_struct *mm)
252{
253 struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct,
254 ms_notifier);
255
256 gms->ms_released = 1;
257 gru_dbg(grudev, "gms %p\n", gms);
258}
259
260
261static const struct mmu_notifier_ops gru_mmuops = {
262 .invalidate_range_start = gru_invalidate_range_start,
263 .invalidate_range_end = gru_invalidate_range_end,
264 .release = gru_release,
265};
266
267
268static struct mmu_notifier *mmu_find_ops(struct mm_struct *mm,
269 const struct mmu_notifier_ops *ops)
270{
271 struct mmu_notifier *mn, *gru_mn = NULL;
272
273 if (mm->mmu_notifier_mm) {
274 rcu_read_lock();
275 hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list,
276 hlist)
277 if (mn->ops == ops) {
278 gru_mn = mn;
279 break;
280 }
281 rcu_read_unlock();
282 }
283 return gru_mn;
284}
285
286struct gru_mm_struct *gru_register_mmu_notifier(void)
287{
288 struct gru_mm_struct *gms;
289 struct mmu_notifier *mn;
290 int err;
291
292 mn = mmu_find_ops(current->mm, &gru_mmuops);
293 if (mn) {
294 gms = container_of(mn, struct gru_mm_struct, ms_notifier);
295 atomic_inc(&gms->ms_refcnt);
296 } else {
297 gms = kzalloc(sizeof(*gms), GFP_KERNEL);
298 if (!gms)
299 return ERR_PTR(-ENOMEM);
300 STAT(gms_alloc);
301 spin_lock_init(&gms->ms_asid_lock);
302 gms->ms_notifier.ops = &gru_mmuops;
303 atomic_set(&gms->ms_refcnt, 1);
304 init_waitqueue_head(&gms->ms_wait_queue);
305 err = __mmu_notifier_register(&gms->ms_notifier, current->mm);
306 if (err)
307 goto error;
308 }
309 if (gms)
310 gru_dbg(grudev, "gms %p, refcnt %d\n", gms,
311 atomic_read(&gms->ms_refcnt));
312 return gms;
313error:
314 kfree(gms);
315 return ERR_PTR(err);
316}
317
318void gru_drop_mmu_notifier(struct gru_mm_struct *gms)
319{
320 gru_dbg(grudev, "gms %p, refcnt %d, released %d\n", gms,
321 atomic_read(&gms->ms_refcnt), gms->ms_released);
322 if (atomic_dec_return(&gms->ms_refcnt) == 0) {
323 if (!gms->ms_released)
324 mmu_notifier_unregister(&gms->ms_notifier, current->mm);
325 kfree(gms);
326 STAT(gms_free);
327 }
328}
329
330
331
332
333
334
335
336
337
338
339
340
341
342#define MAX_LOCAL_TGH 16
343
344void gru_tgh_flush_init(struct gru_state *gru)
345{
346 int cpus, shift = 0, n;
347
348 cpus = uv_blade_nr_possible_cpus(gru->gs_blade_id);
349
350
351 if (cpus) {
352 n = 1 << fls(cpus - 1);
353
354
355
356
357
358
359
360 shift = max(0, fls(n - 1) - fls(MAX_LOCAL_TGH - 1));
361 }
362 gru->gs_tgh_local_shift = shift;
363
364
365 gru->gs_tgh_first_remote = (cpus + (1 << shift) - 1) >> shift;
366
367}
368