1
2
3
4
5
6
7
8
9
10
11
12
13
14
15#include <linux/kernel.h>
16#include <linux/list.h>
17#include <linux/spinlock.h>
18#include <linux/mm.h>
19#include <linux/slab.h>
20#include <linux/device.h>
21#include <linux/hugetlb.h>
22#include <linux/delay.h>
23#include <linux/timex.h>
24#include <linux/srcu.h>
25#include <asm/processor.h>
26#include "gru.h"
27#include "grutables.h"
28#include <asm/uv/uv_hub.h>
29
30#define gru_random() get_cycles()
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47static inline int get_off_blade_tgh(struct gru_state *gru)
48{
49 int n;
50
51 n = GRU_NUM_TGH - gru->gs_tgh_first_remote;
52 n = gru_random() % n;
53 n += gru->gs_tgh_first_remote;
54 return n;
55}
56
57static inline int get_on_blade_tgh(struct gru_state *gru)
58{
59 return uv_blade_processor_id() >> gru->gs_tgh_local_shift;
60}
61
62static struct gru_tlb_global_handle *get_lock_tgh_handle(struct gru_state
63 *gru)
64{
65 struct gru_tlb_global_handle *tgh;
66 int n;
67
68 preempt_disable();
69 if (uv_numa_blade_id() == gru->gs_blade_id)
70 n = get_on_blade_tgh(gru);
71 else
72 n = get_off_blade_tgh(gru);
73 tgh = get_tgh_by_index(gru, n);
74 lock_tgh_handle(tgh);
75
76 return tgh;
77}
78
79static void get_unlock_tgh_handle(struct gru_tlb_global_handle *tgh)
80{
81 unlock_tgh_handle(tgh);
82 preempt_enable();
83}
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145void gru_flush_tlb_range(struct gru_mm_struct *gms, unsigned long start,
146 unsigned long len)
147{
148 struct gru_state *gru;
149 struct gru_mm_tracker *asids;
150 struct gru_tlb_global_handle *tgh;
151 unsigned long num;
152 int grupagesize, pagesize, pageshift, gid, asid;
153
154
155 pageshift = PAGE_SHIFT;
156 pagesize = (1UL << pageshift);
157 grupagesize = GRU_PAGESIZE(pageshift);
158 num = min(((len + pagesize - 1) >> pageshift), GRUMAXINVAL);
159
160 STAT(flush_tlb);
161 gru_dbg(grudev, "gms %p, start 0x%lx, len 0x%lx, asidmap 0x%lx\n", gms,
162 start, len, gms->ms_asidmap[0]);
163
164 spin_lock(&gms->ms_asid_lock);
165 for_each_gru_in_bitmap(gid, gms->ms_asidmap) {
166 STAT(flush_tlb_gru);
167 gru = GID_TO_GRU(gid);
168 asids = gms->ms_asids + gid;
169 asid = asids->mt_asid;
170 if (asids->mt_ctxbitmap && asid) {
171 STAT(flush_tlb_gru_tgh);
172 asid = GRUASID(asid, start);
173 gru_dbg(grudev,
174 " FLUSH gruid %d, asid 0x%x, vaddr 0x%lx, vamask 0x%x, num %ld, cbmap 0x%x\n",
175 gid, asid, start, grupagesize, num, asids->mt_ctxbitmap);
176 tgh = get_lock_tgh_handle(gru);
177 tgh_invalidate(tgh, start, ~0, asid, grupagesize, 0,
178 num - 1, asids->mt_ctxbitmap);
179 get_unlock_tgh_handle(tgh);
180 } else {
181 STAT(flush_tlb_gru_zero_asid);
182 asids->mt_asid = 0;
183 __clear_bit(gru->gs_gid, gms->ms_asidmap);
184 gru_dbg(grudev,
185 " CLEARASID gruid %d, asid 0x%x, cbtmap 0x%x, asidmap 0x%lx\n",
186 gid, asid, asids->mt_ctxbitmap,
187 gms->ms_asidmap[0]);
188 }
189 }
190 spin_unlock(&gms->ms_asid_lock);
191}
192
193
194
195
196void gru_flush_all_tlb(struct gru_state *gru)
197{
198 struct gru_tlb_global_handle *tgh;
199
200 gru_dbg(grudev, "gid %d\n", gru->gs_gid);
201 tgh = get_lock_tgh_handle(gru);
202 tgh_invalidate(tgh, 0, ~0, 0, 1, 1, GRUMAXINVAL - 1, 0xffff);
203 get_unlock_tgh_handle(tgh);
204}
205
206
207
208
209static int gru_invalidate_range_start(struct mmu_notifier *mn,
210 const struct mmu_notifier_range *range)
211{
212 struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct,
213 ms_notifier);
214
215 STAT(mmu_invalidate_range);
216 atomic_inc(&gms->ms_range_active);
217 gru_dbg(grudev, "gms %p, start 0x%lx, end 0x%lx, act %d\n", gms,
218 range->start, range->end, atomic_read(&gms->ms_range_active));
219 gru_flush_tlb_range(gms, range->start, range->end - range->start);
220
221 return 0;
222}
223
224static void gru_invalidate_range_end(struct mmu_notifier *mn,
225 const struct mmu_notifier_range *range)
226{
227 struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct,
228 ms_notifier);
229
230
231 (void)atomic_dec_and_test(&gms->ms_range_active);
232
233 wake_up_all(&gms->ms_wait_queue);
234 gru_dbg(grudev, "gms %p, start 0x%lx, end 0x%lx\n",
235 gms, range->start, range->end);
236}
237
238static void gru_release(struct mmu_notifier *mn, struct mm_struct *mm)
239{
240 struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct,
241 ms_notifier);
242
243 gms->ms_released = 1;
244 gru_dbg(grudev, "gms %p\n", gms);
245}
246
247
248static const struct mmu_notifier_ops gru_mmuops = {
249 .invalidate_range_start = gru_invalidate_range_start,
250 .invalidate_range_end = gru_invalidate_range_end,
251 .release = gru_release,
252};
253
254
255static struct mmu_notifier *mmu_find_ops(struct mm_struct *mm,
256 const struct mmu_notifier_ops *ops)
257{
258 struct mmu_notifier *mn, *gru_mn = NULL;
259
260 if (mm->mmu_notifier_mm) {
261 rcu_read_lock();
262 hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list,
263 hlist)
264 if (mn->ops == ops) {
265 gru_mn = mn;
266 break;
267 }
268 rcu_read_unlock();
269 }
270 return gru_mn;
271}
272
273struct gru_mm_struct *gru_register_mmu_notifier(void)
274{
275 struct gru_mm_struct *gms;
276 struct mmu_notifier *mn;
277 int err;
278
279 mn = mmu_find_ops(current->mm, &gru_mmuops);
280 if (mn) {
281 gms = container_of(mn, struct gru_mm_struct, ms_notifier);
282 atomic_inc(&gms->ms_refcnt);
283 } else {
284 gms = kzalloc(sizeof(*gms), GFP_KERNEL);
285 if (!gms)
286 return ERR_PTR(-ENOMEM);
287 STAT(gms_alloc);
288 spin_lock_init(&gms->ms_asid_lock);
289 gms->ms_notifier.ops = &gru_mmuops;
290 atomic_set(&gms->ms_refcnt, 1);
291 init_waitqueue_head(&gms->ms_wait_queue);
292 err = __mmu_notifier_register(&gms->ms_notifier, current->mm);
293 if (err)
294 goto error;
295 }
296 if (gms)
297 gru_dbg(grudev, "gms %p, refcnt %d\n", gms,
298 atomic_read(&gms->ms_refcnt));
299 return gms;
300error:
301 kfree(gms);
302 return ERR_PTR(err);
303}
304
305void gru_drop_mmu_notifier(struct gru_mm_struct *gms)
306{
307 gru_dbg(grudev, "gms %p, refcnt %d, released %d\n", gms,
308 atomic_read(&gms->ms_refcnt), gms->ms_released);
309 if (atomic_dec_return(&gms->ms_refcnt) == 0) {
310 if (!gms->ms_released)
311 mmu_notifier_unregister(&gms->ms_notifier, current->mm);
312 kfree(gms);
313 STAT(gms_free);
314 }
315}
316
317
318
319
320
321
322
323
324
325
326
327
328
329#define MAX_LOCAL_TGH 16
330
331void gru_tgh_flush_init(struct gru_state *gru)
332{
333 int cpus, shift = 0, n;
334
335 cpus = uv_blade_nr_possible_cpus(gru->gs_blade_id);
336
337
338 if (cpus) {
339 n = 1 << fls(cpus - 1);
340
341
342
343
344
345
346
347 shift = max(0, fls(n - 1) - fls(MAX_LOCAL_TGH - 1));
348 }
349 gru->gs_tgh_local_shift = shift;
350
351
352 gru->gs_tgh_first_remote = (cpus + (1 << shift) - 1) >> shift;
353
354}
355