1
2
3
4
5
6
7
8
9#include <linux/file.h>
10#include <linux/poll.h>
11#include <linux/init.h>
12#include <linux/fs.h>
13#include <linux/sched/signal.h>
14#include <linux/kernel.h>
15#include <linux/slab.h>
16#include <linux/list.h>
17#include <linux/spinlock.h>
18#include <linux/anon_inodes.h>
19#include <linux/syscalls.h>
20#include <linux/export.h>
21#include <linux/kref.h>
22#include <linux/eventfd.h>
23#include <linux/proc_fs.h>
24#include <linux/seq_file.h>
25#include <linux/idr.h>
26#include <linux/uio.h>
27
28DEFINE_PER_CPU(int, eventfd_wake_count);
29
30static DEFINE_IDA(eventfd_ida);
31
32struct eventfd_ctx {
33 struct kref kref;
34 wait_queue_head_t wqh;
35
36
37
38
39
40
41
42
43 __u64 count;
44 unsigned int flags;
45 int id;
46};
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62__u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n)
63{
64 unsigned long flags;
65
66
67
68
69
70
71
72
73
74 if (WARN_ON_ONCE(this_cpu_read(eventfd_wake_count)))
75 return 0;
76
77 spin_lock_irqsave(&ctx->wqh.lock, flags);
78 this_cpu_inc(eventfd_wake_count);
79 if (ULLONG_MAX - ctx->count < n)
80 n = ULLONG_MAX - ctx->count;
81 ctx->count += n;
82 if (waitqueue_active(&ctx->wqh))
83 wake_up_locked_poll(&ctx->wqh, EPOLLIN);
84 this_cpu_dec(eventfd_wake_count);
85 spin_unlock_irqrestore(&ctx->wqh.lock, flags);
86
87 return n;
88}
89EXPORT_SYMBOL_GPL(eventfd_signal);
90
91static void eventfd_free_ctx(struct eventfd_ctx *ctx)
92{
93 if (ctx->id >= 0)
94 ida_simple_remove(&eventfd_ida, ctx->id);
95 kfree(ctx);
96}
97
98static void eventfd_free(struct kref *kref)
99{
100 struct eventfd_ctx *ctx = container_of(kref, struct eventfd_ctx, kref);
101
102 eventfd_free_ctx(ctx);
103}
104
105
106
107
108
109
110
111
112void eventfd_ctx_put(struct eventfd_ctx *ctx)
113{
114 kref_put(&ctx->kref, eventfd_free);
115}
116EXPORT_SYMBOL_GPL(eventfd_ctx_put);
117
118static int eventfd_release(struct inode *inode, struct file *file)
119{
120 struct eventfd_ctx *ctx = file->private_data;
121
122 wake_up_poll(&ctx->wqh, EPOLLHUP);
123 eventfd_ctx_put(ctx);
124 return 0;
125}
126
127static __poll_t eventfd_poll(struct file *file, poll_table *wait)
128{
129 struct eventfd_ctx *ctx = file->private_data;
130 __poll_t events = 0;
131 u64 count;
132
133 poll_wait(file, &ctx->wqh, wait);
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173 count = READ_ONCE(ctx->count);
174
175 if (count > 0)
176 events |= EPOLLIN;
177 if (count == ULLONG_MAX)
178 events |= EPOLLERR;
179 if (ULLONG_MAX - 1 > count)
180 events |= EPOLLOUT;
181
182 return events;
183}
184
185void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt)
186{
187 lockdep_assert_held(&ctx->wqh.lock);
188
189 *cnt = (ctx->flags & EFD_SEMAPHORE) ? 1 : ctx->count;
190 ctx->count -= *cnt;
191}
192EXPORT_SYMBOL_GPL(eventfd_ctx_do_read);
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_entry_t *wait,
208 __u64 *cnt)
209{
210 unsigned long flags;
211
212 spin_lock_irqsave(&ctx->wqh.lock, flags);
213 eventfd_ctx_do_read(ctx, cnt);
214 __remove_wait_queue(&ctx->wqh, wait);
215 if (*cnt != 0 && waitqueue_active(&ctx->wqh))
216 wake_up_locked_poll(&ctx->wqh, EPOLLOUT);
217 spin_unlock_irqrestore(&ctx->wqh.lock, flags);
218
219 return *cnt != 0 ? 0 : -EAGAIN;
220}
221EXPORT_SYMBOL_GPL(eventfd_ctx_remove_wait_queue);
222
223static ssize_t eventfd_read(struct kiocb *iocb, struct iov_iter *to)
224{
225 struct file *file = iocb->ki_filp;
226 struct eventfd_ctx *ctx = file->private_data;
227 __u64 ucnt = 0;
228 DECLARE_WAITQUEUE(wait, current);
229
230 if (iov_iter_count(to) < sizeof(ucnt))
231 return -EINVAL;
232 spin_lock_irq(&ctx->wqh.lock);
233 if (!ctx->count) {
234 if ((file->f_flags & O_NONBLOCK) ||
235 (iocb->ki_flags & IOCB_NOWAIT)) {
236 spin_unlock_irq(&ctx->wqh.lock);
237 return -EAGAIN;
238 }
239 __add_wait_queue(&ctx->wqh, &wait);
240 for (;;) {
241 set_current_state(TASK_INTERRUPTIBLE);
242 if (ctx->count)
243 break;
244 if (signal_pending(current)) {
245 __remove_wait_queue(&ctx->wqh, &wait);
246 __set_current_state(TASK_RUNNING);
247 spin_unlock_irq(&ctx->wqh.lock);
248 return -ERESTARTSYS;
249 }
250 spin_unlock_irq(&ctx->wqh.lock);
251 schedule();
252 spin_lock_irq(&ctx->wqh.lock);
253 }
254 __remove_wait_queue(&ctx->wqh, &wait);
255 __set_current_state(TASK_RUNNING);
256 }
257 eventfd_ctx_do_read(ctx, &ucnt);
258 if (waitqueue_active(&ctx->wqh))
259 wake_up_locked_poll(&ctx->wqh, EPOLLOUT);
260 spin_unlock_irq(&ctx->wqh.lock);
261 if (unlikely(copy_to_iter(&ucnt, sizeof(ucnt), to) != sizeof(ucnt)))
262 return -EFAULT;
263
264 return sizeof(ucnt);
265}
266
267static ssize_t eventfd_write(struct file *file, const char __user *buf, size_t count,
268 loff_t *ppos)
269{
270 struct eventfd_ctx *ctx = file->private_data;
271 ssize_t res;
272 __u64 ucnt;
273 DECLARE_WAITQUEUE(wait, current);
274
275 if (count < sizeof(ucnt))
276 return -EINVAL;
277 if (copy_from_user(&ucnt, buf, sizeof(ucnt)))
278 return -EFAULT;
279 if (ucnt == ULLONG_MAX)
280 return -EINVAL;
281 spin_lock_irq(&ctx->wqh.lock);
282 res = -EAGAIN;
283 if (ULLONG_MAX - ctx->count > ucnt)
284 res = sizeof(ucnt);
285 else if (!(file->f_flags & O_NONBLOCK)) {
286 __add_wait_queue(&ctx->wqh, &wait);
287 for (res = 0;;) {
288 set_current_state(TASK_INTERRUPTIBLE);
289 if (ULLONG_MAX - ctx->count > ucnt) {
290 res = sizeof(ucnt);
291 break;
292 }
293 if (signal_pending(current)) {
294 res = -ERESTARTSYS;
295 break;
296 }
297 spin_unlock_irq(&ctx->wqh.lock);
298 schedule();
299 spin_lock_irq(&ctx->wqh.lock);
300 }
301 __remove_wait_queue(&ctx->wqh, &wait);
302 __set_current_state(TASK_RUNNING);
303 }
304 if (likely(res > 0)) {
305 ctx->count += ucnt;
306 if (waitqueue_active(&ctx->wqh))
307 wake_up_locked_poll(&ctx->wqh, EPOLLIN);
308 }
309 spin_unlock_irq(&ctx->wqh.lock);
310
311 return res;
312}
313
314#ifdef CONFIG_PROC_FS
315static void eventfd_show_fdinfo(struct seq_file *m, struct file *f)
316{
317 struct eventfd_ctx *ctx = f->private_data;
318
319 spin_lock_irq(&ctx->wqh.lock);
320 seq_printf(m, "eventfd-count: %16llx\n",
321 (unsigned long long)ctx->count);
322 spin_unlock_irq(&ctx->wqh.lock);
323 seq_printf(m, "eventfd-id: %d\n", ctx->id);
324}
325#endif
326
327static const struct file_operations eventfd_fops = {
328#ifdef CONFIG_PROC_FS
329 .show_fdinfo = eventfd_show_fdinfo,
330#endif
331 .release = eventfd_release,
332 .poll = eventfd_poll,
333 .read_iter = eventfd_read,
334 .write = eventfd_write,
335 .llseek = noop_llseek,
336};
337
338
339
340
341
342
343
344
345
346
347
348struct file *eventfd_fget(int fd)
349{
350 struct file *file;
351
352 file = fget(fd);
353 if (!file)
354 return ERR_PTR(-EBADF);
355 if (file->f_op != &eventfd_fops) {
356 fput(file);
357 return ERR_PTR(-EINVAL);
358 }
359
360 return file;
361}
362EXPORT_SYMBOL_GPL(eventfd_fget);
363
364
365
366
367
368
369
370
371
372
373struct eventfd_ctx *eventfd_ctx_fdget(int fd)
374{
375 struct eventfd_ctx *ctx;
376 struct fd f = fdget(fd);
377 if (!f.file)
378 return ERR_PTR(-EBADF);
379 ctx = eventfd_ctx_fileget(f.file);
380 fdput(f);
381 return ctx;
382}
383EXPORT_SYMBOL_GPL(eventfd_ctx_fdget);
384
385
386
387
388
389
390
391
392
393
394struct eventfd_ctx *eventfd_ctx_fileget(struct file *file)
395{
396 struct eventfd_ctx *ctx;
397
398 if (file->f_op != &eventfd_fops)
399 return ERR_PTR(-EINVAL);
400
401 ctx = file->private_data;
402 kref_get(&ctx->kref);
403 return ctx;
404}
405EXPORT_SYMBOL_GPL(eventfd_ctx_fileget);
406
407static int do_eventfd(unsigned int count, int flags)
408{
409 struct eventfd_ctx *ctx;
410 struct file *file;
411 int fd;
412
413
414 BUILD_BUG_ON(EFD_CLOEXEC != O_CLOEXEC);
415 BUILD_BUG_ON(EFD_NONBLOCK != O_NONBLOCK);
416
417 if (flags & ~EFD_FLAGS_SET)
418 return -EINVAL;
419
420 ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
421 if (!ctx)
422 return -ENOMEM;
423
424 kref_init(&ctx->kref);
425 init_waitqueue_head(&ctx->wqh);
426 ctx->count = count;
427 ctx->flags = flags;
428 ctx->id = ida_simple_get(&eventfd_ida, 0, 0, GFP_KERNEL);
429
430 flags &= EFD_SHARED_FCNTL_FLAGS;
431 flags |= O_RDWR;
432 fd = get_unused_fd_flags(flags);
433 if (fd < 0)
434 goto err;
435
436 file = anon_inode_getfile("[eventfd]", &eventfd_fops, ctx, flags);
437 if (IS_ERR(file)) {
438 put_unused_fd(fd);
439 fd = PTR_ERR(file);
440 goto err;
441 }
442
443 file->f_mode |= FMODE_NOWAIT;
444 fd_install(fd, file);
445 return fd;
446err:
447 eventfd_free_ctx(ctx);
448 return fd;
449}
450
451SYSCALL_DEFINE2(eventfd2, unsigned int, count, int, flags)
452{
453 return do_eventfd(count, flags);
454}
455
456SYSCALL_DEFINE1(eventfd, unsigned int, count)
457{
458 return do_eventfd(count, 0);
459}
460
461