1
2
3
4
5
6
7
8
9#include <linux/file.h>
10#include <linux/poll.h>
11#include <linux/init.h>
12#include <linux/fs.h>
13#include <linux/sched/signal.h>
14#include <linux/kernel.h>
15#include <linux/slab.h>
16#include <linux/list.h>
17#include <linux/spinlock.h>
18#include <linux/anon_inodes.h>
19#include <linux/syscalls.h>
20#include <linux/export.h>
21#include <linux/kref.h>
22#include <linux/eventfd.h>
23#include <linux/proc_fs.h>
24#include <linux/seq_file.h>
25#include <linux/idr.h>
26#include <linux/uio.h>
27
28static DEFINE_IDA(eventfd_ida);
29
30struct eventfd_ctx {
31 struct kref kref;
32 wait_queue_head_t wqh;
33
34
35
36
37
38
39
40
41 __u64 count;
42 unsigned int flags;
43 int id;
44};
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60__u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n)
61{
62 unsigned long flags;
63
64
65
66
67
68
69
70
71
72 if (WARN_ON_ONCE(current->in_eventfd_signal))
73 return 0;
74
75 spin_lock_irqsave(&ctx->wqh.lock, flags);
76 current->in_eventfd_signal = 1;
77 if (ULLONG_MAX - ctx->count < n)
78 n = ULLONG_MAX - ctx->count;
79 ctx->count += n;
80 if (waitqueue_active(&ctx->wqh))
81 wake_up_locked_poll(&ctx->wqh, EPOLLIN);
82 current->in_eventfd_signal = 0;
83 spin_unlock_irqrestore(&ctx->wqh.lock, flags);
84
85 return n;
86}
87EXPORT_SYMBOL_GPL(eventfd_signal);
88
89static void eventfd_free_ctx(struct eventfd_ctx *ctx)
90{
91 if (ctx->id >= 0)
92 ida_simple_remove(&eventfd_ida, ctx->id);
93 kfree(ctx);
94}
95
96static void eventfd_free(struct kref *kref)
97{
98 struct eventfd_ctx *ctx = container_of(kref, struct eventfd_ctx, kref);
99
100 eventfd_free_ctx(ctx);
101}
102
103
104
105
106
107
108
109
110void eventfd_ctx_put(struct eventfd_ctx *ctx)
111{
112 kref_put(&ctx->kref, eventfd_free);
113}
114EXPORT_SYMBOL_GPL(eventfd_ctx_put);
115
116static int eventfd_release(struct inode *inode, struct file *file)
117{
118 struct eventfd_ctx *ctx = file->private_data;
119
120 wake_up_poll(&ctx->wqh, EPOLLHUP);
121 eventfd_ctx_put(ctx);
122 return 0;
123}
124
125static __poll_t eventfd_poll(struct file *file, poll_table *wait)
126{
127 struct eventfd_ctx *ctx = file->private_data;
128 __poll_t events = 0;
129 u64 count;
130
131 poll_wait(file, &ctx->wqh, wait);
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171 count = READ_ONCE(ctx->count);
172
173 if (count > 0)
174 events |= EPOLLIN;
175 if (count == ULLONG_MAX)
176 events |= EPOLLERR;
177 if (ULLONG_MAX - 1 > count)
178 events |= EPOLLOUT;
179
180 return events;
181}
182
183void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt)
184{
185 lockdep_assert_held(&ctx->wqh.lock);
186
187 *cnt = (ctx->flags & EFD_SEMAPHORE) ? 1 : ctx->count;
188 ctx->count -= *cnt;
189}
190EXPORT_SYMBOL_GPL(eventfd_ctx_do_read);
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_entry_t *wait,
206 __u64 *cnt)
207{
208 unsigned long flags;
209
210 spin_lock_irqsave(&ctx->wqh.lock, flags);
211 eventfd_ctx_do_read(ctx, cnt);
212 __remove_wait_queue(&ctx->wqh, wait);
213 if (*cnt != 0 && waitqueue_active(&ctx->wqh))
214 wake_up_locked_poll(&ctx->wqh, EPOLLOUT);
215 spin_unlock_irqrestore(&ctx->wqh.lock, flags);
216
217 return *cnt != 0 ? 0 : -EAGAIN;
218}
219EXPORT_SYMBOL_GPL(eventfd_ctx_remove_wait_queue);
220
221static ssize_t eventfd_read(struct kiocb *iocb, struct iov_iter *to)
222{
223 struct file *file = iocb->ki_filp;
224 struct eventfd_ctx *ctx = file->private_data;
225 __u64 ucnt = 0;
226 DECLARE_WAITQUEUE(wait, current);
227
228 if (iov_iter_count(to) < sizeof(ucnt))
229 return -EINVAL;
230 spin_lock_irq(&ctx->wqh.lock);
231 if (!ctx->count) {
232 if ((file->f_flags & O_NONBLOCK) ||
233 (iocb->ki_flags & IOCB_NOWAIT)) {
234 spin_unlock_irq(&ctx->wqh.lock);
235 return -EAGAIN;
236 }
237 __add_wait_queue(&ctx->wqh, &wait);
238 for (;;) {
239 set_current_state(TASK_INTERRUPTIBLE);
240 if (ctx->count)
241 break;
242 if (signal_pending(current)) {
243 __remove_wait_queue(&ctx->wqh, &wait);
244 __set_current_state(TASK_RUNNING);
245 spin_unlock_irq(&ctx->wqh.lock);
246 return -ERESTARTSYS;
247 }
248 spin_unlock_irq(&ctx->wqh.lock);
249 schedule();
250 spin_lock_irq(&ctx->wqh.lock);
251 }
252 __remove_wait_queue(&ctx->wqh, &wait);
253 __set_current_state(TASK_RUNNING);
254 }
255 eventfd_ctx_do_read(ctx, &ucnt);
256 if (waitqueue_active(&ctx->wqh))
257 wake_up_locked_poll(&ctx->wqh, EPOLLOUT);
258 spin_unlock_irq(&ctx->wqh.lock);
259 if (unlikely(copy_to_iter(&ucnt, sizeof(ucnt), to) != sizeof(ucnt)))
260 return -EFAULT;
261
262 return sizeof(ucnt);
263}
264
265static ssize_t eventfd_write(struct file *file, const char __user *buf, size_t count,
266 loff_t *ppos)
267{
268 struct eventfd_ctx *ctx = file->private_data;
269 ssize_t res;
270 __u64 ucnt;
271 DECLARE_WAITQUEUE(wait, current);
272
273 if (count < sizeof(ucnt))
274 return -EINVAL;
275 if (copy_from_user(&ucnt, buf, sizeof(ucnt)))
276 return -EFAULT;
277 if (ucnt == ULLONG_MAX)
278 return -EINVAL;
279 spin_lock_irq(&ctx->wqh.lock);
280 res = -EAGAIN;
281 if (ULLONG_MAX - ctx->count > ucnt)
282 res = sizeof(ucnt);
283 else if (!(file->f_flags & O_NONBLOCK)) {
284 __add_wait_queue(&ctx->wqh, &wait);
285 for (res = 0;;) {
286 set_current_state(TASK_INTERRUPTIBLE);
287 if (ULLONG_MAX - ctx->count > ucnt) {
288 res = sizeof(ucnt);
289 break;
290 }
291 if (signal_pending(current)) {
292 res = -ERESTARTSYS;
293 break;
294 }
295 spin_unlock_irq(&ctx->wqh.lock);
296 schedule();
297 spin_lock_irq(&ctx->wqh.lock);
298 }
299 __remove_wait_queue(&ctx->wqh, &wait);
300 __set_current_state(TASK_RUNNING);
301 }
302 if (likely(res > 0)) {
303 ctx->count += ucnt;
304 if (waitqueue_active(&ctx->wqh))
305 wake_up_locked_poll(&ctx->wqh, EPOLLIN);
306 }
307 spin_unlock_irq(&ctx->wqh.lock);
308
309 return res;
310}
311
312#ifdef CONFIG_PROC_FS
313static void eventfd_show_fdinfo(struct seq_file *m, struct file *f)
314{
315 struct eventfd_ctx *ctx = f->private_data;
316
317 spin_lock_irq(&ctx->wqh.lock);
318 seq_printf(m, "eventfd-count: %16llx\n",
319 (unsigned long long)ctx->count);
320 spin_unlock_irq(&ctx->wqh.lock);
321 seq_printf(m, "eventfd-id: %d\n", ctx->id);
322}
323#endif
324
325static const struct file_operations eventfd_fops = {
326#ifdef CONFIG_PROC_FS
327 .show_fdinfo = eventfd_show_fdinfo,
328#endif
329 .release = eventfd_release,
330 .poll = eventfd_poll,
331 .read_iter = eventfd_read,
332 .write = eventfd_write,
333 .llseek = noop_llseek,
334};
335
336
337
338
339
340
341
342
343
344
345
346struct file *eventfd_fget(int fd)
347{
348 struct file *file;
349
350 file = fget(fd);
351 if (!file)
352 return ERR_PTR(-EBADF);
353 if (file->f_op != &eventfd_fops) {
354 fput(file);
355 return ERR_PTR(-EINVAL);
356 }
357
358 return file;
359}
360EXPORT_SYMBOL_GPL(eventfd_fget);
361
362
363
364
365
366
367
368
369
370
371struct eventfd_ctx *eventfd_ctx_fdget(int fd)
372{
373 struct eventfd_ctx *ctx;
374 struct fd f = fdget(fd);
375 if (!f.file)
376 return ERR_PTR(-EBADF);
377 ctx = eventfd_ctx_fileget(f.file);
378 fdput(f);
379 return ctx;
380}
381EXPORT_SYMBOL_GPL(eventfd_ctx_fdget);
382
383
384
385
386
387
388
389
390
391
392struct eventfd_ctx *eventfd_ctx_fileget(struct file *file)
393{
394 struct eventfd_ctx *ctx;
395
396 if (file->f_op != &eventfd_fops)
397 return ERR_PTR(-EINVAL);
398
399 ctx = file->private_data;
400 kref_get(&ctx->kref);
401 return ctx;
402}
403EXPORT_SYMBOL_GPL(eventfd_ctx_fileget);
404
405static int do_eventfd(unsigned int count, int flags)
406{
407 struct eventfd_ctx *ctx;
408 struct file *file;
409 int fd;
410
411
412 BUILD_BUG_ON(EFD_CLOEXEC != O_CLOEXEC);
413 BUILD_BUG_ON(EFD_NONBLOCK != O_NONBLOCK);
414
415 if (flags & ~EFD_FLAGS_SET)
416 return -EINVAL;
417
418 ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
419 if (!ctx)
420 return -ENOMEM;
421
422 kref_init(&ctx->kref);
423 init_waitqueue_head(&ctx->wqh);
424 ctx->count = count;
425 ctx->flags = flags;
426 ctx->id = ida_simple_get(&eventfd_ida, 0, 0, GFP_KERNEL);
427
428 flags &= EFD_SHARED_FCNTL_FLAGS;
429 flags |= O_RDWR;
430 fd = get_unused_fd_flags(flags);
431 if (fd < 0)
432 goto err;
433
434 file = anon_inode_getfile("[eventfd]", &eventfd_fops, ctx, flags);
435 if (IS_ERR(file)) {
436 put_unused_fd(fd);
437 fd = PTR_ERR(file);
438 goto err;
439 }
440
441 file->f_mode |= FMODE_NOWAIT;
442 fd_install(fd, file);
443 return fd;
444err:
445 eventfd_free_ctx(ctx);
446 return fd;
447}
448
449SYSCALL_DEFINE2(eventfd2, unsigned int, count, int, flags)
450{
451 return do_eventfd(count, flags);
452}
453
454SYSCALL_DEFINE1(eventfd, unsigned int, count)
455{
456 return do_eventfd(count, 0);
457}
458
459