1
2
3
4
5
6
7
8#include <linux/file.h>
9#include <linux/poll.h>
10#include <linux/init.h>
11#include <linux/fs.h>
12#include <linux/sched/signal.h>
13#include <linux/kernel.h>
14#include <linux/slab.h>
15#include <linux/list.h>
16#include <linux/spinlock.h>
17#include <linux/anon_inodes.h>
18#include <linux/syscalls.h>
19#include <linux/export.h>
20#include <linux/kref.h>
21#include <linux/eventfd.h>
22#include <linux/proc_fs.h>
23#include <linux/seq_file.h>
24
25struct eventfd_ctx {
26 struct kref kref;
27 wait_queue_head_t wqh;
28
29
30
31
32
33
34
35
36 __u64 count;
37 unsigned int flags;
38};
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54__u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n)
55{
56 unsigned long flags;
57
58 spin_lock_irqsave(&ctx->wqh.lock, flags);
59 if (ULLONG_MAX - ctx->count < n)
60 n = ULLONG_MAX - ctx->count;
61 ctx->count += n;
62 if (waitqueue_active(&ctx->wqh))
63 wake_up_locked_poll(&ctx->wqh, EPOLLIN);
64 spin_unlock_irqrestore(&ctx->wqh.lock, flags);
65
66 return n;
67}
68EXPORT_SYMBOL_GPL(eventfd_signal);
69
70static void eventfd_free_ctx(struct eventfd_ctx *ctx)
71{
72 kfree(ctx);
73}
74
75static void eventfd_free(struct kref *kref)
76{
77 struct eventfd_ctx *ctx = container_of(kref, struct eventfd_ctx, kref);
78
79 eventfd_free_ctx(ctx);
80}
81
82
83
84
85
86
87
88
89void eventfd_ctx_put(struct eventfd_ctx *ctx)
90{
91 kref_put(&ctx->kref, eventfd_free);
92}
93EXPORT_SYMBOL_GPL(eventfd_ctx_put);
94
95static int eventfd_release(struct inode *inode, struct file *file)
96{
97 struct eventfd_ctx *ctx = file->private_data;
98
99 wake_up_poll(&ctx->wqh, EPOLLHUP);
100 eventfd_ctx_put(ctx);
101 return 0;
102}
103
104static __poll_t eventfd_poll(struct file *file, poll_table *wait)
105{
106 struct eventfd_ctx *ctx = file->private_data;
107 __poll_t events = 0;
108 u64 count;
109
110 poll_wait(file, &ctx->wqh, wait);
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150 count = READ_ONCE(ctx->count);
151
152 if (count > 0)
153 events |= EPOLLIN;
154 if (count == ULLONG_MAX)
155 events |= EPOLLERR;
156 if (ULLONG_MAX - 1 > count)
157 events |= EPOLLOUT;
158
159 return events;
160}
161
162static void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt)
163{
164 *cnt = (ctx->flags & EFD_SEMAPHORE) ? 1 : ctx->count;
165 ctx->count -= *cnt;
166}
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_entry_t *wait,
182 __u64 *cnt)
183{
184 unsigned long flags;
185
186 spin_lock_irqsave(&ctx->wqh.lock, flags);
187 eventfd_ctx_do_read(ctx, cnt);
188 __remove_wait_queue(&ctx->wqh, wait);
189 if (*cnt != 0 && waitqueue_active(&ctx->wqh))
190 wake_up_locked_poll(&ctx->wqh, EPOLLOUT);
191 spin_unlock_irqrestore(&ctx->wqh.lock, flags);
192
193 return *cnt != 0 ? 0 : -EAGAIN;
194}
195EXPORT_SYMBOL_GPL(eventfd_ctx_remove_wait_queue);
196
197static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count,
198 loff_t *ppos)
199{
200 struct eventfd_ctx *ctx = file->private_data;
201 ssize_t res;
202 __u64 ucnt = 0;
203 DECLARE_WAITQUEUE(wait, current);
204
205 if (count < sizeof(ucnt))
206 return -EINVAL;
207
208 spin_lock_irq(&ctx->wqh.lock);
209 res = -EAGAIN;
210 if (ctx->count > 0)
211 res = sizeof(ucnt);
212 else if (!(file->f_flags & O_NONBLOCK)) {
213 __add_wait_queue(&ctx->wqh, &wait);
214 for (;;) {
215 set_current_state(TASK_INTERRUPTIBLE);
216 if (ctx->count > 0) {
217 res = sizeof(ucnt);
218 break;
219 }
220 if (signal_pending(current)) {
221 res = -ERESTARTSYS;
222 break;
223 }
224 spin_unlock_irq(&ctx->wqh.lock);
225 schedule();
226 spin_lock_irq(&ctx->wqh.lock);
227 }
228 __remove_wait_queue(&ctx->wqh, &wait);
229 __set_current_state(TASK_RUNNING);
230 }
231 if (likely(res > 0)) {
232 eventfd_ctx_do_read(ctx, &ucnt);
233 if (waitqueue_active(&ctx->wqh))
234 wake_up_locked_poll(&ctx->wqh, EPOLLOUT);
235 }
236 spin_unlock_irq(&ctx->wqh.lock);
237
238 if (res > 0 && put_user(ucnt, (__u64 __user *)buf))
239 return -EFAULT;
240
241 return res;
242}
243
244static ssize_t eventfd_write(struct file *file, const char __user *buf, size_t count,
245 loff_t *ppos)
246{
247 struct eventfd_ctx *ctx = file->private_data;
248 ssize_t res;
249 __u64 ucnt;
250 DECLARE_WAITQUEUE(wait, current);
251
252 if (count < sizeof(ucnt))
253 return -EINVAL;
254 if (copy_from_user(&ucnt, buf, sizeof(ucnt)))
255 return -EFAULT;
256 if (ucnt == ULLONG_MAX)
257 return -EINVAL;
258 spin_lock_irq(&ctx->wqh.lock);
259 res = -EAGAIN;
260 if (ULLONG_MAX - ctx->count > ucnt)
261 res = sizeof(ucnt);
262 else if (!(file->f_flags & O_NONBLOCK)) {
263 __add_wait_queue(&ctx->wqh, &wait);
264 for (res = 0;;) {
265 set_current_state(TASK_INTERRUPTIBLE);
266 if (ULLONG_MAX - ctx->count > ucnt) {
267 res = sizeof(ucnt);
268 break;
269 }
270 if (signal_pending(current)) {
271 res = -ERESTARTSYS;
272 break;
273 }
274 spin_unlock_irq(&ctx->wqh.lock);
275 schedule();
276 spin_lock_irq(&ctx->wqh.lock);
277 }
278 __remove_wait_queue(&ctx->wqh, &wait);
279 __set_current_state(TASK_RUNNING);
280 }
281 if (likely(res > 0)) {
282 ctx->count += ucnt;
283 if (waitqueue_active(&ctx->wqh))
284 wake_up_locked_poll(&ctx->wqh, EPOLLIN);
285 }
286 spin_unlock_irq(&ctx->wqh.lock);
287
288 return res;
289}
290
291#ifdef CONFIG_PROC_FS
292static void eventfd_show_fdinfo(struct seq_file *m, struct file *f)
293{
294 struct eventfd_ctx *ctx = f->private_data;
295
296 spin_lock_irq(&ctx->wqh.lock);
297 seq_printf(m, "eventfd-count: %16llx\n",
298 (unsigned long long)ctx->count);
299 spin_unlock_irq(&ctx->wqh.lock);
300}
301#endif
302
303static const struct file_operations eventfd_fops = {
304#ifdef CONFIG_PROC_FS
305 .show_fdinfo = eventfd_show_fdinfo,
306#endif
307 .release = eventfd_release,
308 .poll = eventfd_poll,
309 .read = eventfd_read,
310 .write = eventfd_write,
311 .llseek = noop_llseek,
312};
313
314
315
316
317
318
319
320
321
322
323
324struct file *eventfd_fget(int fd)
325{
326 struct file *file;
327
328 file = fget(fd);
329 if (!file)
330 return ERR_PTR(-EBADF);
331 if (file->f_op != &eventfd_fops) {
332 fput(file);
333 return ERR_PTR(-EINVAL);
334 }
335
336 return file;
337}
338EXPORT_SYMBOL_GPL(eventfd_fget);
339
340
341
342
343
344
345
346
347
348
349struct eventfd_ctx *eventfd_ctx_fdget(int fd)
350{
351 struct eventfd_ctx *ctx;
352 struct fd f = fdget(fd);
353 if (!f.file)
354 return ERR_PTR(-EBADF);
355 ctx = eventfd_ctx_fileget(f.file);
356 fdput(f);
357 return ctx;
358}
359EXPORT_SYMBOL_GPL(eventfd_ctx_fdget);
360
361
362
363
364
365
366
367
368
369
370struct eventfd_ctx *eventfd_ctx_fileget(struct file *file)
371{
372 struct eventfd_ctx *ctx;
373
374 if (file->f_op != &eventfd_fops)
375 return ERR_PTR(-EINVAL);
376
377 ctx = file->private_data;
378 kref_get(&ctx->kref);
379 return ctx;
380}
381EXPORT_SYMBOL_GPL(eventfd_ctx_fileget);
382
383static int do_eventfd(unsigned int count, int flags)
384{
385 struct eventfd_ctx *ctx;
386 int fd;
387
388
389 BUILD_BUG_ON(EFD_CLOEXEC != O_CLOEXEC);
390 BUILD_BUG_ON(EFD_NONBLOCK != O_NONBLOCK);
391
392 if (flags & ~EFD_FLAGS_SET)
393 return -EINVAL;
394
395 ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
396 if (!ctx)
397 return -ENOMEM;
398
399 kref_init(&ctx->kref);
400 init_waitqueue_head(&ctx->wqh);
401 ctx->count = count;
402 ctx->flags = flags;
403
404 fd = anon_inode_getfd("[eventfd]", &eventfd_fops, ctx,
405 O_RDWR | (flags & EFD_SHARED_FCNTL_FLAGS));
406 if (fd < 0)
407 eventfd_free_ctx(ctx);
408
409 return fd;
410}
411
412SYSCALL_DEFINE2(eventfd2, unsigned int, count, int, flags)
413{
414 return do_eventfd(count, flags);
415}
416
417SYSCALL_DEFINE1(eventfd, unsigned int, count)
418{
419 return do_eventfd(count, 0);
420}
421
422