1
2
3
4
5
6
7
8
9#include <linux/file.h>
10#include <linux/poll.h>
11#include <linux/init.h>
12#include <linux/fs.h>
13#include <linux/sched/signal.h>
14#include <linux/kernel.h>
15#include <linux/slab.h>
16#include <linux/list.h>
17#include <linux/spinlock.h>
18#include <linux/anon_inodes.h>
19#include <linux/syscalls.h>
20#include <linux/export.h>
21#include <linux/kref.h>
22#include <linux/eventfd.h>
23#include <linux/proc_fs.h>
24#include <linux/seq_file.h>
25#include <linux/idr.h>
26
27static DEFINE_IDA(eventfd_ida);
28
29struct eventfd_ctx {
30 struct kref kref;
31 wait_queue_head_t wqh;
32
33
34
35
36
37
38
39
40 __u64 count;
41 unsigned int flags;
42 int id;
43};
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59__u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n)
60{
61 unsigned long flags;
62
63 spin_lock_irqsave(&ctx->wqh.lock, flags);
64 if (ULLONG_MAX - ctx->count < n)
65 n = ULLONG_MAX - ctx->count;
66 ctx->count += n;
67 if (waitqueue_active(&ctx->wqh))
68 wake_up_locked_poll(&ctx->wqh, EPOLLIN);
69 spin_unlock_irqrestore(&ctx->wqh.lock, flags);
70
71 return n;
72}
73EXPORT_SYMBOL_GPL(eventfd_signal);
74
75static void eventfd_free_ctx(struct eventfd_ctx *ctx)
76{
77 if (ctx->id >= 0)
78 ida_simple_remove(&eventfd_ida, ctx->id);
79 kfree(ctx);
80}
81
82static void eventfd_free(struct kref *kref)
83{
84 struct eventfd_ctx *ctx = container_of(kref, struct eventfd_ctx, kref);
85
86 eventfd_free_ctx(ctx);
87}
88
89
90
91
92
93
94
95
96void eventfd_ctx_put(struct eventfd_ctx *ctx)
97{
98 kref_put(&ctx->kref, eventfd_free);
99}
100EXPORT_SYMBOL_GPL(eventfd_ctx_put);
101
102static int eventfd_release(struct inode *inode, struct file *file)
103{
104 struct eventfd_ctx *ctx = file->private_data;
105
106 wake_up_poll(&ctx->wqh, EPOLLHUP);
107 eventfd_ctx_put(ctx);
108 return 0;
109}
110
111static __poll_t eventfd_poll(struct file *file, poll_table *wait)
112{
113 struct eventfd_ctx *ctx = file->private_data;
114 __poll_t events = 0;
115 u64 count;
116
117 poll_wait(file, &ctx->wqh, wait);
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157 count = READ_ONCE(ctx->count);
158
159 if (count > 0)
160 events |= EPOLLIN;
161 if (count == ULLONG_MAX)
162 events |= EPOLLERR;
163 if (ULLONG_MAX - 1 > count)
164 events |= EPOLLOUT;
165
166 return events;
167}
168
169static void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt)
170{
171 *cnt = (ctx->flags & EFD_SEMAPHORE) ? 1 : ctx->count;
172 ctx->count -= *cnt;
173}
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_entry_t *wait,
189 __u64 *cnt)
190{
191 unsigned long flags;
192
193 spin_lock_irqsave(&ctx->wqh.lock, flags);
194 eventfd_ctx_do_read(ctx, cnt);
195 __remove_wait_queue(&ctx->wqh, wait);
196 if (*cnt != 0 && waitqueue_active(&ctx->wqh))
197 wake_up_locked_poll(&ctx->wqh, EPOLLOUT);
198 spin_unlock_irqrestore(&ctx->wqh.lock, flags);
199
200 return *cnt != 0 ? 0 : -EAGAIN;
201}
202EXPORT_SYMBOL_GPL(eventfd_ctx_remove_wait_queue);
203
204static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count,
205 loff_t *ppos)
206{
207 struct eventfd_ctx *ctx = file->private_data;
208 ssize_t res;
209 __u64 ucnt = 0;
210 DECLARE_WAITQUEUE(wait, current);
211
212 if (count < sizeof(ucnt))
213 return -EINVAL;
214
215 spin_lock_irq(&ctx->wqh.lock);
216 res = -EAGAIN;
217 if (ctx->count > 0)
218 res = sizeof(ucnt);
219 else if (!(file->f_flags & O_NONBLOCK)) {
220 __add_wait_queue(&ctx->wqh, &wait);
221 for (;;) {
222 set_current_state(TASK_INTERRUPTIBLE);
223 if (ctx->count > 0) {
224 res = sizeof(ucnt);
225 break;
226 }
227 if (signal_pending(current)) {
228 res = -ERESTARTSYS;
229 break;
230 }
231 spin_unlock_irq(&ctx->wqh.lock);
232 schedule();
233 spin_lock_irq(&ctx->wqh.lock);
234 }
235 __remove_wait_queue(&ctx->wqh, &wait);
236 __set_current_state(TASK_RUNNING);
237 }
238 if (likely(res > 0)) {
239 eventfd_ctx_do_read(ctx, &ucnt);
240 if (waitqueue_active(&ctx->wqh))
241 wake_up_locked_poll(&ctx->wqh, EPOLLOUT);
242 }
243 spin_unlock_irq(&ctx->wqh.lock);
244
245 if (res > 0 && put_user(ucnt, (__u64 __user *)buf))
246 return -EFAULT;
247
248 return res;
249}
250
251static ssize_t eventfd_write(struct file *file, const char __user *buf, size_t count,
252 loff_t *ppos)
253{
254 struct eventfd_ctx *ctx = file->private_data;
255 ssize_t res;
256 __u64 ucnt;
257 DECLARE_WAITQUEUE(wait, current);
258
259 if (count < sizeof(ucnt))
260 return -EINVAL;
261 if (copy_from_user(&ucnt, buf, sizeof(ucnt)))
262 return -EFAULT;
263 if (ucnt == ULLONG_MAX)
264 return -EINVAL;
265 spin_lock_irq(&ctx->wqh.lock);
266 res = -EAGAIN;
267 if (ULLONG_MAX - ctx->count > ucnt)
268 res = sizeof(ucnt);
269 else if (!(file->f_flags & O_NONBLOCK)) {
270 __add_wait_queue(&ctx->wqh, &wait);
271 for (res = 0;;) {
272 set_current_state(TASK_INTERRUPTIBLE);
273 if (ULLONG_MAX - ctx->count > ucnt) {
274 res = sizeof(ucnt);
275 break;
276 }
277 if (signal_pending(current)) {
278 res = -ERESTARTSYS;
279 break;
280 }
281 spin_unlock_irq(&ctx->wqh.lock);
282 schedule();
283 spin_lock_irq(&ctx->wqh.lock);
284 }
285 __remove_wait_queue(&ctx->wqh, &wait);
286 __set_current_state(TASK_RUNNING);
287 }
288 if (likely(res > 0)) {
289 ctx->count += ucnt;
290 if (waitqueue_active(&ctx->wqh))
291 wake_up_locked_poll(&ctx->wqh, EPOLLIN);
292 }
293 spin_unlock_irq(&ctx->wqh.lock);
294
295 return res;
296}
297
298#ifdef CONFIG_PROC_FS
299static void eventfd_show_fdinfo(struct seq_file *m, struct file *f)
300{
301 struct eventfd_ctx *ctx = f->private_data;
302
303 spin_lock_irq(&ctx->wqh.lock);
304 seq_printf(m, "eventfd-count: %16llx\n",
305 (unsigned long long)ctx->count);
306 spin_unlock_irq(&ctx->wqh.lock);
307 seq_printf(m, "eventfd-id: %d\n", ctx->id);
308}
309#endif
310
311static const struct file_operations eventfd_fops = {
312#ifdef CONFIG_PROC_FS
313 .show_fdinfo = eventfd_show_fdinfo,
314#endif
315 .release = eventfd_release,
316 .poll = eventfd_poll,
317 .read = eventfd_read,
318 .write = eventfd_write,
319 .llseek = noop_llseek,
320};
321
322
323
324
325
326
327
328
329
330
331
332struct file *eventfd_fget(int fd)
333{
334 struct file *file;
335
336 file = fget(fd);
337 if (!file)
338 return ERR_PTR(-EBADF);
339 if (file->f_op != &eventfd_fops) {
340 fput(file);
341 return ERR_PTR(-EINVAL);
342 }
343
344 return file;
345}
346EXPORT_SYMBOL_GPL(eventfd_fget);
347
348
349
350
351
352
353
354
355
356
357struct eventfd_ctx *eventfd_ctx_fdget(int fd)
358{
359 struct eventfd_ctx *ctx;
360 struct fd f = fdget(fd);
361 if (!f.file)
362 return ERR_PTR(-EBADF);
363 ctx = eventfd_ctx_fileget(f.file);
364 fdput(f);
365 return ctx;
366}
367EXPORT_SYMBOL_GPL(eventfd_ctx_fdget);
368
369
370
371
372
373
374
375
376
377
378struct eventfd_ctx *eventfd_ctx_fileget(struct file *file)
379{
380 struct eventfd_ctx *ctx;
381
382 if (file->f_op != &eventfd_fops)
383 return ERR_PTR(-EINVAL);
384
385 ctx = file->private_data;
386 kref_get(&ctx->kref);
387 return ctx;
388}
389EXPORT_SYMBOL_GPL(eventfd_ctx_fileget);
390
391static int do_eventfd(unsigned int count, int flags)
392{
393 struct eventfd_ctx *ctx;
394 int fd;
395
396
397 BUILD_BUG_ON(EFD_CLOEXEC != O_CLOEXEC);
398 BUILD_BUG_ON(EFD_NONBLOCK != O_NONBLOCK);
399
400 if (flags & ~EFD_FLAGS_SET)
401 return -EINVAL;
402
403 ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
404 if (!ctx)
405 return -ENOMEM;
406
407 kref_init(&ctx->kref);
408 init_waitqueue_head(&ctx->wqh);
409 ctx->count = count;
410 ctx->flags = flags;
411 ctx->id = ida_simple_get(&eventfd_ida, 0, 0, GFP_KERNEL);
412
413 fd = anon_inode_getfd("[eventfd]", &eventfd_fops, ctx,
414 O_RDWR | (flags & EFD_SHARED_FCNTL_FLAGS));
415 if (fd < 0)
416 eventfd_free_ctx(ctx);
417
418 return fd;
419}
420
421SYSCALL_DEFINE2(eventfd2, unsigned int, count, int, flags)
422{
423 return do_eventfd(count, flags);
424}
425
426SYSCALL_DEFINE1(eventfd, unsigned int, count)
427{
428 return do_eventfd(count, 0);
429}
430
431