1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26#include <linux/module.h>
27
28#include <linux/uaccess.h>
29#include <net/sock.h>
30
31#include <linux/drbd.h>
32#include <linux/fs.h>
33#include <linux/file.h>
34#include <linux/in.h>
35#include <linux/mm.h>
36#include <linux/memcontrol.h>
37#include <linux/mm_inline.h>
38#include <linux/slab.h>
39#include <linux/pkt_sched.h>
40#define __KERNEL_SYSCALLS__
41#include <linux/unistd.h>
42#include <linux/vmalloc.h>
43#include <linux/random.h>
44#include <linux/string.h>
45#include <linux/scatterlist.h>
46#include "drbd_int.h"
47#include "drbd_protocol.h"
48#include "drbd_req.h"
49#include "drbd_vli.h"
50
51#define PRO_FEATURES (DRBD_FF_TRIM|DRBD_FF_THIN_RESYNC|DRBD_FF_WSAME)
52
53struct packet_info {
54 enum drbd_packet cmd;
55 unsigned int size;
56 unsigned int vnr;
57 void *data;
58};
59
60enum finish_epoch {
61 FE_STILL_LIVE,
62 FE_DESTROYED,
63 FE_RECYCLED,
64};
65
66static int drbd_do_features(struct drbd_connection *connection);
67static int drbd_do_auth(struct drbd_connection *connection);
68static int drbd_disconnected(struct drbd_peer_device *);
69static void conn_wait_active_ee_empty(struct drbd_connection *connection);
70static enum finish_epoch drbd_may_finish_epoch(struct drbd_connection *, struct drbd_epoch *, enum epoch_event);
71static int e_end_block(struct drbd_work *, int);
72
73
74#define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN)
75
76
77
78
79
80
81
82
83
84
85static struct page *page_chain_del(struct page **head, int n)
86{
87 struct page *page;
88 struct page *tmp;
89
90 BUG_ON(!n);
91 BUG_ON(!head);
92
93 page = *head;
94
95 if (!page)
96 return NULL;
97
98 while (page) {
99 tmp = page_chain_next(page);
100 if (--n == 0)
101 break;
102 if (tmp == NULL)
103
104 return NULL;
105 page = tmp;
106 }
107
108
109 set_page_private(page, 0);
110
111 page = *head;
112 *head = tmp;
113 return page;
114}
115
116
117
118
119static struct page *page_chain_tail(struct page *page, int *len)
120{
121 struct page *tmp;
122 int i = 1;
123 while ((tmp = page_chain_next(page)))
124 ++i, page = tmp;
125 if (len)
126 *len = i;
127 return page;
128}
129
130static int page_chain_free(struct page *page)
131{
132 struct page *tmp;
133 int i = 0;
134 page_chain_for_each_safe(page, tmp) {
135 put_page(page);
136 ++i;
137 }
138 return i;
139}
140
141static void page_chain_add(struct page **head,
142 struct page *chain_first, struct page *chain_last)
143{
144#if 1
145 struct page *tmp;
146 tmp = page_chain_tail(chain_first, NULL);
147 BUG_ON(tmp != chain_last);
148#endif
149
150
151 set_page_private(chain_last, (unsigned long)*head);
152 *head = chain_first;
153}
154
155static struct page *__drbd_alloc_pages(struct drbd_device *device,
156 unsigned int number)
157{
158 struct page *page = NULL;
159 struct page *tmp = NULL;
160 unsigned int i = 0;
161
162
163
164 if (drbd_pp_vacant >= number) {
165 spin_lock(&drbd_pp_lock);
166 page = page_chain_del(&drbd_pp_pool, number);
167 if (page)
168 drbd_pp_vacant -= number;
169 spin_unlock(&drbd_pp_lock);
170 if (page)
171 return page;
172 }
173
174
175
176
177 for (i = 0; i < number; i++) {
178 tmp = alloc_page(GFP_TRY);
179 if (!tmp)
180 break;
181 set_page_private(tmp, (unsigned long)page);
182 page = tmp;
183 }
184
185 if (i == number)
186 return page;
187
188
189
190
191 if (page) {
192 tmp = page_chain_tail(page, NULL);
193 spin_lock(&drbd_pp_lock);
194 page_chain_add(&drbd_pp_pool, page, tmp);
195 drbd_pp_vacant += i;
196 spin_unlock(&drbd_pp_lock);
197 }
198 return NULL;
199}
200
201static void reclaim_finished_net_peer_reqs(struct drbd_device *device,
202 struct list_head *to_be_freed)
203{
204 struct drbd_peer_request *peer_req, *tmp;
205
206
207
208
209
210
211 list_for_each_entry_safe(peer_req, tmp, &device->net_ee, w.list) {
212 if (drbd_peer_req_has_active_page(peer_req))
213 break;
214 list_move(&peer_req->w.list, to_be_freed);
215 }
216}
217
218static void drbd_reclaim_net_peer_reqs(struct drbd_device *device)
219{
220 LIST_HEAD(reclaimed);
221 struct drbd_peer_request *peer_req, *t;
222
223 spin_lock_irq(&device->resource->req_lock);
224 reclaim_finished_net_peer_reqs(device, &reclaimed);
225 spin_unlock_irq(&device->resource->req_lock);
226 list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
227 drbd_free_net_peer_req(device, peer_req);
228}
229
230static void conn_reclaim_net_peer_reqs(struct drbd_connection *connection)
231{
232 struct drbd_peer_device *peer_device;
233 int vnr;
234
235 rcu_read_lock();
236 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
237 struct drbd_device *device = peer_device->device;
238 if (!atomic_read(&device->pp_in_use_by_net))
239 continue;
240
241 kref_get(&device->kref);
242 rcu_read_unlock();
243 drbd_reclaim_net_peer_reqs(device);
244 kref_put(&device->kref, drbd_destroy_device);
245 rcu_read_lock();
246 }
247 rcu_read_unlock();
248}
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270struct page *drbd_alloc_pages(struct drbd_peer_device *peer_device, unsigned int number,
271 bool retry)
272{
273 struct drbd_device *device = peer_device->device;
274 struct page *page = NULL;
275 struct net_conf *nc;
276 DEFINE_WAIT(wait);
277 unsigned int mxb;
278
279 rcu_read_lock();
280 nc = rcu_dereference(peer_device->connection->net_conf);
281 mxb = nc ? nc->max_buffers : 1000000;
282 rcu_read_unlock();
283
284 if (atomic_read(&device->pp_in_use) < mxb)
285 page = __drbd_alloc_pages(device, number);
286
287
288
289 if (page && atomic_read(&device->pp_in_use_by_net) > 512)
290 drbd_reclaim_net_peer_reqs(device);
291
292 while (page == NULL) {
293 prepare_to_wait(&drbd_pp_wait, &wait, TASK_INTERRUPTIBLE);
294
295 drbd_reclaim_net_peer_reqs(device);
296
297 if (atomic_read(&device->pp_in_use) < mxb) {
298 page = __drbd_alloc_pages(device, number);
299 if (page)
300 break;
301 }
302
303 if (!retry)
304 break;
305
306 if (signal_pending(current)) {
307 drbd_warn(device, "drbd_alloc_pages interrupted!\n");
308 break;
309 }
310
311 if (schedule_timeout(HZ/10) == 0)
312 mxb = UINT_MAX;
313 }
314 finish_wait(&drbd_pp_wait, &wait);
315
316 if (page)
317 atomic_add(number, &device->pp_in_use);
318 return page;
319}
320
321
322
323
324
325static void drbd_free_pages(struct drbd_device *device, struct page *page, int is_net)
326{
327 atomic_t *a = is_net ? &device->pp_in_use_by_net : &device->pp_in_use;
328 int i;
329
330 if (page == NULL)
331 return;
332
333 if (drbd_pp_vacant > (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * minor_count)
334 i = page_chain_free(page);
335 else {
336 struct page *tmp;
337 tmp = page_chain_tail(page, &i);
338 spin_lock(&drbd_pp_lock);
339 page_chain_add(&drbd_pp_pool, page, tmp);
340 drbd_pp_vacant += i;
341 spin_unlock(&drbd_pp_lock);
342 }
343 i = atomic_sub_return(i, a);
344 if (i < 0)
345 drbd_warn(device, "ASSERTION FAILED: %s: %d < 0\n",
346 is_net ? "pp_in_use_by_net" : "pp_in_use", i);
347 wake_up(&drbd_pp_wait);
348}
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367struct drbd_peer_request *
368drbd_alloc_peer_req(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
369 unsigned int request_size, unsigned int payload_size, gfp_t gfp_mask) __must_hold(local)
370{
371 struct drbd_device *device = peer_device->device;
372 struct drbd_peer_request *peer_req;
373 struct page *page = NULL;
374 unsigned nr_pages = (payload_size + PAGE_SIZE -1) >> PAGE_SHIFT;
375
376 if (drbd_insert_fault(device, DRBD_FAULT_AL_EE))
377 return NULL;
378
379 peer_req = mempool_alloc(drbd_ee_mempool, gfp_mask & ~__GFP_HIGHMEM);
380 if (!peer_req) {
381 if (!(gfp_mask & __GFP_NOWARN))
382 drbd_err(device, "%s: allocation failed\n", __func__);
383 return NULL;
384 }
385
386 if (nr_pages) {
387 page = drbd_alloc_pages(peer_device, nr_pages,
388 gfpflags_allow_blocking(gfp_mask));
389 if (!page)
390 goto fail;
391 }
392
393 memset(peer_req, 0, sizeof(*peer_req));
394 INIT_LIST_HEAD(&peer_req->w.list);
395 drbd_clear_interval(&peer_req->i);
396 peer_req->i.size = request_size;
397 peer_req->i.sector = sector;
398 peer_req->submit_jif = jiffies;
399 peer_req->peer_device = peer_device;
400 peer_req->pages = page;
401
402
403
404
405 peer_req->block_id = id;
406
407 return peer_req;
408
409 fail:
410 mempool_free(peer_req, drbd_ee_mempool);
411 return NULL;
412}
413
414void __drbd_free_peer_req(struct drbd_device *device, struct drbd_peer_request *peer_req,
415 int is_net)
416{
417 might_sleep();
418 if (peer_req->flags & EE_HAS_DIGEST)
419 kfree(peer_req->digest);
420 drbd_free_pages(device, peer_req->pages, is_net);
421 D_ASSERT(device, atomic_read(&peer_req->pending_bios) == 0);
422 D_ASSERT(device, drbd_interval_empty(&peer_req->i));
423 if (!expect(!(peer_req->flags & EE_CALL_AL_COMPLETE_IO))) {
424 peer_req->flags &= ~EE_CALL_AL_COMPLETE_IO;
425 drbd_al_complete_io(device, &peer_req->i);
426 }
427 mempool_free(peer_req, drbd_ee_mempool);
428}
429
430int drbd_free_peer_reqs(struct drbd_device *device, struct list_head *list)
431{
432 LIST_HEAD(work_list);
433 struct drbd_peer_request *peer_req, *t;
434 int count = 0;
435 int is_net = list == &device->net_ee;
436
437 spin_lock_irq(&device->resource->req_lock);
438 list_splice_init(list, &work_list);
439 spin_unlock_irq(&device->resource->req_lock);
440
441 list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
442 __drbd_free_peer_req(device, peer_req, is_net);
443 count++;
444 }
445 return count;
446}
447
448
449
450
451static int drbd_finish_peer_reqs(struct drbd_device *device)
452{
453 LIST_HEAD(work_list);
454 LIST_HEAD(reclaimed);
455 struct drbd_peer_request *peer_req, *t;
456 int err = 0;
457
458 spin_lock_irq(&device->resource->req_lock);
459 reclaim_finished_net_peer_reqs(device, &reclaimed);
460 list_splice_init(&device->done_ee, &work_list);
461 spin_unlock_irq(&device->resource->req_lock);
462
463 list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
464 drbd_free_net_peer_req(device, peer_req);
465
466
467
468
469
470 list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
471 int err2;
472
473
474 err2 = peer_req->w.cb(&peer_req->w, !!err);
475 if (!err)
476 err = err2;
477 drbd_free_peer_req(device, peer_req);
478 }
479 wake_up(&device->ee_wait);
480
481 return err;
482}
483
484static void _drbd_wait_ee_list_empty(struct drbd_device *device,
485 struct list_head *head)
486{
487 DEFINE_WAIT(wait);
488
489
490
491 while (!list_empty(head)) {
492 prepare_to_wait(&device->ee_wait, &wait, TASK_UNINTERRUPTIBLE);
493 spin_unlock_irq(&device->resource->req_lock);
494 io_schedule();
495 finish_wait(&device->ee_wait, &wait);
496 spin_lock_irq(&device->resource->req_lock);
497 }
498}
499
500static void drbd_wait_ee_list_empty(struct drbd_device *device,
501 struct list_head *head)
502{
503 spin_lock_irq(&device->resource->req_lock);
504 _drbd_wait_ee_list_empty(device, head);
505 spin_unlock_irq(&device->resource->req_lock);
506}
507
508static int drbd_recv_short(struct socket *sock, void *buf, size_t size, int flags)
509{
510 struct kvec iov = {
511 .iov_base = buf,
512 .iov_len = size,
513 };
514 struct msghdr msg = {
515 .msg_flags = (flags ? flags : MSG_WAITALL | MSG_NOSIGNAL)
516 };
517 return kernel_recvmsg(sock, &msg, &iov, 1, size, msg.msg_flags);
518}
519
520static int drbd_recv(struct drbd_connection *connection, void *buf, size_t size)
521{
522 int rv;
523
524 rv = drbd_recv_short(connection->data.socket, buf, size, 0);
525
526 if (rv < 0) {
527 if (rv == -ECONNRESET)
528 drbd_info(connection, "sock was reset by peer\n");
529 else if (rv != -ERESTARTSYS)
530 drbd_err(connection, "sock_recvmsg returned %d\n", rv);
531 } else if (rv == 0) {
532 if (test_bit(DISCONNECT_SENT, &connection->flags)) {
533 long t;
534 rcu_read_lock();
535 t = rcu_dereference(connection->net_conf)->ping_timeo * HZ/10;
536 rcu_read_unlock();
537
538 t = wait_event_timeout(connection->ping_wait, connection->cstate < C_WF_REPORT_PARAMS, t);
539
540 if (t)
541 goto out;
542 }
543 drbd_info(connection, "sock was shut down by peer\n");
544 }
545
546 if (rv != size)
547 conn_request_state(connection, NS(conn, C_BROKEN_PIPE), CS_HARD);
548
549out:
550 return rv;
551}
552
553static int drbd_recv_all(struct drbd_connection *connection, void *buf, size_t size)
554{
555 int err;
556
557 err = drbd_recv(connection, buf, size);
558 if (err != size) {
559 if (err >= 0)
560 err = -EIO;
561 } else
562 err = 0;
563 return err;
564}
565
566static int drbd_recv_all_warn(struct drbd_connection *connection, void *buf, size_t size)
567{
568 int err;
569
570 err = drbd_recv_all(connection, buf, size);
571 if (err && !signal_pending(current))
572 drbd_warn(connection, "short read (expected size %d)\n", (int)size);
573 return err;
574}
575
576
577
578
579
580
581static void drbd_setbufsize(struct socket *sock, unsigned int snd,
582 unsigned int rcv)
583{
584
585 if (snd) {
586 sock->sk->sk_sndbuf = snd;
587 sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
588 }
589 if (rcv) {
590 sock->sk->sk_rcvbuf = rcv;
591 sock->sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
592 }
593}
594
595static struct socket *drbd_try_connect(struct drbd_connection *connection)
596{
597 const char *what;
598 struct socket *sock;
599 struct sockaddr_in6 src_in6;
600 struct sockaddr_in6 peer_in6;
601 struct net_conf *nc;
602 int err, peer_addr_len, my_addr_len;
603 int sndbuf_size, rcvbuf_size, connect_int;
604 int disconnect_on_error = 1;
605
606 rcu_read_lock();
607 nc = rcu_dereference(connection->net_conf);
608 if (!nc) {
609 rcu_read_unlock();
610 return NULL;
611 }
612 sndbuf_size = nc->sndbuf_size;
613 rcvbuf_size = nc->rcvbuf_size;
614 connect_int = nc->connect_int;
615 rcu_read_unlock();
616
617 my_addr_len = min_t(int, connection->my_addr_len, sizeof(src_in6));
618 memcpy(&src_in6, &connection->my_addr, my_addr_len);
619
620 if (((struct sockaddr *)&connection->my_addr)->sa_family == AF_INET6)
621 src_in6.sin6_port = 0;
622 else
623 ((struct sockaddr_in *)&src_in6)->sin_port = 0;
624
625 peer_addr_len = min_t(int, connection->peer_addr_len, sizeof(src_in6));
626 memcpy(&peer_in6, &connection->peer_addr, peer_addr_len);
627
628 what = "sock_create_kern";
629 err = sock_create_kern(&init_net, ((struct sockaddr *)&src_in6)->sa_family,
630 SOCK_STREAM, IPPROTO_TCP, &sock);
631 if (err < 0) {
632 sock = NULL;
633 goto out;
634 }
635
636 sock->sk->sk_rcvtimeo =
637 sock->sk->sk_sndtimeo = connect_int * HZ;
638 drbd_setbufsize(sock, sndbuf_size, rcvbuf_size);
639
640
641
642
643
644
645
646
647 what = "bind before connect";
648 err = sock->ops->bind(sock, (struct sockaddr *) &src_in6, my_addr_len);
649 if (err < 0)
650 goto out;
651
652
653
654 disconnect_on_error = 0;
655 what = "connect";
656 err = sock->ops->connect(sock, (struct sockaddr *) &peer_in6, peer_addr_len, 0);
657
658out:
659 if (err < 0) {
660 if (sock) {
661 sock_release(sock);
662 sock = NULL;
663 }
664 switch (-err) {
665
666 case ETIMEDOUT: case EAGAIN: case EINPROGRESS:
667 case EINTR: case ERESTARTSYS:
668
669 case ECONNREFUSED: case ENETUNREACH:
670 case EHOSTDOWN: case EHOSTUNREACH:
671 disconnect_on_error = 0;
672 break;
673 default:
674 drbd_err(connection, "%s failed, err = %d\n", what, err);
675 }
676 if (disconnect_on_error)
677 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
678 }
679
680 return sock;
681}
682
683struct accept_wait_data {
684 struct drbd_connection *connection;
685 struct socket *s_listen;
686 struct completion door_bell;
687 void (*original_sk_state_change)(struct sock *sk);
688
689};
690
691static void drbd_incoming_connection(struct sock *sk)
692{
693 struct accept_wait_data *ad = sk->sk_user_data;
694 void (*state_change)(struct sock *sk);
695
696 state_change = ad->original_sk_state_change;
697 if (sk->sk_state == TCP_ESTABLISHED)
698 complete(&ad->door_bell);
699 state_change(sk);
700}
701
702static int prepare_listen_socket(struct drbd_connection *connection, struct accept_wait_data *ad)
703{
704 int err, sndbuf_size, rcvbuf_size, my_addr_len;
705 struct sockaddr_in6 my_addr;
706 struct socket *s_listen;
707 struct net_conf *nc;
708 const char *what;
709
710 rcu_read_lock();
711 nc = rcu_dereference(connection->net_conf);
712 if (!nc) {
713 rcu_read_unlock();
714 return -EIO;
715 }
716 sndbuf_size = nc->sndbuf_size;
717 rcvbuf_size = nc->rcvbuf_size;
718 rcu_read_unlock();
719
720 my_addr_len = min_t(int, connection->my_addr_len, sizeof(struct sockaddr_in6));
721 memcpy(&my_addr, &connection->my_addr, my_addr_len);
722
723 what = "sock_create_kern";
724 err = sock_create_kern(&init_net, ((struct sockaddr *)&my_addr)->sa_family,
725 SOCK_STREAM, IPPROTO_TCP, &s_listen);
726 if (err) {
727 s_listen = NULL;
728 goto out;
729 }
730
731 s_listen->sk->sk_reuse = SK_CAN_REUSE;
732 drbd_setbufsize(s_listen, sndbuf_size, rcvbuf_size);
733
734 what = "bind before listen";
735 err = s_listen->ops->bind(s_listen, (struct sockaddr *)&my_addr, my_addr_len);
736 if (err < 0)
737 goto out;
738
739 ad->s_listen = s_listen;
740 write_lock_bh(&s_listen->sk->sk_callback_lock);
741 ad->original_sk_state_change = s_listen->sk->sk_state_change;
742 s_listen->sk->sk_state_change = drbd_incoming_connection;
743 s_listen->sk->sk_user_data = ad;
744 write_unlock_bh(&s_listen->sk->sk_callback_lock);
745
746 what = "listen";
747 err = s_listen->ops->listen(s_listen, 5);
748 if (err < 0)
749 goto out;
750
751 return 0;
752out:
753 if (s_listen)
754 sock_release(s_listen);
755 if (err < 0) {
756 if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
757 drbd_err(connection, "%s failed, err = %d\n", what, err);
758 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
759 }
760 }
761
762 return -EIO;
763}
764
765static void unregister_state_change(struct sock *sk, struct accept_wait_data *ad)
766{
767 write_lock_bh(&sk->sk_callback_lock);
768 sk->sk_state_change = ad->original_sk_state_change;
769 sk->sk_user_data = NULL;
770 write_unlock_bh(&sk->sk_callback_lock);
771}
772
773static struct socket *drbd_wait_for_connect(struct drbd_connection *connection, struct accept_wait_data *ad)
774{
775 int timeo, connect_int, err = 0;
776 struct socket *s_estab = NULL;
777 struct net_conf *nc;
778
779 rcu_read_lock();
780 nc = rcu_dereference(connection->net_conf);
781 if (!nc) {
782 rcu_read_unlock();
783 return NULL;
784 }
785 connect_int = nc->connect_int;
786 rcu_read_unlock();
787
788 timeo = connect_int * HZ;
789
790 timeo += (prandom_u32() & 1) ? timeo / 7 : -timeo / 7;
791
792 err = wait_for_completion_interruptible_timeout(&ad->door_bell, timeo);
793 if (err <= 0)
794 return NULL;
795
796 err = kernel_accept(ad->s_listen, &s_estab, 0);
797 if (err < 0) {
798 if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
799 drbd_err(connection, "accept failed, err = %d\n", err);
800 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
801 }
802 }
803
804 if (s_estab)
805 unregister_state_change(s_estab->sk, ad);
806
807 return s_estab;
808}
809
810static int decode_header(struct drbd_connection *, void *, struct packet_info *);
811
812static int send_first_packet(struct drbd_connection *connection, struct drbd_socket *sock,
813 enum drbd_packet cmd)
814{
815 if (!conn_prepare_command(connection, sock))
816 return -EIO;
817 return conn_send_command(connection, sock, cmd, 0, NULL, 0);
818}
819
820static int receive_first_packet(struct drbd_connection *connection, struct socket *sock)
821{
822 unsigned int header_size = drbd_header_size(connection);
823 struct packet_info pi;
824 struct net_conf *nc;
825 int err;
826
827 rcu_read_lock();
828 nc = rcu_dereference(connection->net_conf);
829 if (!nc) {
830 rcu_read_unlock();
831 return -EIO;
832 }
833 sock->sk->sk_rcvtimeo = nc->ping_timeo * 4 * HZ / 10;
834 rcu_read_unlock();
835
836 err = drbd_recv_short(sock, connection->data.rbuf, header_size, 0);
837 if (err != header_size) {
838 if (err >= 0)
839 err = -EIO;
840 return err;
841 }
842 err = decode_header(connection, connection->data.rbuf, &pi);
843 if (err)
844 return err;
845 return pi.cmd;
846}
847
848
849
850
851
852static bool drbd_socket_okay(struct socket **sock)
853{
854 int rr;
855 char tb[4];
856
857 if (!*sock)
858 return false;
859
860 rr = drbd_recv_short(*sock, tb, 4, MSG_DONTWAIT | MSG_PEEK);
861
862 if (rr > 0 || rr == -EAGAIN) {
863 return true;
864 } else {
865 sock_release(*sock);
866 *sock = NULL;
867 return false;
868 }
869}
870
871static bool connection_established(struct drbd_connection *connection,
872 struct socket **sock1,
873 struct socket **sock2)
874{
875 struct net_conf *nc;
876 int timeout;
877 bool ok;
878
879 if (!*sock1 || !*sock2)
880 return false;
881
882 rcu_read_lock();
883 nc = rcu_dereference(connection->net_conf);
884 timeout = (nc->sock_check_timeo ?: nc->ping_timeo) * HZ / 10;
885 rcu_read_unlock();
886 schedule_timeout_interruptible(timeout);
887
888 ok = drbd_socket_okay(sock1);
889 ok = drbd_socket_okay(sock2) && ok;
890
891 return ok;
892}
893
894
895
896int drbd_connected(struct drbd_peer_device *peer_device)
897{
898 struct drbd_device *device = peer_device->device;
899 int err;
900
901 atomic_set(&device->packet_seq, 0);
902 device->peer_seq = 0;
903
904 device->state_mutex = peer_device->connection->agreed_pro_version < 100 ?
905 &peer_device->connection->cstate_mutex :
906 &device->own_state_mutex;
907
908 err = drbd_send_sync_param(peer_device);
909 if (!err)
910 err = drbd_send_sizes(peer_device, 0, 0);
911 if (!err)
912 err = drbd_send_uuids(peer_device);
913 if (!err)
914 err = drbd_send_current_state(peer_device);
915 clear_bit(USE_DEGR_WFC_T, &device->flags);
916 clear_bit(RESIZE_PENDING, &device->flags);
917 atomic_set(&device->ap_in_flight, 0);
918 mod_timer(&device->request_timer, jiffies + HZ);
919 return err;
920}
921
922
923
924
925
926
927
928
929
930static int conn_connect(struct drbd_connection *connection)
931{
932 struct drbd_socket sock, msock;
933 struct drbd_peer_device *peer_device;
934 struct net_conf *nc;
935 int vnr, timeout, h;
936 bool discard_my_data, ok;
937 enum drbd_state_rv rv;
938 struct accept_wait_data ad = {
939 .connection = connection,
940 .door_bell = COMPLETION_INITIALIZER_ONSTACK(ad.door_bell),
941 };
942
943 clear_bit(DISCONNECT_SENT, &connection->flags);
944 if (conn_request_state(connection, NS(conn, C_WF_CONNECTION), CS_VERBOSE) < SS_SUCCESS)
945 return -2;
946
947 mutex_init(&sock.mutex);
948 sock.sbuf = connection->data.sbuf;
949 sock.rbuf = connection->data.rbuf;
950 sock.socket = NULL;
951 mutex_init(&msock.mutex);
952 msock.sbuf = connection->meta.sbuf;
953 msock.rbuf = connection->meta.rbuf;
954 msock.socket = NULL;
955
956
957 connection->agreed_pro_version = 80;
958
959 if (prepare_listen_socket(connection, &ad))
960 return 0;
961
962 do {
963 struct socket *s;
964
965 s = drbd_try_connect(connection);
966 if (s) {
967 if (!sock.socket) {
968 sock.socket = s;
969 send_first_packet(connection, &sock, P_INITIAL_DATA);
970 } else if (!msock.socket) {
971 clear_bit(RESOLVE_CONFLICTS, &connection->flags);
972 msock.socket = s;
973 send_first_packet(connection, &msock, P_INITIAL_META);
974 } else {
975 drbd_err(connection, "Logic error in conn_connect()\n");
976 goto out_release_sockets;
977 }
978 }
979
980 if (connection_established(connection, &sock.socket, &msock.socket))
981 break;
982
983retry:
984 s = drbd_wait_for_connect(connection, &ad);
985 if (s) {
986 int fp = receive_first_packet(connection, s);
987 drbd_socket_okay(&sock.socket);
988 drbd_socket_okay(&msock.socket);
989 switch (fp) {
990 case P_INITIAL_DATA:
991 if (sock.socket) {
992 drbd_warn(connection, "initial packet S crossed\n");
993 sock_release(sock.socket);
994 sock.socket = s;
995 goto randomize;
996 }
997 sock.socket = s;
998 break;
999 case P_INITIAL_META:
1000 set_bit(RESOLVE_CONFLICTS, &connection->flags);
1001 if (msock.socket) {
1002 drbd_warn(connection, "initial packet M crossed\n");
1003 sock_release(msock.socket);
1004 msock.socket = s;
1005 goto randomize;
1006 }
1007 msock.socket = s;
1008 break;
1009 default:
1010 drbd_warn(connection, "Error receiving initial packet\n");
1011 sock_release(s);
1012randomize:
1013 if (prandom_u32() & 1)
1014 goto retry;
1015 }
1016 }
1017
1018 if (connection->cstate <= C_DISCONNECTING)
1019 goto out_release_sockets;
1020 if (signal_pending(current)) {
1021 flush_signals(current);
1022 smp_rmb();
1023 if (get_t_state(&connection->receiver) == EXITING)
1024 goto out_release_sockets;
1025 }
1026
1027 ok = connection_established(connection, &sock.socket, &msock.socket);
1028 } while (!ok);
1029
1030 if (ad.s_listen)
1031 sock_release(ad.s_listen);
1032
1033 sock.socket->sk->sk_reuse = SK_CAN_REUSE;
1034 msock.socket->sk->sk_reuse = SK_CAN_REUSE;
1035
1036 sock.socket->sk->sk_allocation = GFP_NOIO;
1037 msock.socket->sk->sk_allocation = GFP_NOIO;
1038
1039 sock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE_BULK;
1040 msock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE;
1041
1042
1043
1044
1045
1046
1047 rcu_read_lock();
1048 nc = rcu_dereference(connection->net_conf);
1049
1050 sock.socket->sk->sk_sndtimeo =
1051 sock.socket->sk->sk_rcvtimeo = nc->ping_timeo*4*HZ/10;
1052
1053 msock.socket->sk->sk_rcvtimeo = nc->ping_int*HZ;
1054 timeout = nc->timeout * HZ / 10;
1055 discard_my_data = nc->discard_my_data;
1056 rcu_read_unlock();
1057
1058 msock.socket->sk->sk_sndtimeo = timeout;
1059
1060
1061
1062 drbd_tcp_nodelay(sock.socket);
1063 drbd_tcp_nodelay(msock.socket);
1064
1065 connection->data.socket = sock.socket;
1066 connection->meta.socket = msock.socket;
1067 connection->last_received = jiffies;
1068
1069 h = drbd_do_features(connection);
1070 if (h <= 0)
1071 return h;
1072
1073 if (connection->cram_hmac_tfm) {
1074
1075 switch (drbd_do_auth(connection)) {
1076 case -1:
1077 drbd_err(connection, "Authentication of peer failed\n");
1078 return -1;
1079 case 0:
1080 drbd_err(connection, "Authentication of peer failed, trying again.\n");
1081 return 0;
1082 }
1083 }
1084
1085 connection->data.socket->sk->sk_sndtimeo = timeout;
1086 connection->data.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
1087
1088 if (drbd_send_protocol(connection) == -EOPNOTSUPP)
1089 return -1;
1090
1091
1092
1093
1094
1095
1096
1097
1098 idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
1099 mutex_lock(peer_device->device->state_mutex);
1100
1101 set_bit(STATE_SENT, &connection->flags);
1102
1103 idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
1104 mutex_unlock(peer_device->device->state_mutex);
1105
1106 rcu_read_lock();
1107 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1108 struct drbd_device *device = peer_device->device;
1109 kref_get(&device->kref);
1110 rcu_read_unlock();
1111
1112 if (discard_my_data)
1113 set_bit(DISCARD_MY_DATA, &device->flags);
1114 else
1115 clear_bit(DISCARD_MY_DATA, &device->flags);
1116
1117 drbd_connected(peer_device);
1118 kref_put(&device->kref, drbd_destroy_device);
1119 rcu_read_lock();
1120 }
1121 rcu_read_unlock();
1122
1123 rv = conn_request_state(connection, NS(conn, C_WF_REPORT_PARAMS), CS_VERBOSE);
1124 if (rv < SS_SUCCESS || connection->cstate != C_WF_REPORT_PARAMS) {
1125 clear_bit(STATE_SENT, &connection->flags);
1126 return 0;
1127 }
1128
1129 drbd_thread_start(&connection->ack_receiver);
1130
1131
1132 connection->ack_sender =
1133 alloc_ordered_workqueue("drbd_as_%s", WQ_MEM_RECLAIM, connection->resource->name);
1134 if (!connection->ack_sender) {
1135 drbd_err(connection, "Failed to create workqueue ack_sender\n");
1136 return 0;
1137 }
1138
1139 mutex_lock(&connection->resource->conf_update);
1140
1141
1142
1143
1144 connection->net_conf->discard_my_data = 0;
1145 mutex_unlock(&connection->resource->conf_update);
1146
1147 return h;
1148
1149out_release_sockets:
1150 if (ad.s_listen)
1151 sock_release(ad.s_listen);
1152 if (sock.socket)
1153 sock_release(sock.socket);
1154 if (msock.socket)
1155 sock_release(msock.socket);
1156 return -1;
1157}
1158
1159static int decode_header(struct drbd_connection *connection, void *header, struct packet_info *pi)
1160{
1161 unsigned int header_size = drbd_header_size(connection);
1162
1163 if (header_size == sizeof(struct p_header100) &&
1164 *(__be32 *)header == cpu_to_be32(DRBD_MAGIC_100)) {
1165 struct p_header100 *h = header;
1166 if (h->pad != 0) {
1167 drbd_err(connection, "Header padding is not zero\n");
1168 return -EINVAL;
1169 }
1170 pi->vnr = be16_to_cpu(h->volume);
1171 pi->cmd = be16_to_cpu(h->command);
1172 pi->size = be32_to_cpu(h->length);
1173 } else if (header_size == sizeof(struct p_header95) &&
1174 *(__be16 *)header == cpu_to_be16(DRBD_MAGIC_BIG)) {
1175 struct p_header95 *h = header;
1176 pi->cmd = be16_to_cpu(h->command);
1177 pi->size = be32_to_cpu(h->length);
1178 pi->vnr = 0;
1179 } else if (header_size == sizeof(struct p_header80) &&
1180 *(__be32 *)header == cpu_to_be32(DRBD_MAGIC)) {
1181 struct p_header80 *h = header;
1182 pi->cmd = be16_to_cpu(h->command);
1183 pi->size = be16_to_cpu(h->length);
1184 pi->vnr = 0;
1185 } else {
1186 drbd_err(connection, "Wrong magic value 0x%08x in protocol version %d\n",
1187 be32_to_cpu(*(__be32 *)header),
1188 connection->agreed_pro_version);
1189 return -EINVAL;
1190 }
1191 pi->data = header + header_size;
1192 return 0;
1193}
1194
1195static int drbd_recv_header(struct drbd_connection *connection, struct packet_info *pi)
1196{
1197 void *buffer = connection->data.rbuf;
1198 int err;
1199
1200 err = drbd_recv_all_warn(connection, buffer, drbd_header_size(connection));
1201 if (err)
1202 return err;
1203
1204 err = decode_header(connection, buffer, pi);
1205 connection->last_received = jiffies;
1206
1207 return err;
1208}
1209
1210
1211
1212
1213
1214struct issue_flush_context {
1215 atomic_t pending;
1216 int error;
1217 struct completion done;
1218};
1219struct one_flush_context {
1220 struct drbd_device *device;
1221 struct issue_flush_context *ctx;
1222};
1223
1224void one_flush_endio(struct bio *bio)
1225{
1226 struct one_flush_context *octx = bio->bi_private;
1227 struct drbd_device *device = octx->device;
1228 struct issue_flush_context *ctx = octx->ctx;
1229
1230 if (bio->bi_error) {
1231 ctx->error = bio->bi_error;
1232 drbd_info(device, "local disk FLUSH FAILED with status %d\n", bio->bi_error);
1233 }
1234 kfree(octx);
1235 bio_put(bio);
1236
1237 clear_bit(FLUSH_PENDING, &device->flags);
1238 put_ldev(device);
1239 kref_put(&device->kref, drbd_destroy_device);
1240
1241 if (atomic_dec_and_test(&ctx->pending))
1242 complete(&ctx->done);
1243}
1244
1245static void submit_one_flush(struct drbd_device *device, struct issue_flush_context *ctx)
1246{
1247 struct bio *bio = bio_alloc(GFP_NOIO, 0);
1248 struct one_flush_context *octx = kmalloc(sizeof(*octx), GFP_NOIO);
1249 if (!bio || !octx) {
1250 drbd_warn(device, "Could not allocate a bio, CANNOT ISSUE FLUSH\n");
1251
1252
1253
1254 kfree(octx);
1255 if (bio)
1256 bio_put(bio);
1257
1258 ctx->error = -ENOMEM;
1259 put_ldev(device);
1260 kref_put(&device->kref, drbd_destroy_device);
1261 return;
1262 }
1263
1264 octx->device = device;
1265 octx->ctx = ctx;
1266 bio->bi_bdev = device->ldev->backing_bdev;
1267 bio->bi_private = octx;
1268 bio->bi_end_io = one_flush_endio;
1269 bio_set_op_attrs(bio, REQ_OP_FLUSH, WRITE_FLUSH);
1270
1271 device->flush_jif = jiffies;
1272 set_bit(FLUSH_PENDING, &device->flags);
1273 atomic_inc(&ctx->pending);
1274 submit_bio(bio);
1275}
1276
1277static void drbd_flush(struct drbd_connection *connection)
1278{
1279 if (connection->resource->write_ordering >= WO_BDEV_FLUSH) {
1280 struct drbd_peer_device *peer_device;
1281 struct issue_flush_context ctx;
1282 int vnr;
1283
1284 atomic_set(&ctx.pending, 1);
1285 ctx.error = 0;
1286 init_completion(&ctx.done);
1287
1288 rcu_read_lock();
1289 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1290 struct drbd_device *device = peer_device->device;
1291
1292 if (!get_ldev(device))
1293 continue;
1294 kref_get(&device->kref);
1295 rcu_read_unlock();
1296
1297 submit_one_flush(device, &ctx);
1298
1299 rcu_read_lock();
1300 }
1301 rcu_read_unlock();
1302
1303
1304
1305 if (!atomic_dec_and_test(&ctx.pending))
1306 wait_for_completion(&ctx.done);
1307
1308 if (ctx.error) {
1309
1310
1311
1312
1313 drbd_bump_write_ordering(connection->resource, NULL, WO_DRAIN_IO);
1314 }
1315 }
1316}
1317
1318
1319
1320
1321
1322
1323
1324static enum finish_epoch drbd_may_finish_epoch(struct drbd_connection *connection,
1325 struct drbd_epoch *epoch,
1326 enum epoch_event ev)
1327{
1328 int epoch_size;
1329 struct drbd_epoch *next_epoch;
1330 enum finish_epoch rv = FE_STILL_LIVE;
1331
1332 spin_lock(&connection->epoch_lock);
1333 do {
1334 next_epoch = NULL;
1335
1336 epoch_size = atomic_read(&epoch->epoch_size);
1337
1338 switch (ev & ~EV_CLEANUP) {
1339 case EV_PUT:
1340 atomic_dec(&epoch->active);
1341 break;
1342 case EV_GOT_BARRIER_NR:
1343 set_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags);
1344 break;
1345 case EV_BECAME_LAST:
1346
1347 break;
1348 }
1349
1350 if (epoch_size != 0 &&
1351 atomic_read(&epoch->active) == 0 &&
1352 (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags) || ev & EV_CLEANUP)) {
1353 if (!(ev & EV_CLEANUP)) {
1354 spin_unlock(&connection->epoch_lock);
1355 drbd_send_b_ack(epoch->connection, epoch->barrier_nr, epoch_size);
1356 spin_lock(&connection->epoch_lock);
1357 }
1358#if 0
1359
1360
1361 if (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags))
1362 dec_unacked(epoch->connection);
1363#endif
1364
1365 if (connection->current_epoch != epoch) {
1366 next_epoch = list_entry(epoch->list.next, struct drbd_epoch, list);
1367 list_del(&epoch->list);
1368 ev = EV_BECAME_LAST | (ev & EV_CLEANUP);
1369 connection->epochs--;
1370 kfree(epoch);
1371
1372 if (rv == FE_STILL_LIVE)
1373 rv = FE_DESTROYED;
1374 } else {
1375 epoch->flags = 0;
1376 atomic_set(&epoch->epoch_size, 0);
1377
1378 if (rv == FE_STILL_LIVE)
1379 rv = FE_RECYCLED;
1380 }
1381 }
1382
1383 if (!next_epoch)
1384 break;
1385
1386 epoch = next_epoch;
1387 } while (1);
1388
1389 spin_unlock(&connection->epoch_lock);
1390
1391 return rv;
1392}
1393
1394static enum write_ordering_e
1395max_allowed_wo(struct drbd_backing_dev *bdev, enum write_ordering_e wo)
1396{
1397 struct disk_conf *dc;
1398
1399 dc = rcu_dereference(bdev->disk_conf);
1400
1401 if (wo == WO_BDEV_FLUSH && !dc->disk_flushes)
1402 wo = WO_DRAIN_IO;
1403 if (wo == WO_DRAIN_IO && !dc->disk_drain)
1404 wo = WO_NONE;
1405
1406 return wo;
1407}
1408
1409
1410
1411
1412
1413
1414void drbd_bump_write_ordering(struct drbd_resource *resource, struct drbd_backing_dev *bdev,
1415 enum write_ordering_e wo)
1416{
1417 struct drbd_device *device;
1418 enum write_ordering_e pwo;
1419 int vnr;
1420 static char *write_ordering_str[] = {
1421 [WO_NONE] = "none",
1422 [WO_DRAIN_IO] = "drain",
1423 [WO_BDEV_FLUSH] = "flush",
1424 };
1425
1426 pwo = resource->write_ordering;
1427 if (wo != WO_BDEV_FLUSH)
1428 wo = min(pwo, wo);
1429 rcu_read_lock();
1430 idr_for_each_entry(&resource->devices, device, vnr) {
1431 if (get_ldev(device)) {
1432 wo = max_allowed_wo(device->ldev, wo);
1433 if (device->ldev == bdev)
1434 bdev = NULL;
1435 put_ldev(device);
1436 }
1437 }
1438
1439 if (bdev)
1440 wo = max_allowed_wo(bdev, wo);
1441
1442 rcu_read_unlock();
1443
1444 resource->write_ordering = wo;
1445 if (pwo != resource->write_ordering || wo == WO_BDEV_FLUSH)
1446 drbd_info(resource, "Method to ensure write ordering: %s\n", write_ordering_str[resource->write_ordering]);
1447}
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467int drbd_issue_discard_or_zero_out(struct drbd_device *device, sector_t start, unsigned int nr_sectors, bool discard)
1468{
1469 struct block_device *bdev = device->ldev->backing_bdev;
1470 struct request_queue *q = bdev_get_queue(bdev);
1471 sector_t tmp, nr;
1472 unsigned int max_discard_sectors, granularity;
1473 int alignment;
1474 int err = 0;
1475
1476 if (!discard)
1477 goto zero_out;
1478
1479
1480 granularity = max(q->limits.discard_granularity >> 9, 1U);
1481 alignment = (bdev_discard_alignment(bdev) >> 9) % granularity;
1482
1483 max_discard_sectors = min(q->limits.max_discard_sectors, (1U << 22));
1484 max_discard_sectors -= max_discard_sectors % granularity;
1485 if (unlikely(!max_discard_sectors))
1486 goto zero_out;
1487
1488 if (nr_sectors < granularity)
1489 goto zero_out;
1490
1491 tmp = start;
1492 if (sector_div(tmp, granularity) != alignment) {
1493 if (nr_sectors < 2*granularity)
1494 goto zero_out;
1495
1496 tmp = start + granularity - alignment;
1497 tmp = start + granularity - sector_div(tmp, granularity);
1498
1499 nr = tmp - start;
1500 err |= blkdev_issue_zeroout(bdev, start, nr, GFP_NOIO, 0);
1501 nr_sectors -= nr;
1502 start = tmp;
1503 }
1504 while (nr_sectors >= granularity) {
1505 nr = min_t(sector_t, nr_sectors, max_discard_sectors);
1506 err |= blkdev_issue_discard(bdev, start, nr, GFP_NOIO, 0);
1507 nr_sectors -= nr;
1508 start += nr;
1509 }
1510 zero_out:
1511 if (nr_sectors) {
1512 err |= blkdev_issue_zeroout(bdev, start, nr_sectors, GFP_NOIO, 0);
1513 }
1514 return err != 0;
1515}
1516
1517static bool can_do_reliable_discards(struct drbd_device *device)
1518{
1519 struct request_queue *q = bdev_get_queue(device->ldev->backing_bdev);
1520 struct disk_conf *dc;
1521 bool can_do;
1522
1523 if (!blk_queue_discard(q))
1524 return false;
1525
1526 if (q->limits.discard_zeroes_data)
1527 return true;
1528
1529 rcu_read_lock();
1530 dc = rcu_dereference(device->ldev->disk_conf);
1531 can_do = dc->discard_zeroes_if_aligned;
1532 rcu_read_unlock();
1533 return can_do;
1534}
1535
1536static void drbd_issue_peer_discard(struct drbd_device *device, struct drbd_peer_request *peer_req)
1537{
1538
1539
1540
1541
1542 if (!can_do_reliable_discards(device))
1543 peer_req->flags |= EE_IS_TRIM_USE_ZEROOUT;
1544
1545 if (drbd_issue_discard_or_zero_out(device, peer_req->i.sector,
1546 peer_req->i.size >> 9, !(peer_req->flags & EE_IS_TRIM_USE_ZEROOUT)))
1547 peer_req->flags |= EE_WAS_ERROR;
1548 drbd_endio_write_sec_final(peer_req);
1549}
1550
1551static void drbd_issue_peer_wsame(struct drbd_device *device,
1552 struct drbd_peer_request *peer_req)
1553{
1554 struct block_device *bdev = device->ldev->backing_bdev;
1555 sector_t s = peer_req->i.sector;
1556 sector_t nr = peer_req->i.size >> 9;
1557 if (blkdev_issue_write_same(bdev, s, nr, GFP_NOIO, peer_req->pages))
1558 peer_req->flags |= EE_WAS_ERROR;
1559 drbd_endio_write_sec_final(peer_req);
1560}
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580int drbd_submit_peer_request(struct drbd_device *device,
1581 struct drbd_peer_request *peer_req,
1582 const unsigned op, const unsigned op_flags,
1583 const int fault_type)
1584{
1585 struct bio *bios = NULL;
1586 struct bio *bio;
1587 struct page *page = peer_req->pages;
1588 sector_t sector = peer_req->i.sector;
1589 unsigned data_size = peer_req->i.size;
1590 unsigned n_bios = 0;
1591 unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT;
1592 int err = -ENOMEM;
1593
1594
1595
1596
1597
1598
1599
1600 if (peer_req->flags & (EE_IS_TRIM|EE_WRITE_SAME)) {
1601
1602
1603 conn_wait_active_ee_empty(peer_req->peer_device->connection);
1604
1605
1606 peer_req->submit_jif = jiffies;
1607 peer_req->flags |= EE_SUBMITTED;
1608
1609
1610
1611 if (list_empty(&peer_req->w.list)) {
1612 spin_lock_irq(&device->resource->req_lock);
1613 list_add_tail(&peer_req->w.list, &device->active_ee);
1614 spin_unlock_irq(&device->resource->req_lock);
1615 }
1616
1617 if (peer_req->flags & EE_IS_TRIM)
1618 drbd_issue_peer_discard(device, peer_req);
1619 else
1620 drbd_issue_peer_wsame(device, peer_req);
1621 return 0;
1622 }
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632next_bio:
1633 bio = bio_alloc(GFP_NOIO, nr_pages);
1634 if (!bio) {
1635 drbd_err(device, "submit_ee: Allocation of a bio failed (nr_pages=%u)\n", nr_pages);
1636 goto fail;
1637 }
1638
1639 bio->bi_iter.bi_sector = sector;
1640 bio->bi_bdev = device->ldev->backing_bdev;
1641 bio_set_op_attrs(bio, op, op_flags);
1642 bio->bi_private = peer_req;
1643 bio->bi_end_io = drbd_peer_request_endio;
1644
1645 bio->bi_next = bios;
1646 bios = bio;
1647 ++n_bios;
1648
1649 page_chain_for_each(page) {
1650 unsigned len = min_t(unsigned, data_size, PAGE_SIZE);
1651 if (!bio_add_page(bio, page, len, 0)) {
1652
1653
1654
1655 if (bio->bi_vcnt == 0) {
1656 drbd_err(device,
1657 "bio_add_page failed for len=%u, "
1658 "bi_vcnt=0 (bi_sector=%llu)\n",
1659 len, (uint64_t)bio->bi_iter.bi_sector);
1660 err = -ENOSPC;
1661 goto fail;
1662 }
1663 goto next_bio;
1664 }
1665 data_size -= len;
1666 sector += len >> 9;
1667 --nr_pages;
1668 }
1669 D_ASSERT(device, data_size == 0);
1670 D_ASSERT(device, page == NULL);
1671
1672 atomic_set(&peer_req->pending_bios, n_bios);
1673
1674 peer_req->submit_jif = jiffies;
1675 peer_req->flags |= EE_SUBMITTED;
1676 do {
1677 bio = bios;
1678 bios = bios->bi_next;
1679 bio->bi_next = NULL;
1680
1681 drbd_generic_make_request(device, fault_type, bio);
1682 } while (bios);
1683 return 0;
1684
1685fail:
1686 while (bios) {
1687 bio = bios;
1688 bios = bios->bi_next;
1689 bio_put(bio);
1690 }
1691 return err;
1692}
1693
1694static void drbd_remove_epoch_entry_interval(struct drbd_device *device,
1695 struct drbd_peer_request *peer_req)
1696{
1697 struct drbd_interval *i = &peer_req->i;
1698
1699 drbd_remove_interval(&device->write_requests, i);
1700 drbd_clear_interval(i);
1701
1702
1703 if (i->waiting)
1704 wake_up(&device->misc_wait);
1705}
1706
1707static void conn_wait_active_ee_empty(struct drbd_connection *connection)
1708{
1709 struct drbd_peer_device *peer_device;
1710 int vnr;
1711
1712 rcu_read_lock();
1713 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1714 struct drbd_device *device = peer_device->device;
1715
1716 kref_get(&device->kref);
1717 rcu_read_unlock();
1718 drbd_wait_ee_list_empty(device, &device->active_ee);
1719 kref_put(&device->kref, drbd_destroy_device);
1720 rcu_read_lock();
1721 }
1722 rcu_read_unlock();
1723}
1724
1725static int receive_Barrier(struct drbd_connection *connection, struct packet_info *pi)
1726{
1727 int rv;
1728 struct p_barrier *p = pi->data;
1729 struct drbd_epoch *epoch;
1730
1731
1732
1733
1734 connection->current_epoch->barrier_nr = p->barrier;
1735 connection->current_epoch->connection = connection;
1736 rv = drbd_may_finish_epoch(connection, connection->current_epoch, EV_GOT_BARRIER_NR);
1737
1738
1739
1740
1741
1742
1743 switch (connection->resource->write_ordering) {
1744 case WO_NONE:
1745 if (rv == FE_RECYCLED)
1746 return 0;
1747
1748
1749
1750 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1751 if (epoch)
1752 break;
1753 else
1754 drbd_warn(connection, "Allocation of an epoch failed, slowing down\n");
1755
1756
1757 case WO_BDEV_FLUSH:
1758 case WO_DRAIN_IO:
1759 conn_wait_active_ee_empty(connection);
1760 drbd_flush(connection);
1761
1762 if (atomic_read(&connection->current_epoch->epoch_size)) {
1763 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1764 if (epoch)
1765 break;
1766 }
1767
1768 return 0;
1769 default:
1770 drbd_err(connection, "Strangeness in connection->write_ordering %d\n",
1771 connection->resource->write_ordering);
1772 return -EIO;
1773 }
1774
1775 epoch->flags = 0;
1776 atomic_set(&epoch->epoch_size, 0);
1777 atomic_set(&epoch->active, 0);
1778
1779 spin_lock(&connection->epoch_lock);
1780 if (atomic_read(&connection->current_epoch->epoch_size)) {
1781 list_add(&epoch->list, &connection->current_epoch->list);
1782 connection->current_epoch = epoch;
1783 connection->epochs++;
1784 } else {
1785
1786 kfree(epoch);
1787 }
1788 spin_unlock(&connection->epoch_lock);
1789
1790 return 0;
1791}
1792
1793
1794static void drbd_csum_ee_size(struct crypto_ahash *h,
1795 struct drbd_peer_request *r, void *d,
1796 unsigned int payload_size)
1797{
1798 unsigned int tmp = r->i.size;
1799 r->i.size = payload_size;
1800 drbd_csum_ee(h, r, d);
1801 r->i.size = tmp;
1802}
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813static struct drbd_peer_request *
1814read_in_block(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
1815 struct packet_info *pi) __must_hold(local)
1816{
1817 struct drbd_device *device = peer_device->device;
1818 const sector_t capacity = drbd_get_capacity(device->this_bdev);
1819 struct drbd_peer_request *peer_req;
1820 struct page *page;
1821 int digest_size, err;
1822 unsigned int data_size = pi->size, ds;
1823 void *dig_in = peer_device->connection->int_dig_in;
1824 void *dig_vv = peer_device->connection->int_dig_vv;
1825 unsigned long *data;
1826 struct p_trim *trim = (pi->cmd == P_TRIM) ? pi->data : NULL;
1827 struct p_trim *wsame = (pi->cmd == P_WSAME) ? pi->data : NULL;
1828
1829 digest_size = 0;
1830 if (!trim && peer_device->connection->peer_integrity_tfm) {
1831 digest_size = crypto_ahash_digestsize(peer_device->connection->peer_integrity_tfm);
1832
1833
1834
1835
1836 err = drbd_recv_all_warn(peer_device->connection, dig_in, digest_size);
1837 if (err)
1838 return NULL;
1839 data_size -= digest_size;
1840 }
1841
1842
1843 ds = data_size;
1844 if (trim) {
1845 if (!expect(data_size == 0))
1846 return NULL;
1847 ds = be32_to_cpu(trim->size);
1848 } else if (wsame) {
1849 if (data_size != queue_logical_block_size(device->rq_queue)) {
1850 drbd_err(peer_device, "data size (%u) != drbd logical block size (%u)\n",
1851 data_size, queue_logical_block_size(device->rq_queue));
1852 return NULL;
1853 }
1854 if (data_size != bdev_logical_block_size(device->ldev->backing_bdev)) {
1855 drbd_err(peer_device, "data size (%u) != backend logical block size (%u)\n",
1856 data_size, bdev_logical_block_size(device->ldev->backing_bdev));
1857 return NULL;
1858 }
1859 ds = be32_to_cpu(wsame->size);
1860 }
1861
1862 if (!expect(IS_ALIGNED(ds, 512)))
1863 return NULL;
1864 if (trim || wsame) {
1865 if (!expect(ds <= (DRBD_MAX_BBIO_SECTORS << 9)))
1866 return NULL;
1867 } else if (!expect(ds <= DRBD_MAX_BIO_SIZE))
1868 return NULL;
1869
1870
1871
1872 if (sector + (ds>>9) > capacity) {
1873 drbd_err(device, "request from peer beyond end of local disk: "
1874 "capacity: %llus < sector: %llus + size: %u\n",
1875 (unsigned long long)capacity,
1876 (unsigned long long)sector, ds);
1877 return NULL;
1878 }
1879
1880
1881
1882
1883 peer_req = drbd_alloc_peer_req(peer_device, id, sector, ds, data_size, GFP_NOIO);
1884 if (!peer_req)
1885 return NULL;
1886
1887 peer_req->flags |= EE_WRITE;
1888 if (trim) {
1889 peer_req->flags |= EE_IS_TRIM;
1890 return peer_req;
1891 }
1892 if (wsame)
1893 peer_req->flags |= EE_WRITE_SAME;
1894
1895
1896 ds = data_size;
1897 page = peer_req->pages;
1898 page_chain_for_each(page) {
1899 unsigned len = min_t(int, ds, PAGE_SIZE);
1900 data = kmap(page);
1901 err = drbd_recv_all_warn(peer_device->connection, data, len);
1902 if (drbd_insert_fault(device, DRBD_FAULT_RECEIVE)) {
1903 drbd_err(device, "Fault injection: Corrupting data on receive\n");
1904 data[0] = data[0] ^ (unsigned long)-1;
1905 }
1906 kunmap(page);
1907 if (err) {
1908 drbd_free_peer_req(device, peer_req);
1909 return NULL;
1910 }
1911 ds -= len;
1912 }
1913
1914 if (digest_size) {
1915 drbd_csum_ee_size(peer_device->connection->peer_integrity_tfm, peer_req, dig_vv, data_size);
1916 if (memcmp(dig_in, dig_vv, digest_size)) {
1917 drbd_err(device, "Digest integrity check FAILED: %llus +%u\n",
1918 (unsigned long long)sector, data_size);
1919 drbd_free_peer_req(device, peer_req);
1920 return NULL;
1921 }
1922 }
1923 device->recv_cnt += data_size >> 9;
1924 return peer_req;
1925}
1926
1927
1928
1929
1930static int drbd_drain_block(struct drbd_peer_device *peer_device, int data_size)
1931{
1932 struct page *page;
1933 int err = 0;
1934 void *data;
1935
1936 if (!data_size)
1937 return 0;
1938
1939 page = drbd_alloc_pages(peer_device, 1, 1);
1940
1941 data = kmap(page);
1942 while (data_size) {
1943 unsigned int len = min_t(int, data_size, PAGE_SIZE);
1944
1945 err = drbd_recv_all_warn(peer_device->connection, data, len);
1946 if (err)
1947 break;
1948 data_size -= len;
1949 }
1950 kunmap(page);
1951 drbd_free_pages(peer_device->device, page, 0);
1952 return err;
1953}
1954
1955static int recv_dless_read(struct drbd_peer_device *peer_device, struct drbd_request *req,
1956 sector_t sector, int data_size)
1957{
1958 struct bio_vec bvec;
1959 struct bvec_iter iter;
1960 struct bio *bio;
1961 int digest_size, err, expect;
1962 void *dig_in = peer_device->connection->int_dig_in;
1963 void *dig_vv = peer_device->connection->int_dig_vv;
1964
1965 digest_size = 0;
1966 if (peer_device->connection->peer_integrity_tfm) {
1967 digest_size = crypto_ahash_digestsize(peer_device->connection->peer_integrity_tfm);
1968 err = drbd_recv_all_warn(peer_device->connection, dig_in, digest_size);
1969 if (err)
1970 return err;
1971 data_size -= digest_size;
1972 }
1973
1974
1975
1976 peer_device->device->recv_cnt += data_size>>9;
1977
1978 bio = req->master_bio;
1979 D_ASSERT(peer_device->device, sector == bio->bi_iter.bi_sector);
1980
1981 bio_for_each_segment(bvec, bio, iter) {
1982 void *mapped = kmap(bvec.bv_page) + bvec.bv_offset;
1983 expect = min_t(int, data_size, bvec.bv_len);
1984 err = drbd_recv_all_warn(peer_device->connection, mapped, expect);
1985 kunmap(bvec.bv_page);
1986 if (err)
1987 return err;
1988 data_size -= expect;
1989 }
1990
1991 if (digest_size) {
1992 drbd_csum_bio(peer_device->connection->peer_integrity_tfm, bio, dig_vv);
1993 if (memcmp(dig_in, dig_vv, digest_size)) {
1994 drbd_err(peer_device, "Digest integrity check FAILED. Broken NICs?\n");
1995 return -EINVAL;
1996 }
1997 }
1998
1999 D_ASSERT(peer_device->device, data_size == 0);
2000 return 0;
2001}
2002
2003
2004
2005
2006
2007static int e_end_resync_block(struct drbd_work *w, int unused)
2008{
2009 struct drbd_peer_request *peer_req =
2010 container_of(w, struct drbd_peer_request, w);
2011 struct drbd_peer_device *peer_device = peer_req->peer_device;
2012 struct drbd_device *device = peer_device->device;
2013 sector_t sector = peer_req->i.sector;
2014 int err;
2015
2016 D_ASSERT(device, drbd_interval_empty(&peer_req->i));
2017
2018 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
2019 drbd_set_in_sync(device, sector, peer_req->i.size);
2020 err = drbd_send_ack(peer_device, P_RS_WRITE_ACK, peer_req);
2021 } else {
2022
2023 drbd_rs_failed_io(device, sector, peer_req->i.size);
2024
2025 err = drbd_send_ack(peer_device, P_NEG_ACK, peer_req);
2026 }
2027 dec_unacked(device);
2028
2029 return err;
2030}
2031
2032static int recv_resync_read(struct drbd_peer_device *peer_device, sector_t sector,
2033 struct packet_info *pi) __releases(local)
2034{
2035 struct drbd_device *device = peer_device->device;
2036 struct drbd_peer_request *peer_req;
2037
2038 peer_req = read_in_block(peer_device, ID_SYNCER, sector, pi);
2039 if (!peer_req)
2040 goto fail;
2041
2042 dec_rs_pending(device);
2043
2044 inc_unacked(device);
2045
2046
2047
2048 peer_req->w.cb = e_end_resync_block;
2049 peer_req->submit_jif = jiffies;
2050
2051 spin_lock_irq(&device->resource->req_lock);
2052 list_add_tail(&peer_req->w.list, &device->sync_ee);
2053 spin_unlock_irq(&device->resource->req_lock);
2054
2055 atomic_add(pi->size >> 9, &device->rs_sect_ev);
2056 if (drbd_submit_peer_request(device, peer_req, REQ_OP_WRITE, 0,
2057 DRBD_FAULT_RS_WR) == 0)
2058 return 0;
2059
2060
2061 drbd_err(device, "submit failed, triggering re-connect\n");
2062 spin_lock_irq(&device->resource->req_lock);
2063 list_del(&peer_req->w.list);
2064 spin_unlock_irq(&device->resource->req_lock);
2065
2066 drbd_free_peer_req(device, peer_req);
2067fail:
2068 put_ldev(device);
2069 return -EIO;
2070}
2071
2072static struct drbd_request *
2073find_request(struct drbd_device *device, struct rb_root *root, u64 id,
2074 sector_t sector, bool missing_ok, const char *func)
2075{
2076 struct drbd_request *req;
2077
2078
2079 req = (struct drbd_request *)(unsigned long)id;
2080 if (drbd_contains_interval(root, sector, &req->i) && req->i.local)
2081 return req;
2082 if (!missing_ok) {
2083 drbd_err(device, "%s: failed to find request 0x%lx, sector %llus\n", func,
2084 (unsigned long)id, (unsigned long long)sector);
2085 }
2086 return NULL;
2087}
2088
2089static int receive_DataReply(struct drbd_connection *connection, struct packet_info *pi)
2090{
2091 struct drbd_peer_device *peer_device;
2092 struct drbd_device *device;
2093 struct drbd_request *req;
2094 sector_t sector;
2095 int err;
2096 struct p_data *p = pi->data;
2097
2098 peer_device = conn_peer_device(connection, pi->vnr);
2099 if (!peer_device)
2100 return -EIO;
2101 device = peer_device->device;
2102
2103 sector = be64_to_cpu(p->sector);
2104
2105 spin_lock_irq(&device->resource->req_lock);
2106 req = find_request(device, &device->read_requests, p->block_id, sector, false, __func__);
2107 spin_unlock_irq(&device->resource->req_lock);
2108 if (unlikely(!req))
2109 return -EIO;
2110
2111
2112
2113
2114 err = recv_dless_read(peer_device, req, sector, pi->size);
2115 if (!err)
2116 req_mod(req, DATA_RECEIVED);
2117
2118
2119
2120
2121 return err;
2122}
2123
2124static int receive_RSDataReply(struct drbd_connection *connection, struct packet_info *pi)
2125{
2126 struct drbd_peer_device *peer_device;
2127 struct drbd_device *device;
2128 sector_t sector;
2129 int err;
2130 struct p_data *p = pi->data;
2131
2132 peer_device = conn_peer_device(connection, pi->vnr);
2133 if (!peer_device)
2134 return -EIO;
2135 device = peer_device->device;
2136
2137 sector = be64_to_cpu(p->sector);
2138 D_ASSERT(device, p->block_id == ID_SYNCER);
2139
2140 if (get_ldev(device)) {
2141
2142
2143
2144 err = recv_resync_read(peer_device, sector, pi);
2145 } else {
2146 if (__ratelimit(&drbd_ratelimit_state))
2147 drbd_err(device, "Can not write resync data to local disk.\n");
2148
2149 err = drbd_drain_block(peer_device, pi->size);
2150
2151 drbd_send_ack_dp(peer_device, P_NEG_ACK, p, pi->size);
2152 }
2153
2154 atomic_add(pi->size >> 9, &device->rs_sect_in);
2155
2156 return err;
2157}
2158
2159static void restart_conflicting_writes(struct drbd_device *device,
2160 sector_t sector, int size)
2161{
2162 struct drbd_interval *i;
2163 struct drbd_request *req;
2164
2165 drbd_for_each_overlap(i, &device->write_requests, sector, size) {
2166 if (!i->local)
2167 continue;
2168 req = container_of(i, struct drbd_request, i);
2169 if (req->rq_state & RQ_LOCAL_PENDING ||
2170 !(req->rq_state & RQ_POSTPONED))
2171 continue;
2172
2173
2174 __req_mod(req, CONFLICT_RESOLVED, NULL);
2175 }
2176}
2177
2178
2179
2180
2181static int e_end_block(struct drbd_work *w, int cancel)
2182{
2183 struct drbd_peer_request *peer_req =
2184 container_of(w, struct drbd_peer_request, w);
2185 struct drbd_peer_device *peer_device = peer_req->peer_device;
2186 struct drbd_device *device = peer_device->device;
2187 sector_t sector = peer_req->i.sector;
2188 int err = 0, pcmd;
2189
2190 if (peer_req->flags & EE_SEND_WRITE_ACK) {
2191 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
2192 pcmd = (device->state.conn >= C_SYNC_SOURCE &&
2193 device->state.conn <= C_PAUSED_SYNC_T &&
2194 peer_req->flags & EE_MAY_SET_IN_SYNC) ?
2195 P_RS_WRITE_ACK : P_WRITE_ACK;
2196 err = drbd_send_ack(peer_device, pcmd, peer_req);
2197 if (pcmd == P_RS_WRITE_ACK)
2198 drbd_set_in_sync(device, sector, peer_req->i.size);
2199 } else {
2200 err = drbd_send_ack(peer_device, P_NEG_ACK, peer_req);
2201
2202
2203 }
2204 dec_unacked(device);
2205 }
2206
2207
2208
2209 if (peer_req->flags & EE_IN_INTERVAL_TREE) {
2210 spin_lock_irq(&device->resource->req_lock);
2211 D_ASSERT(device, !drbd_interval_empty(&peer_req->i));
2212 drbd_remove_epoch_entry_interval(device, peer_req);
2213 if (peer_req->flags & EE_RESTART_REQUESTS)
2214 restart_conflicting_writes(device, sector, peer_req->i.size);
2215 spin_unlock_irq(&device->resource->req_lock);
2216 } else
2217 D_ASSERT(device, drbd_interval_empty(&peer_req->i));
2218
2219 drbd_may_finish_epoch(peer_device->connection, peer_req->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0));
2220
2221 return err;
2222}
2223
2224static int e_send_ack(struct drbd_work *w, enum drbd_packet ack)
2225{
2226 struct drbd_peer_request *peer_req =
2227 container_of(w, struct drbd_peer_request, w);
2228 struct drbd_peer_device *peer_device = peer_req->peer_device;
2229 int err;
2230
2231 err = drbd_send_ack(peer_device, ack, peer_req);
2232 dec_unacked(peer_device->device);
2233
2234 return err;
2235}
2236
2237static int e_send_superseded(struct drbd_work *w, int unused)
2238{
2239 return e_send_ack(w, P_SUPERSEDED);
2240}
2241
2242static int e_send_retry_write(struct drbd_work *w, int unused)
2243{
2244 struct drbd_peer_request *peer_req =
2245 container_of(w, struct drbd_peer_request, w);
2246 struct drbd_connection *connection = peer_req->peer_device->connection;
2247
2248 return e_send_ack(w, connection->agreed_pro_version >= 100 ?
2249 P_RETRY_WRITE : P_SUPERSEDED);
2250}
2251
2252static bool seq_greater(u32 a, u32 b)
2253{
2254
2255
2256
2257
2258
2259 return (s32)a - (s32)b > 0;
2260}
2261
2262static u32 seq_max(u32 a, u32 b)
2263{
2264 return seq_greater(a, b) ? a : b;
2265}
2266
2267static void update_peer_seq(struct drbd_peer_device *peer_device, unsigned int peer_seq)
2268{
2269 struct drbd_device *device = peer_device->device;
2270 unsigned int newest_peer_seq;
2271
2272 if (test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)) {
2273 spin_lock(&device->peer_seq_lock);
2274 newest_peer_seq = seq_max(device->peer_seq, peer_seq);
2275 device->peer_seq = newest_peer_seq;
2276 spin_unlock(&device->peer_seq_lock);
2277
2278 if (peer_seq == newest_peer_seq)
2279 wake_up(&device->seq_wait);
2280 }
2281}
2282
2283static inline int overlaps(sector_t s1, int l1, sector_t s2, int l2)
2284{
2285 return !((s1 + (l1>>9) <= s2) || (s1 >= s2 + (l2>>9)));
2286}
2287
2288
2289static bool overlapping_resync_write(struct drbd_device *device, struct drbd_peer_request *peer_req)
2290{
2291 struct drbd_peer_request *rs_req;
2292 bool rv = false;
2293
2294 spin_lock_irq(&device->resource->req_lock);
2295 list_for_each_entry(rs_req, &device->sync_ee, w.list) {
2296 if (overlaps(peer_req->i.sector, peer_req->i.size,
2297 rs_req->i.sector, rs_req->i.size)) {
2298 rv = true;
2299 break;
2300 }
2301 }
2302 spin_unlock_irq(&device->resource->req_lock);
2303
2304 return rv;
2305}
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328static int wait_for_and_update_peer_seq(struct drbd_peer_device *peer_device, const u32 peer_seq)
2329{
2330 struct drbd_device *device = peer_device->device;
2331 DEFINE_WAIT(wait);
2332 long timeout;
2333 int ret = 0, tp;
2334
2335 if (!test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags))
2336 return 0;
2337
2338 spin_lock(&device->peer_seq_lock);
2339 for (;;) {
2340 if (!seq_greater(peer_seq - 1, device->peer_seq)) {
2341 device->peer_seq = seq_max(device->peer_seq, peer_seq);
2342 break;
2343 }
2344
2345 if (signal_pending(current)) {
2346 ret = -ERESTARTSYS;
2347 break;
2348 }
2349
2350 rcu_read_lock();
2351 tp = rcu_dereference(peer_device->connection->net_conf)->two_primaries;
2352 rcu_read_unlock();
2353
2354 if (!tp)
2355 break;
2356
2357
2358 prepare_to_wait(&device->seq_wait, &wait, TASK_INTERRUPTIBLE);
2359 spin_unlock(&device->peer_seq_lock);
2360 rcu_read_lock();
2361 timeout = rcu_dereference(peer_device->connection->net_conf)->ping_timeo*HZ/10;
2362 rcu_read_unlock();
2363 timeout = schedule_timeout(timeout);
2364 spin_lock(&device->peer_seq_lock);
2365 if (!timeout) {
2366 ret = -ETIMEDOUT;
2367 drbd_err(device, "Timed out waiting for missing ack packets; disconnecting\n");
2368 break;
2369 }
2370 }
2371 spin_unlock(&device->peer_seq_lock);
2372 finish_wait(&device->seq_wait, &wait);
2373 return ret;
2374}
2375
2376
2377
2378
2379static unsigned long wire_flags_to_bio_flags(u32 dpf)
2380{
2381 return (dpf & DP_RW_SYNC ? REQ_SYNC : 0) |
2382 (dpf & DP_FUA ? REQ_FUA : 0) |
2383 (dpf & DP_FLUSH ? REQ_PREFLUSH : 0);
2384}
2385
2386static unsigned long wire_flags_to_bio_op(u32 dpf)
2387{
2388 if (dpf & DP_DISCARD)
2389 return REQ_OP_DISCARD;
2390 else
2391 return REQ_OP_WRITE;
2392}
2393
2394static void fail_postponed_requests(struct drbd_device *device, sector_t sector,
2395 unsigned int size)
2396{
2397 struct drbd_interval *i;
2398
2399 repeat:
2400 drbd_for_each_overlap(i, &device->write_requests, sector, size) {
2401 struct drbd_request *req;
2402 struct bio_and_error m;
2403
2404 if (!i->local)
2405 continue;
2406 req = container_of(i, struct drbd_request, i);
2407 if (!(req->rq_state & RQ_POSTPONED))
2408 continue;
2409 req->rq_state &= ~RQ_POSTPONED;
2410 __req_mod(req, NEG_ACKED, &m);
2411 spin_unlock_irq(&device->resource->req_lock);
2412 if (m.bio)
2413 complete_master_bio(device, &m);
2414 spin_lock_irq(&device->resource->req_lock);
2415 goto repeat;
2416 }
2417}
2418
2419static int handle_write_conflicts(struct drbd_device *device,
2420 struct drbd_peer_request *peer_req)
2421{
2422 struct drbd_connection *connection = peer_req->peer_device->connection;
2423 bool resolve_conflicts = test_bit(RESOLVE_CONFLICTS, &connection->flags);
2424 sector_t sector = peer_req->i.sector;
2425 const unsigned int size = peer_req->i.size;
2426 struct drbd_interval *i;
2427 bool equal;
2428 int err;
2429
2430
2431
2432
2433
2434 drbd_insert_interval(&device->write_requests, &peer_req->i);
2435
2436 repeat:
2437 drbd_for_each_overlap(i, &device->write_requests, sector, size) {
2438 if (i == &peer_req->i)
2439 continue;
2440 if (i->completed)
2441 continue;
2442
2443 if (!i->local) {
2444
2445
2446
2447
2448
2449 err = drbd_wait_misc(device, i);
2450 if (err)
2451 goto out;
2452 goto repeat;
2453 }
2454
2455 equal = i->sector == sector && i->size == size;
2456 if (resolve_conflicts) {
2457
2458
2459
2460
2461
2462
2463 bool superseded = i->sector <= sector && i->sector +
2464 (i->size >> 9) >= sector + (size >> 9);
2465
2466 if (!equal)
2467 drbd_alert(device, "Concurrent writes detected: "
2468 "local=%llus +%u, remote=%llus +%u, "
2469 "assuming %s came first\n",
2470 (unsigned long long)i->sector, i->size,
2471 (unsigned long long)sector, size,
2472 superseded ? "local" : "remote");
2473
2474 peer_req->w.cb = superseded ? e_send_superseded :
2475 e_send_retry_write;
2476 list_add_tail(&peer_req->w.list, &device->done_ee);
2477 queue_work(connection->ack_sender, &peer_req->peer_device->send_acks_work);
2478
2479 err = -ENOENT;
2480 goto out;
2481 } else {
2482 struct drbd_request *req =
2483 container_of(i, struct drbd_request, i);
2484
2485 if (!equal)
2486 drbd_alert(device, "Concurrent writes detected: "
2487 "local=%llus +%u, remote=%llus +%u\n",
2488 (unsigned long long)i->sector, i->size,
2489 (unsigned long long)sector, size);
2490
2491 if (req->rq_state & RQ_LOCAL_PENDING ||
2492 !(req->rq_state & RQ_POSTPONED)) {
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504 err = drbd_wait_misc(device, &req->i);
2505 if (err) {
2506 _conn_request_state(connection, NS(conn, C_TIMEOUT), CS_HARD);
2507 fail_postponed_requests(device, sector, size);
2508 goto out;
2509 }
2510 goto repeat;
2511 }
2512
2513
2514
2515
2516 peer_req->flags |= EE_RESTART_REQUESTS;
2517 }
2518 }
2519 err = 0;
2520
2521 out:
2522 if (err)
2523 drbd_remove_epoch_entry_interval(device, peer_req);
2524 return err;
2525}
2526
2527
2528static int receive_Data(struct drbd_connection *connection, struct packet_info *pi)
2529{
2530 struct drbd_peer_device *peer_device;
2531 struct drbd_device *device;
2532 struct net_conf *nc;
2533 sector_t sector;
2534 struct drbd_peer_request *peer_req;
2535 struct p_data *p = pi->data;
2536 u32 peer_seq = be32_to_cpu(p->seq_num);
2537 int op, op_flags;
2538 u32 dp_flags;
2539 int err, tp;
2540
2541 peer_device = conn_peer_device(connection, pi->vnr);
2542 if (!peer_device)
2543 return -EIO;
2544 device = peer_device->device;
2545
2546 if (!get_ldev(device)) {
2547 int err2;
2548
2549 err = wait_for_and_update_peer_seq(peer_device, peer_seq);
2550 drbd_send_ack_dp(peer_device, P_NEG_ACK, p, pi->size);
2551 atomic_inc(&connection->current_epoch->epoch_size);
2552 err2 = drbd_drain_block(peer_device, pi->size);
2553 if (!err)
2554 err = err2;
2555 return err;
2556 }
2557
2558
2559
2560
2561
2562
2563
2564 sector = be64_to_cpu(p->sector);
2565 peer_req = read_in_block(peer_device, p->block_id, sector, pi);
2566 if (!peer_req) {
2567 put_ldev(device);
2568 return -EIO;
2569 }
2570
2571 peer_req->w.cb = e_end_block;
2572 peer_req->submit_jif = jiffies;
2573 peer_req->flags |= EE_APPLICATION;
2574
2575 dp_flags = be32_to_cpu(p->dp_flags);
2576 op = wire_flags_to_bio_op(dp_flags);
2577 op_flags = wire_flags_to_bio_flags(dp_flags);
2578 if (pi->cmd == P_TRIM) {
2579 D_ASSERT(peer_device, peer_req->i.size > 0);
2580 D_ASSERT(peer_device, op == REQ_OP_DISCARD);
2581 D_ASSERT(peer_device, peer_req->pages == NULL);
2582 } else if (peer_req->pages == NULL) {
2583 D_ASSERT(device, peer_req->i.size == 0);
2584 D_ASSERT(device, dp_flags & DP_FLUSH);
2585 }
2586
2587 if (dp_flags & DP_MAY_SET_IN_SYNC)
2588 peer_req->flags |= EE_MAY_SET_IN_SYNC;
2589
2590 spin_lock(&connection->epoch_lock);
2591 peer_req->epoch = connection->current_epoch;
2592 atomic_inc(&peer_req->epoch->epoch_size);
2593 atomic_inc(&peer_req->epoch->active);
2594 spin_unlock(&connection->epoch_lock);
2595
2596 rcu_read_lock();
2597 nc = rcu_dereference(peer_device->connection->net_conf);
2598 tp = nc->two_primaries;
2599 if (peer_device->connection->agreed_pro_version < 100) {
2600 switch (nc->wire_protocol) {
2601 case DRBD_PROT_C:
2602 dp_flags |= DP_SEND_WRITE_ACK;
2603 break;
2604 case DRBD_PROT_B:
2605 dp_flags |= DP_SEND_RECEIVE_ACK;
2606 break;
2607 }
2608 }
2609 rcu_read_unlock();
2610
2611 if (dp_flags & DP_SEND_WRITE_ACK) {
2612 peer_req->flags |= EE_SEND_WRITE_ACK;
2613 inc_unacked(device);
2614
2615
2616 }
2617
2618 if (dp_flags & DP_SEND_RECEIVE_ACK) {
2619
2620
2621 drbd_send_ack(peer_device, P_RECV_ACK, peer_req);
2622 }
2623
2624 if (tp) {
2625
2626 D_ASSERT(device, dp_flags & DP_SEND_WRITE_ACK);
2627 peer_req->flags |= EE_IN_INTERVAL_TREE;
2628 err = wait_for_and_update_peer_seq(peer_device, peer_seq);
2629 if (err)
2630 goto out_interrupted;
2631 spin_lock_irq(&device->resource->req_lock);
2632 err = handle_write_conflicts(device, peer_req);
2633 if (err) {
2634 spin_unlock_irq(&device->resource->req_lock);
2635 if (err == -ENOENT) {
2636 put_ldev(device);
2637 return 0;
2638 }
2639 goto out_interrupted;
2640 }
2641 } else {
2642 update_peer_seq(peer_device, peer_seq);
2643 spin_lock_irq(&device->resource->req_lock);
2644 }
2645
2646
2647
2648
2649 if ((peer_req->flags & (EE_IS_TRIM|EE_WRITE_SAME)) == 0)
2650 list_add_tail(&peer_req->w.list, &device->active_ee);
2651 spin_unlock_irq(&device->resource->req_lock);
2652
2653 if (device->state.conn == C_SYNC_TARGET)
2654 wait_event(device->ee_wait, !overlapping_resync_write(device, peer_req));
2655
2656 if (device->state.pdsk < D_INCONSISTENT) {
2657
2658 drbd_set_out_of_sync(device, peer_req->i.sector, peer_req->i.size);
2659 peer_req->flags &= ~EE_MAY_SET_IN_SYNC;
2660 drbd_al_begin_io(device, &peer_req->i);
2661 peer_req->flags |= EE_CALL_AL_COMPLETE_IO;
2662 }
2663
2664 err = drbd_submit_peer_request(device, peer_req, op, op_flags,
2665 DRBD_FAULT_DT_WR);
2666 if (!err)
2667 return 0;
2668
2669
2670 drbd_err(device, "submit failed, triggering re-connect\n");
2671 spin_lock_irq(&device->resource->req_lock);
2672 list_del(&peer_req->w.list);
2673 drbd_remove_epoch_entry_interval(device, peer_req);
2674 spin_unlock_irq(&device->resource->req_lock);
2675 if (peer_req->flags & EE_CALL_AL_COMPLETE_IO) {
2676 peer_req->flags &= ~EE_CALL_AL_COMPLETE_IO;
2677 drbd_al_complete_io(device, &peer_req->i);
2678 }
2679
2680out_interrupted:
2681 drbd_may_finish_epoch(connection, peer_req->epoch, EV_PUT | EV_CLEANUP);
2682 put_ldev(device);
2683 drbd_free_peer_req(device, peer_req);
2684 return err;
2685}
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector,
2699 bool throttle_if_app_is_waiting)
2700{
2701 struct lc_element *tmp;
2702 bool throttle = drbd_rs_c_min_rate_throttle(device);
2703
2704 if (!throttle || throttle_if_app_is_waiting)
2705 return throttle;
2706
2707 spin_lock_irq(&device->al_lock);
2708 tmp = lc_find(device->resync, BM_SECT_TO_EXT(sector));
2709 if (tmp) {
2710 struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce);
2711 if (test_bit(BME_PRIORITY, &bm_ext->flags))
2712 throttle = false;
2713
2714
2715 }
2716 spin_unlock_irq(&device->al_lock);
2717
2718 return throttle;
2719}
2720
2721bool drbd_rs_c_min_rate_throttle(struct drbd_device *device)
2722{
2723 struct gendisk *disk = device->ldev->backing_bdev->bd_contains->bd_disk;
2724 unsigned long db, dt, dbdt;
2725 unsigned int c_min_rate;
2726 int curr_events;
2727
2728 rcu_read_lock();
2729 c_min_rate = rcu_dereference(device->ldev->disk_conf)->c_min_rate;
2730 rcu_read_unlock();
2731
2732
2733 if (c_min_rate == 0)
2734 return false;
2735
2736 curr_events = (int)part_stat_read(&disk->part0, sectors[0]) +
2737 (int)part_stat_read(&disk->part0, sectors[1]) -
2738 atomic_read(&device->rs_sect_ev);
2739
2740 if (atomic_read(&device->ap_actlog_cnt)
2741 || curr_events - device->rs_last_events > 64) {
2742 unsigned long rs_left;
2743 int i;
2744
2745 device->rs_last_events = curr_events;
2746
2747
2748
2749 i = (device->rs_last_mark + DRBD_SYNC_MARKS-1) % DRBD_SYNC_MARKS;
2750
2751 if (device->state.conn == C_VERIFY_S || device->state.conn == C_VERIFY_T)
2752 rs_left = device->ov_left;
2753 else
2754 rs_left = drbd_bm_total_weight(device) - device->rs_failed;
2755
2756 dt = ((long)jiffies - (long)device->rs_mark_time[i]) / HZ;
2757 if (!dt)
2758 dt++;
2759 db = device->rs_mark_left[i] - rs_left;
2760 dbdt = Bit2KB(db/dt);
2761
2762 if (dbdt > c_min_rate)
2763 return true;
2764 }
2765 return false;
2766}
2767
2768static int receive_DataRequest(struct drbd_connection *connection, struct packet_info *pi)
2769{
2770 struct drbd_peer_device *peer_device;
2771 struct drbd_device *device;
2772 sector_t sector;
2773 sector_t capacity;
2774 struct drbd_peer_request *peer_req;
2775 struct digest_info *di = NULL;
2776 int size, verb;
2777 unsigned int fault_type;
2778 struct p_block_req *p = pi->data;
2779
2780 peer_device = conn_peer_device(connection, pi->vnr);
2781 if (!peer_device)
2782 return -EIO;
2783 device = peer_device->device;
2784 capacity = drbd_get_capacity(device->this_bdev);
2785
2786 sector = be64_to_cpu(p->sector);
2787 size = be32_to_cpu(p->blksize);
2788
2789 if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) {
2790 drbd_err(device, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
2791 (unsigned long long)sector, size);
2792 return -EINVAL;
2793 }
2794 if (sector + (size>>9) > capacity) {
2795 drbd_err(device, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
2796 (unsigned long long)sector, size);
2797 return -EINVAL;
2798 }
2799
2800 if (!get_ldev_if_state(device, D_UP_TO_DATE)) {
2801 verb = 1;
2802 switch (pi->cmd) {
2803 case P_DATA_REQUEST:
2804 drbd_send_ack_rp(peer_device, P_NEG_DREPLY, p);
2805 break;
2806 case P_RS_THIN_REQ:
2807 case P_RS_DATA_REQUEST:
2808 case P_CSUM_RS_REQUEST:
2809 case P_OV_REQUEST:
2810 drbd_send_ack_rp(peer_device, P_NEG_RS_DREPLY , p);
2811 break;
2812 case P_OV_REPLY:
2813 verb = 0;
2814 dec_rs_pending(device);
2815 drbd_send_ack_ex(peer_device, P_OV_RESULT, sector, size, ID_IN_SYNC);
2816 break;
2817 default:
2818 BUG();
2819 }
2820 if (verb && __ratelimit(&drbd_ratelimit_state))
2821 drbd_err(device, "Can not satisfy peer's read request, "
2822 "no local data.\n");
2823
2824
2825 return drbd_drain_block(peer_device, pi->size);
2826 }
2827
2828
2829
2830
2831 peer_req = drbd_alloc_peer_req(peer_device, p->block_id, sector, size,
2832 size, GFP_NOIO);
2833 if (!peer_req) {
2834 put_ldev(device);
2835 return -ENOMEM;
2836 }
2837
2838 switch (pi->cmd) {
2839 case P_DATA_REQUEST:
2840 peer_req->w.cb = w_e_end_data_req;
2841 fault_type = DRBD_FAULT_DT_RD;
2842
2843 peer_req->flags |= EE_APPLICATION;
2844 goto submit;
2845
2846 case P_RS_THIN_REQ:
2847
2848
2849
2850
2851 peer_req->flags |= EE_RS_THIN_REQ;
2852 case P_RS_DATA_REQUEST:
2853 peer_req->w.cb = w_e_end_rsdata_req;
2854 fault_type = DRBD_FAULT_RS_RD;
2855
2856 device->bm_resync_fo = BM_SECT_TO_BIT(sector);
2857 break;
2858
2859 case P_OV_REPLY:
2860 case P_CSUM_RS_REQUEST:
2861 fault_type = DRBD_FAULT_RS_RD;
2862 di = kmalloc(sizeof(*di) + pi->size, GFP_NOIO);
2863 if (!di)
2864 goto out_free_e;
2865
2866 di->digest_size = pi->size;
2867 di->digest = (((char *)di)+sizeof(struct digest_info));
2868
2869 peer_req->digest = di;
2870 peer_req->flags |= EE_HAS_DIGEST;
2871
2872 if (drbd_recv_all(peer_device->connection, di->digest, pi->size))
2873 goto out_free_e;
2874
2875 if (pi->cmd == P_CSUM_RS_REQUEST) {
2876 D_ASSERT(device, peer_device->connection->agreed_pro_version >= 89);
2877 peer_req->w.cb = w_e_end_csum_rs_req;
2878
2879 device->bm_resync_fo = BM_SECT_TO_BIT(sector);
2880
2881 device->use_csums = true;
2882 } else if (pi->cmd == P_OV_REPLY) {
2883
2884 atomic_add(size >> 9, &device->rs_sect_in);
2885 peer_req->w.cb = w_e_end_ov_reply;
2886 dec_rs_pending(device);
2887
2888
2889 goto submit_for_resync;
2890 }
2891 break;
2892
2893 case P_OV_REQUEST:
2894 if (device->ov_start_sector == ~(sector_t)0 &&
2895 peer_device->connection->agreed_pro_version >= 90) {
2896 unsigned long now = jiffies;
2897 int i;
2898 device->ov_start_sector = sector;
2899 device->ov_position = sector;
2900 device->ov_left = drbd_bm_bits(device) - BM_SECT_TO_BIT(sector);
2901 device->rs_total = device->ov_left;
2902 for (i = 0; i < DRBD_SYNC_MARKS; i++) {
2903 device->rs_mark_left[i] = device->ov_left;
2904 device->rs_mark_time[i] = now;
2905 }
2906 drbd_info(device, "Online Verify start sector: %llu\n",
2907 (unsigned long long)sector);
2908 }
2909 peer_req->w.cb = w_e_end_ov_req;
2910 fault_type = DRBD_FAULT_RS_RD;
2911 break;
2912
2913 default:
2914 BUG();
2915 }
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944 spin_lock_irq(&device->resource->req_lock);
2945 list_add_tail(&peer_req->w.list, &device->read_ee);
2946 spin_unlock_irq(&device->resource->req_lock);
2947
2948 update_receiver_timing_details(connection, drbd_rs_should_slow_down);
2949 if (device->state.peer != R_PRIMARY
2950 && drbd_rs_should_slow_down(device, sector, false))
2951 schedule_timeout_uninterruptible(HZ/10);
2952 update_receiver_timing_details(connection, drbd_rs_begin_io);
2953 if (drbd_rs_begin_io(device, sector))
2954 goto out_free_e;
2955
2956submit_for_resync:
2957 atomic_add(size >> 9, &device->rs_sect_ev);
2958
2959submit:
2960 update_receiver_timing_details(connection, drbd_submit_peer_request);
2961 inc_unacked(device);
2962 if (drbd_submit_peer_request(device, peer_req, REQ_OP_READ, 0,
2963 fault_type) == 0)
2964 return 0;
2965
2966
2967 drbd_err(device, "submit failed, triggering re-connect\n");
2968
2969out_free_e:
2970 spin_lock_irq(&device->resource->req_lock);
2971 list_del(&peer_req->w.list);
2972 spin_unlock_irq(&device->resource->req_lock);
2973
2974
2975 put_ldev(device);
2976 drbd_free_peer_req(device, peer_req);
2977 return -EIO;
2978}
2979
2980
2981
2982
2983static int drbd_asb_recover_0p(struct drbd_peer_device *peer_device) __must_hold(local)
2984{
2985 struct drbd_device *device = peer_device->device;
2986 int self, peer, rv = -100;
2987 unsigned long ch_self, ch_peer;
2988 enum drbd_after_sb_p after_sb_0p;
2989
2990 self = device->ldev->md.uuid[UI_BITMAP] & 1;
2991 peer = device->p_uuid[UI_BITMAP] & 1;
2992
2993 ch_peer = device->p_uuid[UI_SIZE];
2994 ch_self = device->comm_bm_set;
2995
2996 rcu_read_lock();
2997 after_sb_0p = rcu_dereference(peer_device->connection->net_conf)->after_sb_0p;
2998 rcu_read_unlock();
2999 switch (after_sb_0p) {
3000 case ASB_CONSENSUS:
3001 case ASB_DISCARD_SECONDARY:
3002 case ASB_CALL_HELPER:
3003 case ASB_VIOLENTLY:
3004 drbd_err(device, "Configuration error.\n");
3005 break;
3006 case ASB_DISCONNECT:
3007 break;
3008 case ASB_DISCARD_YOUNGER_PRI:
3009 if (self == 0 && peer == 1) {
3010 rv = -1;
3011 break;
3012 }
3013 if (self == 1 && peer == 0) {
3014 rv = 1;
3015 break;
3016 }
3017
3018 case ASB_DISCARD_OLDER_PRI:
3019 if (self == 0 && peer == 1) {
3020 rv = 1;
3021 break;
3022 }
3023 if (self == 1 && peer == 0) {
3024 rv = -1;
3025 break;
3026 }
3027
3028 drbd_warn(device, "Discard younger/older primary did not find a decision\n"
3029 "Using discard-least-changes instead\n");
3030 case ASB_DISCARD_ZERO_CHG:
3031 if (ch_peer == 0 && ch_self == 0) {
3032 rv = test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)
3033 ? -1 : 1;
3034 break;
3035 } else {
3036 if (ch_peer == 0) { rv = 1; break; }
3037 if (ch_self == 0) { rv = -1; break; }
3038 }
3039 if (after_sb_0p == ASB_DISCARD_ZERO_CHG)
3040 break;
3041 case ASB_DISCARD_LEAST_CHG:
3042 if (ch_self < ch_peer)
3043 rv = -1;
3044 else if (ch_self > ch_peer)
3045 rv = 1;
3046 else
3047
3048 rv = test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)
3049 ? -1 : 1;
3050 break;
3051 case ASB_DISCARD_LOCAL:
3052 rv = -1;
3053 break;
3054 case ASB_DISCARD_REMOTE:
3055 rv = 1;
3056 }
3057
3058 return rv;
3059}
3060
3061
3062
3063
3064static int drbd_asb_recover_1p(struct drbd_peer_device *peer_device) __must_hold(local)
3065{
3066 struct drbd_device *device = peer_device->device;
3067 int hg, rv = -100;
3068 enum drbd_after_sb_p after_sb_1p;
3069
3070 rcu_read_lock();
3071 after_sb_1p = rcu_dereference(peer_device->connection->net_conf)->after_sb_1p;
3072 rcu_read_unlock();
3073 switch (after_sb_1p) {
3074 case ASB_DISCARD_YOUNGER_PRI:
3075 case ASB_DISCARD_OLDER_PRI:
3076 case ASB_DISCARD_LEAST_CHG:
3077 case ASB_DISCARD_LOCAL:
3078 case ASB_DISCARD_REMOTE:
3079 case ASB_DISCARD_ZERO_CHG:
3080 drbd_err(device, "Configuration error.\n");
3081 break;
3082 case ASB_DISCONNECT:
3083 break;
3084 case ASB_CONSENSUS:
3085 hg = drbd_asb_recover_0p(peer_device);
3086 if (hg == -1 && device->state.role == R_SECONDARY)
3087 rv = hg;
3088 if (hg == 1 && device->state.role == R_PRIMARY)
3089 rv = hg;
3090 break;
3091 case ASB_VIOLENTLY:
3092 rv = drbd_asb_recover_0p(peer_device);
3093 break;
3094 case ASB_DISCARD_SECONDARY:
3095 return device->state.role == R_PRIMARY ? 1 : -1;
3096 case ASB_CALL_HELPER:
3097 hg = drbd_asb_recover_0p(peer_device);
3098 if (hg == -1 && device->state.role == R_PRIMARY) {
3099 enum drbd_state_rv rv2;
3100
3101
3102
3103
3104 rv2 = drbd_change_state(device, CS_VERBOSE, NS(role, R_SECONDARY));
3105 if (rv2 != SS_SUCCESS) {
3106 drbd_khelper(device, "pri-lost-after-sb");
3107 } else {
3108 drbd_warn(device, "Successfully gave up primary role.\n");
3109 rv = hg;
3110 }
3111 } else
3112 rv = hg;
3113 }
3114
3115 return rv;
3116}
3117
3118
3119
3120
3121static int drbd_asb_recover_2p(struct drbd_peer_device *peer_device) __must_hold(local)
3122{
3123 struct drbd_device *device = peer_device->device;
3124 int hg, rv = -100;
3125 enum drbd_after_sb_p after_sb_2p;
3126
3127 rcu_read_lock();
3128 after_sb_2p = rcu_dereference(peer_device->connection->net_conf)->after_sb_2p;
3129 rcu_read_unlock();
3130 switch (after_sb_2p) {
3131 case ASB_DISCARD_YOUNGER_PRI:
3132 case ASB_DISCARD_OLDER_PRI:
3133 case ASB_DISCARD_LEAST_CHG:
3134 case ASB_DISCARD_LOCAL:
3135 case ASB_DISCARD_REMOTE:
3136 case ASB_CONSENSUS:
3137 case ASB_DISCARD_SECONDARY:
3138 case ASB_DISCARD_ZERO_CHG:
3139 drbd_err(device, "Configuration error.\n");
3140 break;
3141 case ASB_VIOLENTLY:
3142 rv = drbd_asb_recover_0p(peer_device);
3143 break;
3144 case ASB_DISCONNECT:
3145 break;
3146 case ASB_CALL_HELPER:
3147 hg = drbd_asb_recover_0p(peer_device);
3148 if (hg == -1) {
3149 enum drbd_state_rv rv2;
3150
3151
3152
3153
3154 rv2 = drbd_change_state(device, CS_VERBOSE, NS(role, R_SECONDARY));
3155 if (rv2 != SS_SUCCESS) {
3156 drbd_khelper(device, "pri-lost-after-sb");
3157 } else {
3158 drbd_warn(device, "Successfully gave up primary role.\n");
3159 rv = hg;
3160 }
3161 } else
3162 rv = hg;
3163 }
3164
3165 return rv;
3166}
3167
3168static void drbd_uuid_dump(struct drbd_device *device, char *text, u64 *uuid,
3169 u64 bits, u64 flags)
3170{
3171 if (!uuid) {
3172 drbd_info(device, "%s uuid info vanished while I was looking!\n", text);
3173 return;
3174 }
3175 drbd_info(device, "%s %016llX:%016llX:%016llX:%016llX bits:%llu flags:%llX\n",
3176 text,
3177 (unsigned long long)uuid[UI_CURRENT],
3178 (unsigned long long)uuid[UI_BITMAP],
3179 (unsigned long long)uuid[UI_HISTORY_START],
3180 (unsigned long long)uuid[UI_HISTORY_END],
3181 (unsigned long long)bits,
3182 (unsigned long long)flags);
3183}
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198static int drbd_uuid_compare(struct drbd_device *const device, enum drbd_role const peer_role, int *rule_nr) __must_hold(local)
3199{
3200 struct drbd_peer_device *const peer_device = first_peer_device(device);
3201 struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL;
3202 u64 self, peer;
3203 int i, j;
3204
3205 self = device->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
3206 peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
3207
3208 *rule_nr = 10;
3209 if (self == UUID_JUST_CREATED && peer == UUID_JUST_CREATED)
3210 return 0;
3211
3212 *rule_nr = 20;
3213 if ((self == UUID_JUST_CREATED || self == (u64)0) &&
3214 peer != UUID_JUST_CREATED)
3215 return -2;
3216
3217 *rule_nr = 30;
3218 if (self != UUID_JUST_CREATED &&
3219 (peer == UUID_JUST_CREATED || peer == (u64)0))
3220 return 2;
3221
3222 if (self == peer) {
3223 int rct, dc;
3224
3225 if (device->p_uuid[UI_BITMAP] == (u64)0 && device->ldev->md.uuid[UI_BITMAP] != (u64)0) {
3226
3227 if (connection->agreed_pro_version < 91)
3228 return -1091;
3229
3230 if ((device->ldev->md.uuid[UI_BITMAP] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START] & ~((u64)1)) &&
3231 (device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START + 1] & ~((u64)1))) {
3232 drbd_info(device, "was SyncSource, missed the resync finished event, corrected myself:\n");
3233 drbd_uuid_move_history(device);
3234 device->ldev->md.uuid[UI_HISTORY_START] = device->ldev->md.uuid[UI_BITMAP];
3235 device->ldev->md.uuid[UI_BITMAP] = 0;
3236
3237 drbd_uuid_dump(device, "self", device->ldev->md.uuid,
3238 device->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(device) : 0, 0);
3239 *rule_nr = 34;
3240 } else {
3241 drbd_info(device, "was SyncSource (peer failed to write sync_uuid)\n");
3242 *rule_nr = 36;
3243 }
3244
3245 return 1;
3246 }
3247
3248 if (device->ldev->md.uuid[UI_BITMAP] == (u64)0 && device->p_uuid[UI_BITMAP] != (u64)0) {
3249
3250 if (connection->agreed_pro_version < 91)
3251 return -1091;
3252
3253 if ((device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (device->p_uuid[UI_BITMAP] & ~((u64)1)) &&
3254 (device->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START] & ~((u64)1))) {
3255 drbd_info(device, "was SyncTarget, peer missed the resync finished event, corrected peer:\n");
3256
3257 device->p_uuid[UI_HISTORY_START + 1] = device->p_uuid[UI_HISTORY_START];
3258 device->p_uuid[UI_HISTORY_START] = device->p_uuid[UI_BITMAP];
3259 device->p_uuid[UI_BITMAP] = 0UL;
3260
3261 drbd_uuid_dump(device, "peer", device->p_uuid, device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
3262 *rule_nr = 35;
3263 } else {
3264 drbd_info(device, "was SyncTarget (failed to write sync_uuid)\n");
3265 *rule_nr = 37;
3266 }
3267
3268 return -1;
3269 }
3270
3271
3272 rct = (test_bit(CRASHED_PRIMARY, &device->flags) ? 1 : 0) +
3273 (device->p_uuid[UI_FLAGS] & 2);
3274
3275
3276 *rule_nr = 40;
3277
3278
3279
3280 if (rct == 0)
3281 return 0;
3282
3283
3284
3285
3286
3287
3288 if (device->state.role == R_PRIMARY || peer_role == R_PRIMARY) {
3289 *rule_nr = 41;
3290 if (!(connection->agreed_features & DRBD_FF_WSAME)) {
3291 drbd_warn(peer_device, "Equivalent unrotated UUIDs, but current primary present.\n");
3292 return -(0x10000 | PRO_VERSION_MAX | (DRBD_FF_WSAME << 8));
3293 }
3294 if (device->state.role == R_PRIMARY && peer_role == R_PRIMARY) {
3295
3296
3297
3298 drbd_err(peer_device, "Equivalent unrotated UUIDs, but both are primary. Can not resolve this.\n");
3299 return -100;
3300 }
3301 if (device->state.role == R_PRIMARY)
3302 return 1;
3303 return -1;
3304 }
3305
3306
3307
3308
3309 switch (rct) {
3310 case 0: return 0;
3311 case 1: return 1;
3312 case 2: return -1;
3313 case 3:
3314 dc = test_bit(RESOLVE_CONFLICTS, &connection->flags);
3315 return dc ? -1 : 1;
3316 }
3317 }
3318
3319 *rule_nr = 50;
3320 peer = device->p_uuid[UI_BITMAP] & ~((u64)1);
3321 if (self == peer)
3322 return -1;
3323
3324 *rule_nr = 51;
3325 peer = device->p_uuid[UI_HISTORY_START] & ~((u64)1);
3326 if (self == peer) {
3327 if (connection->agreed_pro_version < 96 ?
3328 (device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) ==
3329 (device->p_uuid[UI_HISTORY_START + 1] & ~((u64)1)) :
3330 peer + UUID_NEW_BM_OFFSET == (device->p_uuid[UI_BITMAP] & ~((u64)1))) {
3331
3332
3333
3334 if (connection->agreed_pro_version < 91)
3335 return -1091;
3336
3337 device->p_uuid[UI_BITMAP] = device->p_uuid[UI_HISTORY_START];
3338 device->p_uuid[UI_HISTORY_START] = device->p_uuid[UI_HISTORY_START + 1];
3339
3340 drbd_info(device, "Lost last syncUUID packet, corrected:\n");
3341 drbd_uuid_dump(device, "peer", device->p_uuid, device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
3342
3343 return -1;
3344 }
3345 }
3346
3347 *rule_nr = 60;
3348 self = device->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
3349 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
3350 peer = device->p_uuid[i] & ~((u64)1);
3351 if (self == peer)
3352 return -2;
3353 }
3354
3355 *rule_nr = 70;
3356 self = device->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
3357 peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
3358 if (self == peer)
3359 return 1;
3360
3361 *rule_nr = 71;
3362 self = device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1);
3363 if (self == peer) {
3364 if (connection->agreed_pro_version < 96 ?
3365 (device->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) ==
3366 (device->p_uuid[UI_HISTORY_START] & ~((u64)1)) :
3367 self + UUID_NEW_BM_OFFSET == (device->ldev->md.uuid[UI_BITMAP] & ~((u64)1))) {
3368
3369
3370
3371 if (connection->agreed_pro_version < 91)
3372 return -1091;
3373
3374 __drbd_uuid_set(device, UI_BITMAP, device->ldev->md.uuid[UI_HISTORY_START]);
3375 __drbd_uuid_set(device, UI_HISTORY_START, device->ldev->md.uuid[UI_HISTORY_START + 1]);
3376
3377 drbd_info(device, "Last syncUUID did not get through, corrected:\n");
3378 drbd_uuid_dump(device, "self", device->ldev->md.uuid,
3379 device->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(device) : 0, 0);
3380
3381 return 1;
3382 }
3383 }
3384
3385
3386 *rule_nr = 80;
3387 peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
3388 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
3389 self = device->ldev->md.uuid[i] & ~((u64)1);
3390 if (self == peer)
3391 return 2;
3392 }
3393
3394 *rule_nr = 90;
3395 self = device->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
3396 peer = device->p_uuid[UI_BITMAP] & ~((u64)1);
3397 if (self == peer && self != ((u64)0))
3398 return 100;
3399
3400 *rule_nr = 100;
3401 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
3402 self = device->ldev->md.uuid[i] & ~((u64)1);
3403 for (j = UI_HISTORY_START; j <= UI_HISTORY_END; j++) {
3404 peer = device->p_uuid[j] & ~((u64)1);
3405 if (self == peer)
3406 return -100;
3407 }
3408 }
3409
3410 return -1000;
3411}
3412
3413
3414
3415
3416static enum drbd_conns drbd_sync_handshake(struct drbd_peer_device *peer_device,
3417 enum drbd_role peer_role,
3418 enum drbd_disk_state peer_disk) __must_hold(local)
3419{
3420 struct drbd_device *device = peer_device->device;
3421 enum drbd_conns rv = C_MASK;
3422 enum drbd_disk_state mydisk;
3423 struct net_conf *nc;
3424 int hg, rule_nr, rr_conflict, tentative;
3425
3426 mydisk = device->state.disk;
3427 if (mydisk == D_NEGOTIATING)
3428 mydisk = device->new_state_tmp.disk;
3429
3430 drbd_info(device, "drbd_sync_handshake:\n");
3431
3432 spin_lock_irq(&device->ldev->md.uuid_lock);
3433 drbd_uuid_dump(device, "self", device->ldev->md.uuid, device->comm_bm_set, 0);
3434 drbd_uuid_dump(device, "peer", device->p_uuid,
3435 device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
3436
3437 hg = drbd_uuid_compare(device, peer_role, &rule_nr);
3438 spin_unlock_irq(&device->ldev->md.uuid_lock);
3439
3440 drbd_info(device, "uuid_compare()=%d by rule %d\n", hg, rule_nr);
3441
3442 if (hg == -1000) {
3443 drbd_alert(device, "Unrelated data, aborting!\n");
3444 return C_MASK;
3445 }
3446 if (hg < -0x10000) {
3447 int proto, fflags;
3448 hg = -hg;
3449 proto = hg & 0xff;
3450 fflags = (hg >> 8) & 0xff;
3451 drbd_alert(device, "To resolve this both sides have to support at least protocol %d and feature flags 0x%x\n",
3452 proto, fflags);
3453 return C_MASK;
3454 }
3455 if (hg < -1000) {
3456 drbd_alert(device, "To resolve this both sides have to support at least protocol %d\n", -hg - 1000);
3457 return C_MASK;
3458 }
3459
3460 if ((mydisk == D_INCONSISTENT && peer_disk > D_INCONSISTENT) ||
3461 (peer_disk == D_INCONSISTENT && mydisk > D_INCONSISTENT)) {
3462 int f = (hg == -100) || abs(hg) == 2;
3463 hg = mydisk > D_INCONSISTENT ? 1 : -1;
3464 if (f)
3465 hg = hg*2;
3466 drbd_info(device, "Becoming sync %s due to disk states.\n",
3467 hg > 0 ? "source" : "target");
3468 }
3469
3470 if (abs(hg) == 100)
3471 drbd_khelper(device, "initial-split-brain");
3472
3473 rcu_read_lock();
3474 nc = rcu_dereference(peer_device->connection->net_conf);
3475
3476 if (hg == 100 || (hg == -100 && nc->always_asbp)) {
3477 int pcount = (device->state.role == R_PRIMARY)
3478 + (peer_role == R_PRIMARY);
3479 int forced = (hg == -100);
3480
3481 switch (pcount) {
3482 case 0:
3483 hg = drbd_asb_recover_0p(peer_device);
3484 break;
3485 case 1:
3486 hg = drbd_asb_recover_1p(peer_device);
3487 break;
3488 case 2:
3489 hg = drbd_asb_recover_2p(peer_device);
3490 break;
3491 }
3492 if (abs(hg) < 100) {
3493 drbd_warn(device, "Split-Brain detected, %d primaries, "
3494 "automatically solved. Sync from %s node\n",
3495 pcount, (hg < 0) ? "peer" : "this");
3496 if (forced) {
3497 drbd_warn(device, "Doing a full sync, since"
3498 " UUIDs where ambiguous.\n");
3499 hg = hg*2;
3500 }
3501 }
3502 }
3503
3504 if (hg == -100) {
3505 if (test_bit(DISCARD_MY_DATA, &device->flags) && !(device->p_uuid[UI_FLAGS]&1))
3506 hg = -1;
3507 if (!test_bit(DISCARD_MY_DATA, &device->flags) && (device->p_uuid[UI_FLAGS]&1))
3508 hg = 1;
3509
3510 if (abs(hg) < 100)
3511 drbd_warn(device, "Split-Brain detected, manually solved. "
3512 "Sync from %s node\n",
3513 (hg < 0) ? "peer" : "this");
3514 }
3515 rr_conflict = nc->rr_conflict;
3516 tentative = nc->tentative;
3517 rcu_read_unlock();
3518
3519 if (hg == -100) {
3520
3521
3522
3523
3524 drbd_alert(device, "Split-Brain detected but unresolved, dropping connection!\n");
3525 drbd_khelper(device, "split-brain");
3526 return C_MASK;
3527 }
3528
3529 if (hg > 0 && mydisk <= D_INCONSISTENT) {
3530 drbd_err(device, "I shall become SyncSource, but I am inconsistent!\n");
3531 return C_MASK;
3532 }
3533
3534 if (hg < 0 &&
3535 device->state.role == R_PRIMARY && device->state.disk >= D_CONSISTENT) {
3536 switch (rr_conflict) {
3537 case ASB_CALL_HELPER:
3538 drbd_khelper(device, "pri-lost");
3539
3540 case ASB_DISCONNECT:
3541 drbd_err(device, "I shall become SyncTarget, but I am primary!\n");
3542 return C_MASK;
3543 case ASB_VIOLENTLY:
3544 drbd_warn(device, "Becoming SyncTarget, violating the stable-data"
3545 "assumption\n");
3546 }
3547 }
3548
3549 if (tentative || test_bit(CONN_DRY_RUN, &peer_device->connection->flags)) {
3550 if (hg == 0)
3551 drbd_info(device, "dry-run connect: No resync, would become Connected immediately.\n");
3552 else
3553 drbd_info(device, "dry-run connect: Would become %s, doing a %s resync.",
3554 drbd_conn_str(hg > 0 ? C_SYNC_SOURCE : C_SYNC_TARGET),
3555 abs(hg) >= 2 ? "full" : "bit-map based");
3556 return C_MASK;
3557 }
3558
3559 if (abs(hg) >= 2) {
3560 drbd_info(device, "Writing the whole bitmap, full sync required after drbd_sync_handshake.\n");
3561 if (drbd_bitmap_io(device, &drbd_bmio_set_n_write, "set_n_write from sync_handshake",
3562 BM_LOCKED_SET_ALLOWED))
3563 return C_MASK;
3564 }
3565
3566 if (hg > 0) {
3567 rv = C_WF_BITMAP_S;
3568 } else if (hg < 0) {
3569 rv = C_WF_BITMAP_T;
3570 } else {
3571 rv = C_CONNECTED;
3572 if (drbd_bm_total_weight(device)) {
3573 drbd_info(device, "No resync, but %lu bits in bitmap!\n",
3574 drbd_bm_total_weight(device));
3575 }
3576 }
3577
3578 return rv;
3579}
3580
3581static enum drbd_after_sb_p convert_after_sb(enum drbd_after_sb_p peer)
3582{
3583
3584 if (peer == ASB_DISCARD_REMOTE)
3585 return ASB_DISCARD_LOCAL;
3586
3587
3588 if (peer == ASB_DISCARD_LOCAL)
3589 return ASB_DISCARD_REMOTE;
3590
3591
3592 return peer;
3593}
3594
3595static int receive_protocol(struct drbd_connection *connection, struct packet_info *pi)
3596{
3597 struct p_protocol *p = pi->data;
3598 enum drbd_after_sb_p p_after_sb_0p, p_after_sb_1p, p_after_sb_2p;
3599 int p_proto, p_discard_my_data, p_two_primaries, cf;
3600 struct net_conf *nc, *old_net_conf, *new_net_conf = NULL;
3601 char integrity_alg[SHARED_SECRET_MAX] = "";
3602 struct crypto_ahash *peer_integrity_tfm = NULL;
3603 void *int_dig_in = NULL, *int_dig_vv = NULL;
3604
3605 p_proto = be32_to_cpu(p->protocol);
3606 p_after_sb_0p = be32_to_cpu(p->after_sb_0p);
3607 p_after_sb_1p = be32_to_cpu(p->after_sb_1p);
3608 p_after_sb_2p = be32_to_cpu(p->after_sb_2p);
3609 p_two_primaries = be32_to_cpu(p->two_primaries);
3610 cf = be32_to_cpu(p->conn_flags);
3611 p_discard_my_data = cf & CF_DISCARD_MY_DATA;
3612
3613 if (connection->agreed_pro_version >= 87) {
3614 int err;
3615
3616 if (pi->size > sizeof(integrity_alg))
3617 return -EIO;
3618 err = drbd_recv_all(connection, integrity_alg, pi->size);
3619 if (err)
3620 return err;
3621 integrity_alg[SHARED_SECRET_MAX - 1] = 0;
3622 }
3623
3624 if (pi->cmd != P_PROTOCOL_UPDATE) {
3625 clear_bit(CONN_DRY_RUN, &connection->flags);
3626
3627 if (cf & CF_DRY_RUN)
3628 set_bit(CONN_DRY_RUN, &connection->flags);
3629
3630 rcu_read_lock();
3631 nc = rcu_dereference(connection->net_conf);
3632
3633 if (p_proto != nc->wire_protocol) {
3634 drbd_err(connection, "incompatible %s settings\n", "protocol");
3635 goto disconnect_rcu_unlock;
3636 }
3637
3638 if (convert_after_sb(p_after_sb_0p) != nc->after_sb_0p) {
3639 drbd_err(connection, "incompatible %s settings\n", "after-sb-0pri");
3640 goto disconnect_rcu_unlock;
3641 }
3642
3643 if (convert_after_sb(p_after_sb_1p) != nc->after_sb_1p) {
3644 drbd_err(connection, "incompatible %s settings\n", "after-sb-1pri");
3645 goto disconnect_rcu_unlock;
3646 }
3647
3648 if (convert_after_sb(p_after_sb_2p) != nc->after_sb_2p) {
3649 drbd_err(connection, "incompatible %s settings\n", "after-sb-2pri");
3650 goto disconnect_rcu_unlock;
3651 }
3652
3653 if (p_discard_my_data && nc->discard_my_data) {
3654 drbd_err(connection, "incompatible %s settings\n", "discard-my-data");
3655 goto disconnect_rcu_unlock;
3656 }
3657
3658 if (p_two_primaries != nc->two_primaries) {
3659 drbd_err(connection, "incompatible %s settings\n", "allow-two-primaries");
3660 goto disconnect_rcu_unlock;
3661 }
3662
3663 if (strcmp(integrity_alg, nc->integrity_alg)) {
3664 drbd_err(connection, "incompatible %s settings\n", "data-integrity-alg");
3665 goto disconnect_rcu_unlock;
3666 }
3667
3668 rcu_read_unlock();
3669 }
3670
3671 if (integrity_alg[0]) {
3672 int hash_size;
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683 peer_integrity_tfm = crypto_alloc_ahash(integrity_alg, 0, CRYPTO_ALG_ASYNC);
3684 if (IS_ERR(peer_integrity_tfm)) {
3685 peer_integrity_tfm = NULL;
3686 drbd_err(connection, "peer data-integrity-alg %s not supported\n",
3687 integrity_alg);
3688 goto disconnect;
3689 }
3690
3691 hash_size = crypto_ahash_digestsize(peer_integrity_tfm);
3692 int_dig_in = kmalloc(hash_size, GFP_KERNEL);
3693 int_dig_vv = kmalloc(hash_size, GFP_KERNEL);
3694 if (!(int_dig_in && int_dig_vv)) {
3695 drbd_err(connection, "Allocation of buffers for data integrity checking failed\n");
3696 goto disconnect;
3697 }
3698 }
3699
3700 new_net_conf = kmalloc(sizeof(struct net_conf), GFP_KERNEL);
3701 if (!new_net_conf) {
3702 drbd_err(connection, "Allocation of new net_conf failed\n");
3703 goto disconnect;
3704 }
3705
3706 mutex_lock(&connection->data.mutex);
3707 mutex_lock(&connection->resource->conf_update);
3708 old_net_conf = connection->net_conf;
3709 *new_net_conf = *old_net_conf;
3710
3711 new_net_conf->wire_protocol = p_proto;
3712 new_net_conf->after_sb_0p = convert_after_sb(p_after_sb_0p);
3713 new_net_conf->after_sb_1p = convert_after_sb(p_after_sb_1p);
3714 new_net_conf->after_sb_2p = convert_after_sb(p_after_sb_2p);
3715 new_net_conf->two_primaries = p_two_primaries;
3716
3717 rcu_assign_pointer(connection->net_conf, new_net_conf);
3718 mutex_unlock(&connection->resource->conf_update);
3719 mutex_unlock(&connection->data.mutex);
3720
3721 crypto_free_ahash(connection->peer_integrity_tfm);
3722 kfree(connection->int_dig_in);
3723 kfree(connection->int_dig_vv);
3724 connection->peer_integrity_tfm = peer_integrity_tfm;
3725 connection->int_dig_in = int_dig_in;
3726 connection->int_dig_vv = int_dig_vv;
3727
3728 if (strcmp(old_net_conf->integrity_alg, integrity_alg))
3729 drbd_info(connection, "peer data-integrity-alg: %s\n",
3730 integrity_alg[0] ? integrity_alg : "(none)");
3731
3732 synchronize_rcu();
3733 kfree(old_net_conf);
3734 return 0;
3735
3736disconnect_rcu_unlock:
3737 rcu_read_unlock();
3738disconnect:
3739 crypto_free_ahash(peer_integrity_tfm);
3740 kfree(int_dig_in);
3741 kfree(int_dig_vv);
3742 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
3743 return -EIO;
3744}
3745
3746
3747
3748
3749
3750
3751static struct crypto_ahash *drbd_crypto_alloc_digest_safe(const struct drbd_device *device,
3752 const char *alg, const char *name)
3753{
3754 struct crypto_ahash *tfm;
3755
3756 if (!alg[0])
3757 return NULL;
3758
3759 tfm = crypto_alloc_ahash(alg, 0, CRYPTO_ALG_ASYNC);
3760 if (IS_ERR(tfm)) {
3761 drbd_err(device, "Can not allocate \"%s\" as %s (reason: %ld)\n",
3762 alg, name, PTR_ERR(tfm));
3763 return tfm;
3764 }
3765 return tfm;
3766}
3767
3768static int ignore_remaining_packet(struct drbd_connection *connection, struct packet_info *pi)
3769{
3770 void *buffer = connection->data.rbuf;
3771 int size = pi->size;
3772
3773 while (size) {
3774 int s = min_t(int, size, DRBD_SOCKET_BUFFER_SIZE);
3775 s = drbd_recv(connection, buffer, s);
3776 if (s <= 0) {
3777 if (s < 0)
3778 return s;
3779 break;
3780 }
3781 size -= s;
3782 }
3783 if (size)
3784 return -EIO;
3785 return 0;
3786}
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799static int config_unknown_volume(struct drbd_connection *connection, struct packet_info *pi)
3800{
3801 drbd_warn(connection, "%s packet received for volume %u, which is not configured locally\n",
3802 cmdname(pi->cmd), pi->vnr);
3803 return ignore_remaining_packet(connection, pi);
3804}
3805
3806static int receive_SyncParam(struct drbd_connection *connection, struct packet_info *pi)
3807{
3808 struct drbd_peer_device *peer_device;
3809 struct drbd_device *device;
3810 struct p_rs_param_95 *p;
3811 unsigned int header_size, data_size, exp_max_sz;
3812 struct crypto_ahash *verify_tfm = NULL;
3813 struct crypto_ahash *csums_tfm = NULL;
3814 struct net_conf *old_net_conf, *new_net_conf = NULL;
3815 struct disk_conf *old_disk_conf = NULL, *new_disk_conf = NULL;
3816 const int apv = connection->agreed_pro_version;
3817 struct fifo_buffer *old_plan = NULL, *new_plan = NULL;
3818 int fifo_size = 0;
3819 int err;
3820
3821 peer_device = conn_peer_device(connection, pi->vnr);
3822 if (!peer_device)
3823 return config_unknown_volume(connection, pi);
3824 device = peer_device->device;
3825
3826 exp_max_sz = apv <= 87 ? sizeof(struct p_rs_param)
3827 : apv == 88 ? sizeof(struct p_rs_param)
3828 + SHARED_SECRET_MAX
3829 : apv <= 94 ? sizeof(struct p_rs_param_89)
3830 : sizeof(struct p_rs_param_95);
3831
3832 if (pi->size > exp_max_sz) {
3833 drbd_err(device, "SyncParam packet too long: received %u, expected <= %u bytes\n",
3834 pi->size, exp_max_sz);
3835 return -EIO;
3836 }
3837
3838 if (apv <= 88) {
3839 header_size = sizeof(struct p_rs_param);
3840 data_size = pi->size - header_size;
3841 } else if (apv <= 94) {
3842 header_size = sizeof(struct p_rs_param_89);
3843 data_size = pi->size - header_size;
3844 D_ASSERT(device, data_size == 0);
3845 } else {
3846 header_size = sizeof(struct p_rs_param_95);
3847 data_size = pi->size - header_size;
3848 D_ASSERT(device, data_size == 0);
3849 }
3850
3851
3852 p = pi->data;
3853 memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX);
3854
3855 err = drbd_recv_all(peer_device->connection, p, header_size);
3856 if (err)
3857 return err;
3858
3859 mutex_lock(&connection->resource->conf_update);
3860 old_net_conf = peer_device->connection->net_conf;
3861 if (get_ldev(device)) {
3862 new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
3863 if (!new_disk_conf) {
3864 put_ldev(device);
3865 mutex_unlock(&connection->resource->conf_update);
3866 drbd_err(device, "Allocation of new disk_conf failed\n");
3867 return -ENOMEM;
3868 }
3869
3870 old_disk_conf = device->ldev->disk_conf;
3871 *new_disk_conf = *old_disk_conf;
3872
3873 new_disk_conf->resync_rate = be32_to_cpu(p->resync_rate);
3874 }
3875
3876 if (apv >= 88) {
3877 if (apv == 88) {
3878 if (data_size > SHARED_SECRET_MAX || data_size == 0) {
3879 drbd_err(device, "verify-alg of wrong size, "
3880 "peer wants %u, accepting only up to %u byte\n",
3881 data_size, SHARED_SECRET_MAX);
3882 err = -EIO;
3883 goto reconnect;
3884 }
3885
3886 err = drbd_recv_all(peer_device->connection, p->verify_alg, data_size);
3887 if (err)
3888 goto reconnect;
3889
3890
3891 D_ASSERT(device, p->verify_alg[data_size-1] == 0);
3892 p->verify_alg[data_size-1] = 0;
3893
3894 } else {
3895
3896
3897 D_ASSERT(device, p->verify_alg[SHARED_SECRET_MAX-1] == 0);
3898 D_ASSERT(device, p->csums_alg[SHARED_SECRET_MAX-1] == 0);
3899 p->verify_alg[SHARED_SECRET_MAX-1] = 0;
3900 p->csums_alg[SHARED_SECRET_MAX-1] = 0;
3901 }
3902
3903 if (strcmp(old_net_conf->verify_alg, p->verify_alg)) {
3904 if (device->state.conn == C_WF_REPORT_PARAMS) {
3905 drbd_err(device, "Different verify-alg settings. me=\"%s\" peer=\"%s\"\n",
3906 old_net_conf->verify_alg, p->verify_alg);
3907 goto disconnect;
3908 }
3909 verify_tfm = drbd_crypto_alloc_digest_safe(device,
3910 p->verify_alg, "verify-alg");
3911 if (IS_ERR(verify_tfm)) {
3912 verify_tfm = NULL;
3913 goto disconnect;
3914 }
3915 }
3916
3917 if (apv >= 89 && strcmp(old_net_conf->csums_alg, p->csums_alg)) {
3918 if (device->state.conn == C_WF_REPORT_PARAMS) {
3919 drbd_err(device, "Different csums-alg settings. me=\"%s\" peer=\"%s\"\n",
3920 old_net_conf->csums_alg, p->csums_alg);
3921 goto disconnect;
3922 }
3923 csums_tfm = drbd_crypto_alloc_digest_safe(device,
3924 p->csums_alg, "csums-alg");
3925 if (IS_ERR(csums_tfm)) {
3926 csums_tfm = NULL;
3927 goto disconnect;
3928 }
3929 }
3930
3931 if (apv > 94 && new_disk_conf) {
3932 new_disk_conf->c_plan_ahead = be32_to_cpu(p->c_plan_ahead);
3933 new_disk_conf->c_delay_target = be32_to_cpu(p->c_delay_target);
3934 new_disk_conf->c_fill_target = be32_to_cpu(p->c_fill_target);
3935 new_disk_conf->c_max_rate = be32_to_cpu(p->c_max_rate);
3936
3937 fifo_size = (new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ;
3938 if (fifo_size != device->rs_plan_s->size) {
3939 new_plan = fifo_alloc(fifo_size);
3940 if (!new_plan) {
3941 drbd_err(device, "kmalloc of fifo_buffer failed");
3942 put_ldev(device);
3943 goto disconnect;
3944 }
3945 }
3946 }
3947
3948 if (verify_tfm || csums_tfm) {
3949 new_net_conf = kzalloc(sizeof(struct net_conf), GFP_KERNEL);
3950 if (!new_net_conf) {
3951 drbd_err(device, "Allocation of new net_conf failed\n");
3952 goto disconnect;
3953 }
3954
3955 *new_net_conf = *old_net_conf;
3956
3957 if (verify_tfm) {
3958 strcpy(new_net_conf->verify_alg, p->verify_alg);
3959 new_net_conf->verify_alg_len = strlen(p->verify_alg) + 1;
3960 crypto_free_ahash(peer_device->connection->verify_tfm);
3961 peer_device->connection->verify_tfm = verify_tfm;
3962 drbd_info(device, "using verify-alg: \"%s\"\n", p->verify_alg);
3963 }
3964 if (csums_tfm) {
3965 strcpy(new_net_conf->csums_alg, p->csums_alg);
3966 new_net_conf->csums_alg_len = strlen(p->csums_alg) + 1;
3967 crypto_free_ahash(peer_device->connection->csums_tfm);
3968 peer_device->connection->csums_tfm = csums_tfm;
3969 drbd_info(device, "using csums-alg: \"%s\"\n", p->csums_alg);
3970 }
3971 rcu_assign_pointer(connection->net_conf, new_net_conf);
3972 }
3973 }
3974
3975 if (new_disk_conf) {
3976 rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
3977 put_ldev(device);
3978 }
3979
3980 if (new_plan) {
3981 old_plan = device->rs_plan_s;
3982 rcu_assign_pointer(device->rs_plan_s, new_plan);
3983 }
3984
3985 mutex_unlock(&connection->resource->conf_update);
3986 synchronize_rcu();
3987 if (new_net_conf)
3988 kfree(old_net_conf);
3989 kfree(old_disk_conf);
3990 kfree(old_plan);
3991
3992 return 0;
3993
3994reconnect:
3995 if (new_disk_conf) {
3996 put_ldev(device);
3997 kfree(new_disk_conf);
3998 }
3999 mutex_unlock(&connection->resource->conf_update);
4000 return -EIO;
4001
4002disconnect:
4003 kfree(new_plan);
4004 if (new_disk_conf) {
4005 put_ldev(device);
4006 kfree(new_disk_conf);
4007 }
4008 mutex_unlock(&connection->resource->conf_update);
4009
4010
4011 crypto_free_ahash(csums_tfm);
4012
4013 crypto_free_ahash(verify_tfm);
4014 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
4015 return -EIO;
4016}
4017
4018
4019static void warn_if_differ_considerably(struct drbd_device *device,
4020 const char *s, sector_t a, sector_t b)
4021{
4022 sector_t d;
4023 if (a == 0 || b == 0)
4024 return;
4025 d = (a > b) ? (a - b) : (b - a);
4026 if (d > (a>>3) || d > (b>>3))
4027 drbd_warn(device, "Considerable difference in %s: %llus vs. %llus\n", s,
4028 (unsigned long long)a, (unsigned long long)b);
4029}
4030
4031static int receive_sizes(struct drbd_connection *connection, struct packet_info *pi)
4032{
4033 struct drbd_peer_device *peer_device;
4034 struct drbd_device *device;
4035 struct p_sizes *p = pi->data;
4036 struct o_qlim *o = (connection->agreed_features & DRBD_FF_WSAME) ? p->qlim : NULL;
4037 enum determine_dev_size dd = DS_UNCHANGED;
4038 sector_t p_size, p_usize, p_csize, my_usize;
4039 int ldsc = 0;
4040 enum dds_flags ddsf;
4041
4042 peer_device = conn_peer_device(connection, pi->vnr);
4043 if (!peer_device)
4044 return config_unknown_volume(connection, pi);
4045 device = peer_device->device;
4046
4047 p_size = be64_to_cpu(p->d_size);
4048 p_usize = be64_to_cpu(p->u_size);
4049 p_csize = be64_to_cpu(p->c_size);
4050
4051
4052
4053 device->p_size = p_size;
4054
4055 if (get_ldev(device)) {
4056 sector_t new_size, cur_size;
4057 rcu_read_lock();
4058 my_usize = rcu_dereference(device->ldev->disk_conf)->disk_size;
4059 rcu_read_unlock();
4060
4061 warn_if_differ_considerably(device, "lower level device sizes",
4062 p_size, drbd_get_max_capacity(device->ldev));
4063 warn_if_differ_considerably(device, "user requested size",
4064 p_usize, my_usize);
4065
4066
4067
4068 if (device->state.conn == C_WF_REPORT_PARAMS)
4069 p_usize = min_not_zero(my_usize, p_usize);
4070
4071
4072
4073 new_size = drbd_new_dev_size(device, device->ldev, p_usize, 0);
4074 cur_size = drbd_get_capacity(device->this_bdev);
4075 if (new_size < cur_size &&
4076 device->state.disk >= D_OUTDATED &&
4077 device->state.conn < C_CONNECTED) {
4078 drbd_err(device, "The peer's disk size is too small! (%llu < %llu sectors)\n",
4079 (unsigned long long)new_size, (unsigned long long)cur_size);
4080 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
4081 put_ldev(device);
4082 return -EIO;
4083 }
4084
4085 if (my_usize != p_usize) {
4086 struct disk_conf *old_disk_conf, *new_disk_conf = NULL;
4087
4088 new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
4089 if (!new_disk_conf) {
4090 drbd_err(device, "Allocation of new disk_conf failed\n");
4091 put_ldev(device);
4092 return -ENOMEM;
4093 }
4094
4095 mutex_lock(&connection->resource->conf_update);
4096 old_disk_conf = device->ldev->disk_conf;
4097 *new_disk_conf = *old_disk_conf;
4098 new_disk_conf->disk_size = p_usize;
4099
4100 rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
4101 mutex_unlock(&connection->resource->conf_update);
4102 synchronize_rcu();
4103 kfree(old_disk_conf);
4104
4105 drbd_info(device, "Peer sets u_size to %lu sectors\n",
4106 (unsigned long)my_usize);
4107 }
4108
4109 put_ldev(device);
4110 }
4111
4112 device->peer_max_bio_size = be32_to_cpu(p->max_bio_size);
4113
4114
4115
4116
4117
4118 ddsf = be16_to_cpu(p->dds_flags);
4119 if (get_ldev(device)) {
4120 drbd_reconsider_queue_parameters(device, device->ldev, o);
4121 dd = drbd_determine_dev_size(device, ddsf, NULL);
4122 put_ldev(device);
4123 if (dd == DS_ERROR)
4124 return -EIO;
4125 drbd_md_sync(device);
4126 } else {
4127
4128
4129
4130
4131
4132
4133
4134
4135
4136
4137
4138
4139
4140 drbd_reconsider_queue_parameters(device, NULL, o);
4141 drbd_set_my_capacity(device, p_csize ?: p_usize ?: p_size);
4142 }
4143
4144 if (get_ldev(device)) {
4145 if (device->ldev->known_size != drbd_get_capacity(device->ldev->backing_bdev)) {
4146 device->ldev->known_size = drbd_get_capacity(device->ldev->backing_bdev);
4147 ldsc = 1;
4148 }
4149
4150 put_ldev(device);
4151 }
4152
4153 if (device->state.conn > C_WF_REPORT_PARAMS) {
4154 if (be64_to_cpu(p->c_size) !=
4155 drbd_get_capacity(device->this_bdev) || ldsc) {
4156
4157
4158 drbd_send_sizes(peer_device, 0, ddsf);
4159 }
4160 if (test_and_clear_bit(RESIZE_PENDING, &device->flags) ||
4161 (dd == DS_GREW && device->state.conn == C_CONNECTED)) {
4162 if (device->state.pdsk >= D_INCONSISTENT &&
4163 device->state.disk >= D_INCONSISTENT) {
4164 if (ddsf & DDSF_NO_RESYNC)
4165 drbd_info(device, "Resync of new storage suppressed with --assume-clean\n");
4166 else
4167 resync_after_online_grow(device);
4168 } else
4169 set_bit(RESYNC_AFTER_NEG, &device->flags);
4170 }
4171 }
4172
4173 return 0;
4174}
4175
4176static int receive_uuids(struct drbd_connection *connection, struct packet_info *pi)
4177{
4178 struct drbd_peer_device *peer_device;
4179 struct drbd_device *device;
4180 struct p_uuids *p = pi->data;
4181 u64 *p_uuid;
4182 int i, updated_uuids = 0;
4183
4184 peer_device = conn_peer_device(connection, pi->vnr);
4185 if (!peer_device)
4186 return config_unknown_volume(connection, pi);
4187 device = peer_device->device;
4188
4189 p_uuid = kmalloc(sizeof(u64)*UI_EXTENDED_SIZE, GFP_NOIO);
4190 if (!p_uuid) {
4191 drbd_err(device, "kmalloc of p_uuid failed\n");
4192 return false;
4193 }
4194
4195 for (i = UI_CURRENT; i < UI_EXTENDED_SIZE; i++)
4196 p_uuid[i] = be64_to_cpu(p->uuid[i]);
4197
4198 kfree(device->p_uuid);
4199 device->p_uuid = p_uuid;
4200
4201 if (device->state.conn < C_CONNECTED &&
4202 device->state.disk < D_INCONSISTENT &&
4203 device->state.role == R_PRIMARY &&
4204 (device->ed_uuid & ~((u64)1)) != (p_uuid[UI_CURRENT] & ~((u64)1))) {
4205 drbd_err(device, "Can only connect to data with current UUID=%016llX\n",
4206 (unsigned long long)device->ed_uuid);
4207 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
4208 return -EIO;
4209 }
4210
4211 if (get_ldev(device)) {
4212 int skip_initial_sync =
4213 device->state.conn == C_CONNECTED &&
4214 peer_device->connection->agreed_pro_version >= 90 &&
4215 device->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED &&
4216 (p_uuid[UI_FLAGS] & 8);
4217 if (skip_initial_sync) {
4218 drbd_info(device, "Accepted new current UUID, preparing to skip initial sync\n");
4219 drbd_bitmap_io(device, &drbd_bmio_clear_n_write,
4220 "clear_n_write from receive_uuids",
4221 BM_LOCKED_TEST_ALLOWED);
4222 _drbd_uuid_set(device, UI_CURRENT, p_uuid[UI_CURRENT]);
4223 _drbd_uuid_set(device, UI_BITMAP, 0);
4224 _drbd_set_state(_NS2(device, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
4225 CS_VERBOSE, NULL);
4226 drbd_md_sync(device);
4227 updated_uuids = 1;
4228 }
4229 put_ldev(device);
4230 } else if (device->state.disk < D_INCONSISTENT &&
4231 device->state.role == R_PRIMARY) {
4232
4233
4234 updated_uuids = drbd_set_ed_uuid(device, p_uuid[UI_CURRENT]);
4235 }
4236
4237
4238
4239
4240
4241 mutex_lock(device->state_mutex);
4242 mutex_unlock(device->state_mutex);
4243 if (device->state.conn >= C_CONNECTED && device->state.disk < D_INCONSISTENT)
4244 updated_uuids |= drbd_set_ed_uuid(device, p_uuid[UI_CURRENT]);
4245
4246 if (updated_uuids)
4247 drbd_print_uuids(device, "receiver updated UUIDs to");
4248
4249 return 0;
4250}
4251
4252
4253
4254
4255
4256static union drbd_state convert_state(union drbd_state ps)
4257{
4258 union drbd_state ms;
4259
4260 static enum drbd_conns c_tab[] = {
4261 [C_WF_REPORT_PARAMS] = C_WF_REPORT_PARAMS,
4262 [C_CONNECTED] = C_CONNECTED,
4263
4264 [C_STARTING_SYNC_S] = C_STARTING_SYNC_T,
4265 [C_STARTING_SYNC_T] = C_STARTING_SYNC_S,
4266 [C_DISCONNECTING] = C_TEAR_DOWN,
4267 [C_VERIFY_S] = C_VERIFY_T,
4268 [C_MASK] = C_MASK,
4269 };
4270
4271 ms.i = ps.i;
4272
4273 ms.conn = c_tab[ps.conn];
4274 ms.peer = ps.role;
4275 ms.role = ps.peer;
4276 ms.pdsk = ps.disk;
4277 ms.disk = ps.pdsk;
4278 ms.peer_isp = (ps.aftr_isp | ps.user_isp);
4279
4280 return ms;
4281}
4282
4283static int receive_req_state(struct drbd_connection *connection, struct packet_info *pi)
4284{
4285 struct drbd_peer_device *peer_device;
4286 struct drbd_device *device;
4287 struct p_req_state *p = pi->data;
4288 union drbd_state mask, val;
4289 enum drbd_state_rv rv;
4290
4291 peer_device = conn_peer_device(connection, pi->vnr);
4292 if (!peer_device)
4293 return -EIO;
4294 device = peer_device->device;
4295
4296 mask.i = be32_to_cpu(p->mask);
4297 val.i = be32_to_cpu(p->val);
4298
4299 if (test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags) &&
4300 mutex_is_locked(device->state_mutex)) {
4301 drbd_send_sr_reply(peer_device, SS_CONCURRENT_ST_CHG);
4302 return 0;
4303 }
4304
4305 mask = convert_state(mask);
4306 val = convert_state(val);
4307
4308 rv = drbd_change_state(device, CS_VERBOSE, mask, val);
4309 drbd_send_sr_reply(peer_device, rv);
4310
4311 drbd_md_sync(device);
4312
4313 return 0;
4314}
4315
4316static int receive_req_conn_state(struct drbd_connection *connection, struct packet_info *pi)
4317{
4318 struct p_req_state *p = pi->data;
4319 union drbd_state mask, val;
4320 enum drbd_state_rv rv;
4321
4322 mask.i = be32_to_cpu(p->mask);
4323 val.i = be32_to_cpu(p->val);
4324
4325 if (test_bit(RESOLVE_CONFLICTS, &connection->flags) &&
4326 mutex_is_locked(&connection->cstate_mutex)) {
4327 conn_send_sr_reply(connection, SS_CONCURRENT_ST_CHG);
4328 return 0;
4329 }
4330
4331 mask = convert_state(mask);
4332 val = convert_state(val);
4333
4334 rv = conn_request_state(connection, mask, val, CS_VERBOSE | CS_LOCAL_ONLY | CS_IGN_OUTD_FAIL);
4335 conn_send_sr_reply(connection, rv);
4336
4337 return 0;
4338}
4339
4340static int receive_state(struct drbd_connection *connection, struct packet_info *pi)
4341{
4342 struct drbd_peer_device *peer_device;
4343 struct drbd_device *device;
4344 struct p_state *p = pi->data;
4345 union drbd_state os, ns, peer_state;
4346 enum drbd_disk_state real_peer_disk;
4347 enum chg_state_flags cs_flags;
4348 int rv;
4349
4350 peer_device = conn_peer_device(connection, pi->vnr);
4351 if (!peer_device)
4352 return config_unknown_volume(connection, pi);
4353 device = peer_device->device;
4354
4355 peer_state.i = be32_to_cpu(p->state);
4356
4357 real_peer_disk = peer_state.disk;
4358 if (peer_state.disk == D_NEGOTIATING) {
4359 real_peer_disk = device->p_uuid[UI_FLAGS] & 4 ? D_INCONSISTENT : D_CONSISTENT;
4360 drbd_info(device, "real peer disk state = %s\n", drbd_disk_str(real_peer_disk));
4361 }
4362
4363 spin_lock_irq(&device->resource->req_lock);
4364 retry:
4365 os = ns = drbd_read_state(device);
4366 spin_unlock_irq(&device->resource->req_lock);
4367
4368
4369
4370
4371 if (os.conn <= C_TEAR_DOWN)
4372 return -ECONNRESET;
4373
4374
4375
4376
4377
4378
4379
4380 if ((os.pdsk == D_INCONSISTENT || os.pdsk == D_CONSISTENT) &&
4381 real_peer_disk == D_UP_TO_DATE &&
4382 os.conn > C_CONNECTED && os.disk == D_UP_TO_DATE) {
4383
4384
4385
4386
4387
4388
4389 if (peer_state.conn > C_CONNECTED &&
4390 peer_state.conn < C_SYNC_SOURCE)
4391 real_peer_disk = D_INCONSISTENT;
4392
4393
4394
4395
4396 else if (os.conn >= C_SYNC_SOURCE &&
4397 peer_state.conn == C_CONNECTED) {
4398 if (drbd_bm_total_weight(device) <= device->rs_failed)
4399 drbd_resync_finished(device);
4400 return 0;
4401 }
4402 }
4403
4404
4405 if (os.conn == C_VERIFY_T && os.disk == D_UP_TO_DATE &&
4406 peer_state.conn == C_CONNECTED && real_peer_disk == D_UP_TO_DATE) {
4407 ov_out_of_sync_print(device);
4408 drbd_resync_finished(device);
4409 return 0;
4410 }
4411
4412
4413
4414
4415
4416
4417 if (os.pdsk == D_UP_TO_DATE && real_peer_disk == D_INCONSISTENT &&
4418 os.conn == C_CONNECTED && peer_state.conn > C_SYNC_SOURCE)
4419 real_peer_disk = D_UP_TO_DATE;
4420
4421 if (ns.conn == C_WF_REPORT_PARAMS)
4422 ns.conn = C_CONNECTED;
4423
4424 if (peer_state.conn == C_AHEAD)
4425 ns.conn = C_BEHIND;
4426
4427 if (device->p_uuid && peer_state.disk >= D_NEGOTIATING &&
4428 get_ldev_if_state(device, D_NEGOTIATING)) {
4429 int cr;
4430
4431
4432 cr = (os.conn < C_CONNECTED);
4433
4434
4435 cr |= (os.conn == C_CONNECTED &&
4436 (peer_state.disk == D_NEGOTIATING ||
4437 os.disk == D_NEGOTIATING));
4438
4439
4440 cr |= test_bit(CONSIDER_RESYNC, &device->flags);
4441
4442
4443 cr |= (os.conn == C_CONNECTED &&
4444 (peer_state.conn >= C_STARTING_SYNC_S &&
4445 peer_state.conn <= C_WF_BITMAP_T));
4446
4447 if (cr)
4448 ns.conn = drbd_sync_handshake(peer_device, peer_state.role, real_peer_disk);
4449
4450 put_ldev(device);
4451 if (ns.conn == C_MASK) {
4452 ns.conn = C_CONNECTED;
4453 if (device->state.disk == D_NEGOTIATING) {
4454 drbd_force_state(device, NS(disk, D_FAILED));
4455 } else if (peer_state.disk == D_NEGOTIATING) {
4456 drbd_err(device, "Disk attach process on the peer node was aborted.\n");
4457 peer_state.disk = D_DISKLESS;
4458 real_peer_disk = D_DISKLESS;
4459 } else {
4460 if (test_and_clear_bit(CONN_DRY_RUN, &peer_device->connection->flags))
4461 return -EIO;
4462 D_ASSERT(device, os.conn == C_WF_REPORT_PARAMS);
4463 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
4464 return -EIO;
4465 }
4466 }
4467 }
4468
4469 spin_lock_irq(&device->resource->req_lock);
4470 if (os.i != drbd_read_state(device).i)
4471 goto retry;
4472 clear_bit(CONSIDER_RESYNC, &device->flags);
4473 ns.peer = peer_state.role;
4474 ns.pdsk = real_peer_disk;
4475 ns.peer_isp = (peer_state.aftr_isp | peer_state.user_isp);
4476 if ((ns.conn == C_CONNECTED || ns.conn == C_WF_BITMAP_S) && ns.disk == D_NEGOTIATING)
4477 ns.disk = device->new_state_tmp.disk;
4478 cs_flags = CS_VERBOSE + (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED ? 0 : CS_HARD);
4479 if (ns.pdsk == D_CONSISTENT && drbd_suspended(device) && ns.conn == C_CONNECTED && os.conn < C_CONNECTED &&
4480 test_bit(NEW_CUR_UUID, &device->flags)) {
4481
4482
4483 spin_unlock_irq(&device->resource->req_lock);
4484 drbd_err(device, "Aborting Connect, can not thaw IO with an only Consistent peer\n");
4485 tl_clear(peer_device->connection);
4486 drbd_uuid_new_current(device);
4487 clear_bit(NEW_CUR_UUID, &device->flags);
4488 conn_request_state(peer_device->connection, NS2(conn, C_PROTOCOL_ERROR, susp, 0), CS_HARD);
4489 return -EIO;
4490 }
4491 rv = _drbd_set_state(device, ns, cs_flags, NULL);
4492 ns = drbd_read_state(device);
4493 spin_unlock_irq(&device->resource->req_lock);
4494
4495 if (rv < SS_SUCCESS) {
4496 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
4497 return -EIO;
4498 }
4499
4500 if (os.conn > C_WF_REPORT_PARAMS) {
4501 if (ns.conn > C_CONNECTED && peer_state.conn <= C_CONNECTED &&
4502 peer_state.disk != D_NEGOTIATING ) {
4503
4504
4505
4506 drbd_send_uuids(peer_device);
4507 drbd_send_current_state(peer_device);
4508 }
4509 }
4510
4511 clear_bit(DISCARD_MY_DATA, &device->flags);
4512
4513 drbd_md_sync(device);
4514
4515 return 0;
4516}
4517
4518static int receive_sync_uuid(struct drbd_connection *connection, struct packet_info *pi)
4519{
4520 struct drbd_peer_device *peer_device;
4521 struct drbd_device *device;
4522 struct p_rs_uuid *p = pi->data;
4523
4524 peer_device = conn_peer_device(connection, pi->vnr);
4525 if (!peer_device)
4526 return -EIO;
4527 device = peer_device->device;
4528
4529 wait_event(device->misc_wait,
4530 device->state.conn == C_WF_SYNC_UUID ||
4531 device->state.conn == C_BEHIND ||
4532 device->state.conn < C_CONNECTED ||
4533 device->state.disk < D_NEGOTIATING);
4534
4535
4536
4537
4538
4539 if (get_ldev_if_state(device, D_NEGOTIATING)) {
4540 _drbd_uuid_set(device, UI_CURRENT, be64_to_cpu(p->uuid));
4541 _drbd_uuid_set(device, UI_BITMAP, 0UL);
4542
4543 drbd_print_uuids(device, "updated sync uuid");
4544 drbd_start_resync(device, C_SYNC_TARGET);
4545
4546 put_ldev(device);
4547 } else
4548 drbd_err(device, "Ignoring SyncUUID packet!\n");
4549
4550 return 0;
4551}
4552
4553
4554
4555
4556
4557
4558
4559static int
4560receive_bitmap_plain(struct drbd_peer_device *peer_device, unsigned int size,
4561 unsigned long *p, struct bm_xfer_ctx *c)
4562{
4563 unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE -
4564 drbd_header_size(peer_device->connection);
4565 unsigned int num_words = min_t(size_t, data_size / sizeof(*p),
4566 c->bm_words - c->word_offset);
4567 unsigned int want = num_words * sizeof(*p);
4568 int err;
4569
4570 if (want != size) {
4571 drbd_err(peer_device, "%s:want (%u) != size (%u)\n", __func__, want, size);
4572 return -EIO;
4573 }
4574 if (want == 0)
4575 return 0;
4576 err = drbd_recv_all(peer_device->connection, p, want);
4577 if (err)
4578 return err;
4579
4580 drbd_bm_merge_lel(peer_device->device, c->word_offset, num_words, p);
4581
4582 c->word_offset += num_words;
4583 c->bit_offset = c->word_offset * BITS_PER_LONG;
4584 if (c->bit_offset > c->bm_bits)
4585 c->bit_offset = c->bm_bits;
4586
4587 return 1;
4588}
4589
4590static enum drbd_bitmap_code dcbp_get_code(struct p_compressed_bm *p)
4591{
4592 return (enum drbd_bitmap_code)(p->encoding & 0x0f);
4593}
4594
4595static int dcbp_get_start(struct p_compressed_bm *p)
4596{
4597 return (p->encoding & 0x80) != 0;
4598}
4599
4600static int dcbp_get_pad_bits(struct p_compressed_bm *p)
4601{
4602 return (p->encoding >> 4) & 0x7;
4603}
4604
4605
4606
4607
4608
4609
4610
4611static int
4612recv_bm_rle_bits(struct drbd_peer_device *peer_device,
4613 struct p_compressed_bm *p,
4614 struct bm_xfer_ctx *c,
4615 unsigned int len)
4616{
4617 struct bitstream bs;
4618 u64 look_ahead;
4619 u64 rl;
4620 u64 tmp;
4621 unsigned long s = c->bit_offset;
4622 unsigned long e;
4623 int toggle = dcbp_get_start(p);
4624 int have;
4625 int bits;
4626
4627 bitstream_init(&bs, p->code, len, dcbp_get_pad_bits(p));
4628
4629 bits = bitstream_get_bits(&bs, &look_ahead, 64);
4630 if (bits < 0)
4631 return -EIO;
4632
4633 for (have = bits; have > 0; s += rl, toggle = !toggle) {
4634 bits = vli_decode_bits(&rl, look_ahead);
4635 if (bits <= 0)
4636 return -EIO;
4637
4638 if (toggle) {
4639 e = s + rl -1;
4640 if (e >= c->bm_bits) {
4641 drbd_err(peer_device, "bitmap overflow (e:%lu) while decoding bm RLE packet\n", e);
4642 return -EIO;
4643 }
4644 _drbd_bm_set_bits(peer_device->device, s, e);
4645 }
4646
4647 if (have < bits) {
4648 drbd_err(peer_device, "bitmap decoding error: h:%d b:%d la:0x%08llx l:%u/%u\n",
4649 have, bits, look_ahead,
4650 (unsigned int)(bs.cur.b - p->code),
4651 (unsigned int)bs.buf_len);
4652 return -EIO;
4653 }
4654
4655 if (likely(bits < 64))
4656 look_ahead >>= bits;
4657 else
4658 look_ahead = 0;
4659 have -= bits;
4660
4661 bits = bitstream_get_bits(&bs, &tmp, 64 - have);
4662 if (bits < 0)
4663 return -EIO;
4664 look_ahead |= tmp << have;
4665 have += bits;
4666 }
4667
4668 c->bit_offset = s;
4669 bm_xfer_ctx_bit_to_word_offset(c);
4670
4671 return (s != c->bm_bits);
4672}
4673
4674
4675
4676
4677
4678
4679
4680static int
4681decode_bitmap_c(struct drbd_peer_device *peer_device,
4682 struct p_compressed_bm *p,
4683 struct bm_xfer_ctx *c,
4684 unsigned int len)
4685{
4686 if (dcbp_get_code(p) == RLE_VLI_Bits)
4687 return recv_bm_rle_bits(peer_device, p, c, len - sizeof(*p));
4688
4689
4690
4691
4692
4693 drbd_err(peer_device, "receive_bitmap_c: unknown encoding %u\n", p->encoding);
4694 conn_request_state(peer_device->connection, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
4695 return -EIO;
4696}
4697
4698void INFO_bm_xfer_stats(struct drbd_device *device,
4699 const char *direction, struct bm_xfer_ctx *c)
4700{
4701
4702 unsigned int header_size = drbd_header_size(first_peer_device(device)->connection);
4703 unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE - header_size;
4704 unsigned int plain =
4705 header_size * (DIV_ROUND_UP(c->bm_words, data_size) + 1) +
4706 c->bm_words * sizeof(unsigned long);
4707 unsigned int total = c->bytes[0] + c->bytes[1];
4708 unsigned int r;
4709
4710
4711 if (total == 0)
4712 return;
4713
4714
4715 if (total >= plain)
4716 return;
4717
4718
4719 r = (total > UINT_MAX/1000) ? (total / (plain/1000))
4720 : (1000 * total / plain);
4721
4722 if (r > 1000)
4723 r = 1000;
4724
4725 r = 1000 - r;
4726 drbd_info(device, "%s bitmap stats [Bytes(packets)]: plain %u(%u), RLE %u(%u), "
4727 "total %u; compression: %u.%u%%\n",
4728 direction,
4729 c->bytes[1], c->packets[1],
4730 c->bytes[0], c->packets[0],
4731 total, r/10, r % 10);
4732}
4733
4734
4735
4736
4737
4738
4739
4740
4741
4742static int receive_bitmap(struct drbd_connection *connection, struct packet_info *pi)
4743{
4744 struct drbd_peer_device *peer_device;
4745 struct drbd_device *device;
4746 struct bm_xfer_ctx c;
4747 int err;
4748
4749 peer_device = conn_peer_device(connection, pi->vnr);
4750 if (!peer_device)
4751 return -EIO;
4752 device = peer_device->device;
4753
4754 drbd_bm_lock(device, "receive bitmap", BM_LOCKED_SET_ALLOWED);
4755
4756
4757
4758 c = (struct bm_xfer_ctx) {
4759 .bm_bits = drbd_bm_bits(device),
4760 .bm_words = drbd_bm_words(device),
4761 };
4762
4763 for(;;) {
4764 if (pi->cmd == P_BITMAP)
4765 err = receive_bitmap_plain(peer_device, pi->size, pi->data, &c);
4766 else if (pi->cmd == P_COMPRESSED_BITMAP) {
4767
4768
4769 struct p_compressed_bm *p = pi->data;
4770
4771 if (pi->size > DRBD_SOCKET_BUFFER_SIZE - drbd_header_size(connection)) {
4772 drbd_err(device, "ReportCBitmap packet too large\n");
4773 err = -EIO;
4774 goto out;
4775 }
4776 if (pi->size <= sizeof(*p)) {
4777 drbd_err(device, "ReportCBitmap packet too small (l:%u)\n", pi->size);
4778 err = -EIO;
4779 goto out;
4780 }
4781 err = drbd_recv_all(peer_device->connection, p, pi->size);
4782 if (err)
4783 goto out;
4784 err = decode_bitmap_c(peer_device, p, &c, pi->size);
4785 } else {
4786 drbd_warn(device, "receive_bitmap: cmd neither ReportBitMap nor ReportCBitMap (is 0x%x)", pi->cmd);
4787 err = -EIO;
4788 goto out;
4789 }
4790
4791 c.packets[pi->cmd == P_BITMAP]++;
4792 c.bytes[pi->cmd == P_BITMAP] += drbd_header_size(connection) + pi->size;
4793
4794 if (err <= 0) {
4795 if (err < 0)
4796 goto out;
4797 break;
4798 }
4799 err = drbd_recv_header(peer_device->connection, pi);
4800 if (err)
4801 goto out;
4802 }
4803
4804 INFO_bm_xfer_stats(device, "receive", &c);
4805
4806 if (device->state.conn == C_WF_BITMAP_T) {
4807 enum drbd_state_rv rv;
4808
4809 err = drbd_send_bitmap(device);
4810 if (err)
4811 goto out;
4812
4813 rv = _drbd_request_state(device, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE);
4814 D_ASSERT(device, rv == SS_SUCCESS);
4815 } else if (device->state.conn != C_WF_BITMAP_S) {
4816
4817
4818 drbd_info(device, "unexpected cstate (%s) in receive_bitmap\n",
4819 drbd_conn_str(device->state.conn));
4820 }
4821 err = 0;
4822
4823 out:
4824 drbd_bm_unlock(device);
4825 if (!err && device->state.conn == C_WF_BITMAP_S)
4826 drbd_start_resync(device, C_SYNC_SOURCE);
4827 return err;
4828}
4829
4830static int receive_skip(struct drbd_connection *connection, struct packet_info *pi)
4831{
4832 drbd_warn(connection, "skipping unknown optional packet type %d, l: %d!\n",
4833 pi->cmd, pi->size);
4834
4835 return ignore_remaining_packet(connection, pi);
4836}
4837
4838static int receive_UnplugRemote(struct drbd_connection *connection, struct packet_info *pi)
4839{
4840
4841
4842 drbd_tcp_quickack(connection->data.socket);
4843
4844 return 0;
4845}
4846
4847static int receive_out_of_sync(struct drbd_connection *connection, struct packet_info *pi)
4848{
4849 struct drbd_peer_device *peer_device;
4850 struct drbd_device *device;
4851 struct p_block_desc *p = pi->data;
4852
4853 peer_device = conn_peer_device(connection, pi->vnr);
4854 if (!peer_device)
4855 return -EIO;
4856 device = peer_device->device;
4857
4858 switch (device->state.conn) {
4859 case C_WF_SYNC_UUID:
4860 case C_WF_BITMAP_T:
4861 case C_BEHIND:
4862 break;
4863 default:
4864 drbd_err(device, "ASSERT FAILED cstate = %s, expected: WFSyncUUID|WFBitMapT|Behind\n",
4865 drbd_conn_str(device->state.conn));
4866 }
4867
4868 drbd_set_out_of_sync(device, be64_to_cpu(p->sector), be32_to_cpu(p->blksize));
4869
4870 return 0;
4871}
4872
4873static int receive_rs_deallocated(struct drbd_connection *connection, struct packet_info *pi)
4874{
4875 struct drbd_peer_device *peer_device;
4876 struct p_block_desc *p = pi->data;
4877 struct drbd_device *device;
4878 sector_t sector;
4879 int size, err = 0;
4880
4881 peer_device = conn_peer_device(connection, pi->vnr);
4882 if (!peer_device)
4883 return -EIO;
4884 device = peer_device->device;
4885
4886 sector = be64_to_cpu(p->sector);
4887 size = be32_to_cpu(p->blksize);
4888
4889 dec_rs_pending(device);
4890
4891 if (get_ldev(device)) {
4892 struct drbd_peer_request *peer_req;
4893 const int op = REQ_OP_DISCARD;
4894
4895 peer_req = drbd_alloc_peer_req(peer_device, ID_SYNCER, sector,
4896 size, 0, GFP_NOIO);
4897 if (!peer_req) {
4898 put_ldev(device);
4899 return -ENOMEM;
4900 }
4901
4902 peer_req->w.cb = e_end_resync_block;
4903 peer_req->submit_jif = jiffies;
4904 peer_req->flags |= EE_IS_TRIM;
4905
4906 spin_lock_irq(&device->resource->req_lock);
4907 list_add_tail(&peer_req->w.list, &device->sync_ee);
4908 spin_unlock_irq(&device->resource->req_lock);
4909
4910 atomic_add(pi->size >> 9, &device->rs_sect_ev);
4911 err = drbd_submit_peer_request(device, peer_req, op, 0, DRBD_FAULT_RS_WR);
4912
4913 if (err) {
4914 spin_lock_irq(&device->resource->req_lock);
4915 list_del(&peer_req->w.list);
4916 spin_unlock_irq(&device->resource->req_lock);
4917
4918 drbd_free_peer_req(device, peer_req);
4919 put_ldev(device);
4920 err = 0;
4921 goto fail;
4922 }
4923
4924 inc_unacked(device);
4925
4926
4927
4928 } else {
4929 fail:
4930 drbd_rs_complete_io(device, sector);
4931 drbd_send_ack_ex(peer_device, P_NEG_ACK, sector, size, ID_SYNCER);
4932 }
4933
4934 atomic_add(size >> 9, &device->rs_sect_in);
4935
4936 return err;
4937}
4938
4939struct data_cmd {
4940 int expect_payload;
4941 unsigned int pkt_size;
4942 int (*fn)(struct drbd_connection *, struct packet_info *);
4943};
4944
4945static struct data_cmd drbd_cmd_handler[] = {
4946 [P_DATA] = { 1, sizeof(struct p_data), receive_Data },
4947 [P_DATA_REPLY] = { 1, sizeof(struct p_data), receive_DataReply },
4948 [P_RS_DATA_REPLY] = { 1, sizeof(struct p_data), receive_RSDataReply } ,
4949 [P_BARRIER] = { 0, sizeof(struct p_barrier), receive_Barrier } ,
4950 [P_BITMAP] = { 1, 0, receive_bitmap } ,
4951 [P_COMPRESSED_BITMAP] = { 1, 0, receive_bitmap } ,
4952 [P_UNPLUG_REMOTE] = { 0, 0, receive_UnplugRemote },
4953 [P_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
4954 [P_RS_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
4955 [P_SYNC_PARAM] = { 1, 0, receive_SyncParam },
4956 [P_SYNC_PARAM89] = { 1, 0, receive_SyncParam },
4957 [P_PROTOCOL] = { 1, sizeof(struct p_protocol), receive_protocol },
4958 [P_UUIDS] = { 0, sizeof(struct p_uuids), receive_uuids },
4959 [P_SIZES] = { 0, sizeof(struct p_sizes), receive_sizes },
4960 [P_STATE] = { 0, sizeof(struct p_state), receive_state },
4961 [P_STATE_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_state },
4962 [P_SYNC_UUID] = { 0, sizeof(struct p_rs_uuid), receive_sync_uuid },
4963 [P_OV_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
4964 [P_OV_REPLY] = { 1, sizeof(struct p_block_req), receive_DataRequest },
4965 [P_CSUM_RS_REQUEST] = { 1, sizeof(struct p_block_req), receive_DataRequest },
4966 [P_RS_THIN_REQ] = { 0, sizeof(struct p_block_req), receive_DataRequest },
4967 [P_DELAY_PROBE] = { 0, sizeof(struct p_delay_probe93), receive_skip },
4968 [P_OUT_OF_SYNC] = { 0, sizeof(struct p_block_desc), receive_out_of_sync },
4969 [P_CONN_ST_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_conn_state },
4970 [P_PROTOCOL_UPDATE] = { 1, sizeof(struct p_protocol), receive_protocol },
4971 [P_TRIM] = { 0, sizeof(struct p_trim), receive_Data },
4972 [P_RS_DEALLOCATED] = { 0, sizeof(struct p_block_desc), receive_rs_deallocated },
4973 [P_WSAME] = { 1, sizeof(struct p_wsame), receive_Data },
4974};
4975
4976static void drbdd(struct drbd_connection *connection)
4977{
4978 struct packet_info pi;
4979 size_t shs;
4980 int err;
4981
4982 while (get_t_state(&connection->receiver) == RUNNING) {
4983 struct data_cmd const *cmd;
4984
4985 drbd_thread_current_set_cpu(&connection->receiver);
4986 update_receiver_timing_details(connection, drbd_recv_header);
4987 if (drbd_recv_header(connection, &pi))
4988 goto err_out;
4989
4990 cmd = &drbd_cmd_handler[pi.cmd];
4991 if (unlikely(pi.cmd >= ARRAY_SIZE(drbd_cmd_handler) || !cmd->fn)) {
4992 drbd_err(connection, "Unexpected data packet %s (0x%04x)",
4993 cmdname(pi.cmd), pi.cmd);
4994 goto err_out;
4995 }
4996
4997 shs = cmd->pkt_size;
4998 if (pi.cmd == P_SIZES && connection->agreed_features & DRBD_FF_WSAME)
4999 shs += sizeof(struct o_qlim);
5000 if (pi.size > shs && !cmd->expect_payload) {
5001 drbd_err(connection, "No payload expected %s l:%d\n",
5002 cmdname(pi.cmd), pi.size);
5003 goto err_out;
5004 }
5005 if (pi.size < shs) {
5006 drbd_err(connection, "%s: unexpected packet size, expected:%d received:%d\n",
5007 cmdname(pi.cmd), (int)shs, pi.size);
5008 goto err_out;
5009 }
5010
5011 if (shs) {
5012 update_receiver_timing_details(connection, drbd_recv_all_warn);
5013 err = drbd_recv_all_warn(connection, pi.data, shs);
5014 if (err)
5015 goto err_out;
5016 pi.size -= shs;
5017 }
5018
5019 update_receiver_timing_details(connection, cmd->fn);
5020 err = cmd->fn(connection, &pi);
5021 if (err) {
5022 drbd_err(connection, "error receiving %s, e: %d l: %d!\n",
5023 cmdname(pi.cmd), err, pi.size);
5024 goto err_out;
5025 }
5026 }
5027 return;
5028
5029 err_out:
5030 conn_request_state(connection, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
5031}
5032
5033static void conn_disconnect(struct drbd_connection *connection)
5034{
5035 struct drbd_peer_device *peer_device;
5036 enum drbd_conns oc;
5037 int vnr;
5038
5039 if (connection->cstate == C_STANDALONE)
5040 return;
5041
5042
5043
5044
5045
5046
5047 conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
5048
5049
5050 drbd_thread_stop(&connection->ack_receiver);
5051 if (connection->ack_sender) {
5052 destroy_workqueue(connection->ack_sender);
5053 connection->ack_sender = NULL;
5054 }
5055 drbd_free_sock(connection);
5056
5057 rcu_read_lock();
5058 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
5059 struct drbd_device *device = peer_device->device;
5060 kref_get(&device->kref);
5061 rcu_read_unlock();
5062 drbd_disconnected(peer_device);
5063 kref_put(&device->kref, drbd_destroy_device);
5064 rcu_read_lock();
5065 }
5066 rcu_read_unlock();
5067
5068 if (!list_empty(&connection->current_epoch->list))
5069 drbd_err(connection, "ASSERTION FAILED: connection->current_epoch->list not empty\n");
5070
5071 atomic_set(&connection->current_epoch->epoch_size, 0);
5072 connection->send.seen_any_write_yet = false;
5073
5074 drbd_info(connection, "Connection closed\n");
5075
5076 if (conn_highest_role(connection) == R_PRIMARY && conn_highest_pdsk(connection) >= D_UNKNOWN)
5077 conn_try_outdate_peer_async(connection);
5078
5079 spin_lock_irq(&connection->resource->req_lock);
5080 oc = connection->cstate;
5081 if (oc >= C_UNCONNECTED)
5082 _conn_request_state(connection, NS(conn, C_UNCONNECTED), CS_VERBOSE);
5083
5084 spin_unlock_irq(&connection->resource->req_lock);
5085
5086 if (oc == C_DISCONNECTING)
5087 conn_request_state(connection, NS(conn, C_STANDALONE), CS_VERBOSE | CS_HARD);
5088}
5089
5090static int drbd_disconnected(struct drbd_peer_device *peer_device)
5091{
5092 struct drbd_device *device = peer_device->device;
5093 unsigned int i;
5094
5095
5096 spin_lock_irq(&device->resource->req_lock);
5097 _drbd_wait_ee_list_empty(device, &device->active_ee);
5098 _drbd_wait_ee_list_empty(device, &device->sync_ee);
5099 _drbd_wait_ee_list_empty(device, &device->read_ee);
5100 spin_unlock_irq(&device->resource->req_lock);
5101
5102
5103
5104
5105
5106
5107
5108
5109
5110
5111
5112 drbd_rs_cancel_all(device);
5113 device->rs_total = 0;
5114 device->rs_failed = 0;
5115 atomic_set(&device->rs_pending_cnt, 0);
5116 wake_up(&device->misc_wait);
5117
5118 del_timer_sync(&device->resync_timer);
5119 resync_timer_fn((unsigned long)device);
5120
5121
5122
5123
5124 drbd_flush_workqueue(&peer_device->connection->sender_work);
5125
5126 drbd_finish_peer_reqs(device);
5127
5128
5129
5130
5131 drbd_flush_workqueue(&peer_device->connection->sender_work);
5132
5133
5134
5135 drbd_rs_cancel_all(device);
5136
5137 kfree(device->p_uuid);
5138 device->p_uuid = NULL;
5139
5140 if (!drbd_suspended(device))
5141 tl_clear(peer_device->connection);
5142
5143 drbd_md_sync(device);
5144
5145 if (get_ldev(device)) {
5146 drbd_bitmap_io(device, &drbd_bm_write_copy_pages,
5147 "write from disconnected", BM_LOCKED_CHANGE_ALLOWED);
5148 put_ldev(device);
5149 }
5150
5151
5152
5153
5154
5155
5156
5157
5158 i = drbd_free_peer_reqs(device, &device->net_ee);
5159 if (i)
5160 drbd_info(device, "net_ee not empty, killed %u entries\n", i);
5161 i = atomic_read(&device->pp_in_use_by_net);
5162 if (i)
5163 drbd_info(device, "pp_in_use_by_net = %d, expected 0\n", i);
5164 i = atomic_read(&device->pp_in_use);
5165 if (i)
5166 drbd_info(device, "pp_in_use = %d, expected 0\n", i);
5167
5168 D_ASSERT(device, list_empty(&device->read_ee));
5169 D_ASSERT(device, list_empty(&device->active_ee));
5170 D_ASSERT(device, list_empty(&device->sync_ee));
5171 D_ASSERT(device, list_empty(&device->done_ee));
5172
5173 return 0;
5174}
5175
5176
5177
5178
5179
5180
5181
5182
5183
5184
5185static int drbd_send_features(struct drbd_connection *connection)
5186{
5187 struct drbd_socket *sock;
5188 struct p_connection_features *p;
5189
5190 sock = &connection->data;
5191 p = conn_prepare_command(connection, sock);
5192 if (!p)
5193 return -EIO;
5194 memset(p, 0, sizeof(*p));
5195 p->protocol_min = cpu_to_be32(PRO_VERSION_MIN);
5196 p->protocol_max = cpu_to_be32(PRO_VERSION_MAX);
5197 p->feature_flags = cpu_to_be32(PRO_FEATURES);
5198 return conn_send_command(connection, sock, P_CONNECTION_FEATURES, sizeof(*p), NULL, 0);
5199}
5200
5201
5202
5203
5204
5205
5206
5207
5208static int drbd_do_features(struct drbd_connection *connection)
5209{
5210
5211 struct p_connection_features *p;
5212 const int expect = sizeof(struct p_connection_features);
5213 struct packet_info pi;
5214 int err;
5215
5216 err = drbd_send_features(connection);
5217 if (err)
5218 return 0;
5219
5220 err = drbd_recv_header(connection, &pi);
5221 if (err)
5222 return 0;
5223
5224 if (pi.cmd != P_CONNECTION_FEATURES) {
5225 drbd_err(connection, "expected ConnectionFeatures packet, received: %s (0x%04x)\n",
5226 cmdname(pi.cmd), pi.cmd);
5227 return -1;
5228 }
5229
5230 if (pi.size != expect) {
5231 drbd_err(connection, "expected ConnectionFeatures length: %u, received: %u\n",
5232 expect, pi.size);
5233 return -1;
5234 }
5235
5236 p = pi.data;
5237 err = drbd_recv_all_warn(connection, p, expect);
5238 if (err)
5239 return 0;
5240
5241 p->protocol_min = be32_to_cpu(p->protocol_min);
5242 p->protocol_max = be32_to_cpu(p->protocol_max);
5243 if (p->protocol_max == 0)
5244 p->protocol_max = p->protocol_min;
5245
5246 if (PRO_VERSION_MAX < p->protocol_min ||
5247 PRO_VERSION_MIN > p->protocol_max)
5248 goto incompat;
5249
5250 connection->agreed_pro_version = min_t(int, PRO_VERSION_MAX, p->protocol_max);
5251 connection->agreed_features = PRO_FEATURES & be32_to_cpu(p->feature_flags);
5252
5253 drbd_info(connection, "Handshake successful: "
5254 "Agreed network protocol version %d\n", connection->agreed_pro_version);
5255
5256 drbd_info(connection, "Feature flags enabled on protocol level: 0x%x%s%s%s.\n",
5257 connection->agreed_features,
5258 connection->agreed_features & DRBD_FF_TRIM ? " TRIM" : "",
5259 connection->agreed_features & DRBD_FF_THIN_RESYNC ? " THIN_RESYNC" : "",
5260 connection->agreed_features & DRBD_FF_WSAME ? " WRITE_SAME" :
5261 connection->agreed_features ? "" : " none");
5262
5263 return 1;
5264
5265 incompat:
5266 drbd_err(connection, "incompatible DRBD dialects: "
5267 "I support %d-%d, peer supports %d-%d\n",
5268 PRO_VERSION_MIN, PRO_VERSION_MAX,
5269 p->protocol_min, p->protocol_max);
5270 return -1;
5271}
5272
5273#if !defined(CONFIG_CRYPTO_HMAC) && !defined(CONFIG_CRYPTO_HMAC_MODULE)
5274static int drbd_do_auth(struct drbd_connection *connection)
5275{
5276 drbd_err(connection, "This kernel was build without CONFIG_CRYPTO_HMAC.\n");
5277 drbd_err(connection, "You need to disable 'cram-hmac-alg' in drbd.conf.\n");
5278 return -1;
5279}
5280#else
5281#define CHALLENGE_LEN 64
5282
5283
5284
5285
5286
5287
5288
5289static int drbd_do_auth(struct drbd_connection *connection)
5290{
5291 struct drbd_socket *sock;
5292 char my_challenge[CHALLENGE_LEN];
5293 char *response = NULL;
5294 char *right_response = NULL;
5295 char *peers_ch = NULL;
5296 unsigned int key_len;
5297 char secret[SHARED_SECRET_MAX];
5298 unsigned int resp_size;
5299 SHASH_DESC_ON_STACK(desc, connection->cram_hmac_tfm);
5300 struct packet_info pi;
5301 struct net_conf *nc;
5302 int err, rv;
5303
5304
5305
5306 rcu_read_lock();
5307 nc = rcu_dereference(connection->net_conf);
5308 key_len = strlen(nc->shared_secret);
5309 memcpy(secret, nc->shared_secret, key_len);
5310 rcu_read_unlock();
5311
5312 desc->tfm = connection->cram_hmac_tfm;
5313 desc->flags = 0;
5314
5315 rv = crypto_shash_setkey(connection->cram_hmac_tfm, (u8 *)secret, key_len);
5316 if (rv) {
5317 drbd_err(connection, "crypto_shash_setkey() failed with %d\n", rv);
5318 rv = -1;
5319 goto fail;
5320 }
5321
5322 get_random_bytes(my_challenge, CHALLENGE_LEN);
5323
5324 sock = &connection->data;
5325 if (!conn_prepare_command(connection, sock)) {
5326 rv = 0;
5327 goto fail;
5328 }
5329 rv = !conn_send_command(connection, sock, P_AUTH_CHALLENGE, 0,
5330 my_challenge, CHALLENGE_LEN);
5331 if (!rv)
5332 goto fail;
5333
5334 err = drbd_recv_header(connection, &pi);
5335 if (err) {
5336 rv = 0;
5337 goto fail;
5338 }
5339
5340 if (pi.cmd != P_AUTH_CHALLENGE) {
5341 drbd_err(connection, "expected AuthChallenge packet, received: %s (0x%04x)\n",
5342 cmdname(pi.cmd), pi.cmd);
5343 rv = 0;
5344 goto fail;
5345 }
5346
5347 if (pi.size > CHALLENGE_LEN * 2) {
5348 drbd_err(connection, "expected AuthChallenge payload too big.\n");
5349 rv = -1;
5350 goto fail;
5351 }
5352
5353 if (pi.size < CHALLENGE_LEN) {
5354 drbd_err(connection, "AuthChallenge payload too small.\n");
5355 rv = -1;
5356 goto fail;
5357 }
5358
5359 peers_ch = kmalloc(pi.size, GFP_NOIO);
5360 if (peers_ch == NULL) {
5361 drbd_err(connection, "kmalloc of peers_ch failed\n");
5362 rv = -1;
5363 goto fail;
5364 }
5365
5366 err = drbd_recv_all_warn(connection, peers_ch, pi.size);
5367 if (err) {
5368 rv = 0;
5369 goto fail;
5370 }
5371
5372 if (!memcmp(my_challenge, peers_ch, CHALLENGE_LEN)) {
5373 drbd_err(connection, "Peer presented the same challenge!\n");
5374 rv = -1;
5375 goto fail;
5376 }
5377
5378 resp_size = crypto_shash_digestsize(connection->cram_hmac_tfm);
5379 response = kmalloc(resp_size, GFP_NOIO);
5380 if (response == NULL) {
5381 drbd_err(connection, "kmalloc of response failed\n");
5382 rv = -1;
5383 goto fail;
5384 }
5385
5386 rv = crypto_shash_digest(desc, peers_ch, pi.size, response);
5387 if (rv) {
5388 drbd_err(connection, "crypto_hash_digest() failed with %d\n", rv);
5389 rv = -1;
5390 goto fail;
5391 }
5392
5393 if (!conn_prepare_command(connection, sock)) {
5394 rv = 0;
5395 goto fail;
5396 }
5397 rv = !conn_send_command(connection, sock, P_AUTH_RESPONSE, 0,
5398 response, resp_size);
5399 if (!rv)
5400 goto fail;
5401
5402 err = drbd_recv_header(connection, &pi);
5403 if (err) {
5404 rv = 0;
5405 goto fail;
5406 }
5407
5408 if (pi.cmd != P_AUTH_RESPONSE) {
5409 drbd_err(connection, "expected AuthResponse packet, received: %s (0x%04x)\n",
5410 cmdname(pi.cmd), pi.cmd);
5411 rv = 0;
5412 goto fail;
5413 }
5414
5415 if (pi.size != resp_size) {
5416 drbd_err(connection, "expected AuthResponse payload of wrong size\n");
5417 rv = 0;
5418 goto fail;
5419 }
5420
5421 err = drbd_recv_all_warn(connection, response , resp_size);
5422 if (err) {
5423 rv = 0;
5424 goto fail;
5425 }
5426
5427 right_response = kmalloc(resp_size, GFP_NOIO);
5428 if (right_response == NULL) {
5429 drbd_err(connection, "kmalloc of right_response failed\n");
5430 rv = -1;
5431 goto fail;
5432 }
5433
5434 rv = crypto_shash_digest(desc, my_challenge, CHALLENGE_LEN,
5435 right_response);
5436 if (rv) {
5437 drbd_err(connection, "crypto_hash_digest() failed with %d\n", rv);
5438 rv = -1;
5439 goto fail;
5440 }
5441
5442 rv = !memcmp(response, right_response, resp_size);
5443
5444 if (rv)
5445 drbd_info(connection, "Peer authenticated using %d bytes HMAC\n",
5446 resp_size);
5447 else
5448 rv = -1;
5449
5450 fail:
5451 kfree(peers_ch);
5452 kfree(response);
5453 kfree(right_response);
5454 shash_desc_zero(desc);
5455
5456 return rv;
5457}
5458#endif
5459
5460int drbd_receiver(struct drbd_thread *thi)
5461{
5462 struct drbd_connection *connection = thi->connection;
5463 int h;
5464
5465 drbd_info(connection, "receiver (re)started\n");
5466
5467 do {
5468 h = conn_connect(connection);
5469 if (h == 0) {
5470 conn_disconnect(connection);
5471 schedule_timeout_interruptible(HZ);
5472 }
5473 if (h == -1) {
5474 drbd_warn(connection, "Discarding network configuration.\n");
5475 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
5476 }
5477 } while (h == 0);
5478
5479 if (h > 0)
5480 drbdd(connection);
5481
5482 conn_disconnect(connection);
5483
5484 drbd_info(connection, "receiver terminated\n");
5485 return 0;
5486}
5487
5488
5489
5490static int got_conn_RqSReply(struct drbd_connection *connection, struct packet_info *pi)
5491{
5492 struct p_req_state_reply *p = pi->data;
5493 int retcode = be32_to_cpu(p->retcode);
5494
5495 if (retcode >= SS_SUCCESS) {
5496 set_bit(CONN_WD_ST_CHG_OKAY, &connection->flags);
5497 } else {
5498 set_bit(CONN_WD_ST_CHG_FAIL, &connection->flags);
5499 drbd_err(connection, "Requested state change failed by peer: %s (%d)\n",
5500 drbd_set_st_err_str(retcode), retcode);
5501 }
5502 wake_up(&connection->ping_wait);
5503
5504 return 0;
5505}
5506
5507static int got_RqSReply(struct drbd_connection *connection, struct packet_info *pi)
5508{
5509 struct drbd_peer_device *peer_device;
5510 struct drbd_device *device;
5511 struct p_req_state_reply *p = pi->data;
5512 int retcode = be32_to_cpu(p->retcode);
5513
5514 peer_device = conn_peer_device(connection, pi->vnr);
5515 if (!peer_device)
5516 return -EIO;
5517 device = peer_device->device;
5518
5519 if (test_bit(CONN_WD_ST_CHG_REQ, &connection->flags)) {
5520 D_ASSERT(device, connection->agreed_pro_version < 100);
5521 return got_conn_RqSReply(connection, pi);
5522 }
5523
5524 if (retcode >= SS_SUCCESS) {
5525 set_bit(CL_ST_CHG_SUCCESS, &device->flags);
5526 } else {
5527 set_bit(CL_ST_CHG_FAIL, &device->flags);
5528 drbd_err(device, "Requested state change failed by peer: %s (%d)\n",
5529 drbd_set_st_err_str(retcode), retcode);
5530 }
5531 wake_up(&device->state_wait);
5532
5533 return 0;
5534}
5535
5536static int got_Ping(struct drbd_connection *connection, struct packet_info *pi)
5537{
5538 return drbd_send_ping_ack(connection);
5539
5540}
5541
5542static int got_PingAck(struct drbd_connection *connection, struct packet_info *pi)
5543{
5544
5545 connection->meta.socket->sk->sk_rcvtimeo = connection->net_conf->ping_int*HZ;
5546 if (!test_and_set_bit(GOT_PING_ACK, &connection->flags))
5547 wake_up(&connection->ping_wait);
5548
5549 return 0;
5550}
5551
5552static int got_IsInSync(struct drbd_connection *connection, struct packet_info *pi)
5553{
5554 struct drbd_peer_device *peer_device;
5555 struct drbd_device *device;
5556 struct p_block_ack *p = pi->data;
5557 sector_t sector = be64_to_cpu(p->sector);
5558 int blksize = be32_to_cpu(p->blksize);
5559
5560 peer_device = conn_peer_device(connection, pi->vnr);
5561 if (!peer_device)
5562 return -EIO;
5563 device = peer_device->device;
5564
5565 D_ASSERT(device, peer_device->connection->agreed_pro_version >= 89);
5566
5567 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
5568
5569 if (get_ldev(device)) {
5570 drbd_rs_complete_io(device, sector);
5571 drbd_set_in_sync(device, sector, blksize);
5572
5573 device->rs_same_csum += (blksize >> BM_BLOCK_SHIFT);
5574 put_ldev(device);
5575 }
5576 dec_rs_pending(device);
5577 atomic_add(blksize >> 9, &device->rs_sect_in);
5578
5579 return 0;
5580}
5581
5582static int
5583validate_req_change_req_state(struct drbd_device *device, u64 id, sector_t sector,
5584 struct rb_root *root, const char *func,
5585 enum drbd_req_event what, bool missing_ok)
5586{
5587 struct drbd_request *req;
5588 struct bio_and_error m;
5589
5590 spin_lock_irq(&device->resource->req_lock);
5591 req = find_request(device, root, id, sector, missing_ok, func);
5592 if (unlikely(!req)) {
5593 spin_unlock_irq(&device->resource->req_lock);
5594 return -EIO;
5595 }
5596 __req_mod(req, what, &m);
5597 spin_unlock_irq(&device->resource->req_lock);
5598
5599 if (m.bio)
5600 complete_master_bio(device, &m);
5601 return 0;
5602}
5603
5604static int got_BlockAck(struct drbd_connection *connection, struct packet_info *pi)
5605{
5606 struct drbd_peer_device *peer_device;
5607 struct drbd_device *device;
5608 struct p_block_ack *p = pi->data;
5609 sector_t sector = be64_to_cpu(p->sector);
5610 int blksize = be32_to_cpu(p->blksize);
5611 enum drbd_req_event what;
5612
5613 peer_device = conn_peer_device(connection, pi->vnr);
5614 if (!peer_device)
5615 return -EIO;
5616 device = peer_device->device;
5617
5618 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
5619
5620 if (p->block_id == ID_SYNCER) {
5621 drbd_set_in_sync(device, sector, blksize);
5622 dec_rs_pending(device);
5623 return 0;
5624 }
5625 switch (pi->cmd) {
5626 case P_RS_WRITE_ACK:
5627 what = WRITE_ACKED_BY_PEER_AND_SIS;
5628 break;
5629 case P_WRITE_ACK:
5630 what = WRITE_ACKED_BY_PEER;
5631 break;
5632 case P_RECV_ACK:
5633 what = RECV_ACKED_BY_PEER;
5634 break;
5635 case P_SUPERSEDED:
5636 what = CONFLICT_RESOLVED;
5637 break;
5638 case P_RETRY_WRITE:
5639 what = POSTPONE_WRITE;
5640 break;
5641 default:
5642 BUG();
5643 }
5644
5645 return validate_req_change_req_state(device, p->block_id, sector,
5646 &device->write_requests, __func__,
5647 what, false);
5648}
5649
5650static int got_NegAck(struct drbd_connection *connection, struct packet_info *pi)
5651{
5652 struct drbd_peer_device *peer_device;
5653 struct drbd_device *device;
5654 struct p_block_ack *p = pi->data;
5655 sector_t sector = be64_to_cpu(p->sector);
5656 int size = be32_to_cpu(p->blksize);
5657 int err;
5658
5659 peer_device = conn_peer_device(connection, pi->vnr);
5660 if (!peer_device)
5661 return -EIO;
5662 device = peer_device->device;
5663
5664 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
5665
5666 if (p->block_id == ID_SYNCER) {
5667 dec_rs_pending(device);
5668 drbd_rs_failed_io(device, sector, size);
5669 return 0;
5670 }
5671
5672 err = validate_req_change_req_state(device, p->block_id, sector,
5673 &device->write_requests, __func__,
5674 NEG_ACKED, true);
5675 if (err) {
5676
5677
5678
5679
5680
5681 drbd_set_out_of_sync(device, sector, size);
5682 }
5683 return 0;
5684}
5685
5686static int got_NegDReply(struct drbd_connection *connection, struct packet_info *pi)
5687{
5688 struct drbd_peer_device *peer_device;
5689 struct drbd_device *device;
5690 struct p_block_ack *p = pi->data;
5691 sector_t sector = be64_to_cpu(p->sector);
5692
5693 peer_device = conn_peer_device(connection, pi->vnr);
5694 if (!peer_device)
5695 return -EIO;
5696 device = peer_device->device;
5697
5698 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
5699
5700 drbd_err(device, "Got NegDReply; Sector %llus, len %u.\n",
5701 (unsigned long long)sector, be32_to_cpu(p->blksize));
5702
5703 return validate_req_change_req_state(device, p->block_id, sector,
5704 &device->read_requests, __func__,
5705 NEG_ACKED, false);
5706}
5707
5708static int got_NegRSDReply(struct drbd_connection *connection, struct packet_info *pi)
5709{
5710 struct drbd_peer_device *peer_device;
5711 struct drbd_device *device;
5712 sector_t sector;
5713 int size;
5714 struct p_block_ack *p = pi->data;
5715
5716 peer_device = conn_peer_device(connection, pi->vnr);
5717 if (!peer_device)
5718 return -EIO;
5719 device = peer_device->device;
5720
5721 sector = be64_to_cpu(p->sector);
5722 size = be32_to_cpu(p->blksize);
5723
5724 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
5725
5726 dec_rs_pending(device);
5727
5728 if (get_ldev_if_state(device, D_FAILED)) {
5729 drbd_rs_complete_io(device, sector);
5730 switch (pi->cmd) {
5731 case P_NEG_RS_DREPLY:
5732 drbd_rs_failed_io(device, sector, size);
5733 case P_RS_CANCEL:
5734 break;
5735 default:
5736 BUG();
5737 }
5738 put_ldev(device);
5739 }
5740
5741 return 0;
5742}
5743
5744static int got_BarrierAck(struct drbd_connection *connection, struct packet_info *pi)
5745{
5746 struct p_barrier_ack *p = pi->data;
5747 struct drbd_peer_device *peer_device;
5748 int vnr;
5749
5750 tl_release(connection, p->barrier, be32_to_cpu(p->set_size));
5751
5752 rcu_read_lock();
5753 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
5754 struct drbd_device *device = peer_device->device;
5755
5756 if (device->state.conn == C_AHEAD &&
5757 atomic_read(&device->ap_in_flight) == 0 &&
5758 !test_and_set_bit(AHEAD_TO_SYNC_SOURCE, &device->flags)) {
5759 device->start_resync_timer.expires = jiffies + HZ;
5760 add_timer(&device->start_resync_timer);
5761 }
5762 }
5763 rcu_read_unlock();
5764
5765 return 0;
5766}
5767
5768static int got_OVResult(struct drbd_connection *connection, struct packet_info *pi)
5769{
5770 struct drbd_peer_device *peer_device;
5771 struct drbd_device *device;
5772 struct p_block_ack *p = pi->data;
5773 struct drbd_device_work *dw;
5774 sector_t sector;
5775 int size;
5776
5777 peer_device = conn_peer_device(connection, pi->vnr);
5778 if (!peer_device)
5779 return -EIO;
5780 device = peer_device->device;
5781
5782 sector = be64_to_cpu(p->sector);
5783 size = be32_to_cpu(p->blksize);
5784
5785 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
5786
5787 if (be64_to_cpu(p->block_id) == ID_OUT_OF_SYNC)
5788 drbd_ov_out_of_sync_found(device, sector, size);
5789 else
5790 ov_out_of_sync_print(device);
5791
5792 if (!get_ldev(device))
5793 return 0;
5794
5795 drbd_rs_complete_io(device, sector);
5796 dec_rs_pending(device);
5797
5798 --device->ov_left;
5799
5800
5801 if ((device->ov_left & 0x200) == 0x200)
5802 drbd_advance_rs_marks(device, device->ov_left);
5803
5804 if (device->ov_left == 0) {
5805 dw = kmalloc(sizeof(*dw), GFP_NOIO);
5806 if (dw) {
5807 dw->w.cb = w_ov_finished;
5808 dw->device = device;
5809 drbd_queue_work(&peer_device->connection->sender_work, &dw->w);
5810 } else {
5811 drbd_err(device, "kmalloc(dw) failed.");
5812 ov_out_of_sync_print(device);
5813 drbd_resync_finished(device);
5814 }
5815 }
5816 put_ldev(device);
5817 return 0;
5818}
5819
5820static int got_skip(struct drbd_connection *connection, struct packet_info *pi)
5821{
5822 return 0;
5823}
5824
5825struct meta_sock_cmd {
5826 size_t pkt_size;
5827 int (*fn)(struct drbd_connection *connection, struct packet_info *);
5828};
5829
5830static void set_rcvtimeo(struct drbd_connection *connection, bool ping_timeout)
5831{
5832 long t;
5833 struct net_conf *nc;
5834
5835 rcu_read_lock();
5836 nc = rcu_dereference(connection->net_conf);
5837 t = ping_timeout ? nc->ping_timeo : nc->ping_int;
5838 rcu_read_unlock();
5839
5840 t *= HZ;
5841 if (ping_timeout)
5842 t /= 10;
5843
5844 connection->meta.socket->sk->sk_rcvtimeo = t;
5845}
5846
5847static void set_ping_timeout(struct drbd_connection *connection)
5848{
5849 set_rcvtimeo(connection, 1);
5850}
5851
5852static void set_idle_timeout(struct drbd_connection *connection)
5853{
5854 set_rcvtimeo(connection, 0);
5855}
5856
5857static struct meta_sock_cmd ack_receiver_tbl[] = {
5858 [P_PING] = { 0, got_Ping },
5859 [P_PING_ACK] = { 0, got_PingAck },
5860 [P_RECV_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
5861 [P_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
5862 [P_RS_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
5863 [P_SUPERSEDED] = { sizeof(struct p_block_ack), got_BlockAck },
5864 [P_NEG_ACK] = { sizeof(struct p_block_ack), got_NegAck },
5865 [P_NEG_DREPLY] = { sizeof(struct p_block_ack), got_NegDReply },
5866 [P_NEG_RS_DREPLY] = { sizeof(struct p_block_ack), got_NegRSDReply },
5867 [P_OV_RESULT] = { sizeof(struct p_block_ack), got_OVResult },
5868 [P_BARRIER_ACK] = { sizeof(struct p_barrier_ack), got_BarrierAck },
5869 [P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), got_RqSReply },
5870 [P_RS_IS_IN_SYNC] = { sizeof(struct p_block_ack), got_IsInSync },
5871 [P_DELAY_PROBE] = { sizeof(struct p_delay_probe93), got_skip },
5872 [P_RS_CANCEL] = { sizeof(struct p_block_ack), got_NegRSDReply },
5873 [P_CONN_ST_CHG_REPLY]={ sizeof(struct p_req_state_reply), got_conn_RqSReply },
5874 [P_RETRY_WRITE] = { sizeof(struct p_block_ack), got_BlockAck },
5875};
5876
5877int drbd_ack_receiver(struct drbd_thread *thi)
5878{
5879 struct drbd_connection *connection = thi->connection;
5880 struct meta_sock_cmd *cmd = NULL;
5881 struct packet_info pi;
5882 unsigned long pre_recv_jif;
5883 int rv;
5884 void *buf = connection->meta.rbuf;
5885 int received = 0;
5886 unsigned int header_size = drbd_header_size(connection);
5887 int expect = header_size;
5888 bool ping_timeout_active = false;
5889 struct sched_param param = { .sched_priority = 2 };
5890
5891 rv = sched_setscheduler(current, SCHED_RR, ¶m);
5892 if (rv < 0)
5893 drbd_err(connection, "drbd_ack_receiver: ERROR set priority, ret=%d\n", rv);
5894
5895 while (get_t_state(thi) == RUNNING) {
5896 drbd_thread_current_set_cpu(thi);
5897
5898 conn_reclaim_net_peer_reqs(connection);
5899
5900 if (test_and_clear_bit(SEND_PING, &connection->flags)) {
5901 if (drbd_send_ping(connection)) {
5902 drbd_err(connection, "drbd_send_ping has failed\n");
5903 goto reconnect;
5904 }
5905 set_ping_timeout(connection);
5906 ping_timeout_active = true;
5907 }
5908
5909 pre_recv_jif = jiffies;
5910 rv = drbd_recv_short(connection->meta.socket, buf, expect-received, 0);
5911
5912
5913
5914
5915
5916
5917
5918
5919
5920
5921
5922 if (likely(rv > 0)) {
5923 received += rv;
5924 buf += rv;
5925 } else if (rv == 0) {
5926 if (test_bit(DISCONNECT_SENT, &connection->flags)) {
5927 long t;
5928 rcu_read_lock();
5929 t = rcu_dereference(connection->net_conf)->ping_timeo * HZ/10;
5930 rcu_read_unlock();
5931
5932 t = wait_event_timeout(connection->ping_wait,
5933 connection->cstate < C_WF_REPORT_PARAMS,
5934 t);
5935 if (t)
5936 break;
5937 }
5938 drbd_err(connection, "meta connection shut down by peer.\n");
5939 goto reconnect;
5940 } else if (rv == -EAGAIN) {
5941
5942
5943 if (time_after(connection->last_received, pre_recv_jif))
5944 continue;
5945 if (ping_timeout_active) {
5946 drbd_err(connection, "PingAck did not arrive in time.\n");
5947 goto reconnect;
5948 }
5949 set_bit(SEND_PING, &connection->flags);
5950 continue;
5951 } else if (rv == -EINTR) {
5952
5953
5954
5955 flush_signals(current);
5956 continue;
5957 } else {
5958 drbd_err(connection, "sock_recvmsg returned %d\n", rv);
5959 goto reconnect;
5960 }
5961
5962 if (received == expect && cmd == NULL) {
5963 if (decode_header(connection, connection->meta.rbuf, &pi))
5964 goto reconnect;
5965 cmd = &ack_receiver_tbl[pi.cmd];
5966 if (pi.cmd >= ARRAY_SIZE(ack_receiver_tbl) || !cmd->fn) {
5967 drbd_err(connection, "Unexpected meta packet %s (0x%04x)\n",
5968 cmdname(pi.cmd), pi.cmd);
5969 goto disconnect;
5970 }
5971 expect = header_size + cmd->pkt_size;
5972 if (pi.size != expect - header_size) {
5973 drbd_err(connection, "Wrong packet size on meta (c: %d, l: %d)\n",
5974 pi.cmd, pi.size);
5975 goto reconnect;
5976 }
5977 }
5978 if (received == expect) {
5979 bool err;
5980
5981 err = cmd->fn(connection, &pi);
5982 if (err) {
5983 drbd_err(connection, "%pf failed\n", cmd->fn);
5984 goto reconnect;
5985 }
5986
5987 connection->last_received = jiffies;
5988
5989 if (cmd == &ack_receiver_tbl[P_PING_ACK]) {
5990 set_idle_timeout(connection);
5991 ping_timeout_active = false;
5992 }
5993
5994 buf = connection->meta.rbuf;
5995 received = 0;
5996 expect = header_size;
5997 cmd = NULL;
5998 }
5999 }
6000
6001 if (0) {
6002reconnect:
6003 conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
6004 conn_md_sync(connection);
6005 }
6006 if (0) {
6007disconnect:
6008 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
6009 }
6010
6011 drbd_info(connection, "ack_receiver terminated\n");
6012
6013 return 0;
6014}
6015
6016void drbd_send_acks_wf(struct work_struct *ws)
6017{
6018 struct drbd_peer_device *peer_device =
6019 container_of(ws, struct drbd_peer_device, send_acks_work);
6020 struct drbd_connection *connection = peer_device->connection;
6021 struct drbd_device *device = peer_device->device;
6022 struct net_conf *nc;
6023 int tcp_cork, err;
6024
6025 rcu_read_lock();
6026 nc = rcu_dereference(connection->net_conf);
6027 tcp_cork = nc->tcp_cork;
6028 rcu_read_unlock();
6029
6030 if (tcp_cork)
6031 drbd_tcp_cork(connection->meta.socket);
6032
6033 err = drbd_finish_peer_reqs(device);
6034 kref_put(&device->kref, drbd_destroy_device);
6035
6036
6037
6038 if (err) {
6039 conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
6040 return;
6041 }
6042
6043 if (tcp_cork)
6044 drbd_tcp_uncork(connection->meta.socket);
6045
6046 return;
6047}
6048