1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25#include <linux/slab.h>
26#include <scsi/scsi_device.h>
27#include <asm/div64.h>
28
29#include "exofs.h"
30
31#define EXOFS_DBGMSG2(M...) do {} while (0)
32
33
34void exofs_make_credential(u8 cred_a[OSD_CAP_LEN], const struct osd_obj_id *obj)
35{
36 osd_sec_init_nosec_doall_caps(cred_a, obj, false, true);
37}
38
39int exofs_read_kern(struct osd_dev *od, u8 *cred, struct osd_obj_id *obj,
40 u64 offset, void *p, unsigned length)
41{
42 struct osd_request *or = osd_start_request(od, GFP_KERNEL);
43
44 int ret;
45
46 if (unlikely(!or)) {
47 EXOFS_DBGMSG("%s: osd_start_request failed.\n", __func__);
48 return -ENOMEM;
49 }
50 ret = osd_req_read_kern(or, obj, offset, p, length);
51 if (unlikely(ret)) {
52 EXOFS_DBGMSG("%s: osd_req_read_kern failed.\n", __func__);
53 goto out;
54 }
55
56 ret = osd_finalize_request(or, 0, cred, NULL);
57 if (unlikely(ret)) {
58 EXOFS_DBGMSG("Failed to osd_finalize_request() => %d\n", ret);
59 goto out;
60 }
61
62 ret = osd_execute_request(or);
63 if (unlikely(ret))
64 EXOFS_DBGMSG("osd_execute_request() => %d\n", ret);
65
66
67out:
68 osd_end_request(or);
69 return ret;
70}
71
72int exofs_get_io_state(struct exofs_layout *layout,
73 struct exofs_io_state **pios)
74{
75 struct exofs_io_state *ios;
76
77
78
79
80 ios = kzalloc(exofs_io_state_size(layout->s_numdevs), GFP_KERNEL);
81 if (unlikely(!ios)) {
82 EXOFS_DBGMSG("Failed kzalloc bytes=%d\n",
83 exofs_io_state_size(layout->s_numdevs));
84 *pios = NULL;
85 return -ENOMEM;
86 }
87
88 ios->layout = layout;
89 ios->obj.partition = layout->s_pid;
90 *pios = ios;
91 return 0;
92}
93
94void exofs_put_io_state(struct exofs_io_state *ios)
95{
96 if (ios) {
97 unsigned i;
98
99 for (i = 0; i < ios->numdevs; i++) {
100 struct exofs_per_dev_state *per_dev = &ios->per_dev[i];
101
102 if (per_dev->or)
103 osd_end_request(per_dev->or);
104 if (per_dev->bio)
105 bio_put(per_dev->bio);
106 }
107
108 kfree(ios);
109 }
110}
111
112unsigned exofs_layout_od_id(struct exofs_layout *layout,
113 osd_id obj_no, unsigned layout_index)
114{
115
116
117
118 unsigned dev_mod = obj_no;
119
120 return (layout_index + dev_mod * layout->mirrors_p1) %
121 layout->s_numdevs;
122
123
124
125
126}
127
128static inline struct osd_dev *exofs_ios_od(struct exofs_io_state *ios,
129 unsigned layout_index)
130{
131 return ios->layout->s_ods[
132 exofs_layout_od_id(ios->layout, ios->obj.id, layout_index)];
133}
134
135static void _sync_done(struct exofs_io_state *ios, void *p)
136{
137 struct completion *waiting = p;
138
139 complete(waiting);
140}
141
142static void _last_io(struct kref *kref)
143{
144 struct exofs_io_state *ios = container_of(
145 kref, struct exofs_io_state, kref);
146
147 ios->done(ios, ios->private);
148}
149
150static void _done_io(struct osd_request *or, void *p)
151{
152 struct exofs_io_state *ios = p;
153
154 kref_put(&ios->kref, _last_io);
155}
156
157static int exofs_io_execute(struct exofs_io_state *ios)
158{
159 DECLARE_COMPLETION_ONSTACK(wait);
160 bool sync = (ios->done == NULL);
161 int i, ret;
162
163 if (sync) {
164 ios->done = _sync_done;
165 ios->private = &wait;
166 }
167
168 for (i = 0; i < ios->numdevs; i++) {
169 struct osd_request *or = ios->per_dev[i].or;
170 if (unlikely(!or))
171 continue;
172
173 ret = osd_finalize_request(or, 0, ios->cred, NULL);
174 if (unlikely(ret)) {
175 EXOFS_DBGMSG("Failed to osd_finalize_request() => %d\n",
176 ret);
177 return ret;
178 }
179 }
180
181 kref_init(&ios->kref);
182
183 for (i = 0; i < ios->numdevs; i++) {
184 struct osd_request *or = ios->per_dev[i].or;
185 if (unlikely(!or))
186 continue;
187
188 kref_get(&ios->kref);
189 osd_execute_request_async(or, _done_io, ios);
190 }
191
192 kref_put(&ios->kref, _last_io);
193 ret = 0;
194
195 if (sync) {
196 wait_for_completion(&wait);
197 ret = exofs_check_io(ios, NULL);
198 }
199 return ret;
200}
201
202static void _clear_bio(struct bio *bio)
203{
204 struct bio_vec *bv;
205 unsigned i;
206
207 __bio_for_each_segment(bv, bio, i, 0) {
208 unsigned this_count = bv->bv_len;
209
210 if (likely(PAGE_SIZE == this_count))
211 clear_highpage(bv->bv_page);
212 else
213 zero_user(bv->bv_page, bv->bv_offset, this_count);
214 }
215}
216
217int exofs_check_io(struct exofs_io_state *ios, u64 *resid)
218{
219 enum osd_err_priority acumulated_osd_err = 0;
220 int acumulated_lin_err = 0;
221 int i;
222
223 for (i = 0; i < ios->numdevs; i++) {
224 struct osd_sense_info osi;
225 struct osd_request *or = ios->per_dev[i].or;
226 int ret;
227
228 if (unlikely(!or))
229 continue;
230
231 ret = osd_req_decode_sense(or, &osi);
232 if (likely(!ret))
233 continue;
234
235 if (OSD_ERR_PRI_CLEAR_PAGES == osi.osd_err_pri) {
236
237 _clear_bio(ios->per_dev[i].bio);
238 EXOFS_DBGMSG("start read offset passed end of file "
239 "offset=0x%llx, length=0x%llx\n",
240 _LLU(ios->per_dev[i].offset),
241 _LLU(ios->per_dev[i].length));
242
243 continue;
244 }
245
246 if (osi.osd_err_pri >= acumulated_osd_err) {
247 acumulated_osd_err = osi.osd_err_pri;
248 acumulated_lin_err = ret;
249 }
250 }
251
252
253 if (resid) {
254 if (likely(!acumulated_lin_err))
255 *resid = 0;
256 else
257 *resid = ios->length;
258 }
259
260 return acumulated_lin_err;
261}
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305struct _striping_info {
306 u64 obj_offset;
307 u64 group_length;
308 unsigned dev;
309 unsigned unit_off;
310};
311
312static void _calc_stripe_info(struct exofs_io_state *ios, u64 file_offset,
313 struct _striping_info *si)
314{
315 u32 stripe_unit = ios->layout->stripe_unit;
316 u32 group_width = ios->layout->group_width;
317 u64 group_depth = ios->layout->group_depth;
318
319 u32 U = stripe_unit * group_width;
320 u64 T = U * group_depth;
321 u64 S = T * ios->layout->group_count;
322 u64 M = div64_u64(file_offset, S);
323
324
325
326
327
328 u64 LmodS = file_offset - M * S;
329 u32 G = div64_u64(LmodS, T);
330 u64 H = LmodS - G * T;
331
332 u32 N = div_u64(H, U);
333
334
335 si->dev = (u32)(H - (N * U)) / stripe_unit + G * group_width;
336 si->dev *= ios->layout->mirrors_p1;
337
338 div_u64_rem(file_offset, stripe_unit, &si->unit_off);
339
340 si->obj_offset = si->unit_off + (N * stripe_unit) +
341 (M * group_depth * stripe_unit);
342
343 si->group_length = T - H;
344}
345
346static int _add_stripe_unit(struct exofs_io_state *ios, unsigned *cur_pg,
347 unsigned pgbase, struct exofs_per_dev_state *per_dev,
348 int cur_len)
349{
350 unsigned pg = *cur_pg;
351 struct request_queue *q =
352 osd_request_queue(exofs_ios_od(ios, per_dev->dev));
353
354 per_dev->length += cur_len;
355
356 if (per_dev->bio == NULL) {
357 unsigned pages_in_stripe = ios->layout->group_width *
358 (ios->layout->stripe_unit / PAGE_SIZE);
359 unsigned bio_size = (ios->nr_pages + pages_in_stripe) /
360 ios->layout->group_width;
361
362 per_dev->bio = bio_kmalloc(GFP_KERNEL, bio_size);
363 if (unlikely(!per_dev->bio)) {
364 EXOFS_DBGMSG("Failed to allocate BIO size=%u\n",
365 bio_size);
366 return -ENOMEM;
367 }
368 }
369
370 while (cur_len > 0) {
371 unsigned pglen = min_t(unsigned, PAGE_SIZE - pgbase, cur_len);
372 unsigned added_len;
373
374 BUG_ON(ios->nr_pages <= pg);
375 cur_len -= pglen;
376
377 added_len = bio_add_pc_page(q, per_dev->bio, ios->pages[pg],
378 pglen, pgbase);
379 if (unlikely(pglen != added_len))
380 return -ENOMEM;
381 pgbase = 0;
382 ++pg;
383 }
384 BUG_ON(cur_len);
385
386 *cur_pg = pg;
387 return 0;
388}
389
390static int _prepare_one_group(struct exofs_io_state *ios, u64 length,
391 struct _striping_info *si)
392{
393 unsigned stripe_unit = ios->layout->stripe_unit;
394 unsigned mirrors_p1 = ios->layout->mirrors_p1;
395 unsigned devs_in_group = ios->layout->group_width * mirrors_p1;
396 unsigned dev = si->dev;
397 unsigned first_dev = dev - (dev % devs_in_group);
398 unsigned max_comp = ios->numdevs ? ios->numdevs - mirrors_p1 : 0;
399 unsigned cur_pg = ios->pages_consumed;
400 int ret = 0;
401
402 while (length) {
403 struct exofs_per_dev_state *per_dev = &ios->per_dev[dev];
404 unsigned cur_len, page_off = 0;
405
406 if (!per_dev->length) {
407 per_dev->dev = dev;
408 if (dev < si->dev) {
409 per_dev->offset = si->obj_offset + stripe_unit -
410 si->unit_off;
411 cur_len = stripe_unit;
412 } else if (dev == si->dev) {
413 per_dev->offset = si->obj_offset;
414 cur_len = stripe_unit - si->unit_off;
415 page_off = si->unit_off & ~PAGE_MASK;
416 BUG_ON(page_off && (page_off != ios->pgbase));
417 } else {
418 per_dev->offset = si->obj_offset - si->unit_off;
419 cur_len = stripe_unit;
420 }
421
422 if (max_comp < dev)
423 max_comp = dev;
424 } else {
425 cur_len = stripe_unit;
426 }
427 if (cur_len >= length)
428 cur_len = length;
429
430 ret = _add_stripe_unit(ios, &cur_pg, page_off , per_dev,
431 cur_len);
432 if (unlikely(ret))
433 goto out;
434
435 dev += mirrors_p1;
436 dev = (dev % devs_in_group) + first_dev;
437
438 length -= cur_len;
439 }
440out:
441 ios->numdevs = max_comp + mirrors_p1;
442 ios->pages_consumed = cur_pg;
443 return ret;
444}
445
446static int _prepare_for_striping(struct exofs_io_state *ios)
447{
448 u64 length = ios->length;
449 u64 offset = ios->offset;
450 struct _striping_info si;
451 int ret = 0;
452
453 if (!ios->pages) {
454 if (ios->kern_buff) {
455 struct exofs_per_dev_state *per_dev = &ios->per_dev[0];
456
457 _calc_stripe_info(ios, ios->offset, &si);
458 per_dev->offset = si.obj_offset;
459 per_dev->dev = si.dev;
460
461
462 BUG_ON((ios->layout->group_width > 1) &&
463 (si.unit_off + ios->length >
464 ios->layout->stripe_unit));
465 }
466 ios->numdevs = ios->layout->mirrors_p1;
467 return 0;
468 }
469
470 while (length) {
471 _calc_stripe_info(ios, offset, &si);
472
473 if (length < si.group_length)
474 si.group_length = length;
475
476 ret = _prepare_one_group(ios, si.group_length, &si);
477 if (unlikely(ret))
478 goto out;
479
480 offset += si.group_length;
481 length -= si.group_length;
482 }
483
484out:
485 return ret;
486}
487
488int exofs_sbi_create(struct exofs_io_state *ios)
489{
490 int i, ret;
491
492 for (i = 0; i < ios->layout->s_numdevs; i++) {
493 struct osd_request *or;
494
495 or = osd_start_request(exofs_ios_od(ios, i), GFP_KERNEL);
496 if (unlikely(!or)) {
497 EXOFS_ERR("%s: osd_start_request failed\n", __func__);
498 ret = -ENOMEM;
499 goto out;
500 }
501 ios->per_dev[i].or = or;
502 ios->numdevs++;
503
504 osd_req_create_object(or, &ios->obj);
505 }
506 ret = exofs_io_execute(ios);
507
508out:
509 return ret;
510}
511
512int exofs_sbi_remove(struct exofs_io_state *ios)
513{
514 int i, ret;
515
516 for (i = 0; i < ios->layout->s_numdevs; i++) {
517 struct osd_request *or;
518
519 or = osd_start_request(exofs_ios_od(ios, i), GFP_KERNEL);
520 if (unlikely(!or)) {
521 EXOFS_ERR("%s: osd_start_request failed\n", __func__);
522 ret = -ENOMEM;
523 goto out;
524 }
525 ios->per_dev[i].or = or;
526 ios->numdevs++;
527
528 osd_req_remove_object(or, &ios->obj);
529 }
530 ret = exofs_io_execute(ios);
531
532out:
533 return ret;
534}
535
536static int _sbi_write_mirror(struct exofs_io_state *ios, int cur_comp)
537{
538 struct exofs_per_dev_state *master_dev = &ios->per_dev[cur_comp];
539 unsigned dev = ios->per_dev[cur_comp].dev;
540 unsigned last_comp = cur_comp + ios->layout->mirrors_p1;
541 int ret = 0;
542
543 if (ios->pages && !master_dev->length)
544 return 0;
545
546 for (; cur_comp < last_comp; ++cur_comp, ++dev) {
547 struct exofs_per_dev_state *per_dev = &ios->per_dev[cur_comp];
548 struct osd_request *or;
549
550 or = osd_start_request(exofs_ios_od(ios, dev), GFP_KERNEL);
551 if (unlikely(!or)) {
552 EXOFS_ERR("%s: osd_start_request failed\n", __func__);
553 ret = -ENOMEM;
554 goto out;
555 }
556 per_dev->or = or;
557 per_dev->offset = master_dev->offset;
558
559 if (ios->pages) {
560 struct bio *bio;
561
562 if (per_dev != master_dev) {
563 bio = bio_kmalloc(GFP_KERNEL,
564 master_dev->bio->bi_max_vecs);
565 if (unlikely(!bio)) {
566 EXOFS_DBGMSG(
567 "Failed to allocate BIO size=%u\n",
568 master_dev->bio->bi_max_vecs);
569 ret = -ENOMEM;
570 goto out;
571 }
572
573 __bio_clone(bio, master_dev->bio);
574 bio->bi_bdev = NULL;
575 bio->bi_next = NULL;
576 per_dev->length = master_dev->length;
577 per_dev->bio = bio;
578 per_dev->dev = dev;
579 } else {
580 bio = master_dev->bio;
581
582 bio->bi_rw |= REQ_WRITE;
583 }
584
585 osd_req_write(or, &ios->obj, per_dev->offset, bio,
586 per_dev->length);
587 EXOFS_DBGMSG("write(0x%llx) offset=0x%llx "
588 "length=0x%llx dev=%d\n",
589 _LLU(ios->obj.id), _LLU(per_dev->offset),
590 _LLU(per_dev->length), dev);
591 } else if (ios->kern_buff) {
592 ret = osd_req_write_kern(or, &ios->obj, per_dev->offset,
593 ios->kern_buff, ios->length);
594 if (unlikely(ret))
595 goto out;
596 EXOFS_DBGMSG2("write_kern(0x%llx) offset=0x%llx "
597 "length=0x%llx dev=%d\n",
598 _LLU(ios->obj.id), _LLU(per_dev->offset),
599 _LLU(ios->length), dev);
600 } else {
601 osd_req_set_attributes(or, &ios->obj);
602 EXOFS_DBGMSG2("obj(0x%llx) set_attributes=%d dev=%d\n",
603 _LLU(ios->obj.id), ios->out_attr_len, dev);
604 }
605
606 if (ios->out_attr)
607 osd_req_add_set_attr_list(or, ios->out_attr,
608 ios->out_attr_len);
609
610 if (ios->in_attr)
611 osd_req_add_get_attr_list(or, ios->in_attr,
612 ios->in_attr_len);
613 }
614
615out:
616 return ret;
617}
618
619int exofs_sbi_write(struct exofs_io_state *ios)
620{
621 int i;
622 int ret;
623
624 ret = _prepare_for_striping(ios);
625 if (unlikely(ret))
626 return ret;
627
628 for (i = 0; i < ios->numdevs; i += ios->layout->mirrors_p1) {
629 ret = _sbi_write_mirror(ios, i);
630 if (unlikely(ret))
631 return ret;
632 }
633
634 ret = exofs_io_execute(ios);
635 return ret;
636}
637
638static int _sbi_read_mirror(struct exofs_io_state *ios, unsigned cur_comp)
639{
640 struct osd_request *or;
641 struct exofs_per_dev_state *per_dev = &ios->per_dev[cur_comp];
642 unsigned first_dev = (unsigned)ios->obj.id;
643
644 if (ios->pages && !per_dev->length)
645 return 0;
646
647 first_dev = per_dev->dev + first_dev % ios->layout->mirrors_p1;
648 or = osd_start_request(exofs_ios_od(ios, first_dev), GFP_KERNEL);
649 if (unlikely(!or)) {
650 EXOFS_ERR("%s: osd_start_request failed\n", __func__);
651 return -ENOMEM;
652 }
653 per_dev->or = or;
654
655 if (ios->pages) {
656 osd_req_read(or, &ios->obj, per_dev->offset,
657 per_dev->bio, per_dev->length);
658 EXOFS_DBGMSG("read(0x%llx) offset=0x%llx length=0x%llx"
659 " dev=%d\n", _LLU(ios->obj.id),
660 _LLU(per_dev->offset), _LLU(per_dev->length),
661 first_dev);
662 } else if (ios->kern_buff) {
663 int ret = osd_req_read_kern(or, &ios->obj, per_dev->offset,
664 ios->kern_buff, ios->length);
665 EXOFS_DBGMSG2("read_kern(0x%llx) offset=0x%llx "
666 "length=0x%llx dev=%d ret=>%d\n",
667 _LLU(ios->obj.id), _LLU(per_dev->offset),
668 _LLU(ios->length), first_dev, ret);
669 if (unlikely(ret))
670 return ret;
671 } else {
672 osd_req_get_attributes(or, &ios->obj);
673 EXOFS_DBGMSG2("obj(0x%llx) get_attributes=%d dev=%d\n",
674 _LLU(ios->obj.id), ios->in_attr_len, first_dev);
675 }
676 if (ios->out_attr)
677 osd_req_add_set_attr_list(or, ios->out_attr, ios->out_attr_len);
678
679 if (ios->in_attr)
680 osd_req_add_get_attr_list(or, ios->in_attr, ios->in_attr_len);
681
682 return 0;
683}
684
685int exofs_sbi_read(struct exofs_io_state *ios)
686{
687 int i;
688 int ret;
689
690 ret = _prepare_for_striping(ios);
691 if (unlikely(ret))
692 return ret;
693
694 for (i = 0; i < ios->numdevs; i += ios->layout->mirrors_p1) {
695 ret = _sbi_read_mirror(ios, i);
696 if (unlikely(ret))
697 return ret;
698 }
699
700 ret = exofs_io_execute(ios);
701 return ret;
702}
703
704int extract_attr_from_ios(struct exofs_io_state *ios, struct osd_attr *attr)
705{
706 struct osd_attr cur_attr = {.attr_page = 0};
707 void *iter = NULL;
708 int nelem;
709
710 do {
711 nelem = 1;
712 osd_req_decode_get_attr_list(ios->per_dev[0].or,
713 &cur_attr, &nelem, &iter);
714 if ((cur_attr.attr_page == attr->attr_page) &&
715 (cur_attr.attr_id == attr->attr_id)) {
716 attr->len = cur_attr.len;
717 attr->val_ptr = cur_attr.val_ptr;
718 return 0;
719 }
720 } while (iter);
721
722 return -EIO;
723}
724
725static int _truncate_mirrors(struct exofs_io_state *ios, unsigned cur_comp,
726 struct osd_attr *attr)
727{
728 int last_comp = cur_comp + ios->layout->mirrors_p1;
729
730 for (; cur_comp < last_comp; ++cur_comp) {
731 struct exofs_per_dev_state *per_dev = &ios->per_dev[cur_comp];
732 struct osd_request *or;
733
734 or = osd_start_request(exofs_ios_od(ios, cur_comp), GFP_KERNEL);
735 if (unlikely(!or)) {
736 EXOFS_ERR("%s: osd_start_request failed\n", __func__);
737 return -ENOMEM;
738 }
739 per_dev->or = or;
740
741 osd_req_set_attributes(or, &ios->obj);
742 osd_req_add_set_attr_list(or, attr, 1);
743 }
744
745 return 0;
746}
747
748int exofs_oi_truncate(struct exofs_i_info *oi, u64 size)
749{
750 struct exofs_sb_info *sbi = oi->vfs_inode.i_sb->s_fs_info;
751 struct exofs_io_state *ios;
752 struct exofs_trunc_attr {
753 struct osd_attr attr;
754 __be64 newsize;
755 } *size_attrs;
756 struct _striping_info si;
757 int i, ret;
758
759 ret = exofs_get_io_state(&sbi->layout, &ios);
760 if (unlikely(ret))
761 return ret;
762
763 size_attrs = kcalloc(ios->layout->group_width, sizeof(*size_attrs),
764 GFP_KERNEL);
765 if (unlikely(!size_attrs)) {
766 ret = -ENOMEM;
767 goto out;
768 }
769
770 ios->obj.id = exofs_oi_objno(oi);
771 ios->cred = oi->i_cred;
772
773 ios->numdevs = ios->layout->s_numdevs;
774 _calc_stripe_info(ios, size, &si);
775
776 for (i = 0; i < ios->layout->group_width; ++i) {
777 struct exofs_trunc_attr *size_attr = &size_attrs[i];
778 u64 obj_size;
779
780 if (i < si.dev)
781 obj_size = si.obj_offset +
782 ios->layout->stripe_unit - si.unit_off;
783 else if (i == si.dev)
784 obj_size = si.obj_offset;
785 else
786 obj_size = si.obj_offset - si.unit_off;
787
788 size_attr->newsize = cpu_to_be64(obj_size);
789 size_attr->attr = g_attr_logical_length;
790 size_attr->attr.val_ptr = &size_attr->newsize;
791
792 ret = _truncate_mirrors(ios, i * ios->layout->mirrors_p1,
793 &size_attr->attr);
794 if (unlikely(ret))
795 goto out;
796 }
797 ret = exofs_io_execute(ios);
798
799out:
800 kfree(size_attrs);
801 exofs_put_io_state(ios);
802 return ret;
803}
804