1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25#include <linux/slab.h>
26#include <linux/module.h>
27#include <asm/div64.h>
28#include <linux/lcm.h>
29
30#include "ore_raid.h"
31
32MODULE_AUTHOR("Boaz Harrosh <ooo@electrozaur.com>");
33MODULE_DESCRIPTION("Objects Raid Engine ore.ko");
34MODULE_LICENSE("GPL");
35
36
37
38
39
40
41
42
43
44
45
46enum { BIO_MAX_PAGES_KMALLOC =
47 (PAGE_SIZE - sizeof(struct bio)) / sizeof(struct bio_vec),};
48
49int ore_verify_layout(unsigned total_comps, struct ore_layout *layout)
50{
51 u64 stripe_length;
52
53 switch (layout->raid_algorithm) {
54 case PNFS_OSD_RAID_0:
55 layout->parity = 0;
56 break;
57 case PNFS_OSD_RAID_5:
58 layout->parity = 1;
59 break;
60 case PNFS_OSD_RAID_PQ:
61 layout->parity = 2;
62 break;
63 case PNFS_OSD_RAID_4:
64 default:
65 ORE_ERR("Only RAID_0/5/6 for now received-enum=%d\n",
66 layout->raid_algorithm);
67 return -EINVAL;
68 }
69 if (0 != (layout->stripe_unit & ~PAGE_MASK)) {
70 ORE_ERR("Stripe Unit(0x%llx)"
71 " must be Multples of PAGE_SIZE(0x%lx)\n",
72 _LLU(layout->stripe_unit), PAGE_SIZE);
73 return -EINVAL;
74 }
75 if (layout->group_width) {
76 if (!layout->group_depth) {
77 ORE_ERR("group_depth == 0 && group_width != 0\n");
78 return -EINVAL;
79 }
80 if (total_comps < (layout->group_width * layout->mirrors_p1)) {
81 ORE_ERR("Data Map wrong, "
82 "numdevs=%d < group_width=%d * mirrors=%d\n",
83 total_comps, layout->group_width,
84 layout->mirrors_p1);
85 return -EINVAL;
86 }
87 layout->group_count = total_comps / layout->mirrors_p1 /
88 layout->group_width;
89 } else {
90 if (layout->group_depth) {
91 printk(KERN_NOTICE "Warning: group_depth ignored "
92 "group_width == 0 && group_depth == %lld\n",
93 _LLU(layout->group_depth));
94 }
95 layout->group_width = total_comps / layout->mirrors_p1;
96 layout->group_depth = -1;
97 layout->group_count = 1;
98 }
99
100 stripe_length = (u64)layout->group_width * layout->stripe_unit;
101 if (stripe_length >= (1ULL << 32)) {
102 ORE_ERR("Stripe_length(0x%llx) >= 32bit is not supported\n",
103 _LLU(stripe_length));
104 return -EINVAL;
105 }
106
107 layout->max_io_length =
108 (BIO_MAX_PAGES_KMALLOC * PAGE_SIZE - layout->stripe_unit) *
109 (layout->group_width - layout->parity);
110 if (layout->parity) {
111 unsigned stripe_length =
112 (layout->group_width - layout->parity) *
113 layout->stripe_unit;
114
115 layout->max_io_length /= stripe_length;
116 layout->max_io_length *= stripe_length;
117 }
118 ORE_DBGMSG("max_io_length=0x%lx\n", layout->max_io_length);
119
120 return 0;
121}
122EXPORT_SYMBOL(ore_verify_layout);
123
124static u8 *_ios_cred(struct ore_io_state *ios, unsigned index)
125{
126 return ios->oc->comps[index & ios->oc->single_comp].cred;
127}
128
129static struct osd_obj_id *_ios_obj(struct ore_io_state *ios, unsigned index)
130{
131 return &ios->oc->comps[index & ios->oc->single_comp].obj;
132}
133
134static struct osd_dev *_ios_od(struct ore_io_state *ios, unsigned index)
135{
136 ORE_DBGMSG2("oc->first_dev=%d oc->numdevs=%d i=%d oc->ods=%p\n",
137 ios->oc->first_dev, ios->oc->numdevs, index,
138 ios->oc->ods);
139
140 return ore_comp_dev(ios->oc, index);
141}
142
143int _ore_get_io_state(struct ore_layout *layout,
144 struct ore_components *oc, unsigned numdevs,
145 unsigned sgs_per_dev, unsigned num_par_pages,
146 struct ore_io_state **pios)
147{
148 struct ore_io_state *ios;
149 struct page **pages;
150 struct osd_sg_entry *sgilist;
151 struct __alloc_all_io_state {
152 struct ore_io_state ios;
153 struct ore_per_dev_state per_dev[numdevs];
154 union {
155 struct osd_sg_entry sglist[sgs_per_dev * numdevs];
156 struct page *pages[num_par_pages];
157 };
158 } *_aios;
159
160 if (likely(sizeof(*_aios) <= PAGE_SIZE)) {
161 _aios = kzalloc(sizeof(*_aios), GFP_KERNEL);
162 if (unlikely(!_aios)) {
163 ORE_DBGMSG("Failed kzalloc bytes=%zd\n",
164 sizeof(*_aios));
165 *pios = NULL;
166 return -ENOMEM;
167 }
168 pages = num_par_pages ? _aios->pages : NULL;
169 sgilist = sgs_per_dev ? _aios->sglist : NULL;
170 ios = &_aios->ios;
171 } else {
172 struct __alloc_small_io_state {
173 struct ore_io_state ios;
174 struct ore_per_dev_state per_dev[numdevs];
175 } *_aio_small;
176 union __extra_part {
177 struct osd_sg_entry sglist[sgs_per_dev * numdevs];
178 struct page *pages[num_par_pages];
179 } *extra_part;
180
181 _aio_small = kzalloc(sizeof(*_aio_small), GFP_KERNEL);
182 if (unlikely(!_aio_small)) {
183 ORE_DBGMSG("Failed alloc first part bytes=%zd\n",
184 sizeof(*_aio_small));
185 *pios = NULL;
186 return -ENOMEM;
187 }
188 extra_part = kzalloc(sizeof(*extra_part), GFP_KERNEL);
189 if (unlikely(!extra_part)) {
190 ORE_DBGMSG("Failed alloc second part bytes=%zd\n",
191 sizeof(*extra_part));
192 kfree(_aio_small);
193 *pios = NULL;
194 return -ENOMEM;
195 }
196
197 pages = num_par_pages ? extra_part->pages : NULL;
198 sgilist = sgs_per_dev ? extra_part->sglist : NULL;
199
200
201
202 ios = &_aio_small->ios;
203 ios->extra_part_alloc = true;
204 }
205
206 if (pages) {
207 ios->parity_pages = pages;
208 ios->max_par_pages = num_par_pages;
209 }
210 if (sgilist) {
211 unsigned d;
212
213 for (d = 0; d < numdevs; ++d) {
214 ios->per_dev[d].sglist = sgilist;
215 sgilist += sgs_per_dev;
216 }
217 ios->sgs_per_dev = sgs_per_dev;
218 }
219
220 ios->layout = layout;
221 ios->oc = oc;
222 *pios = ios;
223 return 0;
224}
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242int ore_get_rw_state(struct ore_layout *layout, struct ore_components *oc,
243 bool is_reading, u64 offset, u64 length,
244 struct ore_io_state **pios)
245{
246 struct ore_io_state *ios;
247 unsigned numdevs = layout->group_width * layout->mirrors_p1;
248 unsigned sgs_per_dev = 0, max_par_pages = 0;
249 int ret;
250
251 if (layout->parity && length) {
252 unsigned data_devs = layout->group_width - layout->parity;
253 unsigned stripe_size = layout->stripe_unit * data_devs;
254 unsigned pages_in_unit = layout->stripe_unit / PAGE_SIZE;
255 u32 remainder;
256 u64 num_stripes;
257 u64 num_raid_units;
258
259 num_stripes = div_u64_rem(length, stripe_size, &remainder);
260 if (remainder)
261 ++num_stripes;
262
263 num_raid_units = num_stripes * layout->parity;
264
265 if (is_reading) {
266
267
268
269
270
271
272
273 num_raid_units += layout->group_width;
274 sgs_per_dev = div_u64(num_raid_units, data_devs) + 2;
275 } else {
276
277 max_par_pages = num_raid_units * pages_in_unit *
278 sizeof(struct page *);
279 }
280 }
281
282 ret = _ore_get_io_state(layout, oc, numdevs, sgs_per_dev, max_par_pages,
283 pios);
284 if (unlikely(ret))
285 return ret;
286
287 ios = *pios;
288 ios->reading = is_reading;
289 ios->offset = offset;
290
291 if (length) {
292 ore_calc_stripe_info(layout, offset, length, &ios->si);
293 ios->length = ios->si.length;
294 ios->nr_pages = ((ios->offset & (PAGE_SIZE - 1)) +
295 ios->length + PAGE_SIZE - 1) / PAGE_SIZE;
296 if (layout->parity)
297 _ore_post_alloc_raid_stuff(ios);
298 }
299
300 return 0;
301}
302EXPORT_SYMBOL(ore_get_rw_state);
303
304
305
306
307
308
309
310
311int ore_get_io_state(struct ore_layout *layout, struct ore_components *oc,
312 struct ore_io_state **pios)
313{
314 return _ore_get_io_state(layout, oc, oc->numdevs, 0, 0, pios);
315}
316EXPORT_SYMBOL(ore_get_io_state);
317
318void ore_put_io_state(struct ore_io_state *ios)
319{
320 if (ios) {
321 unsigned i;
322
323 for (i = 0; i < ios->numdevs; i++) {
324 struct ore_per_dev_state *per_dev = &ios->per_dev[i];
325
326 if (per_dev->or)
327 osd_end_request(per_dev->or);
328 if (per_dev->bio)
329 bio_put(per_dev->bio);
330 }
331
332 _ore_free_raid_stuff(ios);
333 kfree(ios);
334 }
335}
336EXPORT_SYMBOL(ore_put_io_state);
337
338static void _sync_done(struct ore_io_state *ios, void *p)
339{
340 struct completion *waiting = p;
341
342 complete(waiting);
343}
344
345static void _last_io(struct kref *kref)
346{
347 struct ore_io_state *ios = container_of(
348 kref, struct ore_io_state, kref);
349
350 ios->done(ios, ios->private);
351}
352
353static void _done_io(struct osd_request *or, void *p)
354{
355 struct ore_io_state *ios = p;
356
357 kref_put(&ios->kref, _last_io);
358}
359
360int ore_io_execute(struct ore_io_state *ios)
361{
362 DECLARE_COMPLETION_ONSTACK(wait);
363 bool sync = (ios->done == NULL);
364 int i, ret;
365
366 if (sync) {
367 ios->done = _sync_done;
368 ios->private = &wait;
369 }
370
371 for (i = 0; i < ios->numdevs; i++) {
372 struct osd_request *or = ios->per_dev[i].or;
373 if (unlikely(!or))
374 continue;
375
376 ret = osd_finalize_request(or, 0, _ios_cred(ios, i), NULL);
377 if (unlikely(ret)) {
378 ORE_DBGMSG("Failed to osd_finalize_request() => %d\n",
379 ret);
380 return ret;
381 }
382 }
383
384 kref_init(&ios->kref);
385
386 for (i = 0; i < ios->numdevs; i++) {
387 struct osd_request *or = ios->per_dev[i].or;
388 if (unlikely(!or))
389 continue;
390
391 kref_get(&ios->kref);
392 osd_execute_request_async(or, _done_io, ios);
393 }
394
395 kref_put(&ios->kref, _last_io);
396 ret = 0;
397
398 if (sync) {
399 wait_for_completion(&wait);
400 ret = ore_check_io(ios, NULL);
401 }
402 return ret;
403}
404
405static void _clear_bio(struct bio *bio)
406{
407 struct bio_vec *bv;
408 unsigned i;
409
410 bio_for_each_segment_all(bv, bio, i) {
411 unsigned this_count = bv->bv_len;
412
413 if (likely(PAGE_SIZE == this_count))
414 clear_highpage(bv->bv_page);
415 else
416 zero_user(bv->bv_page, bv->bv_offset, this_count);
417 }
418}
419
420int ore_check_io(struct ore_io_state *ios, ore_on_dev_error on_dev_error)
421{
422 enum osd_err_priority acumulated_osd_err = 0;
423 int acumulated_lin_err = 0;
424 int i;
425
426 for (i = 0; i < ios->numdevs; i++) {
427 struct osd_sense_info osi;
428 struct ore_per_dev_state *per_dev = &ios->per_dev[i];
429 struct osd_request *or = per_dev->or;
430 int ret;
431
432 if (unlikely(!or))
433 continue;
434
435 ret = osd_req_decode_sense(or, &osi);
436 if (likely(!ret))
437 continue;
438
439 if ((OSD_ERR_PRI_CLEAR_PAGES == osi.osd_err_pri) &&
440 per_dev->bio) {
441
442
443
444
445 _clear_bio(per_dev->bio);
446 ORE_DBGMSG("start read offset passed end of file "
447 "offset=0x%llx, length=0x%llx\n",
448 _LLU(per_dev->offset),
449 _LLU(per_dev->length));
450
451 continue;
452 }
453
454 if (on_dev_error) {
455 u64 residual = ios->reading ?
456 or->in.residual : or->out.residual;
457 u64 offset = (ios->offset + ios->length) - residual;
458 unsigned dev = per_dev->dev - ios->oc->first_dev;
459 struct ore_dev *od = ios->oc->ods[dev];
460
461 on_dev_error(ios, od, dev, osi.osd_err_pri,
462 offset, residual);
463 }
464 if (osi.osd_err_pri >= acumulated_osd_err) {
465 acumulated_osd_err = osi.osd_err_pri;
466 acumulated_lin_err = ret;
467 }
468 }
469
470 return acumulated_lin_err;
471}
472EXPORT_SYMBOL(ore_check_io);
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526void ore_calc_stripe_info(struct ore_layout *layout, u64 file_offset,
527 u64 length, struct ore_striping_info *si)
528{
529 u32 stripe_unit = layout->stripe_unit;
530 u32 group_width = layout->group_width;
531 u64 group_depth = layout->group_depth;
532 u32 parity = layout->parity;
533
534 u32 D = group_width - parity;
535 u32 U = D * stripe_unit;
536 u64 T = U * group_depth;
537 u64 S = T * layout->group_count;
538 u64 M = div64_u64(file_offset, S);
539
540
541
542
543
544 u64 LmodS = file_offset - M * S;
545 u32 G = div64_u64(LmodS, T);
546 u64 H = LmodS - G * T;
547
548 u32 N = div_u64(H, U);
549 u32 Nlast;
550
551
552 u32 C = (u32)(H - (N * U)) / stripe_unit + G * group_width;
553 u32 first_dev = C - C % group_width;
554
555 div_u64_rem(file_offset, stripe_unit, &si->unit_off);
556
557 si->obj_offset = si->unit_off + (N * stripe_unit) +
558 (M * group_depth * stripe_unit);
559 si->cur_comp = C - first_dev;
560 si->cur_pg = si->unit_off / PAGE_SIZE;
561
562 if (parity) {
563 u32 LCMdP = lcm(group_width, parity) / parity;
564
565 u32 RxP = (N % LCMdP) * parity;
566
567 si->par_dev = (group_width + group_width - parity - RxP) %
568 group_width + first_dev;
569 si->dev = (group_width + group_width + C - RxP) %
570 group_width + first_dev;
571 si->bytes_in_stripe = U;
572 si->first_stripe_start = M * S + G * T + N * U;
573 } else {
574
575 si->par_dev = group_width;
576 si->dev = C;
577 }
578
579 si->dev *= layout->mirrors_p1;
580 si->par_dev *= layout->mirrors_p1;
581 si->offset = file_offset;
582 si->length = T - H;
583 if (si->length > length)
584 si->length = length;
585
586 Nlast = div_u64(H + si->length + U - 1, U);
587 si->maxdevUnits = Nlast - N;
588
589 si->M = M;
590}
591EXPORT_SYMBOL(ore_calc_stripe_info);
592
593int _ore_add_stripe_unit(struct ore_io_state *ios, unsigned *cur_pg,
594 unsigned pgbase, struct page **pages,
595 struct ore_per_dev_state *per_dev, int cur_len)
596{
597 unsigned pg = *cur_pg;
598 struct request_queue *q =
599 osd_request_queue(_ios_od(ios, per_dev->dev));
600 unsigned len = cur_len;
601 int ret;
602
603 if (per_dev->bio == NULL) {
604 unsigned bio_size;
605
606 if (!ios->reading) {
607 bio_size = ios->si.maxdevUnits;
608 } else {
609 bio_size = (ios->si.maxdevUnits + 1) *
610 (ios->layout->group_width - ios->layout->parity) /
611 ios->layout->group_width;
612 }
613 bio_size *= (ios->layout->stripe_unit / PAGE_SIZE);
614
615 per_dev->bio = bio_kmalloc(GFP_KERNEL, bio_size);
616 if (unlikely(!per_dev->bio)) {
617 ORE_DBGMSG("Failed to allocate BIO size=%u\n",
618 bio_size);
619 ret = -ENOMEM;
620 goto out;
621 }
622 }
623
624 while (cur_len > 0) {
625 unsigned pglen = min_t(unsigned, PAGE_SIZE - pgbase, cur_len);
626 unsigned added_len;
627
628 cur_len -= pglen;
629
630 added_len = bio_add_pc_page(q, per_dev->bio, pages[pg],
631 pglen, pgbase);
632 if (unlikely(pglen != added_len)) {
633
634 ORE_DBGMSG("Failed bio_add_pc_page bi_vcnt=0x%x "
635 "bi_max=0x%x BIO_MAX=0x%x cur_len=0x%x\n",
636 per_dev->bio->bi_vcnt,
637 per_dev->bio->bi_max_vecs,
638 BIO_MAX_PAGES_KMALLOC, cur_len);
639 ret = -ENOMEM;
640 goto out;
641 }
642 _add_stripe_page(ios->sp2d, &ios->si, pages[pg]);
643
644 pgbase = 0;
645 ++pg;
646 }
647 BUG_ON(cur_len);
648
649 per_dev->length += len;
650 *cur_pg = pg;
651 ret = 0;
652out:
653
654
655
656
657 return ret;
658}
659
660static int _add_parity_units(struct ore_io_state *ios,
661 struct ore_striping_info *si,
662 unsigned dev, unsigned first_dev,
663 unsigned mirrors_p1, unsigned devs_in_group,
664 unsigned cur_len)
665{
666 unsigned do_parity;
667 int ret = 0;
668
669 for (do_parity = ios->layout->parity; do_parity; --do_parity) {
670 struct ore_per_dev_state *per_dev;
671
672 per_dev = &ios->per_dev[dev - first_dev];
673 if (!per_dev->length && !per_dev->offset) {
674
675
676
677
678 per_dev->dev = dev;
679 per_dev->offset = si->obj_offset - si->unit_off;
680 }
681
682 ret = _ore_add_parity_unit(ios, si, per_dev, cur_len,
683 do_parity == 1);
684 if (unlikely(ret))
685 break;
686
687 if (do_parity != 1) {
688 dev = ((dev + mirrors_p1) % devs_in_group) + first_dev;
689 si->cur_comp = (si->cur_comp + 1) %
690 ios->layout->group_width;
691 }
692 }
693
694 return ret;
695}
696
697static int _prepare_for_striping(struct ore_io_state *ios)
698{
699 struct ore_striping_info *si = &ios->si;
700 unsigned stripe_unit = ios->layout->stripe_unit;
701 unsigned mirrors_p1 = ios->layout->mirrors_p1;
702 unsigned group_width = ios->layout->group_width;
703 unsigned devs_in_group = group_width * mirrors_p1;
704 unsigned dev = si->dev;
705 unsigned first_dev = dev - (dev % devs_in_group);
706 unsigned cur_pg = ios->pages_consumed;
707 u64 length = ios->length;
708 int ret = 0;
709
710 if (!ios->pages) {
711 ios->numdevs = ios->layout->mirrors_p1;
712 return 0;
713 }
714
715 BUG_ON(length > si->length);
716
717 while (length) {
718 struct ore_per_dev_state *per_dev =
719 &ios->per_dev[dev - first_dev];
720 unsigned cur_len, page_off = 0;
721
722 if (!per_dev->length && !per_dev->offset) {
723
724 per_dev->dev = dev;
725 if (dev == si->dev) {
726 WARN_ON(dev == si->par_dev);
727 per_dev->offset = si->obj_offset;
728 cur_len = stripe_unit - si->unit_off;
729 page_off = si->unit_off & ~PAGE_MASK;
730 BUG_ON(page_off && (page_off != ios->pgbase));
731 } else {
732 per_dev->offset = si->obj_offset - si->unit_off;
733 cur_len = stripe_unit;
734 }
735 } else {
736 cur_len = stripe_unit;
737 }
738 if (cur_len >= length)
739 cur_len = length;
740
741 ret = _ore_add_stripe_unit(ios, &cur_pg, page_off, ios->pages,
742 per_dev, cur_len);
743 if (unlikely(ret))
744 goto out;
745
746 length -= cur_len;
747
748 dev = ((dev + mirrors_p1) % devs_in_group) + first_dev;
749 si->cur_comp = (si->cur_comp + 1) % group_width;
750 if (unlikely((dev == si->par_dev) || (!length && ios->sp2d))) {
751 if (!length && ios->sp2d) {
752
753
754
755 dev = si->par_dev;
756
757 si->cur_comp = group_width - ios->layout->parity;
758 }
759
760
761
762
763 ret = _add_parity_units(ios, si, dev, first_dev,
764 mirrors_p1, devs_in_group,
765 ios->sp2d ? length : cur_len);
766 if (unlikely(ret))
767 goto out;
768
769
770 si->par_dev = (devs_in_group + si->par_dev -
771 ios->layout->parity * mirrors_p1) %
772 devs_in_group + first_dev;
773
774 si->cur_comp = 0;
775 si->cur_pg = 0;
776 si->obj_offset += cur_len;
777 si->unit_off = 0;
778 }
779 }
780out:
781 ios->numdevs = devs_in_group;
782 ios->pages_consumed = cur_pg;
783 return ret;
784}
785
786int ore_create(struct ore_io_state *ios)
787{
788 int i, ret;
789
790 for (i = 0; i < ios->oc->numdevs; i++) {
791 struct osd_request *or;
792
793 or = osd_start_request(_ios_od(ios, i), GFP_KERNEL);
794 if (unlikely(!or)) {
795 ORE_ERR("%s: osd_start_request failed\n", __func__);
796 ret = -ENOMEM;
797 goto out;
798 }
799 ios->per_dev[i].or = or;
800 ios->numdevs++;
801
802 osd_req_create_object(or, _ios_obj(ios, i));
803 }
804 ret = ore_io_execute(ios);
805
806out:
807 return ret;
808}
809EXPORT_SYMBOL(ore_create);
810
811int ore_remove(struct ore_io_state *ios)
812{
813 int i, ret;
814
815 for (i = 0; i < ios->oc->numdevs; i++) {
816 struct osd_request *or;
817
818 or = osd_start_request(_ios_od(ios, i), GFP_KERNEL);
819 if (unlikely(!or)) {
820 ORE_ERR("%s: osd_start_request failed\n", __func__);
821 ret = -ENOMEM;
822 goto out;
823 }
824 ios->per_dev[i].or = or;
825 ios->numdevs++;
826
827 osd_req_remove_object(or, _ios_obj(ios, i));
828 }
829 ret = ore_io_execute(ios);
830
831out:
832 return ret;
833}
834EXPORT_SYMBOL(ore_remove);
835
836static int _write_mirror(struct ore_io_state *ios, int cur_comp)
837{
838 struct ore_per_dev_state *master_dev = &ios->per_dev[cur_comp];
839 unsigned dev = ios->per_dev[cur_comp].dev;
840 unsigned last_comp = cur_comp + ios->layout->mirrors_p1;
841 int ret = 0;
842
843 if (ios->pages && !master_dev->length)
844 return 0;
845
846 for (; cur_comp < last_comp; ++cur_comp, ++dev) {
847 struct ore_per_dev_state *per_dev = &ios->per_dev[cur_comp];
848 struct osd_request *or;
849
850 or = osd_start_request(_ios_od(ios, dev), GFP_KERNEL);
851 if (unlikely(!or)) {
852 ORE_ERR("%s: osd_start_request failed\n", __func__);
853 ret = -ENOMEM;
854 goto out;
855 }
856 per_dev->or = or;
857
858 if (ios->pages) {
859 struct bio *bio;
860
861 if (per_dev != master_dev) {
862 bio = bio_clone_kmalloc(master_dev->bio,
863 GFP_KERNEL);
864 if (unlikely(!bio)) {
865 ORE_DBGMSG(
866 "Failed to allocate BIO size=%u\n",
867 master_dev->bio->bi_max_vecs);
868 ret = -ENOMEM;
869 goto out;
870 }
871
872 bio->bi_bdev = NULL;
873 bio->bi_next = NULL;
874 per_dev->offset = master_dev->offset;
875 per_dev->length = master_dev->length;
876 per_dev->bio = bio;
877 per_dev->dev = dev;
878 } else {
879 bio = master_dev->bio;
880
881 bio->bi_rw |= REQ_WRITE;
882 }
883
884 osd_req_write(or, _ios_obj(ios, cur_comp),
885 per_dev->offset, bio, per_dev->length);
886 ORE_DBGMSG("write(0x%llx) offset=0x%llx "
887 "length=0x%llx dev=%d\n",
888 _LLU(_ios_obj(ios, cur_comp)->id),
889 _LLU(per_dev->offset),
890 _LLU(per_dev->length), dev);
891 } else if (ios->kern_buff) {
892 per_dev->offset = ios->si.obj_offset;
893 per_dev->dev = ios->si.dev + dev;
894
895
896 BUG_ON((ios->layout->group_width > 1) &&
897 (ios->si.unit_off + ios->length >
898 ios->layout->stripe_unit));
899
900 ret = osd_req_write_kern(or, _ios_obj(ios, cur_comp),
901 per_dev->offset,
902 ios->kern_buff, ios->length);
903 if (unlikely(ret))
904 goto out;
905 ORE_DBGMSG2("write_kern(0x%llx) offset=0x%llx "
906 "length=0x%llx dev=%d\n",
907 _LLU(_ios_obj(ios, cur_comp)->id),
908 _LLU(per_dev->offset),
909 _LLU(ios->length), per_dev->dev);
910 } else {
911 osd_req_set_attributes(or, _ios_obj(ios, cur_comp));
912 ORE_DBGMSG2("obj(0x%llx) set_attributes=%d dev=%d\n",
913 _LLU(_ios_obj(ios, cur_comp)->id),
914 ios->out_attr_len, dev);
915 }
916
917 if (ios->out_attr)
918 osd_req_add_set_attr_list(or, ios->out_attr,
919 ios->out_attr_len);
920
921 if (ios->in_attr)
922 osd_req_add_get_attr_list(or, ios->in_attr,
923 ios->in_attr_len);
924 }
925
926out:
927 return ret;
928}
929
930int ore_write(struct ore_io_state *ios)
931{
932 int i;
933 int ret;
934
935 if (unlikely(ios->sp2d && !ios->r4w)) {
936
937
938
939 WARN_ON_ONCE(1);
940 return -ENOTSUPP;
941 }
942
943 ret = _prepare_for_striping(ios);
944 if (unlikely(ret))
945 return ret;
946
947 for (i = 0; i < ios->numdevs; i += ios->layout->mirrors_p1) {
948 ret = _write_mirror(ios, i);
949 if (unlikely(ret))
950 return ret;
951 }
952
953 ret = ore_io_execute(ios);
954 return ret;
955}
956EXPORT_SYMBOL(ore_write);
957
958int _ore_read_mirror(struct ore_io_state *ios, unsigned cur_comp)
959{
960 struct osd_request *or;
961 struct ore_per_dev_state *per_dev = &ios->per_dev[cur_comp];
962 struct osd_obj_id *obj = _ios_obj(ios, cur_comp);
963 unsigned first_dev = (unsigned)obj->id;
964
965 if (ios->pages && !per_dev->length)
966 return 0;
967
968 first_dev = per_dev->dev + first_dev % ios->layout->mirrors_p1;
969 or = osd_start_request(_ios_od(ios, first_dev), GFP_KERNEL);
970 if (unlikely(!or)) {
971 ORE_ERR("%s: osd_start_request failed\n", __func__);
972 return -ENOMEM;
973 }
974 per_dev->or = or;
975
976 if (ios->pages) {
977 if (per_dev->cur_sg) {
978
979 _ore_add_sg_seg(per_dev, 0, false);
980 if (unlikely(!per_dev->cur_sg))
981 return 0;
982
983 osd_req_read_sg(or, obj, per_dev->bio,
984 per_dev->sglist, per_dev->cur_sg);
985 } else {
986
987 osd_req_read(or, obj, per_dev->offset,
988 per_dev->bio, per_dev->length);
989 }
990
991 ORE_DBGMSG("read(0x%llx) offset=0x%llx length=0x%llx"
992 " dev=%d sg_len=%d\n", _LLU(obj->id),
993 _LLU(per_dev->offset), _LLU(per_dev->length),
994 first_dev, per_dev->cur_sg);
995 } else {
996 BUG_ON(ios->kern_buff);
997
998 osd_req_get_attributes(or, obj);
999 ORE_DBGMSG2("obj(0x%llx) get_attributes=%d dev=%d\n",
1000 _LLU(obj->id),
1001 ios->in_attr_len, first_dev);
1002 }
1003 if (ios->out_attr)
1004 osd_req_add_set_attr_list(or, ios->out_attr, ios->out_attr_len);
1005
1006 if (ios->in_attr)
1007 osd_req_add_get_attr_list(or, ios->in_attr, ios->in_attr_len);
1008
1009 return 0;
1010}
1011
1012int ore_read(struct ore_io_state *ios)
1013{
1014 int i;
1015 int ret;
1016
1017 ret = _prepare_for_striping(ios);
1018 if (unlikely(ret))
1019 return ret;
1020
1021 for (i = 0; i < ios->numdevs; i += ios->layout->mirrors_p1) {
1022 ret = _ore_read_mirror(ios, i);
1023 if (unlikely(ret))
1024 return ret;
1025 }
1026
1027 ret = ore_io_execute(ios);
1028 return ret;
1029}
1030EXPORT_SYMBOL(ore_read);
1031
1032int extract_attr_from_ios(struct ore_io_state *ios, struct osd_attr *attr)
1033{
1034 struct osd_attr cur_attr = {.attr_page = 0};
1035 void *iter = NULL;
1036 int nelem;
1037
1038 do {
1039 nelem = 1;
1040 osd_req_decode_get_attr_list(ios->per_dev[0].or,
1041 &cur_attr, &nelem, &iter);
1042 if ((cur_attr.attr_page == attr->attr_page) &&
1043 (cur_attr.attr_id == attr->attr_id)) {
1044 attr->len = cur_attr.len;
1045 attr->val_ptr = cur_attr.val_ptr;
1046 return 0;
1047 }
1048 } while (iter);
1049
1050 return -EIO;
1051}
1052EXPORT_SYMBOL(extract_attr_from_ios);
1053
1054static int _truncate_mirrors(struct ore_io_state *ios, unsigned cur_comp,
1055 struct osd_attr *attr)
1056{
1057 int last_comp = cur_comp + ios->layout->mirrors_p1;
1058
1059 for (; cur_comp < last_comp; ++cur_comp) {
1060 struct ore_per_dev_state *per_dev = &ios->per_dev[cur_comp];
1061 struct osd_request *or;
1062
1063 or = osd_start_request(_ios_od(ios, cur_comp), GFP_KERNEL);
1064 if (unlikely(!or)) {
1065 ORE_ERR("%s: osd_start_request failed\n", __func__);
1066 return -ENOMEM;
1067 }
1068 per_dev->or = or;
1069
1070 osd_req_set_attributes(or, _ios_obj(ios, cur_comp));
1071 osd_req_add_set_attr_list(or, attr, 1);
1072 }
1073
1074 return 0;
1075}
1076
1077struct _trunc_info {
1078 struct ore_striping_info si;
1079 u64 prev_group_obj_off;
1080 u64 next_group_obj_off;
1081
1082 unsigned first_group_dev;
1083 unsigned nex_group_dev;
1084};
1085
1086static void _calc_trunk_info(struct ore_layout *layout, u64 file_offset,
1087 struct _trunc_info *ti)
1088{
1089 unsigned stripe_unit = layout->stripe_unit;
1090
1091 ore_calc_stripe_info(layout, file_offset, 0, &ti->si);
1092
1093 ti->prev_group_obj_off = ti->si.M * stripe_unit;
1094 ti->next_group_obj_off = ti->si.M ? (ti->si.M - 1) * stripe_unit : 0;
1095
1096 ti->first_group_dev = ti->si.dev - (ti->si.dev % layout->group_width);
1097 ti->nex_group_dev = ti->first_group_dev + layout->group_width;
1098}
1099
1100int ore_truncate(struct ore_layout *layout, struct ore_components *oc,
1101 u64 size)
1102{
1103 struct ore_io_state *ios;
1104 struct exofs_trunc_attr {
1105 struct osd_attr attr;
1106 __be64 newsize;
1107 } *size_attrs;
1108 struct _trunc_info ti;
1109 int i, ret;
1110
1111 ret = ore_get_io_state(layout, oc, &ios);
1112 if (unlikely(ret))
1113 return ret;
1114
1115 _calc_trunk_info(ios->layout, size, &ti);
1116
1117 size_attrs = kcalloc(ios->oc->numdevs, sizeof(*size_attrs),
1118 GFP_KERNEL);
1119 if (unlikely(!size_attrs)) {
1120 ret = -ENOMEM;
1121 goto out;
1122 }
1123
1124 ios->numdevs = ios->oc->numdevs;
1125
1126 for (i = 0; i < ios->numdevs; ++i) {
1127 struct exofs_trunc_attr *size_attr = &size_attrs[i];
1128 u64 obj_size;
1129
1130 if (i < ti.first_group_dev)
1131 obj_size = ti.prev_group_obj_off;
1132 else if (i >= ti.nex_group_dev)
1133 obj_size = ti.next_group_obj_off;
1134 else if (i < ti.si.dev)
1135 obj_size = ti.si.obj_offset +
1136 ios->layout->stripe_unit - ti.si.unit_off;
1137 else if (i == ti.si.dev)
1138 obj_size = ti.si.obj_offset;
1139 else
1140 obj_size = ti.si.obj_offset - ti.si.unit_off;
1141
1142 size_attr->newsize = cpu_to_be64(obj_size);
1143 size_attr->attr = g_attr_logical_length;
1144 size_attr->attr.val_ptr = &size_attr->newsize;
1145
1146 ORE_DBGMSG2("trunc(0x%llx) obj_offset=0x%llx dev=%d\n",
1147 _LLU(oc->comps->obj.id), _LLU(obj_size), i);
1148 ret = _truncate_mirrors(ios, i * ios->layout->mirrors_p1,
1149 &size_attr->attr);
1150 if (unlikely(ret))
1151 goto out;
1152 }
1153 ret = ore_io_execute(ios);
1154
1155out:
1156 kfree(size_attrs);
1157 ore_put_io_state(ios);
1158 return ret;
1159}
1160EXPORT_SYMBOL(ore_truncate);
1161
1162const struct osd_attr g_attr_logical_length = ATTR_DEF(
1163 OSD_APAGE_OBJECT_INFORMATION, OSD_ATTR_OI_LOGICAL_LENGTH, 8);
1164EXPORT_SYMBOL(g_attr_logical_length);
1165