1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25#include <linux/slab.h>
26#include <linux/module.h>
27#include <asm/div64.h>
28#include <linux/lcm.h>
29
30#include "ore_raid.h"
31
32MODULE_AUTHOR("Boaz Harrosh <bharrosh@panasas.com>");
33MODULE_DESCRIPTION("Objects Raid Engine ore.ko");
34MODULE_LICENSE("GPL");
35
36
37
38
39
40
41
42
43
44
45
46enum { BIO_MAX_PAGES_KMALLOC =
47 (PAGE_SIZE - sizeof(struct bio)) / sizeof(struct bio_vec),};
48
49int ore_verify_layout(unsigned total_comps, struct ore_layout *layout)
50{
51 u64 stripe_length;
52
53 switch (layout->raid_algorithm) {
54 case PNFS_OSD_RAID_0:
55 layout->parity = 0;
56 break;
57 case PNFS_OSD_RAID_5:
58 layout->parity = 1;
59 break;
60 case PNFS_OSD_RAID_PQ:
61 case PNFS_OSD_RAID_4:
62 default:
63 ORE_ERR("Only RAID_0/5 for now\n");
64 return -EINVAL;
65 }
66 if (0 != (layout->stripe_unit & ~PAGE_MASK)) {
67 ORE_ERR("Stripe Unit(0x%llx)"
68 " must be Multples of PAGE_SIZE(0x%lx)\n",
69 _LLU(layout->stripe_unit), PAGE_SIZE);
70 return -EINVAL;
71 }
72 if (layout->group_width) {
73 if (!layout->group_depth) {
74 ORE_ERR("group_depth == 0 && group_width != 0\n");
75 return -EINVAL;
76 }
77 if (total_comps < (layout->group_width * layout->mirrors_p1)) {
78 ORE_ERR("Data Map wrong, "
79 "numdevs=%d < group_width=%d * mirrors=%d\n",
80 total_comps, layout->group_width,
81 layout->mirrors_p1);
82 return -EINVAL;
83 }
84 layout->group_count = total_comps / layout->mirrors_p1 /
85 layout->group_width;
86 } else {
87 if (layout->group_depth) {
88 printk(KERN_NOTICE "Warning: group_depth ignored "
89 "group_width == 0 && group_depth == %lld\n",
90 _LLU(layout->group_depth));
91 }
92 layout->group_width = total_comps / layout->mirrors_p1;
93 layout->group_depth = -1;
94 layout->group_count = 1;
95 }
96
97 stripe_length = (u64)layout->group_width * layout->stripe_unit;
98 if (stripe_length >= (1ULL << 32)) {
99 ORE_ERR("Stripe_length(0x%llx) >= 32bit is not supported\n",
100 _LLU(stripe_length));
101 return -EINVAL;
102 }
103
104 layout->max_io_length =
105 (BIO_MAX_PAGES_KMALLOC * PAGE_SIZE - layout->stripe_unit) *
106 layout->group_width;
107 if (layout->parity) {
108 unsigned stripe_length =
109 (layout->group_width - layout->parity) *
110 layout->stripe_unit;
111
112 layout->max_io_length /= stripe_length;
113 layout->max_io_length *= stripe_length;
114 }
115 return 0;
116}
117EXPORT_SYMBOL(ore_verify_layout);
118
119static u8 *_ios_cred(struct ore_io_state *ios, unsigned index)
120{
121 return ios->oc->comps[index & ios->oc->single_comp].cred;
122}
123
124static struct osd_obj_id *_ios_obj(struct ore_io_state *ios, unsigned index)
125{
126 return &ios->oc->comps[index & ios->oc->single_comp].obj;
127}
128
129static struct osd_dev *_ios_od(struct ore_io_state *ios, unsigned index)
130{
131 ORE_DBGMSG2("oc->first_dev=%d oc->numdevs=%d i=%d oc->ods=%p\n",
132 ios->oc->first_dev, ios->oc->numdevs, index,
133 ios->oc->ods);
134
135 return ore_comp_dev(ios->oc, index);
136}
137
138int _ore_get_io_state(struct ore_layout *layout,
139 struct ore_components *oc, unsigned numdevs,
140 unsigned sgs_per_dev, unsigned num_par_pages,
141 struct ore_io_state **pios)
142{
143 struct ore_io_state *ios;
144 struct page **pages;
145 struct osd_sg_entry *sgilist;
146 struct __alloc_all_io_state {
147 struct ore_io_state ios;
148 struct ore_per_dev_state per_dev[numdevs];
149 union {
150 struct osd_sg_entry sglist[sgs_per_dev * numdevs];
151 struct page *pages[num_par_pages];
152 };
153 } *_aios;
154
155 if (likely(sizeof(*_aios) <= PAGE_SIZE)) {
156 _aios = kzalloc(sizeof(*_aios), GFP_KERNEL);
157 if (unlikely(!_aios)) {
158 ORE_DBGMSG("Failed kzalloc bytes=%zd\n",
159 sizeof(*_aios));
160 *pios = NULL;
161 return -ENOMEM;
162 }
163 pages = num_par_pages ? _aios->pages : NULL;
164 sgilist = sgs_per_dev ? _aios->sglist : NULL;
165 ios = &_aios->ios;
166 } else {
167 struct __alloc_small_io_state {
168 struct ore_io_state ios;
169 struct ore_per_dev_state per_dev[numdevs];
170 } *_aio_small;
171 union __extra_part {
172 struct osd_sg_entry sglist[sgs_per_dev * numdevs];
173 struct page *pages[num_par_pages];
174 } *extra_part;
175
176 _aio_small = kzalloc(sizeof(*_aio_small), GFP_KERNEL);
177 if (unlikely(!_aio_small)) {
178 ORE_DBGMSG("Failed alloc first part bytes=%zd\n",
179 sizeof(*_aio_small));
180 *pios = NULL;
181 return -ENOMEM;
182 }
183 extra_part = kzalloc(sizeof(*extra_part), GFP_KERNEL);
184 if (unlikely(!extra_part)) {
185 ORE_DBGMSG("Failed alloc second part bytes=%zd\n",
186 sizeof(*extra_part));
187 kfree(_aio_small);
188 *pios = NULL;
189 return -ENOMEM;
190 }
191
192 pages = num_par_pages ? extra_part->pages : NULL;
193 sgilist = sgs_per_dev ? extra_part->sglist : NULL;
194
195
196
197 ios = &_aio_small->ios;
198 ios->extra_part_alloc = true;
199 }
200
201 if (pages) {
202 ios->parity_pages = pages;
203 ios->max_par_pages = num_par_pages;
204 }
205 if (sgilist) {
206 unsigned d;
207
208 for (d = 0; d < numdevs; ++d) {
209 ios->per_dev[d].sglist = sgilist;
210 sgilist += sgs_per_dev;
211 }
212 ios->sgs_per_dev = sgs_per_dev;
213 }
214
215 ios->layout = layout;
216 ios->oc = oc;
217 *pios = ios;
218 return 0;
219}
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237int ore_get_rw_state(struct ore_layout *layout, struct ore_components *oc,
238 bool is_reading, u64 offset, u64 length,
239 struct ore_io_state **pios)
240{
241 struct ore_io_state *ios;
242 unsigned numdevs = layout->group_width * layout->mirrors_p1;
243 unsigned sgs_per_dev = 0, max_par_pages = 0;
244 int ret;
245
246 if (layout->parity && length) {
247 unsigned data_devs = layout->group_width - layout->parity;
248 unsigned stripe_size = layout->stripe_unit * data_devs;
249 unsigned pages_in_unit = layout->stripe_unit / PAGE_SIZE;
250 u32 remainder;
251 u64 num_stripes;
252 u64 num_raid_units;
253
254 num_stripes = div_u64_rem(length, stripe_size, &remainder);
255 if (remainder)
256 ++num_stripes;
257
258 num_raid_units = num_stripes * layout->parity;
259
260 if (is_reading) {
261
262
263
264
265
266
267
268 num_raid_units += layout->group_width;
269 sgs_per_dev = div_u64(num_raid_units, data_devs) + 2;
270 } else {
271
272 max_par_pages = num_raid_units * pages_in_unit *
273 sizeof(struct page *);
274 }
275 }
276
277 ret = _ore_get_io_state(layout, oc, numdevs, sgs_per_dev, max_par_pages,
278 pios);
279 if (unlikely(ret))
280 return ret;
281
282 ios = *pios;
283 ios->reading = is_reading;
284 ios->offset = offset;
285
286 if (length) {
287 ore_calc_stripe_info(layout, offset, length, &ios->si);
288 ios->length = ios->si.length;
289 ios->nr_pages = (ios->length + PAGE_SIZE - 1) / PAGE_SIZE;
290 if (layout->parity)
291 _ore_post_alloc_raid_stuff(ios);
292 }
293
294 return 0;
295}
296EXPORT_SYMBOL(ore_get_rw_state);
297
298
299
300
301
302
303
304
305int ore_get_io_state(struct ore_layout *layout, struct ore_components *oc,
306 struct ore_io_state **pios)
307{
308 return _ore_get_io_state(layout, oc, oc->numdevs, 0, 0, pios);
309}
310EXPORT_SYMBOL(ore_get_io_state);
311
312void ore_put_io_state(struct ore_io_state *ios)
313{
314 if (ios) {
315 unsigned i;
316
317 for (i = 0; i < ios->numdevs; i++) {
318 struct ore_per_dev_state *per_dev = &ios->per_dev[i];
319
320 if (per_dev->or)
321 osd_end_request(per_dev->or);
322 if (per_dev->bio)
323 bio_put(per_dev->bio);
324 }
325
326 _ore_free_raid_stuff(ios);
327 kfree(ios);
328 }
329}
330EXPORT_SYMBOL(ore_put_io_state);
331
332static void _sync_done(struct ore_io_state *ios, void *p)
333{
334 struct completion *waiting = p;
335
336 complete(waiting);
337}
338
339static void _last_io(struct kref *kref)
340{
341 struct ore_io_state *ios = container_of(
342 kref, struct ore_io_state, kref);
343
344 ios->done(ios, ios->private);
345}
346
347static void _done_io(struct osd_request *or, void *p)
348{
349 struct ore_io_state *ios = p;
350
351 kref_put(&ios->kref, _last_io);
352}
353
354int ore_io_execute(struct ore_io_state *ios)
355{
356 DECLARE_COMPLETION_ONSTACK(wait);
357 bool sync = (ios->done == NULL);
358 int i, ret;
359
360 if (sync) {
361 ios->done = _sync_done;
362 ios->private = &wait;
363 }
364
365 for (i = 0; i < ios->numdevs; i++) {
366 struct osd_request *or = ios->per_dev[i].or;
367 if (unlikely(!or))
368 continue;
369
370 ret = osd_finalize_request(or, 0, _ios_cred(ios, i), NULL);
371 if (unlikely(ret)) {
372 ORE_DBGMSG("Failed to osd_finalize_request() => %d\n",
373 ret);
374 return ret;
375 }
376 }
377
378 kref_init(&ios->kref);
379
380 for (i = 0; i < ios->numdevs; i++) {
381 struct osd_request *or = ios->per_dev[i].or;
382 if (unlikely(!or))
383 continue;
384
385 kref_get(&ios->kref);
386 osd_execute_request_async(or, _done_io, ios);
387 }
388
389 kref_put(&ios->kref, _last_io);
390 ret = 0;
391
392 if (sync) {
393 wait_for_completion(&wait);
394 ret = ore_check_io(ios, NULL);
395 }
396 return ret;
397}
398
399static void _clear_bio(struct bio *bio)
400{
401 struct bio_vec *bv;
402 unsigned i;
403
404 bio_for_each_segment_all(bv, bio, i) {
405 unsigned this_count = bv->bv_len;
406
407 if (likely(PAGE_SIZE == this_count))
408 clear_highpage(bv->bv_page);
409 else
410 zero_user(bv->bv_page, bv->bv_offset, this_count);
411 }
412}
413
414int ore_check_io(struct ore_io_state *ios, ore_on_dev_error on_dev_error)
415{
416 enum osd_err_priority acumulated_osd_err = 0;
417 int acumulated_lin_err = 0;
418 int i;
419
420 for (i = 0; i < ios->numdevs; i++) {
421 struct osd_sense_info osi;
422 struct ore_per_dev_state *per_dev = &ios->per_dev[i];
423 struct osd_request *or = per_dev->or;
424 int ret;
425
426 if (unlikely(!or))
427 continue;
428
429 ret = osd_req_decode_sense(or, &osi);
430 if (likely(!ret))
431 continue;
432
433 if (OSD_ERR_PRI_CLEAR_PAGES == osi.osd_err_pri) {
434
435 _clear_bio(per_dev->bio);
436 ORE_DBGMSG("start read offset passed end of file "
437 "offset=0x%llx, length=0x%llx\n",
438 _LLU(per_dev->offset),
439 _LLU(per_dev->length));
440
441 continue;
442 }
443
444 if (on_dev_error) {
445 u64 residual = ios->reading ?
446 or->in.residual : or->out.residual;
447 u64 offset = (ios->offset + ios->length) - residual;
448 unsigned dev = per_dev->dev - ios->oc->first_dev;
449 struct ore_dev *od = ios->oc->ods[dev];
450
451 on_dev_error(ios, od, dev, osi.osd_err_pri,
452 offset, residual);
453 }
454 if (osi.osd_err_pri >= acumulated_osd_err) {
455 acumulated_osd_err = osi.osd_err_pri;
456 acumulated_lin_err = ret;
457 }
458 }
459
460 return acumulated_lin_err;
461}
462EXPORT_SYMBOL(ore_check_io);
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516void ore_calc_stripe_info(struct ore_layout *layout, u64 file_offset,
517 u64 length, struct ore_striping_info *si)
518{
519 u32 stripe_unit = layout->stripe_unit;
520 u32 group_width = layout->group_width;
521 u64 group_depth = layout->group_depth;
522 u32 parity = layout->parity;
523
524 u32 D = group_width - parity;
525 u32 U = D * stripe_unit;
526 u64 T = U * group_depth;
527 u64 S = T * layout->group_count;
528 u64 M = div64_u64(file_offset, S);
529
530
531
532
533
534 u64 LmodS = file_offset - M * S;
535 u32 G = div64_u64(LmodS, T);
536 u64 H = LmodS - G * T;
537
538 u32 N = div_u64(H, U);
539
540
541 u32 C = (u32)(H - (N * U)) / stripe_unit + G * group_width;
542
543 div_u64_rem(file_offset, stripe_unit, &si->unit_off);
544
545 si->obj_offset = si->unit_off + (N * stripe_unit) +
546 (M * group_depth * stripe_unit);
547
548 if (parity) {
549 u32 LCMdP = lcm(group_width, parity) / parity;
550
551 u32 RxP = (N % LCMdP) * parity;
552 u32 first_dev = C - C % group_width;
553
554 si->par_dev = (group_width + group_width - parity - RxP) %
555 group_width + first_dev;
556 si->dev = (group_width + C - RxP) % group_width + first_dev;
557 si->bytes_in_stripe = U;
558 si->first_stripe_start = M * S + G * T + N * U;
559 } else {
560
561 si->par_dev = group_width;
562 si->dev = C;
563 }
564
565 si->dev *= layout->mirrors_p1;
566 si->par_dev *= layout->mirrors_p1;
567 si->offset = file_offset;
568 si->length = T - H;
569 if (si->length > length)
570 si->length = length;
571 si->M = M;
572}
573EXPORT_SYMBOL(ore_calc_stripe_info);
574
575int _ore_add_stripe_unit(struct ore_io_state *ios, unsigned *cur_pg,
576 unsigned pgbase, struct page **pages,
577 struct ore_per_dev_state *per_dev, int cur_len)
578{
579 unsigned pg = *cur_pg;
580 struct request_queue *q =
581 osd_request_queue(_ios_od(ios, per_dev->dev));
582 unsigned len = cur_len;
583 int ret;
584
585 if (per_dev->bio == NULL) {
586 unsigned pages_in_stripe = ios->layout->group_width *
587 (ios->layout->stripe_unit / PAGE_SIZE);
588 unsigned nr_pages = ios->nr_pages * ios->layout->group_width /
589 (ios->layout->group_width -
590 ios->layout->parity);
591 unsigned bio_size = (nr_pages + pages_in_stripe) /
592 ios->layout->group_width;
593
594 per_dev->bio = bio_kmalloc(GFP_KERNEL, bio_size);
595 if (unlikely(!per_dev->bio)) {
596 ORE_DBGMSG("Failed to allocate BIO size=%u\n",
597 bio_size);
598 ret = -ENOMEM;
599 goto out;
600 }
601 }
602
603 while (cur_len > 0) {
604 unsigned pglen = min_t(unsigned, PAGE_SIZE - pgbase, cur_len);
605 unsigned added_len;
606
607 cur_len -= pglen;
608
609 added_len = bio_add_pc_page(q, per_dev->bio, pages[pg],
610 pglen, pgbase);
611 if (unlikely(pglen != added_len)) {
612 ORE_DBGMSG("Failed bio_add_pc_page bi_vcnt=%u\n",
613 per_dev->bio->bi_vcnt);
614 ret = -ENOMEM;
615 goto out;
616 }
617 _add_stripe_page(ios->sp2d, &ios->si, pages[pg]);
618
619 pgbase = 0;
620 ++pg;
621 }
622 BUG_ON(cur_len);
623
624 per_dev->length += len;
625 *cur_pg = pg;
626 ret = 0;
627out:
628
629
630
631
632 return ret;
633}
634
635static int _prepare_for_striping(struct ore_io_state *ios)
636{
637 struct ore_striping_info *si = &ios->si;
638 unsigned stripe_unit = ios->layout->stripe_unit;
639 unsigned mirrors_p1 = ios->layout->mirrors_p1;
640 unsigned group_width = ios->layout->group_width;
641 unsigned devs_in_group = group_width * mirrors_p1;
642 unsigned dev = si->dev;
643 unsigned first_dev = dev - (dev % devs_in_group);
644 unsigned dev_order;
645 unsigned cur_pg = ios->pages_consumed;
646 u64 length = ios->length;
647 int ret = 0;
648
649 if (!ios->pages) {
650 ios->numdevs = ios->layout->mirrors_p1;
651 return 0;
652 }
653
654 BUG_ON(length > si->length);
655
656 dev_order = _dev_order(devs_in_group, mirrors_p1, si->par_dev, dev);
657 si->cur_comp = dev_order;
658 si->cur_pg = si->unit_off / PAGE_SIZE;
659
660 while (length) {
661 unsigned comp = dev - first_dev;
662 struct ore_per_dev_state *per_dev = &ios->per_dev[comp];
663 unsigned cur_len, page_off = 0;
664
665 if (!per_dev->length) {
666 per_dev->dev = dev;
667 if (dev == si->dev) {
668 WARN_ON(dev == si->par_dev);
669 per_dev->offset = si->obj_offset;
670 cur_len = stripe_unit - si->unit_off;
671 page_off = si->unit_off & ~PAGE_MASK;
672 BUG_ON(page_off && (page_off != ios->pgbase));
673 } else {
674 if (si->cur_comp > dev_order)
675 per_dev->offset =
676 si->obj_offset - si->unit_off;
677 else
678 per_dev->offset =
679 si->obj_offset + stripe_unit -
680 si->unit_off;
681 cur_len = stripe_unit;
682 }
683 } else {
684 cur_len = stripe_unit;
685 }
686 if (cur_len >= length)
687 cur_len = length;
688
689 ret = _ore_add_stripe_unit(ios, &cur_pg, page_off, ios->pages,
690 per_dev, cur_len);
691 if (unlikely(ret))
692 goto out;
693
694 dev += mirrors_p1;
695 dev = (dev % devs_in_group) + first_dev;
696
697 length -= cur_len;
698
699 si->cur_comp = (si->cur_comp + 1) % group_width;
700 if (unlikely((dev == si->par_dev) || (!length && ios->sp2d))) {
701 if (!length && ios->sp2d) {
702
703
704
705 dev = si->par_dev;
706 }
707 if (ios->sp2d)
708
709
710
711 cur_len = length;
712 per_dev = &ios->per_dev[dev - first_dev];
713 if (!per_dev->length) {
714
715
716
717
718 per_dev->dev = dev;
719 per_dev->offset = si->obj_offset - si->unit_off;
720 }
721
722 ret = _ore_add_parity_unit(ios, si, per_dev, cur_len);
723 if (unlikely(ret))
724 goto out;
725
726
727 si->par_dev = (devs_in_group + si->par_dev -
728 ios->layout->parity * mirrors_p1) %
729 devs_in_group + first_dev;
730
731 si->cur_comp = 0;
732 si->cur_pg = 0;
733 }
734 }
735out:
736 ios->numdevs = devs_in_group;
737 ios->pages_consumed = cur_pg;
738 return ret;
739}
740
741int ore_create(struct ore_io_state *ios)
742{
743 int i, ret;
744
745 for (i = 0; i < ios->oc->numdevs; i++) {
746 struct osd_request *or;
747
748 or = osd_start_request(_ios_od(ios, i), GFP_KERNEL);
749 if (unlikely(!or)) {
750 ORE_ERR("%s: osd_start_request failed\n", __func__);
751 ret = -ENOMEM;
752 goto out;
753 }
754 ios->per_dev[i].or = or;
755 ios->numdevs++;
756
757 osd_req_create_object(or, _ios_obj(ios, i));
758 }
759 ret = ore_io_execute(ios);
760
761out:
762 return ret;
763}
764EXPORT_SYMBOL(ore_create);
765
766int ore_remove(struct ore_io_state *ios)
767{
768 int i, ret;
769
770 for (i = 0; i < ios->oc->numdevs; i++) {
771 struct osd_request *or;
772
773 or = osd_start_request(_ios_od(ios, i), GFP_KERNEL);
774 if (unlikely(!or)) {
775 ORE_ERR("%s: osd_start_request failed\n", __func__);
776 ret = -ENOMEM;
777 goto out;
778 }
779 ios->per_dev[i].or = or;
780 ios->numdevs++;
781
782 osd_req_remove_object(or, _ios_obj(ios, i));
783 }
784 ret = ore_io_execute(ios);
785
786out:
787 return ret;
788}
789EXPORT_SYMBOL(ore_remove);
790
791static int _write_mirror(struct ore_io_state *ios, int cur_comp)
792{
793 struct ore_per_dev_state *master_dev = &ios->per_dev[cur_comp];
794 unsigned dev = ios->per_dev[cur_comp].dev;
795 unsigned last_comp = cur_comp + ios->layout->mirrors_p1;
796 int ret = 0;
797
798 if (ios->pages && !master_dev->length)
799 return 0;
800
801 for (; cur_comp < last_comp; ++cur_comp, ++dev) {
802 struct ore_per_dev_state *per_dev = &ios->per_dev[cur_comp];
803 struct osd_request *or;
804
805 or = osd_start_request(_ios_od(ios, dev), GFP_KERNEL);
806 if (unlikely(!or)) {
807 ORE_ERR("%s: osd_start_request failed\n", __func__);
808 ret = -ENOMEM;
809 goto out;
810 }
811 per_dev->or = or;
812
813 if (ios->pages) {
814 struct bio *bio;
815
816 if (per_dev != master_dev) {
817 bio = bio_clone_kmalloc(master_dev->bio,
818 GFP_KERNEL);
819 if (unlikely(!bio)) {
820 ORE_DBGMSG(
821 "Failed to allocate BIO size=%u\n",
822 master_dev->bio->bi_max_vecs);
823 ret = -ENOMEM;
824 goto out;
825 }
826
827 bio->bi_bdev = NULL;
828 bio->bi_next = NULL;
829 per_dev->offset = master_dev->offset;
830 per_dev->length = master_dev->length;
831 per_dev->bio = bio;
832 per_dev->dev = dev;
833 } else {
834 bio = master_dev->bio;
835
836 bio->bi_rw |= REQ_WRITE;
837 }
838
839 osd_req_write(or, _ios_obj(ios, cur_comp),
840 per_dev->offset, bio, per_dev->length);
841 ORE_DBGMSG("write(0x%llx) offset=0x%llx "
842 "length=0x%llx dev=%d\n",
843 _LLU(_ios_obj(ios, cur_comp)->id),
844 _LLU(per_dev->offset),
845 _LLU(per_dev->length), dev);
846 } else if (ios->kern_buff) {
847 per_dev->offset = ios->si.obj_offset;
848 per_dev->dev = ios->si.dev + dev;
849
850
851 BUG_ON((ios->layout->group_width > 1) &&
852 (ios->si.unit_off + ios->length >
853 ios->layout->stripe_unit));
854
855 ret = osd_req_write_kern(or, _ios_obj(ios, cur_comp),
856 per_dev->offset,
857 ios->kern_buff, ios->length);
858 if (unlikely(ret))
859 goto out;
860 ORE_DBGMSG2("write_kern(0x%llx) offset=0x%llx "
861 "length=0x%llx dev=%d\n",
862 _LLU(_ios_obj(ios, cur_comp)->id),
863 _LLU(per_dev->offset),
864 _LLU(ios->length), per_dev->dev);
865 } else {
866 osd_req_set_attributes(or, _ios_obj(ios, cur_comp));
867 ORE_DBGMSG2("obj(0x%llx) set_attributes=%d dev=%d\n",
868 _LLU(_ios_obj(ios, cur_comp)->id),
869 ios->out_attr_len, dev);
870 }
871
872 if (ios->out_attr)
873 osd_req_add_set_attr_list(or, ios->out_attr,
874 ios->out_attr_len);
875
876 if (ios->in_attr)
877 osd_req_add_get_attr_list(or, ios->in_attr,
878 ios->in_attr_len);
879 }
880
881out:
882 return ret;
883}
884
885int ore_write(struct ore_io_state *ios)
886{
887 int i;
888 int ret;
889
890 if (unlikely(ios->sp2d && !ios->r4w)) {
891
892
893
894 WARN_ON_ONCE(1);
895 return -ENOTSUPP;
896 }
897
898 ret = _prepare_for_striping(ios);
899 if (unlikely(ret))
900 return ret;
901
902 for (i = 0; i < ios->numdevs; i += ios->layout->mirrors_p1) {
903 ret = _write_mirror(ios, i);
904 if (unlikely(ret))
905 return ret;
906 }
907
908 ret = ore_io_execute(ios);
909 return ret;
910}
911EXPORT_SYMBOL(ore_write);
912
913int _ore_read_mirror(struct ore_io_state *ios, unsigned cur_comp)
914{
915 struct osd_request *or;
916 struct ore_per_dev_state *per_dev = &ios->per_dev[cur_comp];
917 struct osd_obj_id *obj = _ios_obj(ios, cur_comp);
918 unsigned first_dev = (unsigned)obj->id;
919
920 if (ios->pages && !per_dev->length)
921 return 0;
922
923 first_dev = per_dev->dev + first_dev % ios->layout->mirrors_p1;
924 or = osd_start_request(_ios_od(ios, first_dev), GFP_KERNEL);
925 if (unlikely(!or)) {
926 ORE_ERR("%s: osd_start_request failed\n", __func__);
927 return -ENOMEM;
928 }
929 per_dev->or = or;
930
931 if (ios->pages) {
932 if (per_dev->cur_sg) {
933
934 _ore_add_sg_seg(per_dev, 0, false);
935 if (unlikely(!per_dev->cur_sg))
936 return 0;
937
938 osd_req_read_sg(or, obj, per_dev->bio,
939 per_dev->sglist, per_dev->cur_sg);
940 } else {
941
942 osd_req_read(or, obj, per_dev->offset,
943 per_dev->bio, per_dev->length);
944 }
945
946 ORE_DBGMSG("read(0x%llx) offset=0x%llx length=0x%llx"
947 " dev=%d sg_len=%d\n", _LLU(obj->id),
948 _LLU(per_dev->offset), _LLU(per_dev->length),
949 first_dev, per_dev->cur_sg);
950 } else {
951 BUG_ON(ios->kern_buff);
952
953 osd_req_get_attributes(or, obj);
954 ORE_DBGMSG2("obj(0x%llx) get_attributes=%d dev=%d\n",
955 _LLU(obj->id),
956 ios->in_attr_len, first_dev);
957 }
958 if (ios->out_attr)
959 osd_req_add_set_attr_list(or, ios->out_attr, ios->out_attr_len);
960
961 if (ios->in_attr)
962 osd_req_add_get_attr_list(or, ios->in_attr, ios->in_attr_len);
963
964 return 0;
965}
966
967int ore_read(struct ore_io_state *ios)
968{
969 int i;
970 int ret;
971
972 ret = _prepare_for_striping(ios);
973 if (unlikely(ret))
974 return ret;
975
976 for (i = 0; i < ios->numdevs; i += ios->layout->mirrors_p1) {
977 ret = _ore_read_mirror(ios, i);
978 if (unlikely(ret))
979 return ret;
980 }
981
982 ret = ore_io_execute(ios);
983 return ret;
984}
985EXPORT_SYMBOL(ore_read);
986
987int extract_attr_from_ios(struct ore_io_state *ios, struct osd_attr *attr)
988{
989 struct osd_attr cur_attr = {.attr_page = 0};
990 void *iter = NULL;
991 int nelem;
992
993 do {
994 nelem = 1;
995 osd_req_decode_get_attr_list(ios->per_dev[0].or,
996 &cur_attr, &nelem, &iter);
997 if ((cur_attr.attr_page == attr->attr_page) &&
998 (cur_attr.attr_id == attr->attr_id)) {
999 attr->len = cur_attr.len;
1000 attr->val_ptr = cur_attr.val_ptr;
1001 return 0;
1002 }
1003 } while (iter);
1004
1005 return -EIO;
1006}
1007EXPORT_SYMBOL(extract_attr_from_ios);
1008
1009static int _truncate_mirrors(struct ore_io_state *ios, unsigned cur_comp,
1010 struct osd_attr *attr)
1011{
1012 int last_comp = cur_comp + ios->layout->mirrors_p1;
1013
1014 for (; cur_comp < last_comp; ++cur_comp) {
1015 struct ore_per_dev_state *per_dev = &ios->per_dev[cur_comp];
1016 struct osd_request *or;
1017
1018 or = osd_start_request(_ios_od(ios, cur_comp), GFP_KERNEL);
1019 if (unlikely(!or)) {
1020 ORE_ERR("%s: osd_start_request failed\n", __func__);
1021 return -ENOMEM;
1022 }
1023 per_dev->or = or;
1024
1025 osd_req_set_attributes(or, _ios_obj(ios, cur_comp));
1026 osd_req_add_set_attr_list(or, attr, 1);
1027 }
1028
1029 return 0;
1030}
1031
1032struct _trunc_info {
1033 struct ore_striping_info si;
1034 u64 prev_group_obj_off;
1035 u64 next_group_obj_off;
1036
1037 unsigned first_group_dev;
1038 unsigned nex_group_dev;
1039};
1040
1041static void _calc_trunk_info(struct ore_layout *layout, u64 file_offset,
1042 struct _trunc_info *ti)
1043{
1044 unsigned stripe_unit = layout->stripe_unit;
1045
1046 ore_calc_stripe_info(layout, file_offset, 0, &ti->si);
1047
1048 ti->prev_group_obj_off = ti->si.M * stripe_unit;
1049 ti->next_group_obj_off = ti->si.M ? (ti->si.M - 1) * stripe_unit : 0;
1050
1051 ti->first_group_dev = ti->si.dev - (ti->si.dev % layout->group_width);
1052 ti->nex_group_dev = ti->first_group_dev + layout->group_width;
1053}
1054
1055int ore_truncate(struct ore_layout *layout, struct ore_components *oc,
1056 u64 size)
1057{
1058 struct ore_io_state *ios;
1059 struct exofs_trunc_attr {
1060 struct osd_attr attr;
1061 __be64 newsize;
1062 } *size_attrs;
1063 struct _trunc_info ti;
1064 int i, ret;
1065
1066 ret = ore_get_io_state(layout, oc, &ios);
1067 if (unlikely(ret))
1068 return ret;
1069
1070 _calc_trunk_info(ios->layout, size, &ti);
1071
1072 size_attrs = kcalloc(ios->oc->numdevs, sizeof(*size_attrs),
1073 GFP_KERNEL);
1074 if (unlikely(!size_attrs)) {
1075 ret = -ENOMEM;
1076 goto out;
1077 }
1078
1079 ios->numdevs = ios->oc->numdevs;
1080
1081 for (i = 0; i < ios->numdevs; ++i) {
1082 struct exofs_trunc_attr *size_attr = &size_attrs[i];
1083 u64 obj_size;
1084
1085 if (i < ti.first_group_dev)
1086 obj_size = ti.prev_group_obj_off;
1087 else if (i >= ti.nex_group_dev)
1088 obj_size = ti.next_group_obj_off;
1089 else if (i < ti.si.dev)
1090 obj_size = ti.si.obj_offset +
1091 ios->layout->stripe_unit - ti.si.unit_off;
1092 else if (i == ti.si.dev)
1093 obj_size = ti.si.obj_offset;
1094 else
1095 obj_size = ti.si.obj_offset - ti.si.unit_off;
1096
1097 size_attr->newsize = cpu_to_be64(obj_size);
1098 size_attr->attr = g_attr_logical_length;
1099 size_attr->attr.val_ptr = &size_attr->newsize;
1100
1101 ORE_DBGMSG("trunc(0x%llx) obj_offset=0x%llx dev=%d\n",
1102 _LLU(oc->comps->obj.id), _LLU(obj_size), i);
1103 ret = _truncate_mirrors(ios, i * ios->layout->mirrors_p1,
1104 &size_attr->attr);
1105 if (unlikely(ret))
1106 goto out;
1107 }
1108 ret = ore_io_execute(ios);
1109
1110out:
1111 kfree(size_attrs);
1112 ore_put_io_state(ios);
1113 return ret;
1114}
1115EXPORT_SYMBOL(ore_truncate);
1116
1117const struct osd_attr g_attr_logical_length = ATTR_DEF(
1118 OSD_APAGE_OBJECT_INFORMATION, OSD_ATTR_OI_LOGICAL_LENGTH, 8);
1119EXPORT_SYMBOL(g_attr_logical_length);
1120