linux/fs/orangefs/dir.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Copyright 2017 Omnibond Systems, L.L.C.
   4 */
   5
   6#include "protocol.h"
   7#include "orangefs-kernel.h"
   8#include "orangefs-bufmap.h"
   9
  10struct orangefs_dir_part {
  11        struct orangefs_dir_part *next;
  12        size_t len;
  13};
  14
  15struct orangefs_dir {
  16        __u64 token;
  17        struct orangefs_dir_part *part;
  18        loff_t end;
  19        int error;
  20};
  21
  22#define PART_SHIFT (24)
  23#define PART_SIZE (1<<24)
  24#define PART_MASK (~(PART_SIZE - 1))
  25
  26/*
  27 * There can be up to 512 directory entries.  Each entry is encoded as
  28 * follows:
  29 * 4 bytes: string size (n)
  30 * n bytes: string
  31 * 1 byte: trailing zero
  32 * padding to 8 bytes
  33 * 16 bytes: khandle
  34 * padding to 8 bytes
  35 *
  36 * The trailer_buf starts with a struct orangefs_readdir_response_s
  37 * which must be skipped to get to the directory data.
  38 *
  39 * The data which is received from the userspace daemon is termed a
  40 * part and is stored in a linked list in case more than one part is
  41 * needed for a large directory.
  42 *
  43 * The position pointer (ctx->pos) encodes the part and offset on which
  44 * to begin reading at.  Bits above PART_SHIFT encode the part and bits
  45 * below PART_SHIFT encode the offset.  Parts are stored in a linked
  46 * list which grows as data is received from the server.  The overhead
  47 * associated with managing the list is presumed to be small compared to
  48 * the overhead of communicating with the server.
  49 *
  50 * As data is received from the server, it is placed at the end of the
  51 * part list.  Data is parsed from the current position as it is needed.
  52 * When data is determined to be corrupt, it is either because the
  53 * userspace component has sent back corrupt data or because the file
  54 * pointer has been moved to an invalid location.  Since the two cannot
  55 * be differentiated, return EIO.
  56 *
  57 * Part zero is synthesized to contains `.' and `..'.  Part one is the
  58 * first part of the part list.
  59 */
  60
  61static int do_readdir(struct orangefs_inode_s *oi,
  62    struct orangefs_dir *od, struct dentry *dentry,
  63    struct orangefs_kernel_op_s *op)
  64{
  65        struct orangefs_readdir_response_s *resp;
  66        int bufi, r;
  67
  68        /*
  69         * Despite the badly named field, readdir does not use shared
  70         * memory.  However, there are a limited number of readdir
  71         * slots, which must be allocated here.  This flag simply tells
  72         * the op scheduler to return the op here for retry.
  73         */
  74        op->uses_shared_memory = 1;
  75        op->upcall.req.readdir.refn = oi->refn;
  76        op->upcall.req.readdir.token = od->token;
  77        op->upcall.req.readdir.max_dirent_count =
  78            ORANGEFS_MAX_DIRENT_COUNT_READDIR;
  79
  80again:
  81        bufi = orangefs_readdir_index_get();
  82        if (bufi < 0) {
  83                od->error = bufi;
  84                return bufi;
  85        }
  86
  87        op->upcall.req.readdir.buf_index = bufi;
  88
  89        r = service_operation(op, "orangefs_readdir",
  90            get_interruptible_flag(dentry->d_inode));
  91
  92        orangefs_readdir_index_put(bufi);
  93
  94        if (op_state_purged(op)) {
  95                if (r == -EAGAIN) {
  96                        vfree(op->downcall.trailer_buf);
  97                        goto again;
  98                } else if (r == -EIO) {
  99                        vfree(op->downcall.trailer_buf);
 100                        od->error = r;
 101                        return r;
 102                }
 103        }
 104
 105        if (r < 0) {
 106                vfree(op->downcall.trailer_buf);
 107                od->error = r;
 108                return r;
 109        } else if (op->downcall.status) {
 110                vfree(op->downcall.trailer_buf);
 111                od->error = op->downcall.status;
 112                return op->downcall.status;
 113        }
 114
 115        /*
 116         * The maximum size is size per entry times the 512 entries plus
 117         * the header.  This is well under the limit.
 118         */
 119        if (op->downcall.trailer_size > PART_SIZE) {
 120                vfree(op->downcall.trailer_buf);
 121                od->error = -EIO;
 122                return -EIO;
 123        }
 124
 125        resp = (struct orangefs_readdir_response_s *)
 126            op->downcall.trailer_buf;
 127        od->token = resp->token;
 128        return 0;
 129}
 130
 131static int parse_readdir(struct orangefs_dir *od,
 132    struct orangefs_kernel_op_s *op)
 133{
 134        struct orangefs_dir_part *part, *new;
 135        size_t count;
 136
 137        count = 1;
 138        part = od->part;
 139        while (part) {
 140                count++;
 141                if (part->next)
 142                        part = part->next;
 143                else
 144                        break;
 145        }
 146
 147        new = (void *)op->downcall.trailer_buf;
 148        new->next = NULL;
 149        new->len = op->downcall.trailer_size -
 150            sizeof(struct orangefs_readdir_response_s);
 151        if (!od->part)
 152                od->part = new;
 153        else
 154                part->next = new;
 155        count++;
 156        od->end = count << PART_SHIFT;
 157
 158        return 0;
 159}
 160
 161static int orangefs_dir_more(struct orangefs_inode_s *oi,
 162    struct orangefs_dir *od, struct dentry *dentry)
 163{
 164        struct orangefs_kernel_op_s *op;
 165        int r;
 166
 167        op = op_alloc(ORANGEFS_VFS_OP_READDIR);
 168        if (!op) {
 169                od->error = -ENOMEM;
 170                return -ENOMEM;
 171        }
 172        r = do_readdir(oi, od, dentry, op);
 173        if (r) {
 174                od->error = r;
 175                goto out;
 176        }
 177        r = parse_readdir(od, op);
 178        if (r) {
 179                od->error = r;
 180                goto out;
 181        }
 182
 183        od->error = 0;
 184out:
 185        op_release(op);
 186        return od->error;
 187}
 188
 189static int fill_from_part(struct orangefs_dir_part *part,
 190    struct dir_context *ctx)
 191{
 192        const int offset = sizeof(struct orangefs_readdir_response_s);
 193        struct orangefs_khandle *khandle;
 194        __u32 *len, padlen;
 195        loff_t i;
 196        char *s;
 197        i = ctx->pos & ~PART_MASK;
 198
 199        /* The file offset from userspace is too large. */
 200        if (i > part->len)
 201                return 1;
 202
 203        /*
 204         * If the seek pointer is positioned just before an entry it
 205         * should find the next entry.
 206         */
 207        if (i % 8)
 208                i = i + (8 - i%8)%8;
 209
 210        while (i < part->len) {
 211                if (part->len < i + sizeof *len)
 212                        break;
 213                len = (void *)part + offset + i;
 214                /*
 215                 * len is the size of the string itself.  padlen is the
 216                 * total size of the encoded string.
 217                 */
 218                padlen = (sizeof *len + *len + 1) +
 219                    (8 - (sizeof *len + *len + 1)%8)%8;
 220                if (part->len < i + padlen + sizeof *khandle)
 221                        goto next;
 222                s = (void *)part + offset + i + sizeof *len;
 223                if (s[*len] != 0)
 224                        goto next;
 225                khandle = (void *)part + offset + i + padlen;
 226                if (!dir_emit(ctx, s, *len,
 227                    orangefs_khandle_to_ino(khandle),
 228                    DT_UNKNOWN))
 229                        return 0;
 230                i += padlen + sizeof *khandle;
 231                i = i + (8 - i%8)%8;
 232                BUG_ON(i > part->len);
 233                ctx->pos = (ctx->pos & PART_MASK) | i;
 234                continue;
 235next:
 236                i += 8;
 237        }
 238        return 1;
 239}
 240
 241static int orangefs_dir_fill(struct orangefs_inode_s *oi,
 242    struct orangefs_dir *od, struct dentry *dentry,
 243    struct dir_context *ctx)
 244{
 245        struct orangefs_dir_part *part;
 246        size_t count;
 247
 248        count = ((ctx->pos & PART_MASK) >> PART_SHIFT) - 1;
 249
 250        part = od->part;
 251        while (part->next && count) {
 252                count--;
 253                part = part->next;
 254        }
 255        /* This means the userspace file offset is invalid. */
 256        if (count) {
 257                od->error = -EIO;
 258                return -EIO;
 259        }
 260
 261        while (part && part->len) {
 262                int r;
 263                r = fill_from_part(part, ctx);
 264                if (r < 0) {
 265                        od->error = r;
 266                        return r;
 267                } else if (r == 0) {
 268                        /* Userspace buffer is full. */
 269                        break;
 270                } else {
 271                        /*
 272                         * The part ran out of data.  Move to the next
 273                         * part. */
 274                        ctx->pos = (ctx->pos & PART_MASK) +
 275                            (1 << PART_SHIFT);
 276                        part = part->next;
 277                }
 278        }
 279        return 0;
 280}
 281
 282static loff_t orangefs_dir_llseek(struct file *file, loff_t offset,
 283    int whence)
 284{
 285        struct orangefs_dir *od = file->private_data;
 286        /*
 287         * Delete the stored data so userspace sees new directory
 288         * entries.
 289         */
 290        if (!whence && offset < od->end) {
 291                struct orangefs_dir_part *part = od->part;
 292                while (part) {
 293                        struct orangefs_dir_part *next = part->next;
 294                        vfree(part);
 295                        part = next;
 296                }
 297                od->token = ORANGEFS_ITERATE_START;
 298                od->part = NULL;
 299                od->end = 1 << PART_SHIFT;
 300        }
 301        return default_llseek(file, offset, whence);
 302}
 303
 304static int orangefs_dir_iterate(struct file *file,
 305    struct dir_context *ctx)
 306{
 307        struct orangefs_inode_s *oi;
 308        struct orangefs_dir *od;
 309        struct dentry *dentry;
 310        int r;
 311
 312        dentry = file->f_path.dentry;
 313        oi = ORANGEFS_I(dentry->d_inode);
 314        od = file->private_data;
 315
 316        if (od->error)
 317                return od->error;
 318
 319        if (ctx->pos == 0) {
 320                if (!dir_emit_dot(file, ctx))
 321                        return 0;
 322                ctx->pos++;
 323        }
 324        if (ctx->pos == 1) {
 325                if (!dir_emit_dotdot(file, ctx))
 326                        return 0;
 327                ctx->pos = 1 << PART_SHIFT;
 328        }
 329
 330        /*
 331         * The seek position is in the first synthesized part but is not
 332         * valid.
 333         */
 334        if ((ctx->pos & PART_MASK) == 0)
 335                return -EIO;
 336
 337        r = 0;
 338
 339        /*
 340         * Must read more if the user has sought past what has been read
 341         * so far.  Stop a user who has sought past the end.
 342         */
 343        while (od->token != ORANGEFS_ITERATE_END &&
 344            ctx->pos > od->end) {
 345                r = orangefs_dir_more(oi, od, dentry);
 346                if (r)
 347                        return r;
 348        }
 349        if (od->token == ORANGEFS_ITERATE_END && ctx->pos > od->end)
 350                return -EIO;
 351
 352        /* Then try to fill if there's any left in the buffer. */
 353        if (ctx->pos < od->end) {
 354                r = orangefs_dir_fill(oi, od, dentry, ctx);
 355                if (r)
 356                        return r;
 357        }
 358
 359        /* Finally get some more and try to fill. */
 360        if (od->token != ORANGEFS_ITERATE_END) {
 361                r = orangefs_dir_more(oi, od, dentry);
 362                if (r)
 363                        return r;
 364                r = orangefs_dir_fill(oi, od, dentry, ctx);
 365        }
 366
 367        return r;
 368}
 369
 370static int orangefs_dir_open(struct inode *inode, struct file *file)
 371{
 372        struct orangefs_dir *od;
 373        file->private_data = kmalloc(sizeof(struct orangefs_dir),
 374            GFP_KERNEL);
 375        if (!file->private_data)
 376                return -ENOMEM;
 377        od = file->private_data;
 378        od->token = ORANGEFS_ITERATE_START;
 379        od->part = NULL;
 380        od->end = 1 << PART_SHIFT;
 381        od->error = 0;
 382        return 0;
 383}
 384
 385static int orangefs_dir_release(struct inode *inode, struct file *file)
 386{
 387        struct orangefs_dir *od = file->private_data;
 388        struct orangefs_dir_part *part = od->part;
 389        while (part) {
 390                struct orangefs_dir_part *next = part->next;
 391                vfree(part);
 392                part = next;
 393        }
 394        kfree(od);
 395        return 0;
 396}
 397
 398const struct file_operations orangefs_dir_operations = {
 399        .llseek = orangefs_dir_llseek,
 400        .read = generic_read_dir,
 401        .iterate = orangefs_dir_iterate,
 402        .open = orangefs_dir_open,
 403        .release = orangefs_dir_release
 404};
 405