linux/fs/orangefs/orangefs-utils.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * (C) 2001 Clemson University and The University of Chicago
   4 *
   5 * See COPYING in top-level directory.
   6 */
   7#include <linux/kernel.h>
   8#include "protocol.h"
   9#include "orangefs-kernel.h"
  10#include "orangefs-dev-proto.h"
  11#include "orangefs-bufmap.h"
  12
  13__s32 fsid_of_op(struct orangefs_kernel_op_s *op)
  14{
  15        __s32 fsid = ORANGEFS_FS_ID_NULL;
  16
  17        if (op) {
  18                switch (op->upcall.type) {
  19                case ORANGEFS_VFS_OP_FILE_IO:
  20                        fsid = op->upcall.req.io.refn.fs_id;
  21                        break;
  22                case ORANGEFS_VFS_OP_LOOKUP:
  23                        fsid = op->upcall.req.lookup.parent_refn.fs_id;
  24                        break;
  25                case ORANGEFS_VFS_OP_CREATE:
  26                        fsid = op->upcall.req.create.parent_refn.fs_id;
  27                        break;
  28                case ORANGEFS_VFS_OP_GETATTR:
  29                        fsid = op->upcall.req.getattr.refn.fs_id;
  30                        break;
  31                case ORANGEFS_VFS_OP_REMOVE:
  32                        fsid = op->upcall.req.remove.parent_refn.fs_id;
  33                        break;
  34                case ORANGEFS_VFS_OP_MKDIR:
  35                        fsid = op->upcall.req.mkdir.parent_refn.fs_id;
  36                        break;
  37                case ORANGEFS_VFS_OP_READDIR:
  38                        fsid = op->upcall.req.readdir.refn.fs_id;
  39                        break;
  40                case ORANGEFS_VFS_OP_SETATTR:
  41                        fsid = op->upcall.req.setattr.refn.fs_id;
  42                        break;
  43                case ORANGEFS_VFS_OP_SYMLINK:
  44                        fsid = op->upcall.req.sym.parent_refn.fs_id;
  45                        break;
  46                case ORANGEFS_VFS_OP_RENAME:
  47                        fsid = op->upcall.req.rename.old_parent_refn.fs_id;
  48                        break;
  49                case ORANGEFS_VFS_OP_STATFS:
  50                        fsid = op->upcall.req.statfs.fs_id;
  51                        break;
  52                case ORANGEFS_VFS_OP_TRUNCATE:
  53                        fsid = op->upcall.req.truncate.refn.fs_id;
  54                        break;
  55                case ORANGEFS_VFS_OP_RA_FLUSH:
  56                        fsid = op->upcall.req.ra_cache_flush.refn.fs_id;
  57                        break;
  58                case ORANGEFS_VFS_OP_FS_UMOUNT:
  59                        fsid = op->upcall.req.fs_umount.fs_id;
  60                        break;
  61                case ORANGEFS_VFS_OP_GETXATTR:
  62                        fsid = op->upcall.req.getxattr.refn.fs_id;
  63                        break;
  64                case ORANGEFS_VFS_OP_SETXATTR:
  65                        fsid = op->upcall.req.setxattr.refn.fs_id;
  66                        break;
  67                case ORANGEFS_VFS_OP_LISTXATTR:
  68                        fsid = op->upcall.req.listxattr.refn.fs_id;
  69                        break;
  70                case ORANGEFS_VFS_OP_REMOVEXATTR:
  71                        fsid = op->upcall.req.removexattr.refn.fs_id;
  72                        break;
  73                case ORANGEFS_VFS_OP_FSYNC:
  74                        fsid = op->upcall.req.fsync.refn.fs_id;
  75                        break;
  76                default:
  77                        break;
  78                }
  79        }
  80        return fsid;
  81}
  82
  83static int orangefs_inode_flags(struct ORANGEFS_sys_attr_s *attrs)
  84{
  85        int flags = 0;
  86        if (attrs->flags & ORANGEFS_IMMUTABLE_FL)
  87                flags |= S_IMMUTABLE;
  88        else
  89                flags &= ~S_IMMUTABLE;
  90        if (attrs->flags & ORANGEFS_APPEND_FL)
  91                flags |= S_APPEND;
  92        else
  93                flags &= ~S_APPEND;
  94        if (attrs->flags & ORANGEFS_NOATIME_FL)
  95                flags |= S_NOATIME;
  96        else
  97                flags &= ~S_NOATIME;
  98        return flags;
  99}
 100
 101static int orangefs_inode_perms(struct ORANGEFS_sys_attr_s *attrs)
 102{
 103        int perm_mode = 0;
 104
 105        if (attrs->perms & ORANGEFS_O_EXECUTE)
 106                perm_mode |= S_IXOTH;
 107        if (attrs->perms & ORANGEFS_O_WRITE)
 108                perm_mode |= S_IWOTH;
 109        if (attrs->perms & ORANGEFS_O_READ)
 110                perm_mode |= S_IROTH;
 111
 112        if (attrs->perms & ORANGEFS_G_EXECUTE)
 113                perm_mode |= S_IXGRP;
 114        if (attrs->perms & ORANGEFS_G_WRITE)
 115                perm_mode |= S_IWGRP;
 116        if (attrs->perms & ORANGEFS_G_READ)
 117                perm_mode |= S_IRGRP;
 118
 119        if (attrs->perms & ORANGEFS_U_EXECUTE)
 120                perm_mode |= S_IXUSR;
 121        if (attrs->perms & ORANGEFS_U_WRITE)
 122                perm_mode |= S_IWUSR;
 123        if (attrs->perms & ORANGEFS_U_READ)
 124                perm_mode |= S_IRUSR;
 125
 126        if (attrs->perms & ORANGEFS_G_SGID)
 127                perm_mode |= S_ISGID;
 128        if (attrs->perms & ORANGEFS_U_SUID)
 129                perm_mode |= S_ISUID;
 130
 131        return perm_mode;
 132}
 133
 134/*
 135 * NOTE: in kernel land, we never use the sys_attr->link_target for
 136 * anything, so don't bother copying it into the sys_attr object here.
 137 */
 138static inline int copy_attributes_from_inode(struct inode *inode,
 139                                             struct ORANGEFS_sys_attr_s *attrs,
 140                                             struct iattr *iattr)
 141{
 142        umode_t tmp_mode;
 143
 144        if (!iattr || !inode || !attrs) {
 145                gossip_err("NULL iattr (%p), inode (%p), attrs (%p) "
 146                           "in copy_attributes_from_inode!\n",
 147                           iattr,
 148                           inode,
 149                           attrs);
 150                return -EINVAL;
 151        }
 152        /*
 153         * We need to be careful to only copy the attributes out of the
 154         * iattr object that we know are valid.
 155         */
 156        attrs->mask = 0;
 157        if (iattr->ia_valid & ATTR_UID) {
 158                attrs->owner = from_kuid(&init_user_ns, iattr->ia_uid);
 159                attrs->mask |= ORANGEFS_ATTR_SYS_UID;
 160                gossip_debug(GOSSIP_UTILS_DEBUG, "(UID) %d\n", attrs->owner);
 161        }
 162        if (iattr->ia_valid & ATTR_GID) {
 163                attrs->group = from_kgid(&init_user_ns, iattr->ia_gid);
 164                attrs->mask |= ORANGEFS_ATTR_SYS_GID;
 165                gossip_debug(GOSSIP_UTILS_DEBUG, "(GID) %d\n", attrs->group);
 166        }
 167
 168        if (iattr->ia_valid & ATTR_ATIME) {
 169                attrs->mask |= ORANGEFS_ATTR_SYS_ATIME;
 170                if (iattr->ia_valid & ATTR_ATIME_SET) {
 171                        attrs->atime = (time64_t)iattr->ia_atime.tv_sec;
 172                        attrs->mask |= ORANGEFS_ATTR_SYS_ATIME_SET;
 173                }
 174        }
 175        if (iattr->ia_valid & ATTR_MTIME) {
 176                attrs->mask |= ORANGEFS_ATTR_SYS_MTIME;
 177                if (iattr->ia_valid & ATTR_MTIME_SET) {
 178                        attrs->mtime = (time64_t)iattr->ia_mtime.tv_sec;
 179                        attrs->mask |= ORANGEFS_ATTR_SYS_MTIME_SET;
 180                }
 181        }
 182        if (iattr->ia_valid & ATTR_CTIME)
 183                attrs->mask |= ORANGEFS_ATTR_SYS_CTIME;
 184
 185        /*
 186         * ORANGEFS cannot set size with a setattr operation. Probably not
 187         * likely to be requested through the VFS, but just in case, don't
 188         * worry about ATTR_SIZE
 189         */
 190
 191        if (iattr->ia_valid & ATTR_MODE) {
 192                tmp_mode = iattr->ia_mode;
 193                if (tmp_mode & (S_ISVTX)) {
 194                        if (is_root_handle(inode)) {
 195                                /*
 196                                 * allow sticky bit to be set on root (since
 197                                 * it shows up that way by default anyhow),
 198                                 * but don't show it to the server
 199                                 */
 200                                tmp_mode -= S_ISVTX;
 201                        } else {
 202                                gossip_debug(GOSSIP_UTILS_DEBUG,
 203                                        "%s: setting sticky bit not supported.\n",
 204                                        __func__);
 205                                return -EINVAL;
 206                        }
 207                }
 208
 209                if (tmp_mode & (S_ISUID)) {
 210                        gossip_debug(GOSSIP_UTILS_DEBUG,
 211                                "%s: setting setuid bit not supported.\n",
 212                                __func__);
 213                        return -EINVAL;
 214                }
 215
 216                attrs->perms = ORANGEFS_util_translate_mode(tmp_mode);
 217                attrs->mask |= ORANGEFS_ATTR_SYS_PERM;
 218        }
 219
 220        return 0;
 221}
 222
 223static int orangefs_inode_type(enum orangefs_ds_type objtype)
 224{
 225        if (objtype == ORANGEFS_TYPE_METAFILE)
 226                return S_IFREG;
 227        else if (objtype == ORANGEFS_TYPE_DIRECTORY)
 228                return S_IFDIR;
 229        else if (objtype == ORANGEFS_TYPE_SYMLINK)
 230                return S_IFLNK;
 231        else
 232                return -1;
 233}
 234
 235static void orangefs_make_bad_inode(struct inode *inode)
 236{
 237        if (is_root_handle(inode)) {
 238                /*
 239                 * if this occurs, the pvfs2-client-core was killed but we
 240                 * can't afford to lose the inode operations and such
 241                 * associated with the root handle in any case.
 242                 */
 243                gossip_debug(GOSSIP_UTILS_DEBUG,
 244                             "*** NOT making bad root inode %pU\n",
 245                             get_khandle_from_ino(inode));
 246        } else {
 247                gossip_debug(GOSSIP_UTILS_DEBUG,
 248                             "*** making bad inode %pU\n",
 249                             get_khandle_from_ino(inode));
 250                make_bad_inode(inode);
 251        }
 252}
 253
 254static int orangefs_inode_is_stale(struct inode *inode,
 255    struct ORANGEFS_sys_attr_s *attrs, char *link_target)
 256{
 257        struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
 258        int type = orangefs_inode_type(attrs->objtype);
 259        /*
 260         * If the inode type or symlink target have changed then this
 261         * inode is stale.
 262         */
 263        if (type == -1 || !(inode->i_mode & type)) {
 264                orangefs_make_bad_inode(inode);
 265                return 1;
 266        }
 267        if (type == S_IFLNK && strncmp(orangefs_inode->link_target,
 268            link_target, ORANGEFS_NAME_MAX)) {
 269                orangefs_make_bad_inode(inode);
 270                return 1;
 271        }
 272        return 0;
 273}
 274
 275int orangefs_inode_getattr(struct inode *inode, int new, int bypass,
 276    u32 request_mask)
 277{
 278        struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
 279        struct orangefs_kernel_op_s *new_op;
 280        loff_t inode_size;
 281        int ret, type;
 282
 283        gossip_debug(GOSSIP_UTILS_DEBUG, "%s: called on inode %pU\n", __func__,
 284            get_khandle_from_ino(inode));
 285
 286        if (!new && !bypass) {
 287                /*
 288                 * Must have all the attributes in the mask and be within cache
 289                 * time.
 290                 */
 291                if ((request_mask & orangefs_inode->getattr_mask) ==
 292                    request_mask &&
 293                    time_before(jiffies, orangefs_inode->getattr_time))
 294                        return 0;
 295        }
 296
 297        new_op = op_alloc(ORANGEFS_VFS_OP_GETATTR);
 298        if (!new_op)
 299                return -ENOMEM;
 300        new_op->upcall.req.getattr.refn = orangefs_inode->refn;
 301        /*
 302         * Size is the hardest attribute to get.  The incremental cost of any
 303         * other attribute is essentially zero.
 304         */
 305        if (request_mask & STATX_SIZE || new)
 306                new_op->upcall.req.getattr.mask = ORANGEFS_ATTR_SYS_ALL_NOHINT;
 307        else
 308                new_op->upcall.req.getattr.mask =
 309                    ORANGEFS_ATTR_SYS_ALL_NOHINT & ~ORANGEFS_ATTR_SYS_SIZE;
 310
 311        ret = service_operation(new_op, __func__,
 312            get_interruptible_flag(inode));
 313        if (ret != 0)
 314                goto out;
 315
 316        if (!new) {
 317                ret = orangefs_inode_is_stale(inode,
 318                    &new_op->downcall.resp.getattr.attributes,
 319                    new_op->downcall.resp.getattr.link_target);
 320                if (ret) {
 321                        ret = -ESTALE;
 322                        goto out;
 323                }
 324        }
 325
 326        type = orangefs_inode_type(new_op->
 327            downcall.resp.getattr.attributes.objtype);
 328        switch (type) {
 329        case S_IFREG:
 330                inode->i_flags = orangefs_inode_flags(&new_op->
 331                    downcall.resp.getattr.attributes);
 332                if (request_mask & STATX_SIZE || new) {
 333                        inode_size = (loff_t)new_op->
 334                            downcall.resp.getattr.attributes.size;
 335                        inode->i_size = inode_size;
 336                        inode->i_blkbits = ffs(new_op->downcall.resp.getattr.
 337                            attributes.blksize);
 338                        spin_lock(&inode->i_lock);
 339                        inode->i_bytes = inode_size;
 340                        inode->i_blocks =
 341                            (inode_size + 512 - inode_size % 512)/512;
 342                        spin_unlock(&inode->i_lock);
 343                }
 344                break;
 345        case S_IFDIR:
 346                if (request_mask & STATX_SIZE || new) {
 347                        inode->i_size = PAGE_SIZE;
 348                        spin_lock(&inode->i_lock);
 349                        inode_set_bytes(inode, inode->i_size);
 350                        spin_unlock(&inode->i_lock);
 351                }
 352                set_nlink(inode, 1);
 353                break;
 354        case S_IFLNK:
 355                if (new) {
 356                        inode->i_size = (loff_t)strlen(new_op->
 357                            downcall.resp.getattr.link_target);
 358                        ret = strscpy(orangefs_inode->link_target,
 359                            new_op->downcall.resp.getattr.link_target,
 360                            ORANGEFS_NAME_MAX);
 361                        if (ret == -E2BIG) {
 362                                ret = -EIO;
 363                                goto out;
 364                        }
 365                        inode->i_link = orangefs_inode->link_target;
 366                }
 367                break;
 368        /* i.e. -1 */
 369        default:
 370                /* XXX: ESTALE?  This is what is done if it is not new. */
 371                orangefs_make_bad_inode(inode);
 372                ret = -ESTALE;
 373                goto out;
 374        }
 375
 376        inode->i_uid = make_kuid(&init_user_ns, new_op->
 377            downcall.resp.getattr.attributes.owner);
 378        inode->i_gid = make_kgid(&init_user_ns, new_op->
 379            downcall.resp.getattr.attributes.group);
 380        inode->i_atime.tv_sec = (time64_t)new_op->
 381            downcall.resp.getattr.attributes.atime;
 382        inode->i_mtime.tv_sec = (time64_t)new_op->
 383            downcall.resp.getattr.attributes.mtime;
 384        inode->i_ctime.tv_sec = (time64_t)new_op->
 385            downcall.resp.getattr.attributes.ctime;
 386        inode->i_atime.tv_nsec = 0;
 387        inode->i_mtime.tv_nsec = 0;
 388        inode->i_ctime.tv_nsec = 0;
 389
 390        /* special case: mark the root inode as sticky */
 391        inode->i_mode = type | (is_root_handle(inode) ? S_ISVTX : 0) |
 392            orangefs_inode_perms(&new_op->downcall.resp.getattr.attributes);
 393
 394        orangefs_inode->getattr_time = jiffies +
 395            orangefs_getattr_timeout_msecs*HZ/1000;
 396        if (request_mask & STATX_SIZE || new)
 397                orangefs_inode->getattr_mask = STATX_BASIC_STATS;
 398        else
 399                orangefs_inode->getattr_mask = STATX_BASIC_STATS & ~STATX_SIZE;
 400        ret = 0;
 401out:
 402        op_release(new_op);
 403        return ret;
 404}
 405
 406int orangefs_inode_check_changed(struct inode *inode)
 407{
 408        struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
 409        struct orangefs_kernel_op_s *new_op;
 410        int ret;
 411
 412        gossip_debug(GOSSIP_UTILS_DEBUG, "%s: called on inode %pU\n", __func__,
 413            get_khandle_from_ino(inode));
 414
 415        new_op = op_alloc(ORANGEFS_VFS_OP_GETATTR);
 416        if (!new_op)
 417                return -ENOMEM;
 418        new_op->upcall.req.getattr.refn = orangefs_inode->refn;
 419        new_op->upcall.req.getattr.mask = ORANGEFS_ATTR_SYS_TYPE |
 420            ORANGEFS_ATTR_SYS_LNK_TARGET;
 421
 422        ret = service_operation(new_op, __func__,
 423            get_interruptible_flag(inode));
 424        if (ret != 0)
 425                goto out;
 426
 427        ret = orangefs_inode_is_stale(inode,
 428            &new_op->downcall.resp.getattr.attributes,
 429            new_op->downcall.resp.getattr.link_target);
 430out:
 431        op_release(new_op);
 432        return ret;
 433}
 434
 435/*
 436 * issues a orangefs setattr request to make sure the new attribute values
 437 * take effect if successful.  returns 0 on success; -errno otherwise
 438 */
 439int orangefs_inode_setattr(struct inode *inode, struct iattr *iattr)
 440{
 441        struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
 442        struct orangefs_kernel_op_s *new_op;
 443        int ret;
 444
 445        new_op = op_alloc(ORANGEFS_VFS_OP_SETATTR);
 446        if (!new_op)
 447                return -ENOMEM;
 448
 449        new_op->upcall.req.setattr.refn = orangefs_inode->refn;
 450        ret = copy_attributes_from_inode(inode,
 451                       &new_op->upcall.req.setattr.attributes,
 452                       iattr);
 453        if (ret >= 0) {
 454                ret = service_operation(new_op, __func__,
 455                                get_interruptible_flag(inode));
 456
 457                gossip_debug(GOSSIP_UTILS_DEBUG,
 458                             "orangefs_inode_setattr: returning %d\n",
 459                             ret);
 460        }
 461
 462        op_release(new_op);
 463
 464        if (ret == 0)
 465                orangefs_inode->getattr_time = jiffies - 1;
 466
 467        return ret;
 468}
 469
 470/*
 471 * The following is a very dirty hack that is now a permanent part of the
 472 * ORANGEFS protocol. See protocol.h for more error definitions.
 473 */
 474
 475/* The order matches include/orangefs-types.h in the OrangeFS source. */
 476static int PINT_errno_mapping[] = {
 477        0, EPERM, ENOENT, EINTR, EIO, ENXIO, EBADF, EAGAIN, ENOMEM,
 478        EFAULT, EBUSY, EEXIST, ENODEV, ENOTDIR, EISDIR, EINVAL, EMFILE,
 479        EFBIG, ENOSPC, EROFS, EMLINK, EPIPE, EDEADLK, ENAMETOOLONG,
 480        ENOLCK, ENOSYS, ENOTEMPTY, ELOOP, EWOULDBLOCK, ENOMSG, EUNATCH,
 481        EBADR, EDEADLOCK, ENODATA, ETIME, ENONET, EREMOTE, ECOMM,
 482        EPROTO, EBADMSG, EOVERFLOW, ERESTART, EMSGSIZE, EPROTOTYPE,
 483        ENOPROTOOPT, EPROTONOSUPPORT, EOPNOTSUPP, EADDRINUSE,
 484        EADDRNOTAVAIL, ENETDOWN, ENETUNREACH, ENETRESET, ENOBUFS,
 485        ETIMEDOUT, ECONNREFUSED, EHOSTDOWN, EHOSTUNREACH, EALREADY,
 486        EACCES, ECONNRESET, ERANGE
 487};
 488
 489int orangefs_normalize_to_errno(__s32 error_code)
 490{
 491        __u32 i;
 492
 493        /* Success */
 494        if (error_code == 0) {
 495                return 0;
 496        /*
 497         * This shouldn't ever happen. If it does it should be fixed on the
 498         * server.
 499         */
 500        } else if (error_code > 0) {
 501                gossip_err("orangefs: error status received.\n");
 502                gossip_err("orangefs: assuming error code is inverted.\n");
 503                error_code = -error_code;
 504        }
 505
 506        /*
 507         * XXX: This is very bad since error codes from ORANGEFS may not be
 508         * suitable for return into userspace.
 509         */
 510
 511        /*
 512         * Convert ORANGEFS error values into errno values suitable for return
 513         * from the kernel.
 514         */
 515        if ((-error_code) & ORANGEFS_NON_ERRNO_ERROR_BIT) {
 516                if (((-error_code) &
 517                    (ORANGEFS_ERROR_NUMBER_BITS|ORANGEFS_NON_ERRNO_ERROR_BIT|
 518                    ORANGEFS_ERROR_BIT)) == ORANGEFS_ECANCEL) {
 519                        /*
 520                         * cancellation error codes generally correspond to
 521                         * a timeout from the client's perspective
 522                         */
 523                        error_code = -ETIMEDOUT;
 524                } else {
 525                        /* assume a default error code */
 526                        gossip_err("%s: bad error code :%d:.\n",
 527                                __func__,
 528                                error_code);
 529                        error_code = -EINVAL;
 530                }
 531
 532        /* Convert ORANGEFS encoded errno values into regular errno values. */
 533        } else if ((-error_code) & ORANGEFS_ERROR_BIT) {
 534                i = (-error_code) & ~(ORANGEFS_ERROR_BIT|ORANGEFS_ERROR_CLASS_BITS);
 535                if (i < ARRAY_SIZE(PINT_errno_mapping))
 536                        error_code = -PINT_errno_mapping[i];
 537                else
 538                        error_code = -EINVAL;
 539
 540        /*
 541         * Only ORANGEFS protocol error codes should ever come here. Otherwise
 542         * there is a bug somewhere.
 543         */
 544        } else {
 545                gossip_err("%s: unknown error code.\n", __func__);
 546                error_code = -EINVAL;
 547        }
 548        return error_code;
 549}
 550
 551#define NUM_MODES 11
 552__s32 ORANGEFS_util_translate_mode(int mode)
 553{
 554        int ret = 0;
 555        int i = 0;
 556        static int modes[NUM_MODES] = {
 557                S_IXOTH, S_IWOTH, S_IROTH,
 558                S_IXGRP, S_IWGRP, S_IRGRP,
 559                S_IXUSR, S_IWUSR, S_IRUSR,
 560                S_ISGID, S_ISUID
 561        };
 562        static int orangefs_modes[NUM_MODES] = {
 563                ORANGEFS_O_EXECUTE, ORANGEFS_O_WRITE, ORANGEFS_O_READ,
 564                ORANGEFS_G_EXECUTE, ORANGEFS_G_WRITE, ORANGEFS_G_READ,
 565                ORANGEFS_U_EXECUTE, ORANGEFS_U_WRITE, ORANGEFS_U_READ,
 566                ORANGEFS_G_SGID, ORANGEFS_U_SUID
 567        };
 568
 569        for (i = 0; i < NUM_MODES; i++)
 570                if (mode & modes[i])
 571                        ret |= orangefs_modes[i];
 572
 573        return ret;
 574}
 575#undef NUM_MODES
 576