linux/drivers/staging/lustre/lnet/lnet/lib-socket.c
<<
>>
Prefs
   1/*
   2 * GPL HEADER START
   3 *
   4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5 *
   6 * This program is free software; you can redistribute it and/or modify
   7 * it under the terms of the GNU General Public License version 2 only,
   8 * as published by the Free Software Foundation.
   9 *
  10 * This program is distributed in the hope that it will be useful, but
  11 * WITHOUT ANY WARRANTY; without even the implied warranty of
  12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13 * General Public License version 2 for more details (a copy is included
  14 * in the LICENSE file that accompanied this code).
  15 *
  16 * You should have received a copy of the GNU General Public License
  17 * version 2 along with this program; If not, see
  18 * http://www.gnu.org/licenses/gpl-2.0.html
  19 *
  20 * GPL HEADER END
  21 */
  22/*
  23 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
  24 * Use is subject to license terms.
  25 *
  26 * Copyright (c) 2012, 2015, Intel Corporation.
  27 */
  28/*
  29 * This file is part of Lustre, http://www.lustre.org/
  30 * Lustre is a trademark of Seagate, Inc.
  31 */
  32#define DEBUG_SUBSYSTEM S_LNET
  33
  34#include <linux/if.h>
  35#include <linux/in.h>
  36#include <linux/net.h>
  37#include <linux/file.h>
  38#include <linux/pagemap.h>
  39/* For sys_open & sys_close */
  40#include <linux/syscalls.h>
  41#include <net/sock.h>
  42
  43#include "../../include/linux/libcfs/libcfs.h"
  44#include "../../include/linux/lnet/lib-lnet.h"
  45
  46static int
  47kernel_sock_unlocked_ioctl(struct file *filp, int cmd, unsigned long arg)
  48{
  49        mm_segment_t oldfs = get_fs();
  50        int err;
  51
  52        set_fs(KERNEL_DS);
  53        err = filp->f_op->unlocked_ioctl(filp, cmd, arg);
  54        set_fs(oldfs);
  55
  56        return err;
  57}
  58
  59static int
  60lnet_sock_ioctl(int cmd, unsigned long arg)
  61{
  62        struct file *sock_filp;
  63        struct socket *sock;
  64        int rc;
  65
  66        rc = sock_create(PF_INET, SOCK_STREAM, 0, &sock);
  67        if (rc) {
  68                CERROR("Can't create socket: %d\n", rc);
  69                return rc;
  70        }
  71
  72        sock_filp = sock_alloc_file(sock, 0, NULL);
  73        if (IS_ERR(sock_filp)) {
  74                sock_release(sock);
  75                rc = PTR_ERR(sock_filp);
  76                goto out;
  77        }
  78
  79        rc = kernel_sock_unlocked_ioctl(sock_filp, cmd, arg);
  80
  81        fput(sock_filp);
  82out:
  83        return rc;
  84}
  85
  86int
  87lnet_ipif_query(char *name, int *up, __u32 *ip, __u32 *mask)
  88{
  89        struct ifreq ifr;
  90        int nob;
  91        int rc;
  92        __u32 val;
  93
  94        nob = strnlen(name, IFNAMSIZ);
  95        if (nob == IFNAMSIZ) {
  96                CERROR("Interface name %s too long\n", name);
  97                return -EINVAL;
  98        }
  99
 100        CLASSERT(sizeof(ifr.ifr_name) >= IFNAMSIZ);
 101
 102        if (strlen(name) > sizeof(ifr.ifr_name) - 1)
 103                return -E2BIG;
 104        strncpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));
 105
 106        rc = lnet_sock_ioctl(SIOCGIFFLAGS, (unsigned long)&ifr);
 107        if (rc) {
 108                CERROR("Can't get flags for interface %s\n", name);
 109                return rc;
 110        }
 111
 112        if (!(ifr.ifr_flags & IFF_UP)) {
 113                CDEBUG(D_NET, "Interface %s down\n", name);
 114                *up = 0;
 115                *ip = *mask = 0;
 116                return 0;
 117        }
 118        *up = 1;
 119
 120        if (strlen(name) > sizeof(ifr.ifr_name) - 1)
 121                return -E2BIG;
 122        strncpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));
 123
 124        ifr.ifr_addr.sa_family = AF_INET;
 125        rc = lnet_sock_ioctl(SIOCGIFADDR, (unsigned long)&ifr);
 126        if (rc) {
 127                CERROR("Can't get IP address for interface %s\n", name);
 128                return rc;
 129        }
 130
 131        val = ((struct sockaddr_in *)&ifr.ifr_addr)->sin_addr.s_addr;
 132        *ip = ntohl(val);
 133
 134        if (strlen(name) > sizeof(ifr.ifr_name) - 1)
 135                return -E2BIG;
 136        strncpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));
 137
 138        ifr.ifr_addr.sa_family = AF_INET;
 139        rc = lnet_sock_ioctl(SIOCGIFNETMASK, (unsigned long)&ifr);
 140        if (rc) {
 141                CERROR("Can't get netmask for interface %s\n", name);
 142                return rc;
 143        }
 144
 145        val = ((struct sockaddr_in *)&ifr.ifr_netmask)->sin_addr.s_addr;
 146        *mask = ntohl(val);
 147
 148        return 0;
 149}
 150EXPORT_SYMBOL(lnet_ipif_query);
 151
 152int
 153lnet_ipif_enumerate(char ***namesp)
 154{
 155        /* Allocate and fill in 'names', returning # interfaces/error */
 156        char **names;
 157        int toobig;
 158        int nalloc;
 159        int nfound;
 160        struct ifreq *ifr;
 161        struct ifconf ifc;
 162        int rc;
 163        int nob;
 164        int i;
 165
 166        nalloc = 16;    /* first guess at max interfaces */
 167        toobig = 0;
 168        for (;;) {
 169                if (nalloc * sizeof(*ifr) > PAGE_SIZE) {
 170                        toobig = 1;
 171                        nalloc = PAGE_SIZE / sizeof(*ifr);
 172                        CWARN("Too many interfaces: only enumerating first %d\n",
 173                              nalloc);
 174                }
 175
 176                LIBCFS_ALLOC(ifr, nalloc * sizeof(*ifr));
 177                if (!ifr) {
 178                        CERROR("ENOMEM enumerating up to %d interfaces\n",
 179                               nalloc);
 180                        rc = -ENOMEM;
 181                        goto out0;
 182                }
 183
 184                ifc.ifc_buf = (char *)ifr;
 185                ifc.ifc_len = nalloc * sizeof(*ifr);
 186
 187                rc = lnet_sock_ioctl(SIOCGIFCONF, (unsigned long)&ifc);
 188                if (rc < 0) {
 189                        CERROR("Error %d enumerating interfaces\n", rc);
 190                        goto out1;
 191                }
 192
 193                LASSERT(!rc);
 194
 195                nfound = ifc.ifc_len / sizeof(*ifr);
 196                LASSERT(nfound <= nalloc);
 197
 198                if (nfound < nalloc || toobig)
 199                        break;
 200
 201                LIBCFS_FREE(ifr, nalloc * sizeof(*ifr));
 202                nalloc *= 2;
 203        }
 204
 205        if (!nfound)
 206                goto out1;
 207
 208        LIBCFS_ALLOC(names, nfound * sizeof(*names));
 209        if (!names) {
 210                rc = -ENOMEM;
 211                goto out1;
 212        }
 213
 214        for (i = 0; i < nfound; i++) {
 215                nob = strnlen(ifr[i].ifr_name, IFNAMSIZ);
 216                if (nob == IFNAMSIZ) {
 217                        /* no space for terminating NULL */
 218                        CERROR("interface name %.*s too long (%d max)\n",
 219                               nob, ifr[i].ifr_name, IFNAMSIZ);
 220                        rc = -ENAMETOOLONG;
 221                        goto out2;
 222                }
 223
 224                LIBCFS_ALLOC(names[i], IFNAMSIZ);
 225                if (!names[i]) {
 226                        rc = -ENOMEM;
 227                        goto out2;
 228                }
 229
 230                memcpy(names[i], ifr[i].ifr_name, nob);
 231                names[i][nob] = 0;
 232        }
 233
 234        *namesp = names;
 235        rc = nfound;
 236
 237out2:
 238        if (rc < 0)
 239                lnet_ipif_free_enumeration(names, nfound);
 240out1:
 241        LIBCFS_FREE(ifr, nalloc * sizeof(*ifr));
 242out0:
 243        return rc;
 244}
 245EXPORT_SYMBOL(lnet_ipif_enumerate);
 246
 247void
 248lnet_ipif_free_enumeration(char **names, int n)
 249{
 250        int i;
 251
 252        LASSERT(n > 0);
 253
 254        for (i = 0; i < n && names[i]; i++)
 255                LIBCFS_FREE(names[i], IFNAMSIZ);
 256
 257        LIBCFS_FREE(names, n * sizeof(*names));
 258}
 259EXPORT_SYMBOL(lnet_ipif_free_enumeration);
 260
 261int
 262lnet_sock_write(struct socket *sock, void *buffer, int nob, int timeout)
 263{
 264        int rc;
 265        long jiffies_left = timeout * msecs_to_jiffies(MSEC_PER_SEC);
 266        unsigned long then;
 267        struct timeval tv;
 268        struct kvec  iov = { .iov_base = buffer, .iov_len  = nob };
 269        struct msghdr msg = {NULL,};
 270
 271        LASSERT(nob > 0);
 272        /*
 273         * Caller may pass a zero timeout if she thinks the socket buffer is
 274         * empty enough to take the whole message immediately
 275         */
 276        iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, &iov, 1, nob);
 277        for (;;) {
 278                msg.msg_flags = !timeout ? MSG_DONTWAIT : 0;
 279                if (timeout) {
 280                        /* Set send timeout to remaining time */
 281                        jiffies_to_timeval(jiffies_left, &tv);
 282                        rc = kernel_setsockopt(sock, SOL_SOCKET, SO_SNDTIMEO,
 283                                               (char *)&tv, sizeof(tv));
 284                        if (rc) {
 285                                CERROR("Can't set socket send timeout %ld.%06d: %d\n",
 286                                       (long)tv.tv_sec, (int)tv.tv_usec, rc);
 287                                return rc;
 288                        }
 289                }
 290
 291                then = jiffies;
 292                rc = kernel_sendmsg(sock, &msg, &iov, 1, nob);
 293                jiffies_left -= jiffies - then;
 294
 295                if (rc < 0)
 296                        return rc;
 297
 298                if (!rc) {
 299                        CERROR("Unexpected zero rc\n");
 300                        return -ECONNABORTED;
 301                }
 302
 303                if (!msg_data_left(&msg))
 304                        break;
 305
 306                if (jiffies_left <= 0)
 307                        return -EAGAIN;
 308        }
 309        return 0;
 310}
 311EXPORT_SYMBOL(lnet_sock_write);
 312
 313int
 314lnet_sock_read(struct socket *sock, void *buffer, int nob, int timeout)
 315{
 316        int rc;
 317        long jiffies_left = timeout * msecs_to_jiffies(MSEC_PER_SEC);
 318        unsigned long then;
 319        struct timeval tv;
 320
 321        LASSERT(nob > 0);
 322        LASSERT(jiffies_left > 0);
 323
 324        for (;;) {
 325                struct kvec  iov = {
 326                        .iov_base = buffer,
 327                        .iov_len  = nob
 328                };
 329                struct msghdr msg = {
 330                        .msg_flags = 0
 331                };
 332
 333                /* Set receive timeout to remaining time */
 334                jiffies_to_timeval(jiffies_left, &tv);
 335                rc = kernel_setsockopt(sock, SOL_SOCKET, SO_RCVTIMEO,
 336                                       (char *)&tv, sizeof(tv));
 337                if (rc) {
 338                        CERROR("Can't set socket recv timeout %ld.%06d: %d\n",
 339                               (long)tv.tv_sec, (int)tv.tv_usec, rc);
 340                        return rc;
 341                }
 342
 343                then = jiffies;
 344                rc = kernel_recvmsg(sock, &msg, &iov, 1, nob, 0);
 345                jiffies_left -= jiffies - then;
 346
 347                if (rc < 0)
 348                        return rc;
 349
 350                if (!rc)
 351                        return -ECONNRESET;
 352
 353                buffer = ((char *)buffer) + rc;
 354                nob -= rc;
 355
 356                if (!nob)
 357                        return 0;
 358
 359                if (jiffies_left <= 0)
 360                        return -ETIMEDOUT;
 361        }
 362}
 363EXPORT_SYMBOL(lnet_sock_read);
 364
 365static int
 366lnet_sock_create(struct socket **sockp, int *fatal, __u32 local_ip,
 367                 int local_port)
 368{
 369        struct sockaddr_in locaddr;
 370        struct socket *sock;
 371        int rc;
 372        int option;
 373
 374        /* All errors are fatal except bind failure if the port is in use */
 375        *fatal = 1;
 376
 377        rc = sock_create(PF_INET, SOCK_STREAM, 0, &sock);
 378        *sockp = sock;
 379        if (rc) {
 380                CERROR("Can't create socket: %d\n", rc);
 381                return rc;
 382        }
 383
 384        option = 1;
 385        rc = kernel_setsockopt(sock, SOL_SOCKET, SO_REUSEADDR,
 386                               (char *)&option, sizeof(option));
 387        if (rc) {
 388                CERROR("Can't set SO_REUSEADDR for socket: %d\n", rc);
 389                goto failed;
 390        }
 391
 392        if (local_ip || local_port) {
 393                memset(&locaddr, 0, sizeof(locaddr));
 394                locaddr.sin_family = AF_INET;
 395                locaddr.sin_port = htons(local_port);
 396                locaddr.sin_addr.s_addr = !local_ip ?
 397                                          INADDR_ANY : htonl(local_ip);
 398
 399                rc = kernel_bind(sock, (struct sockaddr *)&locaddr,
 400                                 sizeof(locaddr));
 401                if (rc == -EADDRINUSE) {
 402                        CDEBUG(D_NET, "Port %d already in use\n", local_port);
 403                        *fatal = 0;
 404                        goto failed;
 405                }
 406                if (rc) {
 407                        CERROR("Error trying to bind to port %d: %d\n",
 408                               local_port, rc);
 409                        goto failed;
 410                }
 411        }
 412        return 0;
 413
 414failed:
 415        sock_release(sock);
 416        return rc;
 417}
 418
 419int
 420lnet_sock_setbuf(struct socket *sock, int txbufsize, int rxbufsize)
 421{
 422        int option;
 423        int rc;
 424
 425        if (txbufsize) {
 426                option = txbufsize;
 427                rc = kernel_setsockopt(sock, SOL_SOCKET, SO_SNDBUF,
 428                                       (char *)&option, sizeof(option));
 429                if (rc) {
 430                        CERROR("Can't set send buffer %d: %d\n",
 431                               option, rc);
 432                        return rc;
 433                }
 434        }
 435
 436        if (rxbufsize) {
 437                option = rxbufsize;
 438                rc = kernel_setsockopt(sock, SOL_SOCKET, SO_RCVBUF,
 439                                       (char *)&option, sizeof(option));
 440                if (rc) {
 441                        CERROR("Can't set receive buffer %d: %d\n",
 442                               option, rc);
 443                        return rc;
 444                }
 445        }
 446        return 0;
 447}
 448EXPORT_SYMBOL(lnet_sock_setbuf);
 449
 450int
 451lnet_sock_getaddr(struct socket *sock, bool remote, __u32 *ip, int *port)
 452{
 453        struct sockaddr_in sin;
 454        int len = sizeof(sin);
 455        int rc;
 456
 457        if (remote)
 458                rc = kernel_getpeername(sock, (struct sockaddr *)&sin, &len);
 459        else
 460                rc = kernel_getsockname(sock, (struct sockaddr *)&sin, &len);
 461        if (rc) {
 462                CERROR("Error %d getting sock %s IP/port\n",
 463                       rc, remote ? "peer" : "local");
 464                return rc;
 465        }
 466
 467        if (ip)
 468                *ip = ntohl(sin.sin_addr.s_addr);
 469
 470        if (port)
 471                *port = ntohs(sin.sin_port);
 472
 473        return 0;
 474}
 475EXPORT_SYMBOL(lnet_sock_getaddr);
 476
 477int
 478lnet_sock_getbuf(struct socket *sock, int *txbufsize, int *rxbufsize)
 479{
 480        if (txbufsize)
 481                *txbufsize = sock->sk->sk_sndbuf;
 482
 483        if (rxbufsize)
 484                *rxbufsize = sock->sk->sk_rcvbuf;
 485
 486        return 0;
 487}
 488EXPORT_SYMBOL(lnet_sock_getbuf);
 489
 490int
 491lnet_sock_listen(struct socket **sockp, __u32 local_ip, int local_port,
 492                 int backlog)
 493{
 494        int fatal;
 495        int rc;
 496
 497        rc = lnet_sock_create(sockp, &fatal, local_ip, local_port);
 498        if (rc) {
 499                if (!fatal)
 500                        CERROR("Can't create socket: port %d already in use\n",
 501                               local_port);
 502                return rc;
 503        }
 504
 505        rc = kernel_listen(*sockp, backlog);
 506        if (!rc)
 507                return 0;
 508
 509        CERROR("Can't set listen backlog %d: %d\n", backlog, rc);
 510        sock_release(*sockp);
 511        return rc;
 512}
 513
 514int
 515lnet_sock_accept(struct socket **newsockp, struct socket *sock)
 516{
 517        wait_queue_t wait;
 518        struct socket *newsock;
 519        int rc;
 520
 521        /*
 522         * XXX this should add a ref to sock->ops->owner, if
 523         * TCP could be a module
 524         */
 525        rc = sock_create_lite(PF_PACKET, sock->type, IPPROTO_TCP, &newsock);
 526        if (rc) {
 527                CERROR("Can't allocate socket\n");
 528                return rc;
 529        }
 530
 531        newsock->ops = sock->ops;
 532
 533        rc = sock->ops->accept(sock, newsock, O_NONBLOCK);
 534        if (rc == -EAGAIN) {
 535                /* Nothing ready, so wait for activity */
 536                init_waitqueue_entry(&wait, current);
 537                add_wait_queue(sk_sleep(sock->sk), &wait);
 538                set_current_state(TASK_INTERRUPTIBLE);
 539                schedule();
 540                remove_wait_queue(sk_sleep(sock->sk), &wait);
 541                rc = sock->ops->accept(sock, newsock, O_NONBLOCK);
 542        }
 543
 544        if (rc)
 545                goto failed;
 546
 547        *newsockp = newsock;
 548        return 0;
 549
 550failed:
 551        sock_release(newsock);
 552        return rc;
 553}
 554
 555int
 556lnet_sock_connect(struct socket **sockp, int *fatal, __u32 local_ip,
 557                  int local_port, __u32 peer_ip, int peer_port)
 558{
 559        struct sockaddr_in srvaddr;
 560        int rc;
 561
 562        rc = lnet_sock_create(sockp, fatal, local_ip, local_port);
 563        if (rc)
 564                return rc;
 565
 566        memset(&srvaddr, 0, sizeof(srvaddr));
 567        srvaddr.sin_family = AF_INET;
 568        srvaddr.sin_port = htons(peer_port);
 569        srvaddr.sin_addr.s_addr = htonl(peer_ip);
 570
 571        rc = kernel_connect(*sockp, (struct sockaddr *)&srvaddr,
 572                            sizeof(srvaddr), 0);
 573        if (!rc)
 574                return 0;
 575
 576        /*
 577         * EADDRNOTAVAIL probably means we're already connected to the same
 578         * peer/port on the same local port on a differently typed
 579         * connection.  Let our caller retry with a different local
 580         * port...
 581         */
 582        *fatal = !(rc == -EADDRNOTAVAIL);
 583
 584        CDEBUG_LIMIT(*fatal ? D_NETERROR : D_NET,
 585                     "Error %d connecting %pI4h/%d -> %pI4h/%d\n", rc,
 586                     &local_ip, local_port, &peer_ip, peer_port);
 587
 588        sock_release(*sockp);
 589        return rc;
 590}
 591