qemu/net/filter-rewriter.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
   3 * Copyright (c) 2016 FUJITSU LIMITED
   4 * Copyright (c) 2016 Intel Corporation
   5 *
   6 * Author: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
   7 *
   8 * This work is licensed under the terms of the GNU GPL, version 2 or
   9 * later.  See the COPYING file in the top-level directory.
  10 */
  11
  12#include "qemu/osdep.h"
  13#include "trace.h"
  14#include "net/colo.h"
  15#include "net/filter.h"
  16#include "net/net.h"
  17#include "qemu-common.h"
  18#include "qapi/error.h"
  19#include "qapi/qmp/qerror.h"
  20#include "qemu/error-report.h"
  21#include "qapi-visit.h"
  22#include "qom/object.h"
  23#include "qemu/main-loop.h"
  24#include "qemu/iov.h"
  25#include "net/checksum.h"
  26
  27#define FILTER_COLO_REWRITER(obj) \
  28    OBJECT_CHECK(RewriterState, (obj), TYPE_FILTER_REWRITER)
  29
  30#define TYPE_FILTER_REWRITER "filter-rewriter"
  31
  32typedef struct RewriterState {
  33    NetFilterState parent_obj;
  34    NetQueue *incoming_queue;
  35    /* hashtable to save connection */
  36    GHashTable *connection_track_table;
  37    bool vnet_hdr;
  38} RewriterState;
  39
  40static void filter_rewriter_flush(NetFilterState *nf)
  41{
  42    RewriterState *s = FILTER_COLO_REWRITER(nf);
  43
  44    if (!qemu_net_queue_flush(s->incoming_queue)) {
  45        /* Unable to empty the queue, purge remaining packets */
  46        qemu_net_queue_purge(s->incoming_queue, nf->netdev);
  47    }
  48}
  49
  50/*
  51 * Return 1 on success, if return 0 means the pkt
  52 * is not TCP packet
  53 */
  54static int is_tcp_packet(Packet *pkt)
  55{
  56    if (!parse_packet_early(pkt) &&
  57        pkt->ip->ip_p == IPPROTO_TCP) {
  58        return 1;
  59    } else {
  60        return 0;
  61    }
  62}
  63
  64/* handle tcp packet from primary guest */
  65static int handle_primary_tcp_pkt(NetFilterState *nf,
  66                                  Connection *conn,
  67                                  Packet *pkt)
  68{
  69    struct tcphdr *tcp_pkt;
  70
  71    tcp_pkt = (struct tcphdr *)pkt->transport_header;
  72    if (trace_event_get_state_backends(TRACE_COLO_FILTER_REWRITER_DEBUG)) {
  73        trace_colo_filter_rewriter_pkt_info(__func__,
  74                    inet_ntoa(pkt->ip->ip_src), inet_ntoa(pkt->ip->ip_dst),
  75                    ntohl(tcp_pkt->th_seq), ntohl(tcp_pkt->th_ack),
  76                    tcp_pkt->th_flags);
  77        trace_colo_filter_rewriter_conn_offset(conn->offset);
  78    }
  79
  80    if (((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == TH_SYN)) {
  81        /*
  82         * we use this flag update offset func
  83         * run once in independent tcp connection
  84         */
  85        conn->syn_flag = 1;
  86    }
  87
  88    if (((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == TH_ACK)) {
  89        if (conn->syn_flag) {
  90            /*
  91             * offset = secondary_seq - primary seq
  92             * ack packet sent by guest from primary node,
  93             * so we use th_ack - 1 get primary_seq
  94             */
  95            conn->offset -= (ntohl(tcp_pkt->th_ack) - 1);
  96            conn->syn_flag = 0;
  97        }
  98        if (conn->offset) {
  99            /* handle packets to the secondary from the primary */
 100            tcp_pkt->th_ack = htonl(ntohl(tcp_pkt->th_ack) + conn->offset);
 101
 102            net_checksum_calculate((uint8_t *)pkt->data + pkt->vnet_hdr_len,
 103                                   pkt->size - pkt->vnet_hdr_len);
 104        }
 105    }
 106
 107    return 0;
 108}
 109
 110/* handle tcp packet from secondary guest */
 111static int handle_secondary_tcp_pkt(NetFilterState *nf,
 112                                    Connection *conn,
 113                                    Packet *pkt)
 114{
 115    struct tcphdr *tcp_pkt;
 116
 117    tcp_pkt = (struct tcphdr *)pkt->transport_header;
 118
 119    if (trace_event_get_state_backends(TRACE_COLO_FILTER_REWRITER_DEBUG)) {
 120        trace_colo_filter_rewriter_pkt_info(__func__,
 121                    inet_ntoa(pkt->ip->ip_src), inet_ntoa(pkt->ip->ip_dst),
 122                    ntohl(tcp_pkt->th_seq), ntohl(tcp_pkt->th_ack),
 123                    tcp_pkt->th_flags);
 124        trace_colo_filter_rewriter_conn_offset(conn->offset);
 125    }
 126
 127    if (((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == (TH_ACK | TH_SYN))) {
 128        /*
 129         * save offset = secondary_seq and then
 130         * in handle_primary_tcp_pkt make offset
 131         * = secondary_seq - primary_seq
 132         */
 133        conn->offset = ntohl(tcp_pkt->th_seq);
 134    }
 135
 136    if ((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == TH_ACK) {
 137        /* Only need to adjust seq while offset is Non-zero */
 138        if (conn->offset) {
 139            /* handle packets to the primary from the secondary*/
 140            tcp_pkt->th_seq = htonl(ntohl(tcp_pkt->th_seq) - conn->offset);
 141
 142            net_checksum_calculate((uint8_t *)pkt->data + pkt->vnet_hdr_len,
 143                                   pkt->size - pkt->vnet_hdr_len);
 144        }
 145    }
 146
 147    return 0;
 148}
 149
 150static ssize_t colo_rewriter_receive_iov(NetFilterState *nf,
 151                                         NetClientState *sender,
 152                                         unsigned flags,
 153                                         const struct iovec *iov,
 154                                         int iovcnt,
 155                                         NetPacketSent *sent_cb)
 156{
 157    RewriterState *s = FILTER_COLO_REWRITER(nf);
 158    Connection *conn;
 159    ConnectionKey key;
 160    Packet *pkt;
 161    ssize_t size = iov_size(iov, iovcnt);
 162    ssize_t vnet_hdr_len = 0;
 163    char *buf = g_malloc0(size);
 164
 165    iov_to_buf(iov, iovcnt, 0, buf, size);
 166
 167    if (s->vnet_hdr) {
 168        vnet_hdr_len = nf->netdev->vnet_hdr_len;
 169    }
 170
 171    pkt = packet_new(buf, size, vnet_hdr_len);
 172    g_free(buf);
 173
 174    /*
 175     * if we get tcp packet
 176     * we will rewrite it to make secondary guest's
 177     * connection established successfully
 178     */
 179    if (pkt && is_tcp_packet(pkt)) {
 180
 181        fill_connection_key(pkt, &key);
 182
 183        if (sender == nf->netdev) {
 184            /*
 185             * We need make tcp TX and RX packet
 186             * into one connection.
 187             */
 188            reverse_connection_key(&key);
 189        }
 190        conn = connection_get(s->connection_track_table,
 191                              &key,
 192                              NULL);
 193
 194        if (sender == nf->netdev) {
 195            /* NET_FILTER_DIRECTION_TX */
 196            if (!handle_primary_tcp_pkt(nf, conn, pkt)) {
 197                qemu_net_queue_send(s->incoming_queue, sender, 0,
 198                (const uint8_t *)pkt->data, pkt->size, NULL);
 199                packet_destroy(pkt, NULL);
 200                pkt = NULL;
 201                /*
 202                 * We block the packet here,after rewrite pkt
 203                 * and will send it
 204                 */
 205                return 1;
 206            }
 207        } else {
 208            /* NET_FILTER_DIRECTION_RX */
 209            if (!handle_secondary_tcp_pkt(nf, conn, pkt)) {
 210                qemu_net_queue_send(s->incoming_queue, sender, 0,
 211                (const uint8_t *)pkt->data, pkt->size, NULL);
 212                packet_destroy(pkt, NULL);
 213                pkt = NULL;
 214                /*
 215                 * We block the packet here,after rewrite pkt
 216                 * and will send it
 217                 */
 218                return 1;
 219            }
 220        }
 221    }
 222
 223    packet_destroy(pkt, NULL);
 224    pkt = NULL;
 225    return 0;
 226}
 227
 228static void colo_rewriter_cleanup(NetFilterState *nf)
 229{
 230    RewriterState *s = FILTER_COLO_REWRITER(nf);
 231
 232    /* flush packets */
 233    if (s->incoming_queue) {
 234        filter_rewriter_flush(nf);
 235        g_free(s->incoming_queue);
 236    }
 237}
 238
 239static void colo_rewriter_setup(NetFilterState *nf, Error **errp)
 240{
 241    RewriterState *s = FILTER_COLO_REWRITER(nf);
 242
 243    s->connection_track_table = g_hash_table_new_full(connection_key_hash,
 244                                                      connection_key_equal,
 245                                                      g_free,
 246                                                      connection_destroy);
 247    s->incoming_queue = qemu_new_net_queue(qemu_netfilter_pass_to_next, nf);
 248}
 249
 250static bool filter_rewriter_get_vnet_hdr(Object *obj, Error **errp)
 251{
 252    RewriterState *s = FILTER_COLO_REWRITER(obj);
 253
 254    return s->vnet_hdr;
 255}
 256
 257static void filter_rewriter_set_vnet_hdr(Object *obj,
 258                                         bool value,
 259                                         Error **errp)
 260{
 261    RewriterState *s = FILTER_COLO_REWRITER(obj);
 262
 263    s->vnet_hdr = value;
 264}
 265
 266static void filter_rewriter_init(Object *obj)
 267{
 268    RewriterState *s = FILTER_COLO_REWRITER(obj);
 269
 270    s->vnet_hdr = false;
 271    object_property_add_bool(obj, "vnet_hdr_support",
 272                             filter_rewriter_get_vnet_hdr,
 273                             filter_rewriter_set_vnet_hdr, NULL);
 274}
 275
 276static void colo_rewriter_class_init(ObjectClass *oc, void *data)
 277{
 278    NetFilterClass *nfc = NETFILTER_CLASS(oc);
 279
 280    nfc->setup = colo_rewriter_setup;
 281    nfc->cleanup = colo_rewriter_cleanup;
 282    nfc->receive_iov = colo_rewriter_receive_iov;
 283}
 284
 285static const TypeInfo colo_rewriter_info = {
 286    .name = TYPE_FILTER_REWRITER,
 287    .parent = TYPE_NETFILTER,
 288    .class_init = colo_rewriter_class_init,
 289    .instance_init = filter_rewriter_init,
 290    .instance_size = sizeof(RewriterState),
 291};
 292
 293static void register_types(void)
 294{
 295    type_register_static(&colo_rewriter_info);
 296}
 297
 298type_init(register_types);
 299