linux/net/netfilter/nft_set_pipapo.h
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2
   3#ifndef _NFT_SET_PIPAPO_H
   4
   5#include <linux/log2.h>
   6#include <net/ipv6.h>                   /* For the maximum length of a field */
   7
   8/* Count of concatenated fields depends on count of 32-bit nftables registers */
   9#define NFT_PIPAPO_MAX_FIELDS           NFT_REG32_COUNT
  10
  11/* Restrict usage to multiple fields, make sure rbtree is used otherwise */
  12#define NFT_PIPAPO_MIN_FIELDS           2
  13
  14/* Largest supported field size */
  15#define NFT_PIPAPO_MAX_BYTES            (sizeof(struct in6_addr))
  16#define NFT_PIPAPO_MAX_BITS             (NFT_PIPAPO_MAX_BYTES * BITS_PER_BYTE)
  17
  18/* Bits to be grouped together in table buckets depending on set size */
  19#define NFT_PIPAPO_GROUP_BITS_INIT      NFT_PIPAPO_GROUP_BITS_SMALL_SET
  20#define NFT_PIPAPO_GROUP_BITS_SMALL_SET 8
  21#define NFT_PIPAPO_GROUP_BITS_LARGE_SET 4
  22#define NFT_PIPAPO_GROUP_BITS_ARE_8_OR_4                                \
  23        BUILD_BUG_ON((NFT_PIPAPO_GROUP_BITS_SMALL_SET != 8) ||          \
  24                     (NFT_PIPAPO_GROUP_BITS_LARGE_SET != 4))
  25#define NFT_PIPAPO_GROUPS_PER_BYTE(f)   (BITS_PER_BYTE / (f)->bb)
  26
  27/* If a lookup table gets bigger than NFT_PIPAPO_LT_SIZE_HIGH, switch to the
  28 * small group width, and switch to the big group width if the table gets
  29 * smaller than NFT_PIPAPO_LT_SIZE_LOW.
  30 *
  31 * Picking 2MiB as threshold (for a single table) avoids as much as possible
  32 * crossing page boundaries on most architectures (x86-64 and MIPS huge pages,
  33 * ARMv7 supersections, POWER "large" pages, SPARC Level 1 regions, etc.), which
  34 * keeps performance nice in case kvmalloc() gives us non-contiguous areas.
  35 */
  36#define NFT_PIPAPO_LT_SIZE_THRESHOLD    (1 << 21)
  37#define NFT_PIPAPO_LT_SIZE_HYSTERESIS   (1 << 16)
  38#define NFT_PIPAPO_LT_SIZE_HIGH         NFT_PIPAPO_LT_SIZE_THRESHOLD
  39#define NFT_PIPAPO_LT_SIZE_LOW          NFT_PIPAPO_LT_SIZE_THRESHOLD -  \
  40                                        NFT_PIPAPO_LT_SIZE_HYSTERESIS
  41
  42/* Fields are padded to 32 bits in input registers */
  43#define NFT_PIPAPO_GROUPS_PADDED_SIZE(f)                                \
  44        (round_up((f)->groups / NFT_PIPAPO_GROUPS_PER_BYTE(f), sizeof(u32)))
  45#define NFT_PIPAPO_GROUPS_PADDING(f)                                    \
  46        (NFT_PIPAPO_GROUPS_PADDED_SIZE(f) - (f)->groups /               \
  47                                            NFT_PIPAPO_GROUPS_PER_BYTE(f))
  48
  49/* Number of buckets given by 2 ^ n, with n bucket bits */
  50#define NFT_PIPAPO_BUCKETS(bb)          (1 << (bb))
  51
  52/* Each n-bit range maps to up to n * 2 rules */
  53#define NFT_PIPAPO_MAP_NBITS            (const_ilog2(NFT_PIPAPO_MAX_BITS * 2))
  54
  55/* Use the rest of mapping table buckets for rule indices, but it makes no sense
  56 * to exceed 32 bits
  57 */
  58#if BITS_PER_LONG == 64
  59#define NFT_PIPAPO_MAP_TOBITS           32
  60#else
  61#define NFT_PIPAPO_MAP_TOBITS           (BITS_PER_LONG - NFT_PIPAPO_MAP_NBITS)
  62#endif
  63
  64/* ...which gives us the highest allowed index for a rule */
  65#define NFT_PIPAPO_RULE0_MAX            ((1UL << (NFT_PIPAPO_MAP_TOBITS - 1)) \
  66                                        - (1UL << NFT_PIPAPO_MAP_NBITS))
  67
  68/* Definitions for vectorised implementations */
  69#ifdef NFT_PIPAPO_ALIGN
  70#define NFT_PIPAPO_ALIGN_HEADROOM                                       \
  71        (NFT_PIPAPO_ALIGN - ARCH_KMALLOC_MINALIGN)
  72#define NFT_PIPAPO_LT_ALIGN(lt)         (PTR_ALIGN((lt), NFT_PIPAPO_ALIGN))
  73#define NFT_PIPAPO_LT_ASSIGN(field, x)                                  \
  74        do {                                                            \
  75                (field)->lt_aligned = NFT_PIPAPO_LT_ALIGN(x);           \
  76                (field)->lt = (x);                                      \
  77        } while (0)
  78#else
  79#define NFT_PIPAPO_ALIGN_HEADROOM       0
  80#define NFT_PIPAPO_LT_ALIGN(lt)         (lt)
  81#define NFT_PIPAPO_LT_ASSIGN(field, x)  ((field)->lt = (x))
  82#endif /* NFT_PIPAPO_ALIGN */
  83
  84#define nft_pipapo_for_each_field(field, index, match)          \
  85        for ((field) = (match)->f, (index) = 0;                 \
  86             (index) < (match)->field_count;                    \
  87             (index)++, (field)++)
  88
  89/**
  90 * union nft_pipapo_map_bucket - Bucket of mapping table
  91 * @to:         First rule number (in next field) this rule maps to
  92 * @n:          Number of rules (in next field) this rule maps to
  93 * @e:          If there's no next field, pointer to element this rule maps to
  94 */
  95union nft_pipapo_map_bucket {
  96        struct {
  97#if BITS_PER_LONG == 64
  98                static_assert(NFT_PIPAPO_MAP_TOBITS <= 32);
  99                u32 to;
 100
 101                static_assert(NFT_PIPAPO_MAP_NBITS <= 32);
 102                u32 n;
 103#else
 104                unsigned long to:NFT_PIPAPO_MAP_TOBITS;
 105                unsigned long  n:NFT_PIPAPO_MAP_NBITS;
 106#endif
 107        };
 108        struct nft_pipapo_elem *e;
 109};
 110
 111/**
 112 * struct nft_pipapo_field - Lookup, mapping tables and related data for a field
 113 * @groups:     Amount of bit groups
 114 * @rules:      Number of inserted rules
 115 * @bsize:      Size of each bucket in lookup table, in longs
 116 * @bb:         Number of bits grouped together in lookup table buckets
 117 * @lt:         Lookup table: 'groups' rows of buckets
 118 * @lt_aligned: Version of @lt aligned to NFT_PIPAPO_ALIGN bytes
 119 * @mt:         Mapping table: one bucket per rule
 120 */
 121struct nft_pipapo_field {
 122        int groups;
 123        unsigned long rules;
 124        size_t bsize;
 125        int bb;
 126#ifdef NFT_PIPAPO_ALIGN
 127        unsigned long *lt_aligned;
 128#endif
 129        unsigned long *lt;
 130        union nft_pipapo_map_bucket *mt;
 131};
 132
 133/**
 134 * struct nft_pipapo_match - Data used for lookup and matching
 135 * @field_count         Amount of fields in set
 136 * @scratch:            Preallocated per-CPU maps for partial matching results
 137 * @scratch_aligned:    Version of @scratch aligned to NFT_PIPAPO_ALIGN bytes
 138 * @bsize_max:          Maximum lookup table bucket size of all fields, in longs
 139 * @rcu                 Matching data is swapped on commits
 140 * @f:                  Fields, with lookup and mapping tables
 141 */
 142struct nft_pipapo_match {
 143        int field_count;
 144#ifdef NFT_PIPAPO_ALIGN
 145        unsigned long * __percpu *scratch_aligned;
 146#endif
 147        unsigned long * __percpu *scratch;
 148        size_t bsize_max;
 149        struct rcu_head rcu;
 150        struct nft_pipapo_field f[];
 151};
 152
 153/**
 154 * struct nft_pipapo - Representation of a set
 155 * @match:      Currently in-use matching data
 156 * @clone:      Copy where pending insertions and deletions are kept
 157 * @width:      Total bytes to be matched for one packet, including padding
 158 * @dirty:      Working copy has pending insertions or deletions
 159 * @last_gc:    Timestamp of last garbage collection run, jiffies
 160 */
 161struct nft_pipapo {
 162        struct nft_pipapo_match __rcu *match;
 163        struct nft_pipapo_match *clone;
 164        int width;
 165        bool dirty;
 166        unsigned long last_gc;
 167};
 168
 169struct nft_pipapo_elem;
 170
 171/**
 172 * struct nft_pipapo_elem - API-facing representation of single set element
 173 * @ext:        nftables API extensions
 174 */
 175struct nft_pipapo_elem {
 176        struct nft_set_ext ext;
 177};
 178
 179int pipapo_refill(unsigned long *map, int len, int rules, unsigned long *dst,
 180                  union nft_pipapo_map_bucket *mt, bool match_only);
 181
 182/**
 183 * pipapo_and_field_buckets_4bit() - Intersect 4-bit buckets
 184 * @f:          Field including lookup table
 185 * @dst:        Area to store result
 186 * @data:       Input data selecting table buckets
 187 */
 188static inline void pipapo_and_field_buckets_4bit(struct nft_pipapo_field *f,
 189                                                 unsigned long *dst,
 190                                                 const u8 *data)
 191{
 192        unsigned long *lt = NFT_PIPAPO_LT_ALIGN(f->lt);
 193        int group;
 194
 195        for (group = 0; group < f->groups; group += BITS_PER_BYTE / 4, data++) {
 196                u8 v;
 197
 198                v = *data >> 4;
 199                __bitmap_and(dst, dst, lt + v * f->bsize,
 200                             f->bsize * BITS_PER_LONG);
 201                lt += f->bsize * NFT_PIPAPO_BUCKETS(4);
 202
 203                v = *data & 0x0f;
 204                __bitmap_and(dst, dst, lt + v * f->bsize,
 205                             f->bsize * BITS_PER_LONG);
 206                lt += f->bsize * NFT_PIPAPO_BUCKETS(4);
 207        }
 208}
 209
 210/**
 211 * pipapo_and_field_buckets_8bit() - Intersect 8-bit buckets
 212 * @f:          Field including lookup table
 213 * @dst:        Area to store result
 214 * @data:       Input data selecting table buckets
 215 */
 216static inline void pipapo_and_field_buckets_8bit(struct nft_pipapo_field *f,
 217                                                 unsigned long *dst,
 218                                                 const u8 *data)
 219{
 220        unsigned long *lt = NFT_PIPAPO_LT_ALIGN(f->lt);
 221        int group;
 222
 223        for (group = 0; group < f->groups; group++, data++) {
 224                __bitmap_and(dst, dst, lt + *data * f->bsize,
 225                             f->bsize * BITS_PER_LONG);
 226                lt += f->bsize * NFT_PIPAPO_BUCKETS(8);
 227        }
 228}
 229
 230/**
 231 * pipapo_estimate_size() - Estimate worst-case for set size
 232 * @desc:       Set description, element count and field description used here
 233 *
 234 * The size for this set type can vary dramatically, as it depends on the number
 235 * of rules (composing netmasks) the entries expand to. We compute the worst
 236 * case here.
 237 *
 238 * In general, for a non-ranged entry or a single composing netmask, we need
 239 * one bit in each of the sixteen NFT_PIPAPO_BUCKETS, for each 4-bit group (that
 240 * is, each input bit needs four bits of matching data), plus a bucket in the
 241 * mapping table for each field.
 242 *
 243 * Return: worst-case set size in bytes, 0 on any overflow
 244 */
 245static u64 pipapo_estimate_size(const struct nft_set_desc *desc)
 246{
 247        unsigned long entry_size;
 248        u64 size;
 249        int i;
 250
 251        for (i = 0, entry_size = 0; i < desc->field_count; i++) {
 252                unsigned long rules;
 253
 254                if (desc->field_len[i] > NFT_PIPAPO_MAX_BYTES)
 255                        return 0;
 256
 257                /* Worst-case ranges for each concatenated field: each n-bit
 258                 * field can expand to up to n * 2 rules in each bucket, and
 259                 * each rule also needs a mapping bucket.
 260                 */
 261                rules = ilog2(desc->field_len[i] * BITS_PER_BYTE) * 2;
 262                entry_size += rules *
 263                              NFT_PIPAPO_BUCKETS(NFT_PIPAPO_GROUP_BITS_INIT) /
 264                              BITS_PER_BYTE;
 265                entry_size += rules * sizeof(union nft_pipapo_map_bucket);
 266        }
 267
 268        /* Rules in lookup and mapping tables are needed for each entry */
 269        size = desc->size * entry_size;
 270        if (size && div_u64(size, desc->size) != entry_size)
 271                return 0;
 272
 273        size += sizeof(struct nft_pipapo) + sizeof(struct nft_pipapo_match) * 2;
 274
 275        size += sizeof(struct nft_pipapo_field) * desc->field_count;
 276
 277        return size;
 278}
 279
 280#endif /* _NFT_SET_PIPAPO_H */
 281