linux/samples/bpf/do_hbm_test.sh
<<
>>
Prefs
   1#!/bin/bash
   2# SPDX-License-Identifier: GPL-2.0
   3#
   4# Copyright (c) 2019 Facebook
   5#
   6# This program is free software; you can redistribute it and/or
   7# modify it under the terms of version 2 of the GNU General Public
   8# License as published by the Free Software Foundation.
   9
  10Usage() {
  11  echo "Script for testing HBM (Host Bandwidth Manager) framework."
  12  echo "It creates a cgroup to use for testing and load a BPF program to limit"
  13  echo "egress or ingress bandwidth. It then uses iperf3 or netperf to create"
  14  echo "loads. The output is the goodput in Mbps (unless -D was used)."
  15  echo ""
  16  echo "USAGE: $name [out] [-b=<prog>|--bpf=<prog>] [-c=<cc>|--cc=<cc>]"
  17  echo "             [-D] [-d=<delay>|--delay=<delay>] [--debug] [-E] [--edt]"
  18  echo "             [-f=<#flows>|--flows=<#flows>] [-h] [-i=<id>|--id=<id >]"
  19  echo "             [-l] [-N] [--no_cn] [-p=<port>|--port=<port>] [-P]"
  20  echo "             [-q=<qdisc>] [-R] [-s=<server>|--server=<server]"
  21  echo "             [-S|--stats] -t=<time>|--time=<time>] [-w] [cubic|dctcp]"
  22  echo "  Where:"
  23  echo "    out               egress (default)"
  24  echo "    -b or --bpf       BPF program filename to load and attach."
  25  echo "                      Default is hbm_out_kern.o for egress,"
  26  echo "    -c or -cc         TCP congestion control (cubic or dctcp)"
  27  echo "    --debug           print BPF trace buffer"
  28  echo "    -d or --delay     add a delay in ms using netem"
  29  echo "    -D                In addition to the goodput in Mbps, it also outputs"
  30  echo "                      other detailed information. This information is"
  31  echo "                      test dependent (i.e. iperf3 or netperf)."
  32  echo "    -E                enable ECN (not required for dctcp)"
  33  echo "    --edt             use fq's Earliest Departure Time (requires fq)"
  34  echo "    -f or --flows     number of concurrent flows (default=1)"
  35  echo "    -i or --id        cgroup id (an integer, default is 1)"
  36  echo "    -N                use netperf instead of iperf3"
  37  echo "    --no_cn           Do not return CN notifications"
  38  echo "    -l                do not limit flows using loopback"
  39  echo "    -h                Help"
  40  echo "    -p or --port      iperf3 port (default is 5201)"
  41  echo "    -P                use an iperf3 instance for each flow"
  42  echo "    -q                use the specified qdisc"
  43  echo "    -r or --rate      rate in Mbps (default 1s 1Gbps)"
  44  echo "    -R                Use TCP_RR for netperf. 1st flow has req"
  45  echo "                      size of 10KB, rest of 1MB. Reply in all"
  46  echo "                      cases is 1 byte."
  47  echo "                      More detailed output for each flow can be found"
  48  echo "                      in the files netperf.<cg>.<flow>, where <cg> is the"
  49  echo "                      cgroup id as specified with the -i flag, and <flow>"
  50  echo "                      is the flow id starting at 1 and increasing by 1 for"
  51  echo "                      flow (as specified by -f)."
  52  echo "    -s or --server    hostname of netperf server. Used to create netperf"
  53  echo "                      test traffic between to hosts (default is within host)"
  54  echo "                      netserver must be running on the host."
  55  echo "    -S or --stats     whether to update hbm stats (default is yes)."
  56  echo "    -t or --time      duration of iperf3 in seconds (default=5)"
  57  echo "    -w                Work conserving flag. cgroup can increase its"
  58  echo "                      bandwidth beyond the rate limit specified"
  59  echo "                      while there is available bandwidth. Current"
  60  echo "                      implementation assumes there is only one NIC"
  61  echo "                      (eth0), but can be extended to support multiple"
  62  echo "                       NICs."
  63  echo "    cubic or dctcp    specify which TCP CC to use"
  64  echo " "
  65  exit
  66}
  67
  68#set -x
  69
  70debug_flag=0
  71args="$@"
  72name="$0"
  73netem=0
  74cc=x
  75dir="-o"
  76dir_name="out"
  77dur=5
  78flows=1
  79id=1
  80prog=""
  81port=5201
  82rate=1000
  83multi_iperf=0
  84flow_cnt=1
  85use_netperf=0
  86rr=0
  87ecn=0
  88details=0
  89server=""
  90qdisc=""
  91flags=""
  92do_stats=0
  93
  94BPFFS=/sys/fs/bpf
  95function config_bpffs () {
  96        if mount | grep $BPFFS > /dev/null; then
  97                echo "bpffs already mounted"
  98        else
  99                echo "bpffs not mounted. Mounting..."
 100                mount -t bpf none $BPFFS
 101        fi
 102}
 103
 104function start_hbm () {
 105  rm -f hbm.out
 106  echo "./hbm $dir -n $id -r $rate -t $dur $flags $dbg $prog" > hbm.out
 107  echo " " >> hbm.out
 108  ./hbm $dir -n $id -r $rate -t $dur $flags $dbg $prog >> hbm.out 2>&1  &
 109  echo $!
 110}
 111
 112processArgs () {
 113  for i in $args ; do
 114    case $i in
 115    # Support for upcomming ingress rate limiting
 116    #in)         # support for upcoming ingress rate limiting
 117    #  dir="-i"
 118    #  dir_name="in"
 119    #  ;;
 120    out)
 121      dir="-o"
 122      dir_name="out"
 123      ;;
 124    -b=*|--bpf=*)
 125      prog="${i#*=}"
 126      ;;
 127    -c=*|--cc=*)
 128      cc="${i#*=}"
 129      ;;
 130    --no_cn)
 131      flags="$flags --no_cn"
 132      ;;
 133    --debug)
 134      flags="$flags -d"
 135      debug_flag=1
 136      ;;
 137    -d=*|--delay=*)
 138      netem="${i#*=}"
 139      ;;
 140    -D)
 141      details=1
 142      ;;
 143    -E)
 144      ecn=1
 145      ;;
 146    --edt)
 147      flags="$flags --edt"
 148      qdisc="fq"
 149     ;;
 150    -f=*|--flows=*)
 151      flows="${i#*=}"
 152      ;;
 153    -i=*|--id=*)
 154      id="${i#*=}"
 155      ;;
 156    -l)
 157      flags="$flags -l"
 158      ;;
 159    -N)
 160      use_netperf=1
 161      ;;
 162    -p=*|--port=*)
 163      port="${i#*=}"
 164      ;;
 165    -P)
 166      multi_iperf=1
 167      ;;
 168    -q=*)
 169      qdisc="${i#*=}"
 170      ;;
 171    -r=*|--rate=*)
 172      rate="${i#*=}"
 173      ;;
 174    -R)
 175      rr=1
 176      ;;
 177    -s=*|--server=*)
 178      server="${i#*=}"
 179      ;;
 180    -S|--stats)
 181      flags="$flags -s"
 182      do_stats=1
 183      ;;
 184    -t=*|--time=*)
 185      dur="${i#*=}"
 186      ;;
 187    -w)
 188      flags="$flags -w"
 189      ;;
 190    cubic)
 191      cc=cubic
 192      ;;
 193    dctcp)
 194      cc=dctcp
 195      ;;
 196    *)
 197      echo "Unknown arg:$i"
 198      Usage
 199      ;;
 200    esac
 201  done
 202}
 203
 204processArgs
 205config_bpffs
 206
 207if [ $debug_flag -eq 1 ] ; then
 208  rm -f hbm_out.log
 209fi
 210
 211hbm_pid=$(start_hbm)
 212usleep 100000
 213
 214host=`hostname`
 215cg_base_dir=/sys/fs/cgroup/unified
 216cg_dir="$cg_base_dir/cgroup-test-work-dir/hbm$id"
 217
 218echo $$ >> $cg_dir/cgroup.procs
 219
 220ulimit -l unlimited
 221
 222rm -f ss.out
 223rm -f hbm.[0-9]*.$dir_name
 224if [ $ecn -ne 0 ] ; then
 225  sysctl -w -q -n net.ipv4.tcp_ecn=1
 226fi
 227
 228if [ $use_netperf -eq 0 ] ; then
 229  cur_cc=`sysctl -n net.ipv4.tcp_congestion_control`
 230  if [ "$cc" != "x" ] ; then
 231    sysctl -w -q -n net.ipv4.tcp_congestion_control=$cc
 232  fi
 233fi
 234
 235if [ "$netem" -ne "0" ] ; then
 236  if [ "$qdisc" != "" ] ; then
 237    echo "WARNING: Ignoring -q options because -d option used"
 238  fi
 239  tc qdisc del dev lo root > /dev/null 2>&1
 240  tc qdisc add dev lo root netem delay $netem\ms > /dev/null 2>&1
 241elif [ "$qdisc" != "" ] ; then
 242  tc qdisc del dev eth0 root > /dev/null 2>&1
 243  tc qdisc add dev eth0 root $qdisc > /dev/null 2>&1
 244fi
 245
 246n=0
 247m=$[$dur * 5]
 248hn="::1"
 249if [ $use_netperf -ne 0 ] ; then
 250  if [ "$server" != "" ] ; then
 251    hn=$server
 252  fi
 253fi
 254
 255( ping6 -i 0.2 -c $m $hn > ping.out 2>&1 ) &
 256
 257if [ $use_netperf -ne 0 ] ; then
 258  begNetserverPid=`ps ax | grep netserver | grep --invert-match "grep" | \
 259                   awk '{ print $1 }'`
 260  if [ "$begNetserverPid" == "" ] ; then
 261    if [ "$server" == "" ] ; then
 262      ( ./netserver > /dev/null 2>&1) &
 263      usleep 100000
 264    fi
 265  fi
 266  flow_cnt=1
 267  if [ "$server" == "" ] ; then
 268    np_server=$host
 269  else
 270    np_server=$server
 271  fi
 272  if [ "$cc" == "x" ] ; then
 273    np_cc=""
 274  else
 275    np_cc="-K $cc,$cc"
 276  fi
 277  replySize=1
 278  while [ $flow_cnt -le $flows ] ; do
 279    if [ $rr -ne 0 ] ; then
 280      reqSize=1M
 281      if [ $flow_cnt -eq 1 ] ; then
 282        reqSize=10K
 283      fi
 284      if [ "$dir" == "-i" ] ; then
 285        replySize=$reqSize
 286        reqSize=1
 287      fi
 288      ( ./netperf -H $np_server -l $dur -f m -j -t TCP_RR  -- -r $reqSize,$replySize $np_cc -k P50_lATENCY,P90_LATENCY,LOCAL_TRANSPORT_RETRANS,REMOTE_TRANSPORT_RETRANS,LOCAL_SEND_THROUGHPUT,LOCAL_RECV_THROUGHPUT,REQUEST_SIZE,RESPONSE_SIZE > netperf.$id.$flow_cnt ) &
 289    else
 290      if [ "$dir" == "-i" ] ; then
 291        ( ./netperf -H $np_server -l $dur -f m -j -t TCP_RR -- -r 1,10M $np_cc -k P50_LATENCY,P90_LATENCY,LOCAL_TRANSPORT_RETRANS,LOCAL_SEND_THROUGHPUT,REMOTE_TRANSPORT_RETRANS,REMOTE_SEND_THROUGHPUT,REQUEST_SIZE,RESPONSE_SIZE > netperf.$id.$flow_cnt ) &
 292      else
 293        ( ./netperf -H $np_server -l $dur -f m -j -t TCP_STREAM -- $np_cc -k P50_lATENCY,P90_LATENCY,LOCAL_TRANSPORT_RETRANS,LOCAL_SEND_THROUGHPUT,REQUEST_SIZE,RESPONSE_SIZE > netperf.$id.$flow_cnt ) &
 294      fi
 295    fi
 296    flow_cnt=$[flow_cnt+1]
 297  done
 298
 299# sleep for duration of test (plus some buffer)
 300  n=$[dur+2]
 301  sleep $n
 302
 303# force graceful termination of netperf
 304  pids=`pgrep netperf`
 305  for p in $pids ; do
 306    kill -SIGALRM $p
 307  done
 308
 309  flow_cnt=1
 310  rate=0
 311  if [ $details -ne 0 ] ; then
 312    echo ""
 313    echo "Details for HBM in cgroup $id"
 314    if [ $do_stats -eq 1 ] ; then
 315      if [ -e hbm.$id.$dir_name ] ; then
 316        cat hbm.$id.$dir_name
 317      fi
 318    fi
 319  fi
 320  while [ $flow_cnt -le $flows ] ; do
 321    if [ "$dir" == "-i" ] ; then
 322      r=`cat netperf.$id.$flow_cnt | grep -o "REMOTE_SEND_THROUGHPUT=[0-9]*" | grep -o "[0-9]*"`
 323    else
 324      r=`cat netperf.$id.$flow_cnt | grep -o "LOCAL_SEND_THROUGHPUT=[0-9]*" | grep -o "[0-9]*"`
 325    fi
 326    echo "rate for flow $flow_cnt: $r"
 327    rate=$[rate+r]
 328    if [ $details -ne 0 ] ; then
 329      echo "-----"
 330      echo "Details for cgroup $id, flow $flow_cnt"
 331      cat netperf.$id.$flow_cnt
 332    fi
 333    flow_cnt=$[flow_cnt+1]
 334  done
 335  if [ $details -ne 0 ] ; then
 336    echo ""
 337    delay=`grep "avg" ping.out | grep -o "= [0-9.]*/[0-9.]*" | grep -o "[0-9.]*$"`
 338    echo "PING AVG DELAY:$delay"
 339    echo "AGGREGATE_GOODPUT:$rate"
 340  else
 341    echo $rate
 342  fi
 343elif [ $multi_iperf -eq 0 ] ; then
 344  (iperf3 -s -p $port -1 > /dev/null 2>&1) &
 345  usleep 100000
 346  iperf3 -c $host -p $port -i 0 -P $flows -f m -t $dur > iperf.$id
 347  rates=`grep receiver iperf.$id | grep -o "[0-9.]* Mbits" | grep -o "^[0-9]*"`
 348  rate=`echo $rates | grep -o "[0-9]*$"`
 349
 350  if [ $details -ne 0 ] ; then
 351    echo ""
 352    echo "Details for HBM in cgroup $id"
 353    if [ $do_stats -eq 1 ] ; then
 354      if [ -e hbm.$id.$dir_name ] ; then
 355        cat hbm.$id.$dir_name
 356      fi
 357    fi
 358    delay=`grep "avg" ping.out | grep -o "= [0-9.]*/[0-9.]*" | grep -o "[0-9.]*$"`
 359    echo "PING AVG DELAY:$delay"
 360    echo "AGGREGATE_GOODPUT:$rate"
 361  else
 362    echo $rate
 363  fi
 364else
 365  flow_cnt=1
 366  while [ $flow_cnt -le $flows ] ; do
 367    (iperf3 -s -p $port -1 > /dev/null 2>&1) &
 368    ( iperf3 -c $host -p $port -i 0 -P 1 -f m -t $dur | grep receiver | grep -o "[0-9.]* Mbits" | grep -o "^[0-9]*" | grep -o "[0-9]*$" > iperf3.$id.$flow_cnt ) &
 369    port=$[port+1]
 370    flow_cnt=$[flow_cnt+1]
 371  done
 372  n=$[dur+1]
 373  sleep $n
 374  flow_cnt=1
 375  rate=0
 376  if [ $details -ne 0 ] ; then
 377    echo ""
 378    echo "Details for HBM in cgroup $id"
 379    if [ $do_stats -eq 1 ] ; then
 380      if [ -e hbm.$id.$dir_name ] ; then
 381        cat hbm.$id.$dir_name
 382      fi
 383    fi
 384  fi
 385
 386  while [ $flow_cnt -le $flows ] ; do
 387    r=`cat iperf3.$id.$flow_cnt`
 388#    echo "rate for flow $flow_cnt: $r"
 389  if [ $details -ne 0 ] ; then
 390    echo "Rate for cgroup $id, flow $flow_cnt LOCAL_SEND_THROUGHPUT=$r"
 391  fi
 392    rate=$[rate+r]
 393    flow_cnt=$[flow_cnt+1]
 394  done
 395  if [ $details -ne 0 ] ; then
 396    delay=`grep "avg" ping.out | grep -o "= [0-9.]*/[0-9.]*" | grep -o "[0-9.]*$"`
 397    echo "PING AVG DELAY:$delay"
 398    echo "AGGREGATE_GOODPUT:$rate"
 399  else
 400    echo $rate
 401  fi
 402fi
 403
 404if [ $use_netperf -eq 0 ] ; then
 405  sysctl -w -q -n net.ipv4.tcp_congestion_control=$cur_cc
 406fi
 407if [ $ecn -ne 0 ] ; then
 408  sysctl -w -q -n net.ipv4.tcp_ecn=0
 409fi
 410if [ "$netem" -ne "0" ] ; then
 411  tc qdisc del dev lo root > /dev/null 2>&1
 412fi
 413if [ "$qdisc" != "" ] ; then
 414  tc qdisc del dev eth0 root > /dev/null 2>&1
 415fi
 416sleep 2
 417
 418hbmPid=`ps ax | grep "hbm " | grep --invert-match "grep" | awk '{ print $1 }'`
 419if [ "$hbmPid" == "$hbm_pid" ] ; then
 420  kill $hbm_pid
 421fi
 422
 423sleep 1
 424
 425# Detach any pinned BPF programs that may have lingered
 426rm -rf $BPFFS/hbm*
 427
 428if [ $use_netperf -ne 0 ] ; then
 429  if [ "$server" == "" ] ; then
 430    if [ "$begNetserverPid" == "" ] ; then
 431      netserverPid=`ps ax | grep netserver | grep --invert-match "grep" | awk '{ print $1 }'`
 432      if [ "$netserverPid" != "" ] ; then
 433        kill $netserverPid
 434      fi
 435    fi
 436  fi
 437fi
 438exit
 439