adds ovs-dpdkctl tool
- This change introduces a new tool for configuring host interfaces for use with ovs and dpdk. - The ovs-dpdkctl tool will be executed via systemd when a system first boots to bind interfaces to dpdk compaible drivers. - The ovs-dpdkctl tool will be injected into the ovsdb container to allow external configuration of ovs bridges and ports for use with dpdk. Change-Id: Ie8f32d097f0a6816c2ddd03ade926c00837da322
This commit is contained in:
parent
4b5b14948b
commit
c29244a380
390
tools/ovs-dpdkctl.sh
Executable file
390
tools/ovs-dpdkctl.sh
Executable file
@ -0,0 +1,390 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
_XTRACE_OVS_DPDK_CTL=$(set +o | grep xtrace)
|
||||||
|
if [[ "${OVS_DPDK_CTL_DEBUG}" == "True" ]]; then
|
||||||
|
set -o xtrace
|
||||||
|
fi
|
||||||
|
|
||||||
|
FULL_PATH=$(realpath "${BASH_SOURCE[0]}")
|
||||||
|
CONFIG_FILE=${CONFIG_FILE:-"/etc/default/ovs-dpdk.conf"}
|
||||||
|
SERVICE_FILE="/etc/systemd/system/ovs-dpdkctl.service"
|
||||||
|
|
||||||
|
function get_value {
|
||||||
|
crudini --get $CONFIG_FILE $@
|
||||||
|
}
|
||||||
|
|
||||||
|
function set_value {
|
||||||
|
crudini --set $CONFIG_FILE $1 $2 "$3"
|
||||||
|
}
|
||||||
|
|
||||||
|
function del_value {
|
||||||
|
crudini --del $CONFIG_FILE $@
|
||||||
|
}
|
||||||
|
|
||||||
|
function is_set {
|
||||||
|
output=$(crudini --get $CONFIG_FILE $* &> /dev/null ) && [ -n "$(crudini --get $CONFIG_FILE $*)" ]; echo $?
|
||||||
|
}
|
||||||
|
|
||||||
|
function show_config {
|
||||||
|
cat $CONFIG_FILE
|
||||||
|
}
|
||||||
|
|
||||||
|
function get_config {
|
||||||
|
echo $CONFIG_FILE
|
||||||
|
}
|
||||||
|
|
||||||
|
function del_config {
|
||||||
|
rm -f $CONFIG_FILE
|
||||||
|
}
|
||||||
|
|
||||||
|
function generate_pciwhitelist {
|
||||||
|
local _Whitelist=''
|
||||||
|
for nic in $(list_dpdk_nics); do
|
||||||
|
address="$(get_value $nic address)"
|
||||||
|
if [ "$_Whitelist" == '' ]; then
|
||||||
|
_Whitelist="-w $address"
|
||||||
|
else
|
||||||
|
_Whitelist="$_Whitelist -w $address"
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
echo $_Whitelist
|
||||||
|
}
|
||||||
|
|
||||||
|
function gen_port_mappings {
|
||||||
|
OVS_BRIDGE_MAPPINGS=$(get_value ovs bridge_mappings)
|
||||||
|
OVS_BRIDGES=${OVS_BRIDGE_MAPPINGS//,/ }
|
||||||
|
OVS_DPDK_PORT_MAPPINGS=""
|
||||||
|
ARRAY=( $OVS_BRIDGES )
|
||||||
|
for net in "${ARRAY[@]}"; do
|
||||||
|
bridge="${net##*:}"
|
||||||
|
nic=${bridge/br-/}
|
||||||
|
if [[ -z "$OVS_DPDK_PORT_MAPPINGS" ]]; then
|
||||||
|
OVS_DPDK_PORT_MAPPINGS="$nic:$bridge"
|
||||||
|
else
|
||||||
|
OVS_DPDK_PORT_MAPPINGS="$OVS_DPDK_PORT_MAPPINGS,$nic:$bridge"
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
echo "$OVS_DPDK_PORT_MAPPINGS"
|
||||||
|
}
|
||||||
|
|
||||||
|
function gen_config {
|
||||||
|
del_config
|
||||||
|
touch $CONFIG_FILE
|
||||||
|
set_value ovs bridge_mappings ${bridge_mappings:-""}
|
||||||
|
set_value ovs port_mappings ${port_mappings:-$(gen_port_mappings)}
|
||||||
|
set_value ovs cidr_mappings ${cidr_mappings:-""}
|
||||||
|
set_value ovs ovs_coremask ${ovs_coremask:-"0x1"}
|
||||||
|
set_value ovs pmd_coremask ${pmd_coremask:-"0x2"}
|
||||||
|
set_value ovs ovs_mem_channels ${ovs_mem_channels:-4}
|
||||||
|
set_value ovs ovs_socket_mem ${ovs_socket_mem:-"512"}
|
||||||
|
set_value ovs dpdk_interface_driver ${dpdk_interface_driver:-"uio_pci_generic"}
|
||||||
|
set_value ovs hugepage_mountpoint ${hugepage_mountpoint:-"/dev/hugepages"}
|
||||||
|
|
||||||
|
ls -al /sys/class/net/* | awk '$0 ~ /pci/ {n=split($NF,a,"/"); print "\n[" a[n] "]\naddress = " a[n-2] "\ndriver ="}' >> $CONFIG_FILE
|
||||||
|
|
||||||
|
for nic in $(get_value | grep -v ovs); do
|
||||||
|
set_value $nic driver $(get_driver_by_address $(get_value $nic address))
|
||||||
|
done
|
||||||
|
for nic in $(list_dpdk_nics); do
|
||||||
|
set_value $nic driver ${dpdk_interface_driver:-"uio_pci_generic"}
|
||||||
|
done
|
||||||
|
set_value ovs pci_whitelist "'${pci_whitelist:-$(generate_pciwhitelist)}'"
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
function bind_nic {
|
||||||
|
echo $1 > /sys/bus/pci/drivers/$2/bind
|
||||||
|
echo $2 > /sys/bus/pci/devices/$1/driver_override
|
||||||
|
}
|
||||||
|
|
||||||
|
function unbind_nic {
|
||||||
|
echo $1 > /sys/bus/pci/drivers/$2/unbind
|
||||||
|
echo > /sys/bus/pci/devices/$1/driver_override
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
function list_dpdk_nics {
|
||||||
|
for nic in $(crudini --get $CONFIG_FILE ovs port_mappings | cut -d : -f 1); do
|
||||||
|
echo $nic;
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
|
function bind_nics {
|
||||||
|
for nic in $(list_dpdk_nics); do
|
||||||
|
device_address="$(get_value $nic address)"
|
||||||
|
current_driver="$(get_driver_by_address $device_address)"
|
||||||
|
target_driver="$(get_value $nic driver)"
|
||||||
|
if [ "$current_driver" != "$target_driver" ]; then
|
||||||
|
set_value $nic old_driver $current_driver
|
||||||
|
unbind_nic $device_address $current_driver
|
||||||
|
bind_nic $device_address $target_driver
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
|
function unbind_nics {
|
||||||
|
for nic in $(list_dpdk_nics); do
|
||||||
|
if [ "$(is_set $nic old_driver)" == 0 ]; then
|
||||||
|
device_address="$(get_value $nic address)"
|
||||||
|
current_driver="$(get_driver_by_address $device_address)"
|
||||||
|
target_driver="$(get_value $nic old_driver)"
|
||||||
|
if [ "$current_driver" != "$target_driver" ]; then
|
||||||
|
unbind_nic $device_address $current_driver
|
||||||
|
bind_nic $device_address $target_driver
|
||||||
|
del_value $nic old_driver
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
function get_address_by_name {
|
||||||
|
ls -al /sys/class/net/$1 | awk '$0 ~ /pci/ {n=split($NF,a,"/"); print a[n-2] }'
|
||||||
|
}
|
||||||
|
|
||||||
|
function get_driver_by_address {
|
||||||
|
ls /sys/bus/pci/devices/$1/driver -al | awk '{n=split($NF,a,"/"); print a[n]}'
|
||||||
|
}
|
||||||
|
|
||||||
|
function get_port_bridge {
|
||||||
|
for pair in $(get_value ovs port port_mappings); do
|
||||||
|
nic=`echo $pair | cut -f 1 -d ":"`
|
||||||
|
if [[ "$nic" == "$1" ]]; then
|
||||||
|
bridge=`echo $pair | cut -f 2 -d ":"`
|
||||||
|
echo $bridge
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
function init_ovs_db {
|
||||||
|
ovs-vsctl init
|
||||||
|
ovs-vsctl --no-wait set Open_vSwitch . other_config:pmd-cpu-mask="$(get_value ovs pmd_coremask)" \
|
||||||
|
other_config:dpdk-init=True other_config:dpdk-lcore-mask="$(get_value ovs ovs_coremask)" \
|
||||||
|
other_config:dpdk-mem-channels="$(get_value ovs ovs_mem_channels)" \
|
||||||
|
other_config:dpdk-socket-mem="$(get_value ovs ovs_socket_mem)" \
|
||||||
|
other_config:dpdk-hugepage-dir="$(get_value ovs hugepage_mountpoint)" \
|
||||||
|
other_config:dpdk-extra=" --proc-type primary $(get_value ovs pci_whitelist) "
|
||||||
|
}
|
||||||
|
|
||||||
|
function init_ovs_bridges {
|
||||||
|
raw_bridge_mappings=$(get_value ovs bridge_mappings)
|
||||||
|
bridge_mappings=( ${raw_bridge_mappings//,/ } )
|
||||||
|
for pair in "${bridge_mappings[@]}"; do
|
||||||
|
bridge=`echo $pair | cut -f 2 -d ":"`
|
||||||
|
sudo ovs-vsctl --no-wait -- --may-exist add-br $bridge -- set Bridge $bridge datapath_type=netdev
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
|
function init_ovs_interfaces {
|
||||||
|
local pci_port_pairs ==''
|
||||||
|
for nic in $(list_dpdk_nics); do
|
||||||
|
address="$(get_value $nic address)"
|
||||||
|
if [ "$pci_port_pairs" == '' ]; then
|
||||||
|
pci_port_pairs ="$address,$nic"
|
||||||
|
else
|
||||||
|
pci_port_pairs="$pci_port_pairs $address,$nic"
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
pci_port_pairs="$(echo $pci_port_pairs | sort)"
|
||||||
|
dpdk_port_number=0
|
||||||
|
for pair in $pci_port_pairs; do
|
||||||
|
addr="$(echo $pair | cut -f 1 -d ",")"
|
||||||
|
nic="$(echo $pair | cut -f 2 -d ",")"
|
||||||
|
bridge="$(get_port_bridge $nic)"
|
||||||
|
# ovs 2.6 and older requires dpdkX names, ovs 2.7+ requires dpdk-devargs instead.
|
||||||
|
ovs-vsctl --no-wait --may-exist add-port $bridge "dpdk${dpdk_port_number}" \
|
||||||
|
-- set Interface "dpdk${dpdk_port_number}" type=dpdk || \
|
||||||
|
ovs-vsctl --no-wait --may-exist add-port $bridge $nic \
|
||||||
|
-- set Interface $nic type=dpdk options:dpdk-devargs=$addr
|
||||||
|
|
||||||
|
dpdk_port_number=$((dpdk_port_number+1))
|
||||||
|
done
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
function init {
|
||||||
|
init_ovs_db
|
||||||
|
init_ovs_bridges
|
||||||
|
init_ovs_interfaces
|
||||||
|
}
|
||||||
|
|
||||||
|
function install_service {
|
||||||
|
cat << EOF | tee "$SERVICE_FILE"
|
||||||
|
|
||||||
|
[Unit]
|
||||||
|
Description=configuration service for ovs-dpdk nics.
|
||||||
|
Before=network-pre.target
|
||||||
|
After=syslog.target
|
||||||
|
|
||||||
|
[Service]
|
||||||
|
# Uncomment to enable debug logging.
|
||||||
|
# Environment=OVS_DPDK_CTL_DEBUG=True
|
||||||
|
Environment=CONFIG_FILE=$CONFIG_FILE
|
||||||
|
Type=oneshot
|
||||||
|
RemainAfterExit=yes
|
||||||
|
ExecStart=/bin/ovs-dpdkctl bind_nics
|
||||||
|
ExecStop=/bin/ovs-dpdkctl unbind_nics
|
||||||
|
|
||||||
|
[Install]
|
||||||
|
WantedBy=multi-user.target
|
||||||
|
|
||||||
|
EOF
|
||||||
|
systemctl daemon-reload
|
||||||
|
systemctl enable ovs-dpdkctl
|
||||||
|
}
|
||||||
|
|
||||||
|
function uninstall_service {
|
||||||
|
systemctl disable ovs-dpdkctl
|
||||||
|
rm -f "$SERVICE_FILE"
|
||||||
|
systemctl daemon-reload
|
||||||
|
}
|
||||||
|
|
||||||
|
function install {
|
||||||
|
if [ ! -e "$SERVICE_FILE" ]; then
|
||||||
|
install_service
|
||||||
|
fi
|
||||||
|
if [ ! -e /bin/ovs-dpdkctl ]; then
|
||||||
|
cp "$FULL_PATH" /bin/ovs-dpdkctl
|
||||||
|
chmod +x /bin/ovs-dpdkctl
|
||||||
|
fi
|
||||||
|
if [ ! -e "$CONFIG_FILE" ]; then
|
||||||
|
gen_config
|
||||||
|
fi
|
||||||
|
systemctl start ovs-dpdkctl
|
||||||
|
}
|
||||||
|
|
||||||
|
function uninstall {
|
||||||
|
systemctl stop ovs-dpdkctl
|
||||||
|
if [ -e "$SERVICE_FILE" ]; then
|
||||||
|
uninstall_service
|
||||||
|
fi
|
||||||
|
if [ -e /bin/ovs-dpdkctl ]; then
|
||||||
|
rm -f /bin/ovs-dpdkctl
|
||||||
|
fi
|
||||||
|
if [ -e "$CONFIG_FILE" ]; then
|
||||||
|
rm -f "$CONFIG_FILE"
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
function useage {
|
||||||
|
cat << "EOF"
|
||||||
|
ovs-dpdkctl.sh: A tool to configure ovs with dpdk.
|
||||||
|
|
||||||
|
- This tool automate the process of binding host insterfacesto a dpdk
|
||||||
|
compaible driver (uio_pci_generic | vfio-pci) at boot.
|
||||||
|
- This tool automate bootstraping ovs so that it can use the
|
||||||
|
dpdk accelerated netdev datapath.
|
||||||
|
|
||||||
|
commands:
|
||||||
|
- install:
|
||||||
|
- installs ovs-dpdkctl as a systemd service.
|
||||||
|
- installs ovs-dpdkctl binary.
|
||||||
|
- generates ovs-dpdkctl configuration file.
|
||||||
|
- starts ovs-dpdkctl service.
|
||||||
|
- uninstall:
|
||||||
|
- stops ovs-dpdkctl service.
|
||||||
|
- uninstalls ovs-dpdkctl systemd service.
|
||||||
|
- uninstalls ovs-dpdkctl binary.
|
||||||
|
- removes ovs-dpdkctl configuration file.
|
||||||
|
- bind_nics:
|
||||||
|
- iterates over all dpdk interfaces defined in ovs-dpdkctl config
|
||||||
|
and binds the interface to the target driver specifed in the config
|
||||||
|
if current driver does not equal target.
|
||||||
|
- unbind_nics:
|
||||||
|
- iterates over all dpdk interfaces defined in ovs-dpdkctl config
|
||||||
|
and restores the interface to its original non dpdk driver.
|
||||||
|
- init:
|
||||||
|
- defines dpdk specific configuration paramater in the ovsdb.
|
||||||
|
- creates bridges as spcified by ovs bridge_mappings in
|
||||||
|
ovs-dpdkctl config.
|
||||||
|
- creates dpdk ports as defined by ovs port_mappings in
|
||||||
|
ovs-dpdkctl config.
|
||||||
|
- useage:
|
||||||
|
- prints this message
|
||||||
|
|
||||||
|
options:
|
||||||
|
- debuging:
|
||||||
|
- To enable debuging export OVS_DPDK_CTL_DEBUG=True
|
||||||
|
- install:
|
||||||
|
- The varibles discribed below can be defined to customise
|
||||||
|
installation of ovs-dpdkctl.
|
||||||
|
<variable>=<value> ovs-dpdkctl.sh install
|
||||||
|
- bridge_mappings:
|
||||||
|
- A comma seperated list of physnet to bridge mappings.
|
||||||
|
- Example: bridge_mappings=physnet1:br-ex1,physnet2:br-ex2
|
||||||
|
- Default: ""
|
||||||
|
- port_mappings:
|
||||||
|
- A comma seperated list of port to bridge mappings.
|
||||||
|
- Example: port_mappings=eth1:br-ex1,eth2:br-ex2
|
||||||
|
- Default: generated form bridge_mappings assuming bridge names
|
||||||
|
are constructed by appending br- to port name.
|
||||||
|
- cidr_mappings:
|
||||||
|
- A comma seperated list of bridge to cidr mappings.
|
||||||
|
- Example: cidr_mappings=br-ex1:192.168.1.1/24,br-ex2:192.168.2.1/24
|
||||||
|
- Default: ""
|
||||||
|
- ovs_coremask:
|
||||||
|
- A hex encoded string container a bitmask of what cpu core
|
||||||
|
to pin the non dataplane treads of the ovs-vswitchd to.
|
||||||
|
- Node that only the first core of the bit mask is currently
|
||||||
|
used by ovs.
|
||||||
|
- Example: ovs_coremask=0x1
|
||||||
|
- Default: "0x1"
|
||||||
|
- pmd_coremask:
|
||||||
|
- A hex encoded string container a bitmask of what cpu cores
|
||||||
|
to pin the dataplane pool mode driver treads of the ovs-vswitchd to.
|
||||||
|
- Each bit set in the bitmask will result in the creating of a pmd.
|
||||||
|
- For best performance it is recomended to allocate at least 1 pmd per
|
||||||
|
numa node. On systems with HyperThreading enabled it is recomended to also
|
||||||
|
allocate the HT sibling core in the pmd_coremask.cores allocated
|
||||||
|
to ovs with dpdk via the pmd_coremask should be removed from the
|
||||||
|
nova vcpu_pin_set and isolated from the kernel scheduler.
|
||||||
|
- Note it is not recommended to isolate cores in the nova vcpu_pin_set
|
||||||
|
unless the host will be dedicated for vms that request cpu pinning.
|
||||||
|
- Example: pmd_coremask=0x4
|
||||||
|
- Default: "0x4"
|
||||||
|
- ovs_mem_channels:
|
||||||
|
- The number of memory channels supported by the plathforms.
|
||||||
|
- Example: ovs_mem_channels=2
|
||||||
|
- Default: "4"
|
||||||
|
- ovs_socket_mem:
|
||||||
|
- A comma seperated list of hugepage memory, specifed in MBs per numa node,
|
||||||
|
allocated to the ovs-vswitchd to use for the dpdk dataplane.
|
||||||
|
- For best performance memory should be allocated evenly across all numa node
|
||||||
|
that will run a pmd.
|
||||||
|
- Example: ovs_socket_mem=512,512
|
||||||
|
- Default: "512"
|
||||||
|
- hugepage_mountpoint:
|
||||||
|
- The hugepage mountpoint to use when allocating memory for dpdk.
|
||||||
|
- Example: hugepage_mountpoint=/dev/my_custom_mountpoint
|
||||||
|
- Default: "/dev/hugepages"
|
||||||
|
- dpdk_interface_driver:
|
||||||
|
- The dpdk compatible userspace driver to use when binding host interfaces.
|
||||||
|
- Example: dpdk_interface_driver=vfio_pci
|
||||||
|
- Default: "uio_pci_generic"
|
||||||
|
- pci_whitelist:
|
||||||
|
- A repeated space seperated list of pci whitelist flags
|
||||||
|
for allowed ovs-dpdk ports.
|
||||||
|
- The pci_whitelist allows multiple dpdk primary process to
|
||||||
|
utilise different pci devices without resulting in a conflict
|
||||||
|
of ownership.
|
||||||
|
- Example: pci_whitelist="-w <pci address 1> -w <pci address 2>"
|
||||||
|
- Default: auto generated form port_mappings.
|
||||||
|
EOF
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
if [ $# -ge 1 ]; then
|
||||||
|
func=$1
|
||||||
|
shift
|
||||||
|
else
|
||||||
|
func="useage"
|
||||||
|
fi
|
||||||
|
|
||||||
|
#replace with switch later
|
||||||
|
eval "$func $@"
|
||||||
|
|
||||||
|
|
||||||
|
${_XTRACE_OVS_DPDK_CTL}
|
Loading…
Reference in New Issue
Block a user