From 28e0a704adf9a8de510cf5b7ce135a6695c147f4 Mon Sep 17 00:00:00 2001 From: Nuru Date: Sat, 11 Apr 2020 15:32:15 -0700 Subject: [PATCH 1/3] Skip setting governors if existing governors cannot be read --- runfibtest | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/runfibtest b/runfibtest index 1a31709..cc27baa 100755 --- a/runfibtest +++ b/runfibtest @@ -35,8 +35,8 @@ checkandcompile () { # Save old governors, and assume all governors match savegovernors () { - OLDGOV=$(cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor) - OLDENERGY=$(cat /sys/devices/system/cpu/cpu0/cpufreq/energy_performance_preference) + OLDGOV=$(cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor 2>/dev/null) + OLDENERGY=$(cat /sys/devices/system/cpu/cpu0/cpufreq/energy_performance_preference 2>/dev/null) } # Set governors @@ -44,10 +44,17 @@ setgovernors () { SCALINGGOV=$1 ENERGYGOV=$2 + if [[ -n $SCALINGGOV ]] && [[ -z $OLDGOV ]]; then + echo No existing scaling_governor found, not attempting to set it to "${SCALINGGOV}" >&2 + fi + if [[ -n $ENERGYGOV ]] && [[ -z $OLDENERGY ]]; then + echo No existing energy_performance_preference found, not attempting to set it to "${ENERGYGOV}" >&2 + fi + for (( i=0 ; i < NRCPUS ; i++ )) do - sudo sh -c "echo '$SCALINGGOV' > /sys/devices/system/cpu/cpu${i}/cpufreq/scaling_governor" - sudo sh -c "echo '$ENERGYGOV' > /sys/devices/system/cpu/cpu${i}/cpufreq/energy_performance_preference" + [[ -n $SCALINGGOV ]] && [[ -n $OLDGOV ]] && sudo sh -c "echo '$SCALINGGOV' > /sys/devices/system/cpu/cpu${i}/cpufreq/scaling_governor" + [[ -n $ENERGYGOV ]] && [[ -n $OLDENERGY ]] && sudo sh -c "echo '$ENERGYGOV' > /sys/devices/system/cpu/cpu${i}/cpufreq/energy_performance_preference" done } @@ -73,7 +80,7 @@ setgovernors performance performance # Create CGROUP if [ ! -d ${CGROUPDIR}/${CGROUP} ] ; then - sudo mkdir -p ${CGROUPDIR}/${CGROUP} + sudo mkdir -p ${CGROUPDIR}/${CGROUP} || exit $? fi # Set the CPU quota From 923f898f35177b04ab3c9b892288b478be3790ed Mon Sep 17 00:00:00 2001 From: Nuru Date: Sun, 26 Apr 2020 20:42:33 -0700 Subject: [PATCH 2/3] make more portable --- README.md | 78 +++++++++++++++++++++++++++++++++++++++++++++++++----- fibtest.c | 2 +- runfibtest | 61 +++++++++++++++++++++++++----------------- 3 files changed, 110 insertions(+), 31 deletions(-) diff --git a/README.md b/README.md index ebbdd76..b83227e 100644 --- a/README.md +++ b/README.md @@ -11,18 +11,84 @@ thread is pinned to it's corresponding cpu (thread 0 is on CPU 0, thread 2 on CPU 2 etc...). By default fibtest spawns one fast thread on CPU 0, and a number of slow -threads equal to the number of CPUS minus the number of fast threads +threads equal to the number of CPUS minus the number of fast threads. + +This test can be used to check for the CFS scheduler bug fixed by + +- [`de53fd7ae`](https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=de53fd7aedb100f03e5d2231cfce0e4993282425) + sched/fair: Fix low cpu usage with high throttling by removing expiration of cpu-local slices + +For a detailed explanation of the bug being tested and its fix, +see [blog post 1](https://engineering.indeedblog.com/blog/2019/12/unthrottled-fixing-cpu-limits-in-the-cloud/), +[blog post 2](https://engineering.indeedblog.com/blog/2019/12/cpu-throttling-regression-fix/), +and video of [Dave Chiluk's presentation at KubeCon 2019](https://youtu.be/UE7QX98-kO0). + +The important thing to understand is that this bug affects performance in proportion +to the number of cores on the machine. This test will not be effective at demonstrating +the bug on a 4 core machine. At least 32 cores are recommended. ## Running fibtest + +- You need to be running as root or be able to `sudo` without entering a password. +- You need `bash`, `git`, `make`, `gcc`, and `libc-devel` installed. +- You must be running on the host, not inside a Docker container or Kubernetes pod. +It is OK if you are running in a VM, such as a cloud server. +- You must be running a Linux family kernel. This will not work on Windows or +macOS/Darwin/Xnu + +1. Clone this git repository and `cd` into it. Check which branch you are interested in +and possibly `git checkout `. +2. Run `getconf _NPROCESSORS_ONLN` which reports the number of CPUs available. The test +may not produce accurate or conclusive results with fewer than 8 CPUs. The more CPUs the better. +3. Run `./runfibtest 1`. This will compile the fibtest binary if needed. If you run +into issues, you may need to install missing components using `apt-get install` or `yum isntall` +or something simlilar depending on your OS. + +The results of `./runfibtest 1` should be something like this: ``` -$ ./runfibtest 1; ./runfibtest +Iterations Completed(M): 1573 +Throttled for: 51 +CPU Usage (msecs) = 501 ``` -runfibtest optionally takes an arguement which is the total number of threads to spawn. With -no argument it will spawn one fast thread and the number of threads equal to the number of processors. +- "Interations Completed(M)" is relatively unimportant. It mainly has to do with the +CPU clockspeed. +- "Throttled for: 51" is expected. This test runs a single thread for 50 periods at full speed with a +quota of 0.1 CPUs (100m), so it is expected to be throttled for 50 or 51 periods depending on how things +line up. +- "CPU Usage (msecs) = 501" is a baseline number. We would expect it to be 500 because we are running +100m CPU for 5 seconds which adds up to 500ms. -It returns the number of iterations of the fibonacci sequence it was able to accomplish, as well as how -long it was throttled and the corresponding cpu usage that was used. +The above numbers establish a baseline that the test is working correctly and no extraneous issues are +inerfering with the test. + +Now we run the real test: +``` +./runfibtest +``` + +This runs a single fast thread, plus 1 slow thread on every core but one. This demonstrates the bug, in that +the slow threads each end up stealing 1ms per period from the fast thread, without using up +subtantial amounts of CPU time themselves. You should see output like this on an affected machine: +``` +Iterations Completed(M): 150 +Throttled for: 58 +CPU Usage (msecs) = 88 +``` +This test was from an affected kernel on a machine with 64 CPUs. +Note that the number of Iterations completed and the CPU Usage has dropped considerably from the +previous test, completely contrary to expectations. + +The same test, on the same machine, using a patched kernel, produced this result: +``` +Iterations Completed(M): 1209 +Throttled for: 51 +CPU Usage (msecs) = 493 +``` +Note there is a small decline in CPU usage from 501 to 493 ms, which can be attributed to the +extra kernel work keeping track of 64 threads. Still, the number of throttles periods remained 51 +and the decline in CPU usage is less than 2%. The susbstantial decline in Interations Completed +is a more complex topic, beyond the scope of this particular bug. ## Code of Conduct This project is governed by the [Contributor Covenant v 1.4.1](CODE_OF_CONDUCT.md) diff --git a/fibtest.c b/fibtest.c index 5229c7a..7e4ab9b 100644 --- a/fibtest.c +++ b/fibtest.c @@ -55,7 +55,7 @@ void usage () "It then prints out the total number of iterations it was able to complete.\n" "\n" "It divides execution into fast and slow threads. Fast threads run as fast\n" - "as possible and slow threads run 100 iterations and the sleep for 10ms.\n" + "as possible and slow threads run 100 iterations and then sleep for 10ms.\n" "\n" "Options\n" "-v, Verbose prints total number of iterations per thread\n" diff --git a/runfibtest b/runfibtest index cc27baa..ed975bd 100755 --- a/runfibtest +++ b/runfibtest @@ -1,4 +1,4 @@ -#!/bin/bash +#!/bin/bash # # Copyright (C) 2019 Indeed Inc. # @@ -16,31 +16,44 @@ # set -euo pipefail CGROUP=user.slice/fibtest -CGROUPDIR=/sys/fs/cgroup/cpu,cpuacct typeset -i BNRTHROTT ANRTHROTT DNRTHROTT AUSAGE BUSAGE DUSAGE THREADS NRCPUS THREADEDIT THREADEDTIME THREADEDTHROT SINGLEIT SINGLETIME SINGLETHROT NRCPUS=$(getconf _NPROCESSORS_ONLN) THREADS=${1:-NRCPUS} -QUOTA=$(( 10000 )) +QUOTA=$((10000)) + +if [[ -d /sys/fs/cgroup/cpu,cpuacct ]]; then + CGROUPDIR=/sys/fs/cgroup/cpu,cpuacct + CACCTDIR=${CGROUPDIR} +elif [[ -d /sys/fs/cgroup/cpu,cpuacct ]]; then + CGROUPDIR=/sys/fs/cgroup/cpuacct,cpu + CACCTDIR=${CGROUPDIR} +elif [[ -d /sys/fs/cgroup/cpu ]]; then + CGROUPDIR=/sys/fs/cgroup/cpu + CACCTDIR=/sys/fs/cgroup/cpuacct +else + echo Unable to find CGroup directory, giving up >&2 + exit 1 +fi -checkandcompile () { - if [ ! -x fibtest ] ; then +checkandcompile() { + if [ ! -x fibtest ]; then make clean make - if [ ! -x fibtest ] ; then + if [ ! -x fibtest ]; then echo "Unable to compile fibtest\n" - exit 1 + exit 1 fi fi } # Save old governors, and assume all governors match -savegovernors () { +savegovernors() { OLDGOV=$(cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor 2>/dev/null) OLDENERGY=$(cat /sys/devices/system/cpu/cpu0/cpufreq/energy_performance_preference 2>/dev/null) } # Set governors -setgovernors () { +setgovernors() { SCALINGGOV=$1 ENERGYGOV=$2 @@ -51,49 +64,49 @@ setgovernors () { echo No existing energy_performance_preference found, not attempting to set it to "${ENERGYGOV}" >&2 fi - for (( i=0 ; i < NRCPUS ; i++ )) - do + for ((i = 0; i < NRCPUS; i++)); do [[ -n $SCALINGGOV ]] && [[ -n $OLDGOV ]] && sudo sh -c "echo '$SCALINGGOV' > /sys/devices/system/cpu/cpu${i}/cpufreq/scaling_governor" [[ -n $ENERGYGOV ]] && [[ -n $OLDENERGY ]] && sudo sh -c "echo '$ENERGYGOV' > /sys/devices/system/cpu/cpu${i}/cpufreq/energy_performance_preference" done } -runtest () { +runtest() { BNRTHROTT=$(grep nr_throttled ${CGROUPDIR}/${CGROUP}/cpu.stat | awk '{print $2}') - BUSAGE=$(cat ${CGROUPDIR}/${CGROUP}/cpuacct.usage) + BUSAGE=$(cat ${CACCTDIR}/${CGROUP}/cpuacct.usage) ##### RUN TEST ##### - ITERATIONS=$(./fibtest -t "$THREADS" -s 5 | awk -F': ' '{print $2}' ) + echo + echo "Running test wtih ${THREADS} thread(s) on machine with ${NRCPUS} CPUs" + ITERATIONS=$(./fibtest -t "$THREADS" -s 5 | awk -F': ' '{print $2}') echo "Iterations Completed(M): $ITERATIONS" ANRTHROTT=$(grep nr_throttled ${CGROUPDIR}/${CGROUP}/cpu.stat | awk '{print $2}') - AUSAGE=$(cat ${CGROUPDIR}/${CGROUP}/cpuacct.usage) - DNRTHROTT=$(( ANRTHROTT - BNRTHROTT )) - DUSAGE=$(( AUSAGE - BUSAGE )) + AUSAGE=$(cat ${CACCTDIR}/${CGROUP}/cpuacct.usage) + DNRTHROTT=$((ANRTHROTT - BNRTHROTT)) + DUSAGE=$((AUSAGE - BUSAGE)) echo "Throttled for: $DNRTHROTT " - echo "CPU Usage (msecs) = $((DUSAGE / 1000000 ))" + echo "CPU Usage (msecs) = $((DUSAGE / 1000000))" } - checkandcompile savegovernors setgovernors performance performance # Create CGROUP -if [ ! -d ${CGROUPDIR}/${CGROUP} ] ; then +if [ ! -d ${CGROUPDIR}/${CGROUP} ]; then sudo mkdir -p ${CGROUPDIR}/${CGROUP} || exit $? fi # Set the CPU quota sudo sh -c "echo $QUOTA > ${CGROUPDIR}/${CGROUP}/cpu.cfs_quota_us" sudo sh -c "echo $$ >> ${CGROUPDIR}/${CGROUP}/tasks" +[[ ${CACCTDIR} != ${CGROUPDIR} ]] && sudo sh -c "echo $$ >> ${CACCTDIR}/${CGROUP}/tasks" # ****************** RUN THE TEST *************** -# Uses Number of CPUs -runtest -# +# Uses Number of CPUs +runtest +# THREADEDIT=${ITERATIONS} - # *********************************************** setgovernors "${OLDGOV}" "${OLDENERGY}" From 1f010507d1e5cc285ac57845642a59e563e3e56a Mon Sep 17 00:00:00 2001 From: Nuru Date: Mon, 27 Apr 2020 13:34:36 -0700 Subject: [PATCH 3/3] Update README.md per chiluk Co-Authored-By: chiluk --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index b83227e..3a2ada2 100644 --- a/README.md +++ b/README.md @@ -24,8 +24,8 @@ see [blog post 1](https://engineering.indeedblog.com/blog/2019/12/unthrottled-fi and video of [Dave Chiluk's presentation at KubeCon 2019](https://youtu.be/UE7QX98-kO0). The important thing to understand is that this bug affects performance in proportion -to the number of cores on the machine. This test will not be effective at demonstrating -the bug on a 4 core machine. At least 32 cores are recommended. +to the number of cores on the machine. This test will not be as definitive on a 4 core machine. +as on a higher core count machine. ## Running fibtest