From 307b241f0da2aed3bb80ebf4c54e3fc48efb219d Mon Sep 17 00:00:00 2001 From: John Hood Date: Sun, 20 Sep 2015 02:25:39 -0400 Subject: [PATCH] End to end test suite for Mosh sessions. For original messy development history, see https://github.com/cgull/mosh/tree/localhost or https://github.com/cgull/mosh/commit/e7feed48a4a271e2977f714f58acfa04cdaa5cca --- .travis.yml | 4 +- Makefile.am | 2 +- src/tests/.gitignore | 3 + src/tests/Makefile.am | 21 ++- src/tests/README.md | 150 +++++++++++++++++ src/tests/e2e-failure.test | 33 ++++ src/tests/e2e-success.test | 33 ++++ src/tests/e2e-test | 220 +++++++++++++++++++++++++ src/tests/e2e-test-server | 39 +++++ src/tests/emulation-back-tab.test | 50 ++++++ src/tests/hold-stdin | 21 +++ src/tests/print-exitstatus | 21 +++ src/tests/unicode-later-combining.test | 61 +++++++ 13 files changed, 655 insertions(+), 3 deletions(-) create mode 100644 src/tests/README.md create mode 100755 src/tests/e2e-failure.test create mode 100755 src/tests/e2e-success.test create mode 100755 src/tests/e2e-test create mode 100755 src/tests/e2e-test-server create mode 100755 src/tests/emulation-back-tab.test create mode 100755 src/tests/hold-stdin create mode 100755 src/tests/print-exitstatus create mode 100755 src/tests/unicode-later-combining.test diff --git a/.travis.yml b/.travis.yml index c153184..5424790 100644 --- a/.travis.yml +++ b/.travis.yml @@ -15,6 +15,8 @@ addons: - protobuf-compiler - libprotobuf-dev - libutempter-dev + - tmux # test suite + - perl # test suite before_install: - if test "$TRAVIS_OS_NAME" = osx; then brew update; fi @@ -25,7 +27,7 @@ before_install: script: - ./autogen.sh - ./configure --enable-compile-warnings=error --enable-examples - - make distcheck + - AM_TESTS_REDIRECT='9>&2' make distcheck 9>&2 notifications: irc: "chat.freenode.net#mosh" diff --git a/Makefile.am b/Makefile.am index 1922ceb..ac0bb8f 100644 --- a/Makefile.am +++ b/Makefile.am @@ -1,5 +1,5 @@ ACLOCAL_AMFLAGS = -I m4 -SUBDIRS = src scripts man conf +SUBDIRS = scripts src man conf EXTRA_DIST = autogen.sh ocb-license.html README.md COPYING.iOS BUILT_SOURCES = version.h CLANG_SCAN_BUILD = scan-build diff --git a/src/tests/.gitignore b/src/tests/.gitignore index 0d56ed6..0235e91 100644 --- a/src/tests/.gitignore +++ b/src/tests/.gitignore @@ -1,2 +1,5 @@ /ocb-aes /encrypt-decrypt +/*.d/ +*.log +*.trs diff --git a/src/tests/Makefile.am b/src/tests/Makefile.am index b2bbfc8..012ea4b 100644 --- a/src/tests/Makefile.am +++ b/src/tests/Makefile.am @@ -1,8 +1,22 @@ +EXTRA_DIST = \ + hold-stdin print-exitstatus \ + e2e-test e2e-test-server \ + $(displaytests) + AM_CXXFLAGS = $(WARNING_CXXFLAGS) $(PICKY_CXXFLAGS) $(HARDEN_CFLAGS) $(MISC_CXXFLAGS) AM_LDFLAGS = $(HARDEN_LDFLAGS) +displaytests = \ + e2e-success.test \ + e2e-failure.test \ + emulation-back-tab.test \ + unicode-later-combining.test + check_PROGRAMS = ocb-aes encrypt-decrypt -TESTS = ocb-aes encrypt-decrypt +TESTS = ocb-aes encrypt-decrypt $(displaytests) +XFAIL_TESTS = \ + e2e-failure.test \ + emulation-back-tab.test ocb_aes_SOURCES = ocb-aes.cc test_utils.cc test_utils.h ocb_aes_CPPFLAGS = -I$(srcdir)/../crypto -I$(srcdir)/../util @@ -11,3 +25,8 @@ ocb_aes_LDADD = ../crypto/libmoshcrypto.a ../util/libmoshutil.a $(OPENSSL_LIBS) encrypt_decrypt_SOURCES = encrypt-decrypt.cc test_utils.cc test_utils.h encrypt_decrypt_CPPFLAGS = -I$(srcdir)/../crypto -I$(srcdir)/../util encrypt_decrypt_LDADD = ../crypto/libmoshcrypto.a ../util/libmoshutil.a $(OPENSSL_LIBS) + +clean-local: clean-local-check +.PHONY: clean-local-check +clean-local-check: + -for i in $(displaytests); do rm -rf $$i.d/; done diff --git a/src/tests/README.md b/src/tests/README.md new file mode 100644 index 0000000..1ef4057 --- /dev/null +++ b/src/tests/README.md @@ -0,0 +1,150 @@ +# Mosh Tests + +## ocb-aes + +This is a unit test for the OCB-AES encryption used in mosh, including +Rogaway's OCB implementation and some of mosh's surrounding C++ +support code. + +## encrypt-decrypt + +This is a simple functional test of mosh's implementation of encrypted messages. + +## e2e-test + +This is a test framework for end-to-end testing of mosh. It uses tmux +to invoke mosh in a nicely stable interactive pty, and also uses +tmux's `capture-pane` command to get a dump of the terminal screen +that mosh-client has drawn, neatly getting around Mosh's somewhat +non-deterministic display redraw. + +There are three essential parts to the framework: + +* your test script +* `e2e-test` +* `e2e-test-server` + +The test script has two roles: when invoked without argments, it is a +wrapper script for the overall test, and when invoked with an +argument, it performs a testing-related action. In wrapper mode, it +invokes e2e-test with action arguments, which are used to invoke the +test script for actions at appropriate points by e2e-test. These +provide a suite of behaviors that you can use to test various mosh +behaviors. + +`e2e-test` is the heart of the framework. It runs actions as +requested, logs their output, compares and/or validates their results, +and generates the final result (exitstatus, mostly) for the Automake +testing framework used by the mosh build. For test execution, it runs +an action in an interactive session, in a tmux `screen`, to exercise +some behavior. The action can optionally be run in a mosh session, or +directly in tmux (doing both and comparing the result is a useful way +to test complex terminal emulation behaviors). The action generally +writes some output to the terminal that can later be verified by +another action. Optionally, a client action can generate tty input or +otherwise exercise mosh in some fashion (this capability is untested, +but it's a useful place to use `expect` or other interactive +simulations). The action is run by `e2e-test-server`, which is a +relatively small wrapper script to capture errors, and capture the +tmux screen. + +There are several different categories of actions: + +### Execution + +`baseline` is an action that almost all tests will use. This invokes +the test script inside mosh, where it can generate some output, and +then captures the client-side tmux display with `tmux capture-pane`. + +`direct` is the same as the above, except that mosh is not used-- +`e2e-wrapper-script` and the test script are invoked directly inside +tmux. + +`variant` can be used to provide a slightly different action from +`baseline`. + +### Verification + +`verify` compares captures from the `baseline` and `direct` test +actions, which are expected to be identical. + +`same` compares captures from the `baseline` and `variant` test +actions, which are expected to be identical. + +`different` compares captures from the `baseline` and `variant` test +actions, which are expected to be different. + +`post` is a catchall script hook which allows custom verification +acions to be coded. + +### Client wrapper + +`client` simply injects a wrapper command into the (long) test command +between tmux and mosh. It's expected to interact with its wrapped +command line as `expect` might do. This is not actually tested yet. + + +## Logging and error reporting + +Each execution action is run, and recorded in +`.test.d/.*`. `.exitstatus` is the +exitstatus from the server wrapper. `.tmux.log` is the output +of tmux for the entire test run for that action; `.capture` is +a capture of the Mosh client screen after the test action is complete, +generated with `tmux capture-pane`. + +In accordance with GNU Automake's test framework, the test should +return these exit status values: + +* 0 test success +* 1 test failure +* 77 test skipped (tmux or ssh is unavailable if needed) +* 99 hard error + +These values are also used internally between the various scripts; +errors are conveyed out to the build test framework. + + +## Sample tests + +A few tests have been implemented so far to test the framework itself, +and to provide examples for further development. + +`e2e-success` is a simple test that executes `baseline` and `direct` +with the same stimulus (simply clearing the screen), and expects to +see identical results. + +`e2e-failure` is similar `e2e-success`, but expects to see different +results from `baseline` and `variant`. Since it uses the same +stimulus for the two execution action, it fails. A more realistic +test might be to have `variant` execute some escape sequence that is +absent from `baseline`; this would verify that the escape sequence +actually does something. + +`emulation-back-tab` tests an escape sequence that mosh does not +support. It expects the test to produce the output that would be +generated if the escape sequence were implemented. If it gets output +as expected when the escape sequence is *not* implemented, the test +fails. But if the output does not match one of these two cases, the +test returns an error. This is an example of error handling within +the test framework. + +`unicode-later-combining` demonstrates mosh's handling of a Unicode +edge case, a combining character drawn without a printing character in +the same cell. It verifies the output in the `post` action; since +there are a couple of different Unicode renderings that are reasonable +in this case, a regex that covers both is used. It also implements an +unused `variant` action that draws blank-space+combiner in a correct +fashion. + +## Notes + +The shell command `printf` is generally used in place of +`echo` in this framework, because of its more precisely-specified and +portable behavior. But beware, even `printf` varies between systems-- +GNU printf, for example, implements `\e`, which is a non-POSIX +extension unavailable in BSD implementations + +It's fairly simple to test each of these scripts independently, but +the entire chain is a bit prone to behaving oddly in hard-to-debug +ways. `set -x` is your friend here. diff --git a/src/tests/e2e-failure.test b/src/tests/e2e-failure.test new file mode 100755 index 0000000..952d1c5 --- /dev/null +++ b/src/tests/e2e-failure.test @@ -0,0 +1,33 @@ +#!/bin/sh + +fail() +{ + printf "$@" 2>&1 + exit 99 +} + + + +PATH=$PATH:.:$srcdir +# Top-level wrapper. +if [ $# -eq 0 ]; then + e2e-test $0 baseline variant different + exit +fi + +# OK, we have arguments, we're one of the test hooks. +if [ $# -ne 1 ]; then + fail "bad arguments %s\n" "$@" +fi + +baseline() +{ + printf "\033[H\033[J" +} + +case $1 in + baseline|variant) + baseline;; + *) + fail "unknown test argument %s\n" $1;; +esac diff --git a/src/tests/e2e-success.test b/src/tests/e2e-success.test new file mode 100755 index 0000000..9fda323 --- /dev/null +++ b/src/tests/e2e-success.test @@ -0,0 +1,33 @@ +#!/bin/sh + +fail() +{ + printf "$@" 2>&1 + exit 99 +} + + + +PATH=$PATH:.:$srcdir +# Top-level wrapper. +if [ $# -eq 0 ]; then + e2e-test $0 baseline direct variant verify same + exit +fi + +# OK, we have arguments, we're one of the test hooks. +if [ $# -ne 1 ]; then + fail "bad arguments %s\n" "$@" +fi + +baseline() +{ + printf "\033[H\033[J" +} + +case $1 in + baseline|direct|variant) + baseline;; + *) + fail "unknown test argument %s\n" $1;; +esac diff --git a/src/tests/e2e-test b/src/tests/e2e-test new file mode 100755 index 0000000..038a12a --- /dev/null +++ b/src/tests/e2e-test @@ -0,0 +1,220 @@ +#!/bin/sh + +# +# Validate that mosh produces expected output, using screen captures +# in tmux. +# + +log() +{ + printf "$@" +} + +error() +{ + printf "$@" >&2 +} + +dump_logs() +{ + local dir + local testname + dir=$1 + shift + testname=$(basename $dir .d) + for logfile in $dir/*.tmux.log; do + printf "travis_fold:start:%s-%s\n" $testname $(basename $logfile) + cat $logfile + printf "travis_fold:end:%s-%s\n" $testname $(basename $logfile) + done +} + +test_success() +{ + exit 0 +} +test_failure() +{ + error "$@" + exit 1 +} +test_skipped() +{ + error "$@" + exit 77 +} +test_error() +{ + error "$@" + exit 99 +} +test_exitstatus() +{ + status=$1 + shift + error "$@" + exit $status +} + + +# Tmux check. +tmux_check() +{ + local version version_major version_minor + version=$(tmux -V) + if [ $? != 0 ]; then + error "tmux unavailable\n" + return 1 + fi + version=${version##tmux } + version_major=${version%%.*} + version_minor=${version##*.} + # need version 1.8 for capture-pane + if [ $version_major -lt 1 ] || + [ $version_major -eq 1 -a $version_minor -lt 8 ]; then + error "tmux version %s too old\n" "$version" + return 1 + fi + return 0 +} + +ssh_localhost_check() +{ + ssh localhost : + if [ $? -ne 0 ]; then + error "ssh to localhost failed\n" + return 1 + fi + return 0 +} + +# main + +# Set up environment +if [ -z "$srcdir" ]; then + : ${srcdir:=$PWD} +else + srcdir="$(cd $srcdir && pwd)" + if [ $? -ne 0 ]; then + error "can't cd to srcdir: %s\n" "$srcdir" + exit 99 + fi +fi + +if ! tmux_check; then + test_skipped "tmux unavailable\n" +fi + +if [ $# -lt 2 ]; then + test_error "not enough args\n" +fi + +# Get arguments (only one so far) +test_name=$1 +shift +test_args=$@ +# XXX could use AM testsubdir macro instead +test_dir=$(basename ${test_name}).d +test_script="${test_name}" +rm -rf "${test_dir}" +mkdir "${test_dir}" + + +if [ "x$AM_TESTS_REDIRECT" != "x" ]; then + RENDER_REDIRECT=">&9" + exec 2>&9 +fi +trap 'rv=$?; if test $rv -ne 0; then dump_logs '"$test_dir $test_args $RENDER_REDIRECT"'; fi; exit $rv' EXIT + +# Set up tests to run. +server_tests= +compare_tests= +for i in $test_args; do + case $i in + baseline|direct|variant) + server_tests="$server_tests $i";; + verify|same|different) + compare_tests="$compare_tests $i";; + client) + client=1;; + post) + post=1;; + *) + error "unknown test type argument %s", $i + exit 99 + ;; + esac +done + +# Run test(s). +client_wrapper= +if [ -n "$client" ]; then + client_wrapper="${test_script} client" +fi + +for run in $server_tests; do + log "Running server test %s.\n" "$run" + # XXX need to quote special chars in server pathname here somehow + sut="../../scripts/mosh --client=../frontend/mosh-client --server=$PWD/../frontend/mosh-server --local --bind-server=127.0.0.1 127.0.0.1" + testarg=$run + if [ "$run" = "direct" ]; then + sut="" + fi + # Actually execute code under test + # XXX tmux 1.8 requires shell command as a single arg; once we move to 2.0, undo these quotes + # XXX this ignores $TMPDIR, because it results in an overlong pathname on OS X + if ! ${srcdir}/hold-stdin tmux -S "/tmp/.tmux-mosh-test-$$" -C new-session "${srcdir}/print-exitstatus ${client_wrapper} ${sut} \"${srcdir}/e2e-test-server\" \"${PWD}/${test_dir}/${run}\" \"${PWD}/${test_script} ${testarg}\"" > "${test_dir}/${run}.tmux.log"; then + test_error "tmux failure on test %s\n" "$run" + fi + # Check for mosh failures + if ! grep -q "%%% exitstatus: 0 %%%" "${test_dir}/${run}.tmux.log"; then + test_error "mosh-client had non-zero exitstatus\n" + fi + + # Check for server harness failures + if [ ! -s "${test_dir}/${run}.capture" ] \ + || [ ! -s "${test_dir}/${run}.exitstatus" ]; then + test_error "server harness failure on test %s\n" "$run" + fi + read server_rv < "${test_dir}/${run}.exitstatus" + if [ "$server_rv" -ne 0 ]; then + test_error "server harness exited with status %s\n" "$server_rv" + fi + +done + +for compare in $compare_tests; do + log "Running server comparison %s.\n" "$compare" + # Compare captures + if [ "$compare" = verify ]; then + test1="direct" + test2="baseline" + else + test1="baseline" + test2="variant" + fi + if diff -q "${test_dir}/${test1}.capture" "${test_dir}/${test2}.capture"; then + differ=n + else + differ=y + fi + if [ "$compare" = different ]; then + desired=y + badresult=same + else + desired=n + badresult=different + fi + if [ $differ != $desired ]; then + test_failure "Output is %s between tests %s and %s\n" "$badresult" "$test1" "$test2" + fi +done + +# Run a post script (usually a custom validation of results) +if [ -n "$post" ]; then + "${test_script}" post + status=$? + if [ $status -ne 0 ]; then + test_exitstatus $status "Post test failed with exitstatus %d\n" $status + fi +fi diff --git a/src/tests/e2e-test-server b/src/tests/e2e-test-server new file mode 100755 index 0000000..c24a8db --- /dev/null +++ b/src/tests/e2e-test-server @@ -0,0 +1,39 @@ +#!/bin/sh + +# +# Harness script for Mosh tests, server side. Runs test script and +# then captures screen with `tmux capture-pane`. Captures exitstatus +# of both and returns appropriate errors. +# +if [ $# -lt 2 ]; then + printf "not enough args\n" >&2 + exit 99 +fi +testname=$1 +shift +rm -f $testname.capture $testname.exitstatus +trap 'rv=$?; echo $rv > $testname.exitstatus; exit $rv' EXIT +# check for tmux +if [ -z "$TMUX_PANE" ]; then + printf "not running under tmux\n" >&2 + exit 99 +fi +# run harnessed command +eval "$@" +testret=$? +# Wait a bit for tmux screen to become up to date. +sleep 1 +# capture screen +if ! tmux capture-pane -et $TMUX_PANE; then + printf "tmux capture-pane failed, erroring test\n" >&2 + exit 99 +fi +if ! tmux save-buffer $testname.capture; then + printf "tmux save-buffer failed, erroring test\n" >&2 + exit 99 +fi +# return useful exitstatus from harnessed command +if [ $testret -ne 0 ]; then + exit 1 +fi +exit 0 diff --git a/src/tests/emulation-back-tab.test b/src/tests/emulation-back-tab.test new file mode 100755 index 0000000..f407415 --- /dev/null +++ b/src/tests/emulation-back-tab.test @@ -0,0 +1,50 @@ +#!/bin/sh + +# +# This test is for issue 539 on github. +# + +fail() +{ + printf "$@" 2>&1 + exit 99 +} + + + +PATH=$PATH:.:$srcdir +# Top-level wrapper. +if [ $# -eq 0 ]; then + e2e-test $0 baseline post + exit +fi + +# OK, we have arguments, we're one of the test hooks. +if [ $# -ne 1 ]; then + fail "bad arguments %s\n" "$@" +fi + +baseline() +{ + printf 'hello, wurld\033[Zo\n' +} + +post() +{ + if grep -q 'hello, world' $(basename $0).d/baseline.capture; then + exit 0 + fi + if grep -q 'hello, wurldo' $(basename $0).d/baseline.capture; then + exit 1 + fi + exit 99 +} + +case $1 in + baseline) + baseline;; + post) + post;; + *) + fail "unknown test argument %s\n" $1;; +esac diff --git a/src/tests/hold-stdin b/src/tests/hold-stdin new file mode 100755 index 0000000..e6132d2 --- /dev/null +++ b/src/tests/hold-stdin @@ -0,0 +1,21 @@ +#!/usr/bin/env perl + +# +# The sole function of this script is to provide a stdin that doesn't +# read data or return EOF to its children. If there's a clean, +# portable, not-Perl way to do this, then that should replace this. +# +use warnings; +use strict; + +my $pid = open(my $fh, "|-", @ARGV) or die; +waitpid($pid, 0) == $pid or die; +my $rc; +if ($? == 0) { + $rc = 0; +} elsif ($? >= 256) { + $rc = $? >> 8; +} else { + $rc = ($? & 127) | 128; +} +exit $rc; diff --git a/src/tests/print-exitstatus b/src/tests/print-exitstatus new file mode 100755 index 0000000..bd39ac1 --- /dev/null +++ b/src/tests/print-exitstatus @@ -0,0 +1,21 @@ +#!/usr/bin/env perl + +# +# Print exitstatus on stderr. +# +use warnings; +use strict; + +my $rc = system(@ARGV); +if ($? == -1) { + die "system failed: %!\n"; +} +if ($? == 0) { + $rc = 0; +} elsif ($? >= 256) { + $rc = $? >> 8; +} else { + $rc = ($? & 127) | 128; +} +print STDERR "%%% exitstatus: ${rc} %%%\n"; +exit $rc; diff --git a/src/tests/unicode-later-combining.test b/src/tests/unicode-later-combining.test new file mode 100755 index 0000000..980b8ed --- /dev/null +++ b/src/tests/unicode-later-combining.test @@ -0,0 +1,61 @@ +#!/bin/sh + +# +# This test is for the first Unicode issue described on the Mosh web +# page, a combining character drawn on a cell after returning the +# cursor to that cell. +# +# XXX This test is fragile because it depends on tmux's unicode rendering. +# The baseline and variant tests produce different (but valid) outputs +# that are visually identical. The variant test is not run or validated. +# + +fail() +{ + printf "$@" 2>&1 + exit 99 +} + + + +PATH=$PATH:.:$srcdir +# Top-level wrapper. +if [ $# -eq 0 ]; then + e2e-test $0 baseline post + exit +fi + +# OK, we have arguments, we're one of the test hooks. +if [ $# -ne 1 ]; then + fail "bad arguments %s\n" "$@" +fi + +baseline() +{ + printf 'abc\n\314\202\ndef\n' +} + +variant() +{ + printf 'abc\n \314\202\ndef\n' +} + +post() +{ + export LANG=C + if grep -q "$(printf '^\302\240\314\202$')" $(basename $0).d/baseline.capture; then + exit 0 + fi + exit 1 +} + +case $1 in + baseline) + baseline;; + variant) + variant;; + post) + post;; + *) + fail "unknown test argument %s\n" $1;; +esac