From 307b241f0da2aed3bb80ebf4c54e3fc48efb219d Mon Sep 17 00:00:00 2001
From: John Hood <cgull@glup.org>
Date: Sun, 20 Sep 2015 02:25:39 -0400
Subject: [PATCH] End to end test suite for Mosh sessions.

For original messy development history, see
https://github.com/cgull/mosh/tree/localhost
or
https://github.com/cgull/mosh/commit/e7feed48a4a271e2977f714f58acfa04cdaa5cca
---
 .travis.yml                            |   4 +-
 Makefile.am                            |   2 +-
 src/tests/.gitignore                   |   3 +
 src/tests/Makefile.am                  |  21 ++-
 src/tests/README.md                    | 150 +++++++++++++++++
 src/tests/e2e-failure.test             |  33 ++++
 src/tests/e2e-success.test             |  33 ++++
 src/tests/e2e-test                     | 220 +++++++++++++++++++++++++
 src/tests/e2e-test-server              |  39 +++++
 src/tests/emulation-back-tab.test      |  50 ++++++
 src/tests/hold-stdin                   |  21 +++
 src/tests/print-exitstatus             |  21 +++
 src/tests/unicode-later-combining.test |  61 +++++++
 13 files changed, 655 insertions(+), 3 deletions(-)
 create mode 100644 src/tests/README.md
 create mode 100755 src/tests/e2e-failure.test
 create mode 100755 src/tests/e2e-success.test
 create mode 100755 src/tests/e2e-test
 create mode 100755 src/tests/e2e-test-server
 create mode 100755 src/tests/emulation-back-tab.test
 create mode 100755 src/tests/hold-stdin
 create mode 100755 src/tests/print-exitstatus
 create mode 100755 src/tests/unicode-later-combining.test

diff --git a/.travis.yml b/.travis.yml
index c153184..5424790 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -15,6 +15,8 @@ addons:
     - protobuf-compiler
     - libprotobuf-dev
     - libutempter-dev
+    - tmux			# test suite
+    - perl			# test suite
 
 before_install:
   - if test "$TRAVIS_OS_NAME" = osx; then brew update; fi
@@ -25,7 +27,7 @@ before_install:
 script:
   - ./autogen.sh
   - ./configure --enable-compile-warnings=error --enable-examples
-  - make distcheck
+  - AM_TESTS_REDIRECT='9>&2' make distcheck 9>&2
 
 notifications:
   irc: "chat.freenode.net#mosh"
diff --git a/Makefile.am b/Makefile.am
index 1922ceb..ac0bb8f 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -1,5 +1,5 @@
 ACLOCAL_AMFLAGS = -I m4
-SUBDIRS = src scripts man conf
+SUBDIRS = scripts src man conf
 EXTRA_DIST = autogen.sh ocb-license.html README.md COPYING.iOS
 BUILT_SOURCES = version.h
 CLANG_SCAN_BUILD = scan-build
diff --git a/src/tests/.gitignore b/src/tests/.gitignore
index 0d56ed6..0235e91 100644
--- a/src/tests/.gitignore
+++ b/src/tests/.gitignore
@@ -1,2 +1,5 @@
 /ocb-aes
 /encrypt-decrypt
+/*.d/
+*.log
+*.trs
diff --git a/src/tests/Makefile.am b/src/tests/Makefile.am
index b2bbfc8..012ea4b 100644
--- a/src/tests/Makefile.am
+++ b/src/tests/Makefile.am
@@ -1,8 +1,22 @@
+EXTRA_DIST = \
+	hold-stdin print-exitstatus \
+	e2e-test e2e-test-server \
+	$(displaytests)
+
 AM_CXXFLAGS = $(WARNING_CXXFLAGS) $(PICKY_CXXFLAGS) $(HARDEN_CFLAGS) $(MISC_CXXFLAGS)
 AM_LDFLAGS  = $(HARDEN_LDFLAGS)
 
+displaytests = \
+	e2e-success.test \
+	e2e-failure.test \
+	emulation-back-tab.test \
+	unicode-later-combining.test
+
 check_PROGRAMS = ocb-aes encrypt-decrypt
-TESTS = ocb-aes encrypt-decrypt
+TESTS = ocb-aes encrypt-decrypt $(displaytests)
+XFAIL_TESTS = \
+	e2e-failure.test \
+	emulation-back-tab.test
 
 ocb_aes_SOURCES = ocb-aes.cc test_utils.cc test_utils.h
 ocb_aes_CPPFLAGS = -I$(srcdir)/../crypto -I$(srcdir)/../util
@@ -11,3 +25,8 @@ ocb_aes_LDADD = ../crypto/libmoshcrypto.a ../util/libmoshutil.a $(OPENSSL_LIBS)
 encrypt_decrypt_SOURCES = encrypt-decrypt.cc test_utils.cc test_utils.h
 encrypt_decrypt_CPPFLAGS = -I$(srcdir)/../crypto -I$(srcdir)/../util
 encrypt_decrypt_LDADD = ../crypto/libmoshcrypto.a ../util/libmoshutil.a $(OPENSSL_LIBS)
+
+clean-local: clean-local-check
+.PHONY: clean-local-check
+clean-local-check:
+	-for i in $(displaytests); do rm -rf $$i.d/; done
diff --git a/src/tests/README.md b/src/tests/README.md
new file mode 100644
index 0000000..1ef4057
--- /dev/null
+++ b/src/tests/README.md
@@ -0,0 +1,150 @@
+# Mosh Tests
+
+## ocb-aes
+
+This is a unit test for the OCB-AES encryption used in mosh, including
+Rogaway's OCB implementation and some of mosh's surrounding C++
+support code.
+
+## encrypt-decrypt
+
+This is a simple functional test of mosh's implementation of encrypted messages.
+
+## e2e-test
+
+This is a test framework for end-to-end testing of mosh.  It uses tmux
+to invoke mosh in a nicely stable interactive pty, and also uses
+tmux's `capture-pane` command to get a dump of the terminal screen
+that mosh-client has drawn, neatly getting around Mosh's somewhat
+non-deterministic display redraw.
+
+There are three essential parts to the framework:
+
+* your test script
+* `e2e-test`
+* `e2e-test-server`
+
+The test script has two roles: when invoked without argments, it is a
+wrapper script for the overall test, and when invoked with an
+argument, it performs a testing-related action.  In wrapper mode, it
+invokes e2e-test with action arguments, which are used to invoke the
+test script for actions at appropriate points by e2e-test.  These
+provide a suite of behaviors that you can use to test various mosh
+behaviors.
+
+`e2e-test` is the heart of the framework.  It runs actions as
+requested, logs their output, compares and/or validates their results,
+and generates the final result (exitstatus, mostly) for the Automake
+testing framework used by the mosh build.  For test execution, it runs
+an action in an interactive session, in a tmux `screen`, to exercise
+some behavior.  The action can optionally be run in a mosh session, or
+directly in tmux (doing both and comparing the result is a useful way
+to test complex terminal emulation behaviors).  The action generally
+writes some output to the terminal that can later be verified by
+another action.  Optionally, a client action can generate tty input or
+otherwise exercise mosh in some fashion (this capability is untested,
+but it's a useful place to use `expect` or other interactive
+simulations).  The action is run by `e2e-test-server`, which is a
+relatively small wrapper script to capture errors, and capture the
+tmux screen.
+
+There are several different categories of actions:
+
+### Execution
+
+`baseline` is an action that almost all tests will use.  This invokes
+the test script inside mosh, where it can generate some output, and
+then captures the client-side tmux display with `tmux capture-pane`.
+
+`direct` is the same as the above, except that mosh is not used--
+`e2e-wrapper-script` and the test script are invoked directly inside
+tmux.
+
+`variant` can be used to provide a slightly different action from
+`baseline`.
+
+### Verification
+
+`verify` compares captures from the `baseline` and `direct` test
+actions, which are expected to be identical.
+
+`same` compares captures from the `baseline` and `variant` test
+actions, which are expected to be identical.
+
+`different` compares captures from the `baseline` and `variant` test
+actions, which are expected to be different.
+
+`post` is a catchall script hook which allows custom verification
+acions to be coded.
+
+### Client wrapper
+
+`client` simply injects a wrapper command into the (long) test command
+between tmux and mosh.  It's expected to interact with its wrapped
+command line as `expect` might do.  This is not actually tested yet.
+
+
+## Logging and error reporting
+
+Each execution action is run, and recorded in
+`<testname>.test.d/<action>.*`. `<action>.exitstatus` is the
+exitstatus from the server wrapper.  `<action>.tmux.log` is the output
+of tmux for the entire test run for that action; `<action>.capture` is
+a capture of the Mosh client screen after the test action is complete,
+generated with `tmux capture-pane`.
+
+In accordance with GNU Automake's test framework, the test should
+return these exit status values:
+
+* 0 test success
+* 1 test failure
+* 77 test skipped (tmux or ssh is unavailable if needed)
+* 99 hard error
+
+These values are also used internally between the various scripts;
+errors are conveyed out to the build test framework.
+
+
+## Sample tests
+
+A few tests have been implemented so far to test the framework itself,
+and to provide examples for further development.
+
+`e2e-success` is a simple test that executes `baseline` and `direct`
+with the same stimulus (simply clearing the screen), and expects to
+see identical results.
+
+`e2e-failure` is similar `e2e-success`, but expects to see different
+results from `baseline` and `variant`.  Since it uses the same
+stimulus for the two execution action, it fails.  A more realistic
+test might be to have `variant` execute some escape sequence that is
+absent from `baseline`; this would verify that the escape sequence
+actually does something.
+
+`emulation-back-tab` tests an escape sequence that mosh does not
+support.  It expects the test to produce the output that would be
+generated if the escape sequence were implemented.  If it gets output
+as expected when the escape sequence is *not* implemented, the test
+fails.  But if the output does not match one of these two cases, the
+test returns an error.  This is an example of error handling within
+the test framework.
+
+`unicode-later-combining` demonstrates mosh's handling of a Unicode
+edge case, a combining character drawn without a printing character in
+the same cell.  It verifies the output in the `post` action; since
+there are a couple of different Unicode renderings that are reasonable
+in this case, a regex that covers both is used.  It also implements an
+unused `variant` action that draws blank-space+combiner in a correct
+fashion.
+
+## Notes
+
+The shell command `printf` is generally used in place of
+`echo` in this framework, because of its more precisely-specified and
+portable behavior.  But beware, even `printf` varies between systems--
+GNU printf, for example, implements `\e`, which is a non-POSIX
+extension unavailable in BSD implementations
+
+It's fairly simple to test each of these scripts independently, but
+the entire chain is a bit prone to behaving oddly in hard-to-debug
+ways.  `set -x` is your friend here.
diff --git a/src/tests/e2e-failure.test b/src/tests/e2e-failure.test
new file mode 100755
index 0000000..952d1c5
--- /dev/null
+++ b/src/tests/e2e-failure.test
@@ -0,0 +1,33 @@
+#!/bin/sh
+
+fail()
+{
+    printf "$@" 2>&1
+    exit 99
+}
+
+
+
+PATH=$PATH:.:$srcdir
+# Top-level wrapper.
+if [ $# -eq 0 ]; then
+    e2e-test $0 baseline variant different
+    exit
+fi
+
+# OK, we have arguments, we're one of the test hooks.
+if [ $# -ne 1 ]; then
+    fail "bad arguments %s\n" "$@"
+fi
+
+baseline()
+{
+    printf "\033[H\033[J"
+}
+
+case $1 in
+    baseline|variant)
+	baseline;;
+    *)
+	fail "unknown test argument %s\n" $1;;
+esac
diff --git a/src/tests/e2e-success.test b/src/tests/e2e-success.test
new file mode 100755
index 0000000..9fda323
--- /dev/null
+++ b/src/tests/e2e-success.test
@@ -0,0 +1,33 @@
+#!/bin/sh
+
+fail()
+{
+    printf "$@" 2>&1
+    exit 99
+}
+
+
+
+PATH=$PATH:.:$srcdir
+# Top-level wrapper.
+if [ $# -eq 0 ]; then
+    e2e-test $0 baseline direct variant verify same
+    exit
+fi
+
+# OK, we have arguments, we're one of the test hooks.
+if [ $# -ne 1 ]; then
+    fail "bad arguments %s\n" "$@"
+fi
+
+baseline()
+{
+    printf "\033[H\033[J"
+}
+
+case $1 in
+    baseline|direct|variant)
+	baseline;;
+    *)
+	fail "unknown test argument %s\n" $1;;
+esac
diff --git a/src/tests/e2e-test b/src/tests/e2e-test
new file mode 100755
index 0000000..038a12a
--- /dev/null
+++ b/src/tests/e2e-test
@@ -0,0 +1,220 @@
+#!/bin/sh
+
+#
+# Validate that mosh produces expected output, using screen captures
+# in tmux.
+#
+
+log()
+{
+    printf "$@"
+}
+
+error()
+{
+    printf "$@" >&2
+}
+
+dump_logs()
+{
+    local dir
+    local testname
+    dir=$1
+    shift
+    testname=$(basename $dir .d)
+    for logfile in $dir/*.tmux.log; do
+	printf "travis_fold:start:%s-%s\n" $testname $(basename $logfile)
+	cat $logfile
+	printf "travis_fold:end:%s-%s\n" $testname $(basename $logfile)
+    done
+}
+
+test_success()
+{
+    exit 0
+}
+test_failure()
+{
+    error "$@"
+    exit 1
+}
+test_skipped()
+{
+    error "$@"
+    exit 77
+}
+test_error()
+{
+    error "$@"
+    exit 99
+}
+test_exitstatus()
+{
+    status=$1
+    shift
+    error "$@"
+    exit $status
+}
+
+
+# Tmux check.
+tmux_check()
+{
+    local version version_major version_minor
+    version=$(tmux -V)
+    if [ $? != 0 ]; then
+	error "tmux unavailable\n"
+	return 1
+    fi
+    version=${version##tmux }
+    version_major=${version%%.*}
+    version_minor=${version##*.}
+    # need version 1.8 for capture-pane
+    if [ $version_major -lt 1 ] ||
+	   [ $version_major -eq 1 -a $version_minor -lt 8 ]; then
+	error "tmux version %s too old\n" "$version"
+	return 1
+    fi
+    return 0
+}
+
+ssh_localhost_check()
+{
+    ssh localhost :
+    if [ $? -ne 0 ]; then
+	error "ssh to localhost failed\n"
+	return 1
+    fi
+    return 0
+}
+
+# main
+
+# Set up environment
+if [ -z "$srcdir" ]; then
+    : ${srcdir:=$PWD}
+else
+    srcdir="$(cd $srcdir && pwd)"
+    if [ $? -ne 0 ]; then
+	error "can't cd to srcdir: %s\n" "$srcdir"
+	exit 99
+    fi
+fi
+
+if ! tmux_check; then
+    test_skipped "tmux unavailable\n"
+fi
+
+if [ $# -lt 2 ]; then
+    test_error "not enough args\n"
+fi
+
+# Get arguments (only one so far)
+test_name=$1
+shift
+test_args=$@
+# XXX could use AM testsubdir macro instead
+test_dir=$(basename ${test_name}).d
+test_script="${test_name}"
+rm -rf "${test_dir}"
+mkdir "${test_dir}"
+
+
+if [ "x$AM_TESTS_REDIRECT" != "x" ]; then
+    RENDER_REDIRECT=">&9"
+    exec 2>&9
+fi
+trap 'rv=$?; if test $rv -ne 0; then dump_logs '"$test_dir $test_args $RENDER_REDIRECT"'; fi; exit $rv' EXIT
+
+# Set up tests to run.
+server_tests=
+compare_tests=
+for i in $test_args; do
+    case $i in
+	baseline|direct|variant)
+	    server_tests="$server_tests $i";;
+	verify|same|different)
+	    compare_tests="$compare_tests $i";;
+	client)
+	    client=1;;
+	post)
+	    post=1;;
+	*)
+	    error "unknown test type argument %s", $i
+	    exit 99
+	    ;;
+    esac
+done
+
+# Run test(s).
+client_wrapper=
+if [ -n "$client" ]; then
+    client_wrapper="${test_script} client"
+fi
+
+for run in $server_tests; do
+    log "Running server test %s.\n" "$run"
+	# XXX need to quote special chars in server pathname here somehow
+	sut="../../scripts/mosh --client=../frontend/mosh-client --server=$PWD/../frontend/mosh-server --local --bind-server=127.0.0.1 127.0.0.1"
+	testarg=$run
+	if [ "$run" = "direct" ]; then
+	    sut=""
+	fi
+	# Actually execute code under test
+	# XXX tmux 1.8 requires shell command as a single arg; once we move to 2.0, undo these quotes
+	# XXX this ignores $TMPDIR, because it results in an overlong pathname on OS X
+	if ! ${srcdir}/hold-stdin tmux -S "/tmp/.tmux-mosh-test-$$" -C new-session "${srcdir}/print-exitstatus ${client_wrapper} ${sut} \"${srcdir}/e2e-test-server\" \"${PWD}/${test_dir}/${run}\" \"${PWD}/${test_script} ${testarg}\"" > "${test_dir}/${run}.tmux.log"; then
+	    test_error "tmux failure on test %s\n" "$run"
+	fi
+	# Check for mosh failures
+	if ! grep -q "%%% exitstatus: 0 %%%" "${test_dir}/${run}.tmux.log"; then
+	    test_error "mosh-client had non-zero exitstatus\n"
+	fi
+
+	# Check for server harness failures
+	if [ ! -s "${test_dir}/${run}.capture" ] \
+	      || [ ! -s "${test_dir}/${run}.exitstatus" ]; then
+	    test_error "server harness failure on test %s\n" "$run"
+	fi
+	read server_rv < "${test_dir}/${run}.exitstatus"
+	if [ "$server_rv" -ne 0 ]; then
+	    test_error "server harness exited with status %s\n" "$server_rv"
+	fi
+
+done
+
+for compare in $compare_tests; do
+    log "Running server comparison %s.\n" "$compare"
+    # Compare captures
+    if [ "$compare" = verify ]; then
+	test1="direct"
+	test2="baseline"
+    else
+	test1="baseline"
+	test2="variant"
+    fi
+    if diff -q "${test_dir}/${test1}.capture" "${test_dir}/${test2}.capture"; then
+	differ=n
+    else
+	differ=y
+    fi
+    if [ "$compare" = different ]; then
+	desired=y
+	badresult=same
+    else
+	desired=n
+	badresult=different
+    fi
+    if [ $differ != $desired ]; then
+	test_failure "Output is %s between tests %s and %s\n" "$badresult" "$test1" "$test2"
+    fi
+done
+
+# Run a post script (usually a custom validation of results)
+if [ -n "$post" ]; then
+    "${test_script}" post
+    status=$?
+    if [ $status -ne 0 ]; then
+	test_exitstatus $status "Post test failed with exitstatus %d\n" $status
+    fi
+fi
diff --git a/src/tests/e2e-test-server b/src/tests/e2e-test-server
new file mode 100755
index 0000000..c24a8db
--- /dev/null
+++ b/src/tests/e2e-test-server
@@ -0,0 +1,39 @@
+#!/bin/sh
+
+#
+# Harness script for Mosh tests, server side.  Runs test script and
+# then captures screen with `tmux capture-pane`.  Captures exitstatus
+# of both and returns appropriate errors.
+#
+if [ $# -lt 2 ]; then
+    printf "not enough args\n" >&2
+    exit 99
+fi
+testname=$1
+shift
+rm -f $testname.capture $testname.exitstatus
+trap 'rv=$?; echo $rv > $testname.exitstatus; exit $rv' EXIT
+# check for tmux
+if [ -z "$TMUX_PANE" ]; then
+    printf "not running under tmux\n" >&2
+    exit 99
+fi
+# run harnessed command
+eval "$@"
+testret=$?
+# Wait a bit for tmux screen to become up to date.
+sleep 1
+# capture screen
+if ! tmux capture-pane -et $TMUX_PANE; then
+    printf "tmux capture-pane failed, erroring test\n" >&2
+    exit 99
+fi
+if ! tmux save-buffer $testname.capture; then
+    printf "tmux save-buffer failed, erroring test\n" >&2
+    exit 99
+fi
+# return useful exitstatus from harnessed command
+if [ $testret -ne 0 ]; then
+    exit 1
+fi
+exit 0
diff --git a/src/tests/emulation-back-tab.test b/src/tests/emulation-back-tab.test
new file mode 100755
index 0000000..f407415
--- /dev/null
+++ b/src/tests/emulation-back-tab.test
@@ -0,0 +1,50 @@
+#!/bin/sh
+
+#
+# This test is for issue 539 on github.
+#
+
+fail()
+{
+    printf "$@" 2>&1
+    exit 99
+}
+
+
+
+PATH=$PATH:.:$srcdir
+# Top-level wrapper.
+if [ $# -eq 0 ]; then
+    e2e-test $0 baseline post
+    exit
+fi
+
+# OK, we have arguments, we're one of the test hooks.
+if [ $# -ne 1 ]; then
+    fail "bad arguments %s\n" "$@"
+fi
+
+baseline()
+{
+    printf 'hello, wurld\033[Zo\n'
+}
+
+post()
+{
+    if grep -q 'hello, world' $(basename $0).d/baseline.capture; then
+	exit 0
+    fi
+    if grep -q 'hello, wurldo' $(basename $0).d/baseline.capture; then
+	exit 1
+    fi
+    exit 99
+}
+
+case $1 in
+    baseline)
+	baseline;;
+    post)
+	post;;
+    *)
+	fail "unknown test argument %s\n" $1;;
+esac
diff --git a/src/tests/hold-stdin b/src/tests/hold-stdin
new file mode 100755
index 0000000..e6132d2
--- /dev/null
+++ b/src/tests/hold-stdin
@@ -0,0 +1,21 @@
+#!/usr/bin/env perl
+
+#
+# The sole function of this script is to provide a stdin that doesn't
+# read data or return EOF to its children.  If there's a clean,
+# portable, not-Perl way to do this, then that should replace this.
+#
+use warnings;
+use strict;
+
+my $pid = open(my $fh, "|-", @ARGV) or die;
+waitpid($pid, 0) == $pid or die;
+my $rc;
+if ($? == 0) {
+    $rc = 0;
+} elsif ($? >= 256) {
+    $rc = $? >> 8;
+} else {
+    $rc = ($? & 127) | 128;
+}
+exit $rc;
diff --git a/src/tests/print-exitstatus b/src/tests/print-exitstatus
new file mode 100755
index 0000000..bd39ac1
--- /dev/null
+++ b/src/tests/print-exitstatus
@@ -0,0 +1,21 @@
+#!/usr/bin/env perl
+
+#
+# Print exitstatus on stderr.
+#
+use warnings;
+use strict;
+
+my $rc = system(@ARGV);
+if ($? == -1) {
+    die "system failed: %!\n";
+}
+if ($? == 0) {
+    $rc = 0;
+} elsif ($? >= 256) {
+    $rc = $? >> 8;
+} else {
+    $rc = ($? & 127) | 128;
+}
+print STDERR "%%% exitstatus: ${rc} %%%\n";
+exit $rc;
diff --git a/src/tests/unicode-later-combining.test b/src/tests/unicode-later-combining.test
new file mode 100755
index 0000000..980b8ed
--- /dev/null
+++ b/src/tests/unicode-later-combining.test
@@ -0,0 +1,61 @@
+#!/bin/sh
+
+#
+# This test is for the first Unicode issue described on the Mosh web
+# page, a combining character drawn on a cell after returning the
+# cursor to that cell.
+#
+# XXX This test is fragile because it depends on tmux's unicode rendering.
+# The baseline and variant tests produce different (but valid) outputs
+# that are visually identical.  The variant test is not run or validated.
+#
+
+fail()
+{
+    printf "$@" 2>&1
+    exit 99
+}
+
+
+
+PATH=$PATH:.:$srcdir
+# Top-level wrapper.
+if [ $# -eq 0 ]; then
+    e2e-test $0 baseline post
+    exit
+fi
+
+# OK, we have arguments, we're one of the test hooks.
+if [ $# -ne 1 ]; then
+    fail "bad arguments %s\n" "$@"
+fi
+
+baseline()
+{
+    printf 'abc\n\314\202\ndef\n'
+}
+
+variant()
+{
+    printf 'abc\n \314\202\ndef\n'
+}
+
+post()
+{
+    export LANG=C
+    if grep -q "$(printf '^\302\240\314\202$')" $(basename $0).d/baseline.capture; then
+	exit 0
+    fi
+    exit 1
+}
+
+case $1 in
+    baseline)
+	baseline;;
+    variant)
+	variant;;
+    post)
+	post;;
+    *)
+	fail "unknown test argument %s\n" $1;;
+esac