From 812b24b83a07b1d012320106ced734d28c7746df Mon Sep 17 00:00:00 2001 From: Keith Winstein Date: Tue, 6 Mar 2012 22:13:04 -0500 Subject: [PATCH] Screen out ill-formed UTF-8 representing surrogate code point --- src/terminal/parser.cc | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/terminal/parser.cc b/src/terminal/parser.cc index b00547b..f344a74 100644 --- a/src/terminal/parser.cc +++ b/src/terminal/parser.cc @@ -126,6 +126,15 @@ std::list Parser::UTF8Parser::input( char c ) pwc = (wchar_t) 0xFFFD; } + if ( (pwc >= 0xD800) && (pwc <= 0xDFFF) ) { /* surrogate code point */ + /* + OS X unfortunately allows these sequences without EILSEQ, but + they are ill-formed UTF-8 and we shouldn't repeat them to the + user's terminal. + */ + pwc = (wchar_t) 0xFFFD; + } + std::list vec = parser.input( pwc ); ret.insert( ret.end(), vec.begin(), vec.end() );