Screen out ill-formed UTF-8 representing surrogate code point

This commit is contained in:
Keith Winstein
2012-03-06 22:13:04 -05:00
committed by Anders Kaseorg
parent 875e17e966
commit 812b24b83a
+9
View File
@@ -126,6 +126,15 @@ std::list<Parser::Action *> Parser::UTF8Parser::input( char c )
pwc = (wchar_t) 0xFFFD;
}
if ( (pwc >= 0xD800) && (pwc <= 0xDFFF) ) { /* surrogate code point */
/*
OS X unfortunately allows these sequences without EILSEQ, but
they are ill-formed UTF-8 and we shouldn't repeat them to the
user's terminal.
*/
pwc = (wchar_t) 0xFFFD;
}
std::list<Action *> vec = parser.input( pwc );
ret.insert( ret.end(), vec.begin(), vec.end() );