Screen out ill-formed UTF-8 representing surrogate code point
This commit is contained in:
committed by
Anders Kaseorg
parent
875e17e966
commit
812b24b83a
@@ -126,6 +126,15 @@ std::list<Parser::Action *> Parser::UTF8Parser::input( char c )
|
||||
pwc = (wchar_t) 0xFFFD;
|
||||
}
|
||||
|
||||
if ( (pwc >= 0xD800) && (pwc <= 0xDFFF) ) { /* surrogate code point */
|
||||
/*
|
||||
OS X unfortunately allows these sequences without EILSEQ, but
|
||||
they are ill-formed UTF-8 and we shouldn't repeat them to the
|
||||
user's terminal.
|
||||
*/
|
||||
pwc = (wchar_t) 0xFFFD;
|
||||
}
|
||||
|
||||
std::list<Action *> vec = parser.input( pwc );
|
||||
ret.insert( ret.end(), vec.begin(), vec.end() );
|
||||
|
||||
|
||||
Reference in New Issue
Block a user