Screen out ill-formed UTF-8 representing surrogate code point
This commit is contained in:
committed by
Anders Kaseorg
parent
875e17e966
commit
812b24b83a
@@ -126,6 +126,15 @@ std::list<Parser::Action *> Parser::UTF8Parser::input( char c )
|
|||||||
pwc = (wchar_t) 0xFFFD;
|
pwc = (wchar_t) 0xFFFD;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ( (pwc >= 0xD800) && (pwc <= 0xDFFF) ) { /* surrogate code point */
|
||||||
|
/*
|
||||||
|
OS X unfortunately allows these sequences without EILSEQ, but
|
||||||
|
they are ill-formed UTF-8 and we shouldn't repeat them to the
|
||||||
|
user's terminal.
|
||||||
|
*/
|
||||||
|
pwc = (wchar_t) 0xFFFD;
|
||||||
|
}
|
||||||
|
|
||||||
std::list<Action *> vec = parser.input( pwc );
|
std::list<Action *> vec = parser.input( pwc );
|
||||||
ret.insert( ret.end(), vec.begin(), vec.end() );
|
ret.insert( ret.end(), vec.begin(), vec.end() );
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user