Avoid wcwidth(), wcrtomb() and mbrtowc() on ASCII/ISO8859-1 characters.
ASCII <-> UTF has trivial mappings. Avoid wcrtomb() and mbrtowc(). ISO-8859-1 is all narrow characters, and cheap to test for. It might be possible to cheaply test other popular UTF blocks and/or planes as well. These two changes get 2-3x faster input processing on Linux and FreeBSD. Performance improvement in actual usage is more modest but still significant.
This commit is contained in:
@@ -80,10 +80,15 @@ void Parser::UTF8Parser::input( char c, Actions &ret )
|
|||||||
{
|
{
|
||||||
assert( buf_len < BUF_SIZE );
|
assert( buf_len < BUF_SIZE );
|
||||||
|
|
||||||
|
/* 1-byte UTF-8 character, aka ASCII? Cheat. */
|
||||||
|
if ( buf_len == 0 && static_cast<unsigned char>(c) <= 0x7f ) {
|
||||||
|
parser.input( static_cast<wchar_t>(c), ret );
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
buf[ buf_len++ ] = c;
|
buf[ buf_len++ ] = c;
|
||||||
|
|
||||||
/* This function will only work in a UTF-8 locale. */
|
/* This function will only work in a UTF-8 locale. */
|
||||||
|
|
||||||
wchar_t pwc;
|
wchar_t pwc;
|
||||||
mbstate_t ps = mbstate_t();
|
mbstate_t ps = mbstate_t();
|
||||||
|
|
||||||
|
|||||||
@@ -61,7 +61,13 @@ void Emulator::print( const Parser::Print *act )
|
|||||||
{
|
{
|
||||||
assert( act->char_present );
|
assert( act->char_present );
|
||||||
|
|
||||||
int chwidth = act->ch == L'\0' ? -1 : wcwidth( act->ch );
|
const wchar_t ch = act->ch;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Check for printing ISO 8859-1 first, it's a cheap way to detect
|
||||||
|
* some common narrow characters.
|
||||||
|
*/
|
||||||
|
const int chwidth = ch == L'\0' ? -1 : ( Cell::isprint_iso8859_1( ch ) ? 1 : wcwidth( ch ));
|
||||||
|
|
||||||
Cell *this_cell = fb.get_mutable_cell();
|
Cell *this_cell = fb.get_mutable_cell();
|
||||||
|
|
||||||
@@ -100,7 +106,7 @@ void Emulator::print( const Parser::Print *act )
|
|||||||
}
|
}
|
||||||
|
|
||||||
fb.reset_cell( this_cell );
|
fb.reset_cell( this_cell );
|
||||||
this_cell->append( act->ch );
|
this_cell->append( ch );
|
||||||
this_cell->width = chwidth;
|
this_cell->width = chwidth;
|
||||||
fb.apply_renditions_to_cell( this_cell );
|
fb.apply_renditions_to_cell( this_cell );
|
||||||
|
|
||||||
@@ -134,7 +140,7 @@ void Emulator::print( const Parser::Print *act )
|
|||||||
}
|
}
|
||||||
if ( combining_cell->contents.size() < 32 ) {
|
if ( combining_cell->contents.size() < 32 ) {
|
||||||
/* seems like a reasonable limit on combining characters */
|
/* seems like a reasonable limit on combining characters */
|
||||||
combining_cell->append( act->ch );
|
combining_cell->append( ch );
|
||||||
}
|
}
|
||||||
act->handled = true;
|
act->handled = true;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -126,8 +126,19 @@ namespace Terminal {
|
|||||||
|
|
||||||
bool compare( const Cell &other ) const;
|
bool compare( const Cell &other ) const;
|
||||||
|
|
||||||
|
// Is this a printing ISO 8859-1 character?
|
||||||
|
static bool isprint_iso8859_1( const wchar_t c )
|
||||||
|
{
|
||||||
|
return ( c <= 0xff && c >= 0xa0 ) || ( c <= 0x7e && c >= 0x20 );
|
||||||
|
}
|
||||||
|
|
||||||
static void append_to_str( std::string &dest, const wchar_t c )
|
static void append_to_str( std::string &dest, const wchar_t c )
|
||||||
{
|
{
|
||||||
|
/* ASCII? Cheat. */
|
||||||
|
if ( static_cast<uint32_t>(c) <= 0x7f ) {
|
||||||
|
dest.push_back( static_cast<char>(c) );
|
||||||
|
return;
|
||||||
|
}
|
||||||
static mbstate_t ps = mbstate_t();
|
static mbstate_t ps = mbstate_t();
|
||||||
char tmp[MB_LEN_MAX];
|
char tmp[MB_LEN_MAX];
|
||||||
size_t ignore = wcrtomb(NULL, 0, &ps);
|
size_t ignore = wcrtomb(NULL, 0, &ps);
|
||||||
@@ -138,6 +149,11 @@ namespace Terminal {
|
|||||||
|
|
||||||
void append( const wchar_t c )
|
void append( const wchar_t c )
|
||||||
{
|
{
|
||||||
|
/* ASCII? Cheat. */
|
||||||
|
if ( static_cast<uint32_t>(c) <= 0x7f ) {
|
||||||
|
contents.push_back( static_cast<char>(c) );
|
||||||
|
return;
|
||||||
|
}
|
||||||
static mbstate_t ps = mbstate_t();
|
static mbstate_t ps = mbstate_t();
|
||||||
char tmp[MB_LEN_MAX];
|
char tmp[MB_LEN_MAX];
|
||||||
size_t ignore = wcrtomb(NULL, 0, &ps);
|
size_t ignore = wcrtomb(NULL, 0, &ps);
|
||||||
|
|||||||
Reference in New Issue
Block a user