Move UTF-8 code inside class
This commit is contained in:
@@ -3,7 +3,7 @@ objects = parserstate.o parser.o templates.o
|
|||||||
executables = parse
|
executables = parse
|
||||||
|
|
||||||
CXX = g++
|
CXX = g++
|
||||||
CXXFLAGS = -g --std=c++0x -pedantic -Werror -Wall -Wextra -Weffc++ -fno-implicit-templates -pipe -D_FILE_OFFSET_BITS=64 -D_XOPEN_SOURCE=500 -D_GNU_SOURCE
|
CXXFLAGS = -g --std=c++0x -pedantic -Werror -Wall -Wextra -Weffc++ -fno-implicit-templates -fno-default-inline -pipe -D_FILE_OFFSET_BITS=64 -D_XOPEN_SOURCE=500 -D_GNU_SOURCE
|
||||||
LIBS = -lutil
|
LIBS = -lutil
|
||||||
|
|
||||||
all: $(executables)
|
all: $(executables)
|
||||||
|
|||||||
@@ -22,21 +22,9 @@
|
|||||||
|
|
||||||
const size_t buf_size = 1024;
|
const size_t buf_size = 1024;
|
||||||
|
|
||||||
class stripstate {
|
|
||||||
public:
|
|
||||||
int src_fd, dest_fd;
|
|
||||||
mbstate_t ps;
|
|
||||||
char buf[ buf_size ];
|
|
||||||
size_t buf_len;
|
|
||||||
Parser::Parser parser;
|
|
||||||
|
|
||||||
stripstate() : src_fd(-1), dest_fd(-1), ps(),
|
|
||||||
buf(), buf_len(0), parser() {}
|
|
||||||
};
|
|
||||||
|
|
||||||
void emulate_terminal( int fd );
|
void emulate_terminal( int fd );
|
||||||
int copy( int src, int dest );
|
int copy( int src, int dest );
|
||||||
int vt_parser( struct stripstate *state );
|
int vt_parser( int fd, Parser::UTF8Parser *parser );
|
||||||
|
|
||||||
int main( int argc __attribute__((unused)),
|
int main( int argc __attribute__((unused)),
|
||||||
char *argv[] __attribute__((unused)),
|
char *argv[] __attribute__((unused)),
|
||||||
@@ -87,13 +75,6 @@ int main( int argc __attribute__((unused)),
|
|||||||
|
|
||||||
my_argv[ 1 ] = NULL;
|
my_argv[ 1 ] = NULL;
|
||||||
|
|
||||||
/*
|
|
||||||
if ( setenv( "TERM", "vt220", true ) < 0 ) {
|
|
||||||
perror( "setenv" );
|
|
||||||
exit( 1 );
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
|
|
||||||
if ( execve( "/bin/bash", my_argv, envp ) < 0 ) {
|
if ( execve( "/bin/bash", my_argv, envp ) < 0 ) {
|
||||||
perror( "execve" );
|
perror( "execve" );
|
||||||
exit( 1 );
|
exit( 1 );
|
||||||
@@ -123,7 +104,7 @@ int main( int argc __attribute__((unused)),
|
|||||||
|
|
||||||
void emulate_terminal( int fd )
|
void emulate_terminal( int fd )
|
||||||
{
|
{
|
||||||
struct stripstate output_stripstate;
|
Parser::UTF8Parser parser;
|
||||||
struct pollfd pollfds[ 2 ];
|
struct pollfd pollfds[ 2 ];
|
||||||
|
|
||||||
pollfds[ 0 ].fd = STDIN_FILENO;
|
pollfds[ 0 ].fd = STDIN_FILENO;
|
||||||
@@ -132,11 +113,6 @@ void emulate_terminal( int fd )
|
|||||||
pollfds[ 1 ].fd = fd;
|
pollfds[ 1 ].fd = fd;
|
||||||
pollfds[ 1 ].events = POLLIN;
|
pollfds[ 1 ].events = POLLIN;
|
||||||
|
|
||||||
output_stripstate.src_fd = fd;
|
|
||||||
output_stripstate.dest_fd = STDOUT_FILENO;
|
|
||||||
output_stripstate.buf_len = 0;
|
|
||||||
memset( &output_stripstate.ps, 0, sizeof( output_stripstate.ps ) );
|
|
||||||
|
|
||||||
while ( 1 ) {
|
while ( 1 ) {
|
||||||
int active_fds = poll( pollfds, 2, -1 );
|
int active_fds = poll( pollfds, 2, -1 );
|
||||||
if ( active_fds <= 0 ) {
|
if ( active_fds <= 0 ) {
|
||||||
@@ -149,7 +125,7 @@ void emulate_terminal( int fd )
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
} else if ( pollfds[ 1 ].revents & POLLIN ) {
|
} else if ( pollfds[ 1 ].revents & POLLIN ) {
|
||||||
if ( vt_parser( &output_stripstate ) < 0 ) {
|
if ( vt_parser( fd, &parser ) < 0 ) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
} else if ( (pollfds[ 0 ].revents | pollfds[ 1 ].revents)
|
} else if ( (pollfds[ 0 ].revents | pollfds[ 1 ].revents)
|
||||||
@@ -188,11 +164,12 @@ int copy( int src, int dest )
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
int vt_parser( struct stripstate *state )
|
int vt_parser( int fd, Parser::UTF8Parser *parser )
|
||||||
{
|
{
|
||||||
|
char buf[ buf_size ];
|
||||||
|
|
||||||
/* fill buffer if possible */
|
/* fill buffer if possible */
|
||||||
ssize_t bytes_read = read( state->src_fd, state->buf + state->buf_len,
|
ssize_t bytes_read = read( fd, buf, buf_size );
|
||||||
buf_size - state->buf_len );
|
|
||||||
if ( bytes_read == 0 ) { /* EOF */
|
if ( bytes_read == 0 ) { /* EOF */
|
||||||
return -1;
|
return -1;
|
||||||
} else if ( bytes_read < 0 ) {
|
} else if ( bytes_read < 0 ) {
|
||||||
@@ -200,61 +177,9 @@ int vt_parser( struct stripstate *state )
|
|||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
state->buf_len += bytes_read;
|
/* feed to parser */
|
||||||
|
for ( int i = 0; i < bytes_read; i++ ) {
|
||||||
/* translate buffer from UTF-8 to wide characters */
|
std::vector<Parser::Action *> actions = parser->input( buf[ i ] );
|
||||||
wchar_t out_buffer[ buf_size ];
|
|
||||||
size_t in_index = 0, out_index = 0;
|
|
||||||
|
|
||||||
while ( 1 ) {
|
|
||||||
assert( in_index <= state->buf_len );
|
|
||||||
if ( in_index == state->buf_len ) {
|
|
||||||
state->buf_len = 0;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
wchar_t pwc;
|
|
||||||
size_t bytes_parsed = mbrtowc( &pwc, state->buf + in_index,
|
|
||||||
state->buf_len - in_index,
|
|
||||||
&state->ps );
|
|
||||||
/* this returns 0 when n = 0! */
|
|
||||||
|
|
||||||
/* This function annoying returns a size_t so we have to check
|
|
||||||
the negative values first before the "> 0" branch */
|
|
||||||
|
|
||||||
if ( bytes_parsed == 0 ) {
|
|
||||||
/* character was NUL */
|
|
||||||
in_index++; /* this relies on knowing UTF-8 NUL is one byte! */
|
|
||||||
assert( out_index < buf_size );
|
|
||||||
out_buffer[ out_index++ ] = L'\0';
|
|
||||||
} else if ( bytes_parsed == (size_t) -1 ) {
|
|
||||||
/* invalid sequence */
|
|
||||||
assert( errno == EILSEQ );
|
|
||||||
in_index++;
|
|
||||||
assert( out_index < buf_size );
|
|
||||||
out_buffer[ out_index++ ] = (wchar_t) 0xFFFD;
|
|
||||||
memset( &state->ps, 0, sizeof( state->ps ) );
|
|
||||||
} else if ( bytes_parsed == (size_t) -2 ) {
|
|
||||||
/* can't parse complete multibyte character */
|
|
||||||
memmove( state->buf, state->buf + in_index,
|
|
||||||
state->buf_len - in_index );
|
|
||||||
state->buf_len = state->buf_len - in_index;
|
|
||||||
break;
|
|
||||||
} else if ( bytes_parsed > 0 ) {
|
|
||||||
/* parsed something */
|
|
||||||
in_index += bytes_parsed;
|
|
||||||
assert( out_index < buf_size );
|
|
||||||
out_buffer[ out_index++ ] = pwc;
|
|
||||||
} else {
|
|
||||||
fprintf( stderr, "Unknown return value %d from mbrtowc\n",
|
|
||||||
bytes_parsed );
|
|
||||||
exit( 1 );
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* feed to vtparse */
|
|
||||||
for ( size_t i = 0; i < out_index; i++ ) {
|
|
||||||
std::vector<Parser::Action *> actions = state->parser.input( out_buffer[ i ] );
|
|
||||||
for ( std::vector<Parser::Action *>::iterator j = actions.begin();
|
for ( std::vector<Parser::Action *>::iterator j = actions.begin();
|
||||||
j != actions.end();
|
j != actions.end();
|
||||||
j++ ) {
|
j++ ) {
|
||||||
@@ -263,7 +188,7 @@ int vt_parser( struct stripstate *state )
|
|||||||
assert( act );
|
assert( act );
|
||||||
|
|
||||||
if ( act->char_present ) {
|
if ( act->char_present ) {
|
||||||
printf( "%s(0x%02x) ", act->name().c_str(), act->ch );
|
printf( "%s(0x%02x=%lc) ", act->name().c_str(), act->ch, act->ch );
|
||||||
} else {
|
} else {
|
||||||
printf( "[%s] ", act->name().c_str() );
|
printf( "[%s] ", act->name().c_str() );
|
||||||
}
|
}
|
||||||
|
|||||||
+57
@@ -1,5 +1,6 @@
|
|||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
#include <typeinfo>
|
#include <typeinfo>
|
||||||
|
#include <langinfo.h>
|
||||||
|
|
||||||
#include "parser.hpp"
|
#include "parser.hpp"
|
||||||
|
|
||||||
@@ -34,3 +35,59 @@ std::vector<Parser::Action *> Parser::Parser::input( wchar_t ch )
|
|||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Parser::UTF8Parser::UTF8Parser()
|
||||||
|
: parser(), buf_len( 0 )
|
||||||
|
{
|
||||||
|
if ( strcmp( nl_langinfo( CODESET ), "UTF-8" ) != 0 ) {
|
||||||
|
fprintf( stderr, "rtm requires a UTF-8 locale.\n" );
|
||||||
|
throw std::string( "rtm requires a UTF-8 locale." );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<Parser::Action *> Parser::UTF8Parser::input( char c )
|
||||||
|
{
|
||||||
|
assert( buf_len < BUF_SIZE );
|
||||||
|
|
||||||
|
buf[ buf_len++ ] = c;
|
||||||
|
|
||||||
|
/* This function will only work in a UTF-8 locale. */
|
||||||
|
/* This must be asserted by other code. */
|
||||||
|
|
||||||
|
wchar_t pwc;
|
||||||
|
mbstate_t ps;
|
||||||
|
memset( &ps, 0, sizeof( ps ) );
|
||||||
|
|
||||||
|
size_t bytes_parsed = mbrtowc( &pwc, buf, buf_len, &ps );
|
||||||
|
|
||||||
|
/* this returns 0 when n = 0! */
|
||||||
|
|
||||||
|
/* This function annoying returns a size_t so we have to check
|
||||||
|
the negative values first before the "> 0" branch */
|
||||||
|
|
||||||
|
if ( bytes_parsed == 0 ) {
|
||||||
|
/* character was NUL, accept and clear buffer */
|
||||||
|
assert( buf_len == 1 );
|
||||||
|
buf_len = 0;
|
||||||
|
pwc = L'\0';
|
||||||
|
} else if ( bytes_parsed == (size_t) -1 ) {
|
||||||
|
/* invalid sequence, use replacement character and clear buffer */
|
||||||
|
assert( errno == EILSEQ );
|
||||||
|
buf_len = 0;
|
||||||
|
pwc = (wchar_t) 0xFFFD;
|
||||||
|
} else if ( bytes_parsed == (size_t) -2 ) {
|
||||||
|
/* can't parse complete multibyte character */
|
||||||
|
/* return empty vector */
|
||||||
|
std::vector<Action *> vec;
|
||||||
|
return vec;
|
||||||
|
} else if ( bytes_parsed > 0 ) {
|
||||||
|
/* parsed into pwc, accept and clear buffer */
|
||||||
|
assert( bytes_parsed == buf_len );
|
||||||
|
buf_len = 0;
|
||||||
|
} else {
|
||||||
|
throw std::string( "Unknown return value from mbrtowc" );
|
||||||
|
}
|
||||||
|
|
||||||
|
/* we parsed character into pwc */
|
||||||
|
return parser.input( pwc );
|
||||||
|
}
|
||||||
|
|||||||
+17
-1
@@ -6,6 +6,7 @@
|
|||||||
|
|
||||||
#include <wchar.h>
|
#include <wchar.h>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
#include "parsertransition.hpp"
|
#include "parsertransition.hpp"
|
||||||
#include "parseraction.hpp"
|
#include "parseraction.hpp"
|
||||||
@@ -25,7 +26,22 @@ namespace Parser {
|
|||||||
bool operator=( const Parser & );
|
bool operator=( const Parser & );
|
||||||
~Parser() {}
|
~Parser() {}
|
||||||
|
|
||||||
std::vector<Action *> input( wchar_t c );
|
std::vector<Action *> input( wchar_t ch );
|
||||||
|
};
|
||||||
|
|
||||||
|
static const size_t BUF_SIZE = 8;
|
||||||
|
|
||||||
|
class UTF8Parser {
|
||||||
|
private:
|
||||||
|
Parser parser;
|
||||||
|
|
||||||
|
char buf[ BUF_SIZE ];
|
||||||
|
size_t buf_len;
|
||||||
|
|
||||||
|
public:
|
||||||
|
UTF8Parser();
|
||||||
|
|
||||||
|
std::vector<Action *> input( char c );
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user