The new Regex Colorizer feature has been just added in ioninja-5.1.0 and is not yet documented. It uses an FSM-based rather than a backtracking engine, and as such, it doesn't and will not ever support some features available in PCRE -- most notably, no backtracking or named groups. POSIX-style character classes (e.g., [:space:]
) are currently not supported (but can be easily added). Until we have a dedicated documentation page, you can refer to the following Ragel definition as a reference for what's supported:
#. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
#
# standard definitions
#
oct = [0-7];
dec = [0-9];
hex = [0-9a-fA-F];
ws = [ \t\r];
utf8_1 = 0x00 .. 0x7f;
utf8_2 = 0xc0 .. 0xdf;
utf8_3 = 0xe0 .. 0xef;
utf8_4 = 0xf0 .. 0xf7;
utf8_c = 0x80 .. 0xbf;
#. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
#
# main machine
#
main := |*
'^' { createToken(TokenKind_AnchorBeginLine); };
'$' { createToken(TokenKind_AnchorEndLine); };
'\\A' { createToken(TokenKind_AnchorBeginText); };
'\\z' { createToken(TokenKind_AnchorEndText); };
'\\b' { createToken(TokenKind_AnchorWordBoundary); };
'\\B' { createToken(TokenKind_AnchorNotWordBoundary); };
'[^' { createToken(TokenKind_NegatedCharClass); fgoto char_class; };
'[' { createToken(TokenKind_CharClass); fgoto char_class; };
'(?:' { createToken(TokenKind_NonCapturingGroup); };
'(' { createToken(TokenKind_Group); };
')' { createToken(TokenKind_EndGroup); };
'??' { createToken(TokenKind_NonGreedyQuestion); };
'?' { createToken(TokenKind_Question); };
'*?' { createToken(TokenKind_NonGreedyStar); };
'*' { createToken(TokenKind_Star); };
'+?' { createToken(TokenKind_NonGreedyPlus); };
'+' { createToken(TokenKind_Plus); };
'|' { createToken(TokenKind_Pipe); };
'{' { createToken(TokenKind_Quantifier); fgoto quantifier; };
'.' { createToken(TokenKind_AnyChar); };
'\\d' { createToken(TokenKind_StdCharClassDigit); };
'\\D' { createToken(TokenKind_StdCharClassNonDigit); };
'\\h' { createToken(TokenKind_StdCharClassHex); };
'\\H' { createToken(TokenKind_StdCharClassNonHex); };
'\\w' { createToken(TokenKind_StdCharClassWord); };
'\\W' { createToken(TokenKind_StdCharClassNonWord); };
'\\s' { createToken(TokenKind_StdCharClassSpace); };
'\\S' { createToken(TokenKind_StdCharClassNonSpace); };
'\\0' { createCharToken(0); };
'\\a' { createCharToken('\a'); };
'\\b' { createCharToken('\b'); };
'\\e' { createCharToken('\x1b'); };
'\\f' { createCharToken('\f'); };
'\\n' { createCharToken('\n'); };
'\\r' { createCharToken('\r'); };
'\\t' { createCharToken('\t'); };
'\\v' { createCharToken('\v'); };
'\\x' hex{2} { createHexCharToken_2(ts + 2); };
'\\u' hex{4} { createHexCharToken_4(ts + 2); };
'\\U' hex{8} { createHexCharToken_8(ts + 2); };
'\\' oct{3} { createOctCharToken(ts + 1); };
'\\' any { createCharToken(ts[1]); };
utf8_1 { createCharToken(ts[0]); };
utf8_2 utf8_c { createUtf8CharToken_2(ts); };
utf8_3 utf8_c{2} { createUtf8CharToken_3(ts); };
utf8_4 utf8_c{3} { createUtf8CharToken_4(ts); };
any { createCharToken(ts[0]); };
*|;
#. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
#
# char class machine
#
char_class := |*
'-' { createToken(TokenKind_Dash); };
']' { createToken(TokenKind_EndCharClass); fgoto main; };
# alas, Ragel doesn't allow injecting sub-scanners, hence, copy-paste...
'\\d' { createToken(TokenKind_StdCharClassDigit); };
'\\D' { createToken(TokenKind_StdCharClassNonDigit); };
'\\h' { createToken(TokenKind_StdCharClassHex); };
'\\H' { createToken(TokenKind_StdCharClassNonHex); };
'\\w' { createToken(TokenKind_StdCharClassWord); };
'\\W' { createToken(TokenKind_StdCharClassNonWord); };
'\\s' { createToken(TokenKind_StdCharClassSpace); };
'\\S' { createToken(TokenKind_StdCharClassNonSpace); };
'\\0' { createCharToken(0); };
'\\a' { createCharToken('\a'); };
'\\b' { createCharToken('\b'); };
'\\e' { createCharToken('\x1b'); };
'\\f' { createCharToken('\f'); };
'\\n' { createCharToken('\n'); };
'\\r' { createCharToken('\r'); };
'\\t' { createCharToken('\t'); };
'\\v' { createCharToken('\v'); };
'\\x' hex{2} { createHexCharToken_2(ts + 2); };
'\\u' hex{4} { createHexCharToken_4(ts + 2); };
'\\U' hex{8} { createHexCharToken_8(ts + 2); };
'\\' oct{3} { createOctCharToken(ts + 1); };
'\\' any { createCharToken(ts[1]); };
utf8_1 { createCharToken(ts[0]); };
utf8_2 utf8_c { createUtf8CharToken_2(ts); };
utf8_3 utf8_c{2} { createUtf8CharToken_3(ts); };
utf8_4 utf8_c{3} { createUtf8CharToken_4(ts); };
any { createCharToken(ts[0]); };
*|;
#. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
#
# quantifier machine
#
quantifier := |*
dec+ { createNumberToken(atoi(ts)); };
',' { createToken(TokenKind_Comma); };
'}' { createToken(TokenKind_EndQuantifier); fgoto main; };
ws;
*|;
Just as you've noticed, not everything works as expected (e.g., there are issues with anchor processing and quantifiers); these issues will be polished over the upcoming service releases.
For the time being, you can use this to colorize the command byte trigger (0x52
or 0x57
) and the following four bytes:
[\x52\x57]....
I would also consider creating a simple protocol analyzer layer that would allow for more options when it comes to visual aids. For example, you can insert a line "break" before each occurrence of the command byte trigger with this code:
class BreakOnCharLayer:
doc.Layer,
log.Converter {
public:
construct(doc.PluginHost* pluginHost){
basetype.construct(pluginHost);
pluginHost.m_log.addConverter(this);
}
override bool convert(
log.Writer* writer,
uint64_t timestamp,
uint64_t recordCode,
void const* p0,
size_t size
) {
if (recordCode != log.StdRecordCode.Tx &&
recordCode != log.StdRecordCode.Rx)
return false;
char const* base = p0;
char const* p = p0;
char const* end = p + size;
for (; p < end; p++) {
char c = *p;
if (c != 0x52 && c != 0x57)
continue;
if (p > base)
writer.write(timestamp, recordCode, base, p - base - 1);
writer.write(timestamp, log.StdRecordCode.Break);
base = p;
}
if (p > base)
writer.write(timestamp, recordCode, base, p - base);
return true;
}
}
BreakOnChar.7z
A layer like this could, of course, add more meaningful data to the log (i.e., some human-readable information about commands and replies extracted from the binary packets.
Our tutorial for writing protocol analyzers covers the topic in more detail: https://ioninja.com/doc/developer-manual/tutorial-plugin-analyzer.html
Of course, writing an in-depth protocol analyzer only makes sense if it is expected to work with a protocol at hand long enough (to justify the work on the analyzer). Otherwise, I would stop at inserting a break between the packet boundaries and maybe tagging packets with a few lines of human-readable text.