hyperscan-driven snote parser

This commit is contained in:
Eric Mertens 2023-11-26 21:16:56 -08:00
parent 9296b9e8fc
commit 7c6ef18a6c
4 changed files with 176 additions and 10 deletions

View File

@ -7,6 +7,10 @@ project(xbot
)
find_package(Boost REQUIRED)
find_package(PkgConfig REQUIRED)
pkg_check_modules(LIBHS libhs REQUIRED IMPORTED_TARGET)
include(FetchContent)
FetchContent_Declare(
@ -39,4 +43,4 @@ add_executable(xbot
ping_thread.cpp irc_parse_thread.cpp registration_thread.cpp
self_thread.cpp command_thread.cpp)
target_include_directories(xbot PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
target_link_libraries(xbot PRIVATE Boost::headers tomlplusplus_tomlplusplus eventpp)
target_link_libraries(xbot PRIVATE Boost::headers tomlplusplus_tomlplusplus eventpp PkgConfig::LIBHS)

View File

@ -77,11 +77,16 @@ auto start(boost::asio::io_context & io, Settings const& settings) -> void
ping_thread(*connection);
auto const self_thread = SelfThread::start(*connection);
registration_thread(*connection, settings.password, settings.username, settings.realname, settings.nickname);
snote_thread(*connection);
SnoteThread::start(*connection);
CommandThread::start(*connection);
echo_thread(*connection);
unhandled_message_thread(*connection);
connection->add_listener<SnoteEvent>([](SnoteEvent& event)
{
std::cout << "Snote match " << static_cast<int>(event.tag) << std::endl;
});
boost::asio::co_spawn(
io,
connection->connect(io, settings.host, settings.service),

View File

@ -4,12 +4,93 @@
#include "connection.hpp"
#include <cstring>
#include <cstdlib>
#include <stdexcept>
#include <utility>
#include <regex>
namespace {
auto snote_thread(Connection& connection) -> void
struct SnotePattern
{
SnotePattern(SnoteTag tag, char const* expression, unsigned flags = 0)
: tag{tag}
, expression{expression}
, flags{flags}
, regex{expression, std::regex_constants::ECMAScript | std::regex_constants::optimize}
{
}
SnoteTag tag;
char const* expression;
unsigned flags;
std::regex regex;
};
SnotePattern const patterns[] =
{
{SnoteTag::ClientConnecting,
R"(^Client connecting: ([^ ]+) \(([^@ ]+)@([^) ]+)\) \[(.*)\] \{([^ ]*)\} <([^ ]*)> \[(.*)\]$)"},
{SnoteTag::ClientExiting,
R"(^Client exiting: ([^ ]+) \(([^@ ]+)@([^) ]+)\) \[(.*)\] \[(.*)\]$)"},
};
auto setup_database() -> std::unique_ptr<hs_database_t, SnoteThread::DbDeleter>
{
std::vector<char const*> expressions;
std::vector<unsigned> flags;
std::vector<unsigned> ids;
expressions.reserve(std::size(patterns));
flags.reserve(std::size(patterns));
ids.reserve(std::size(patterns));
unsigned id = 0;
for (auto const& pattern : patterns)
{
expressions.push_back(pattern.expression);
flags.push_back(pattern.flags);
ids.push_back(id++);
}
hs_database_t* db;
hs_compile_error *error;
switch (hs_compile_multi(expressions.data(), flags.data(), ids.data(), expressions.size(), HS_MODE_BLOCK, nullptr, &db, &error))
{
case HS_COMPILER_ERROR:
{
std::string msg = error->message;
hs_free_compile_error(error);
throw std::runtime_error{std::move(msg)};
}
case HS_SUCCESS:
break;
default:
abort();
}
return std::unique_ptr<hs_database_t, SnoteThread::DbDeleter>{db};
}
} // namespace
auto SnoteThread::start(Connection& connection) -> std::shared_ptr<SnoteThread>
{
auto thread = std::make_shared<SnoteThread>();
thread->db_ = setup_database();
hs_scratch_t* scratch = nullptr;
if (HS_SUCCESS != hs_alloc_scratch(thread->db_.get(), &scratch))
{
abort();
}
thread->scratch_ = std::unique_ptr<hs_scratch_t, ScratchDeleter>{scratch};
static char const* const prefix = "*** Notice -- ";
connection.add_listener<IrcMsgEvent>([&connection](IrcMsgEvent& event)
connection.add_listener<IrcMsgEvent>([&connection, thread](IrcMsgEvent& event)
{
auto& args = event.irc.args;
if (IrcCommand::NOTICE == event.command
@ -17,7 +98,41 @@ auto snote_thread(Connection& connection) -> void
&& args[1].starts_with(prefix))
{
event.handled_ = true;
connection.make_event<SnoteEvent>(args[1].substr(strlen(prefix)));
auto message = args[1].substr(strlen(prefix));
unsigned int match_id = -1;
auto const scan_result = hs_scan(thread->db_.get(), message.data(), message.size(), 0, thread->scratch_.get(),
[](unsigned int id, unsigned long long from, unsigned long long to, unsigned int flags, void *context) -> int
{
int* const match_id = static_cast<int*>(context);
*match_id = id;
return 1; // stop scanning
}
, &match_id);
if (scan_result != HS_SUCCESS && scan_result != HS_SCAN_TERMINATED)
{
abort();
}
if (match_id != -1)
{
auto& pattern = patterns[match_id];
std::match_results<std::string_view::const_iterator> results;
if (not std::regex_match(message.begin(), message.end(), results, pattern.regex))
{
// something went wrong - hyperscan disagrees with std::regex
abort();
}
std::vector<std::string_view> parts;
for (auto const sub : results)
{
parts.push_back(std::string_view{sub.first, sub.second});
}
connection.make_event<SnoteEvent>(pattern.tag, std::move(parts));
}
}
});
return thread;
}

View File

@ -2,13 +2,55 @@
#include "thread.hpp"
#include <hs.h>
#include <memory>
class Connection;
// WIP: Use much finer granularity
struct SnoteEvent : Event
enum class SnoteTag
{
SnoteEvent(std::string_view raw) : raw{raw} {}
std::string_view raw;
ClientConnecting,
ClientExiting,
};
auto snote_thread(Connection& connection) -> void;
struct SnoteEvent : Event
{
SnoteEvent(SnoteTag tag, std::vector<std::string_view> parts)
: tag{tag}
, parts{std::move(parts)}
{}
SnoteTag tag;
std::vector<std::string_view> parts;
};
struct SnoteThread
{
struct DbDeleter
{
auto operator()(hs_database_t * db) const -> void
{
if (HS_SUCCESS != hs_free_database(db))
{
abort();
}
}
};
struct ScratchDeleter
{
auto operator()(hs_scratch_t * scratch) const -> void
{
if (HS_SUCCESS != hs_free_scratch(scratch))
{
abort();
}
}
};
std::unique_ptr<hs_database_t, DbDeleter> db_;
std::unique_ptr<hs_scratch_t, ScratchDeleter> scratch_;
static auto start(Connection& connection) -> std::shared_ptr<SnoteThread>;
};