From 7c6ef18a6cdbfb51e6aeb5aa43a40d1f440b0ff2 Mon Sep 17 00:00:00 2001 From: Eric Mertens Date: Sun, 26 Nov 2023 21:16:56 -0800 Subject: [PATCH] hyperscan-driven snote parser --- CMakeLists.txt | 6 ++- main.cpp | 7 ++- snote_thread.cpp | 121 +++++++++++++++++++++++++++++++++++++++++++++-- snote_thread.hpp | 52 ++++++++++++++++++-- 4 files changed, 176 insertions(+), 10 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 789f6f9..859bfc8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -7,6 +7,10 @@ project(xbot ) find_package(Boost REQUIRED) +find_package(PkgConfig REQUIRED) + +pkg_check_modules(LIBHS libhs REQUIRED IMPORTED_TARGET) + include(FetchContent) FetchContent_Declare( @@ -39,4 +43,4 @@ add_executable(xbot ping_thread.cpp irc_parse_thread.cpp registration_thread.cpp self_thread.cpp command_thread.cpp) target_include_directories(xbot PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) -target_link_libraries(xbot PRIVATE Boost::headers tomlplusplus_tomlplusplus eventpp) +target_link_libraries(xbot PRIVATE Boost::headers tomlplusplus_tomlplusplus eventpp PkgConfig::LIBHS) diff --git a/main.cpp b/main.cpp index 8369b5b..89ae970 100644 --- a/main.cpp +++ b/main.cpp @@ -77,11 +77,16 @@ auto start(boost::asio::io_context & io, Settings const& settings) -> void ping_thread(*connection); auto const self_thread = SelfThread::start(*connection); registration_thread(*connection, settings.password, settings.username, settings.realname, settings.nickname); - snote_thread(*connection); + SnoteThread::start(*connection); CommandThread::start(*connection); echo_thread(*connection); unhandled_message_thread(*connection); + connection->add_listener([](SnoteEvent& event) + { + std::cout << "Snote match " << static_cast(event.tag) << std::endl; + }); + boost::asio::co_spawn( io, connection->connect(io, settings.host, settings.service), diff --git a/snote_thread.cpp b/snote_thread.cpp index 526678b..d42c914 100644 --- a/snote_thread.cpp +++ b/snote_thread.cpp @@ -4,12 +4,93 @@ #include "connection.hpp" #include +#include +#include +#include +#include +namespace { -auto snote_thread(Connection& connection) -> void +struct SnotePattern { + SnotePattern(SnoteTag tag, char const* expression, unsigned flags = 0) + : tag{tag} + , expression{expression} + , flags{flags} + , regex{expression, std::regex_constants::ECMAScript | std::regex_constants::optimize} + { + } + + SnoteTag tag; + char const* expression; + unsigned flags; + std::regex regex; +}; + +SnotePattern const patterns[] = +{ + {SnoteTag::ClientConnecting, + R"(^Client connecting: ([^ ]+) \(([^@ ]+)@([^) ]+)\) \[(.*)\] \{([^ ]*)\} <([^ ]*)> \[(.*)\]$)"}, + + {SnoteTag::ClientExiting, + R"(^Client exiting: ([^ ]+) \(([^@ ]+)@([^) ]+)\) \[(.*)\] \[(.*)\]$)"}, +}; + +auto setup_database() -> std::unique_ptr +{ + std::vector expressions; + std::vector flags; + std::vector ids; + + expressions.reserve(std::size(patterns)); + flags.reserve(std::size(patterns)); + ids.reserve(std::size(patterns)); + + unsigned id = 0; + + for (auto const& pattern : patterns) + { + expressions.push_back(pattern.expression); + flags.push_back(pattern.flags); + ids.push_back(id++); + } + + hs_database_t* db; + hs_compile_error *error; + + switch (hs_compile_multi(expressions.data(), flags.data(), ids.data(), expressions.size(), HS_MODE_BLOCK, nullptr, &db, &error)) + { + case HS_COMPILER_ERROR: + { + std::string msg = error->message; + hs_free_compile_error(error); + throw std::runtime_error{std::move(msg)}; + } + case HS_SUCCESS: + break; + default: + abort(); + } + return std::unique_ptr{db}; +} + +} // namespace + +auto SnoteThread::start(Connection& connection) -> std::shared_ptr +{ + auto thread = std::make_shared(); + + thread->db_ = setup_database(); + + hs_scratch_t* scratch = nullptr; + if (HS_SUCCESS != hs_alloc_scratch(thread->db_.get(), &scratch)) + { + abort(); + } + thread->scratch_ = std::unique_ptr{scratch}; + static char const* const prefix = "*** Notice -- "; - connection.add_listener([&connection](IrcMsgEvent& event) + connection.add_listener([&connection, thread](IrcMsgEvent& event) { auto& args = event.irc.args; if (IrcCommand::NOTICE == event.command @@ -17,7 +98,41 @@ auto snote_thread(Connection& connection) -> void && args[1].starts_with(prefix)) { event.handled_ = true; - connection.make_event(args[1].substr(strlen(prefix))); + auto message = args[1].substr(strlen(prefix)); + unsigned int match_id = -1; + auto const scan_result = hs_scan(thread->db_.get(), message.data(), message.size(), 0, thread->scratch_.get(), + [](unsigned int id, unsigned long long from, unsigned long long to, unsigned int flags, void *context) -> int + { + int* const match_id = static_cast(context); + *match_id = id; + return 1; // stop scanning + } + , &match_id); + + if (scan_result != HS_SUCCESS && scan_result != HS_SCAN_TERMINATED) + { + abort(); + } + + if (match_id != -1) + { + auto& pattern = patterns[match_id]; + std::match_results results; + if (not std::regex_match(message.begin(), message.end(), results, pattern.regex)) + { + // something went wrong - hyperscan disagrees with std::regex + abort(); + } + + std::vector parts; + for (auto const sub : results) + { + parts.push_back(std::string_view{sub.first, sub.second}); + } + connection.make_event(pattern.tag, std::move(parts)); + } } }); + + return thread; } diff --git a/snote_thread.hpp b/snote_thread.hpp index 266ce66..5eaaf4b 100644 --- a/snote_thread.hpp +++ b/snote_thread.hpp @@ -2,13 +2,55 @@ #include "thread.hpp" +#include + +#include + class Connection; -// WIP: Use much finer granularity -struct SnoteEvent : Event +enum class SnoteTag { - SnoteEvent(std::string_view raw) : raw{raw} {} - std::string_view raw; + ClientConnecting, + ClientExiting, }; -auto snote_thread(Connection& connection) -> void; +struct SnoteEvent : Event +{ + SnoteEvent(SnoteTag tag, std::vector parts) + : tag{tag} + , parts{std::move(parts)} + {} + + SnoteTag tag; + std::vector parts; +}; + +struct SnoteThread +{ + struct DbDeleter + { + auto operator()(hs_database_t * db) const -> void + { + if (HS_SUCCESS != hs_free_database(db)) + { + abort(); + } + } + }; + + struct ScratchDeleter + { + auto operator()(hs_scratch_t * scratch) const -> void + { + if (HS_SUCCESS != hs_free_scratch(scratch)) + { + abort(); + } + } + }; + + std::unique_ptr db_; + std::unique_ptr scratch_; + + static auto start(Connection& connection) -> std::shared_ptr; +};