From d6c05c7cb359ce52b3d17990d047f20deedb6e1e Mon Sep 17 00:00:00 2001 From: EylonKrause Date: Tue, 30 Jun 2026 12:05:06 +0300 Subject: [PATCH] string_view: fix HasWord false negative after a non-boundary partial match CpuFeatures_StringView_HasWord searches for the word in `remainder` (which advances every loop iteration) but runs the separator-boundary checks by slicing `before`/`after` from the full `line` using that remainder-relative index. On the first iteration remainder == line so the index is correct; once an earlier non-boundary partial occurrence advances `remainder`, the index is relative to the shorter remainder yet applied to `line`, so the boundary checks read the wrong characters and the word is reported absent even when it is a real token. E.g. HasWord("foox foo", "foo", ' ') returned false. This affects any feature whose name first appears as a non-boundary substring of an earlier token and then again as its own token (e.g. MIPS/LoongArch flag lists). Fix: convert the remainder-relative index to an absolute index into `line` (remainder is always a suffix, offset = line.size - remainder.size). Anchoring the slices to the full line also avoids the false positive that slicing `remainder` directly would create at remainder-start boundaries. Extends StringViewTest.CpuFeatures_StringView_HasWord with the false-negative repro and a no-false-positive guard. --- src/string_view.c | 9 +++++++-- test/string_view_test.cc | 8 ++++++++ 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/src/string_view.c b/src/string_view.c index 2cac9da7..b880b042 100644 --- a/src/string_view.c +++ b/src/string_view.c @@ -162,10 +162,15 @@ bool CpuFeatures_StringView_HasWord(const StringView line, if (index_of_word < 0) { return false; } else { + // index_of_word is relative to `remainder`; convert it to an absolute + // index into `line` so the boundary checks see the real neighbours. + // (On later iterations `remainder` no longer starts at `line`.) + const size_t absolute_index = + (line.size - remainder.size) + (size_t)index_of_word; const StringView before = - CpuFeatures_StringView_KeepFront(line, index_of_word); + CpuFeatures_StringView_KeepFront(line, absolute_index); const StringView after = - CpuFeatures_StringView_PopFront(line, index_of_word + word.size); + CpuFeatures_StringView_PopFront(line, absolute_index + word.size); const bool valid_before = before.size == 0 || CpuFeatures_StringView_Back(before) == separator; const bool valid_after = diff --git a/test/string_view_test.cc b/test/string_view_test.cc index 772ac3f5..3af70c70 100644 --- a/test/string_view_test.cc +++ b/test/string_view_test.cc @@ -182,6 +182,14 @@ TEST(StringViewTest, CpuFeatures_StringView_HasWord) { CpuFeatures_StringView_HasWord(str("first middle last"), "mid", ' ')); EXPECT_FALSE( CpuFeatures_StringView_HasWord(str("first middle last"), "las", ' ')); + // Regression: the searched word appears as a real token, but an earlier + // non-boundary partial occurrence forces a second loop iteration. HasWord + // used to run the separator checks with a remainder-relative index against + // the whole line, producing a false negative on the real token. + EXPECT_TRUE(CpuFeatures_StringView_HasWord(str("foox foo"), "foo", ' ')); + EXPECT_TRUE(CpuFeatures_StringView_HasWord(str("xvme vme"), "vme", ' ')); + // A repeated prefix that is never its own token must still not match. + EXPECT_FALSE(CpuFeatures_StringView_HasWord(str("foofoo bar"), "foo", ' ')); } TEST(StringViewTest, CpuFeatures_StringView_GetAttributeKeyValue) {