Skip to content

Commit

Permalink
fix: look at byte sequence instead of individual byte
Browse files Browse the repository at this point in the history
  • Loading branch information
fredrikaverpil committed Jan 4, 2025
1 parent 91b99d7 commit 74af212
Showing 1 changed file with 22 additions and 9 deletions.
31 changes: 22 additions & 9 deletions lua/neotest-golang/lib/sanitize.lua
Original file line number Diff line number Diff line change
Expand Up @@ -21,19 +21,32 @@ local function isSequentialList(t)
end

function M.sanitize_string(str)
-- Convert to UTF-8 codepoints and back to handle the string properly
local sanitized_string = ""
for i = 1, #str do
local byte = string.byte(str, i)
-- Preserve:
-- - newlines (10)
-- - tabs (9)
-- - regular ASCII printable chars (32-127)
-- This ensures we keep readable output while filtering binary noise
local pos = 1
while pos <= #str do
local byte = string.byte(str, pos)
local char_len = 1

-- Detect UTF-8 sequence length
if byte >= 240 then -- 4 bytes
char_len = 4
elseif byte >= 224 then -- 3 bytes
char_len = 3
elseif byte >= 192 then -- 2 bytes
char_len = 2
end

local char = string.sub(str, pos, pos + char_len - 1)

-- Check if it's a valid UTF-8 sequence or allowed ASCII
if byte == 9 or byte == 10 or (byte >= 32 and byte <= 126) then
sanitized_string = sanitized_string .. string.char(byte)
sanitized_string = sanitized_string .. char
else
sanitized_string = sanitized_string .. ""
sanitized_string = sanitized_string .. "?" -- Using ASCII replacement
end

pos = pos + char_len
end
return sanitized_string
end
Expand Down

0 comments on commit 74af212

Please sign in to comment.