From fb5d9e4e69da8c39c6607d1139deee818636688d Mon Sep 17 00:00:00 2001 From: bradjin8 Date: Mon, 18 May 2026 14:16:10 -0400 Subject: [PATCH 1/4] removed duplicated function from `export.py`, added test for normalize file path util function --- scripts/export.py | 45 ++----------- tests/test_normalize_file_path.py | 107 ++++++++++++++++++++++++++++++ 2 files changed, 112 insertions(+), 40 deletions(-) create mode 100644 tests/test_normalize_file_path.py diff --git a/scripts/export.py b/scripts/export.py index 629ca27..46cc1bd 100644 --- a/scripts/export.py +++ b/scripts/export.py @@ -30,7 +30,11 @@ build_searchable_text, is_excluded_by_rules, ) -from utils.path_helpers import get_workspace_folder_paths as _shared_get_workspace_folder_paths # noqa: E402 +from utils.path_helpers import ( # noqa: E402 + get_workspace_folder_paths as _shared_get_workspace_folder_paths, + normalize_file_path, + to_epoch_ms, +) from utils.tool_parser import parse_tool_call # noqa: E402 from utils.workspace_path import get_cli_chats_path # noqa: E402 from utils.cli_chat_reader import ( # noqa: E402 @@ -141,45 +145,6 @@ def get_global_state_dir() -> str: return os.path.join(str(Path.home()), ".cursor-chat-browser") -def normalize_file_path(p: str) -> str: - n = re.sub(r"^file:///", "", p or "") - n = re.sub(r"^file://", "", n) - try: - from urllib.parse import unquote - n = unquote(n) - except Exception: - pass - if sys.platform == "win32": - n = n.replace("/", "\\") - n = re.sub(r"^\\([a-zA-Z]:)", r"\1", n) - n = n.lower() - return n - - -def to_epoch_ms(value) -> int: - """Convert a timestamp (int, float, or ISO-8601 string) to epoch ms.""" - if value is None: - return 0 - if isinstance(value, (int, float)): - if value > 1e12: - return int(value) - if value > 0: - return int(value * 1000) - return 0 - if isinstance(value, str): - try: - cleaned = value.rstrip("Z") + "+00:00" if value.endswith("Z") else value - dt = datetime.fromisoformat(cleaned) - return int(dt.timestamp() * 1000) - except Exception: - pass - try: - return to_epoch_ms(float(value)) - except Exception: - pass - return 0 - - def slug(s: str) -> str: s = re.sub(r'[<>:"/\\|?*]', "_", s or "") s = re.sub(r"\s+", "-", s) diff --git a/tests/test_normalize_file_path.py b/tests/test_normalize_file_path.py new file mode 100644 index 0000000..3be8dab --- /dev/null +++ b/tests/test_normalize_file_path.py @@ -0,0 +1,107 @@ +"""Tests for utils.path_helpers.normalize_file_path. + +Covers the shared implementation that was previously duplicated in +scripts/export.py (closes #46). All call-sites in both the web app and the +CLI export script now use this single copy. + +Edge-case matrix: + - file:/// and file:// URI schemes + - Percent-encoded characters: spaces (%20), colons (%3A), hashes (%23) + - Windows-style drive paths (backslash and forward-slash) on all platforms + - Drive-letter lowercasing on win32 + - Plain POSIX paths pass through unchanged + - Empty / None-like input +""" + +import sys +import unittest + +from utils.path_helpers import normalize_file_path + + +class TestNormalizeFilePathUriStripping(unittest.TestCase): + def test_file_triple_slash_stripped(self) -> None: + out = normalize_file_path("file:///home/user/project") + self.assertFalse(out.startswith("file:")) + self.assertIn("home", out) + + def test_file_double_slash_stripped(self) -> None: + out = normalize_file_path("file://server/share/file.txt") + self.assertFalse(out.startswith("file:")) + self.assertIn("share", out) + + def test_empty_string(self) -> None: + self.assertEqual(normalize_file_path(""), "") + + +class TestNormalizeFilePathPercentEncoding(unittest.TestCase): + def test_space_decoded(self) -> None: + out = normalize_file_path("file:///C:/My%20Documents/file.txt") + self.assertNotIn("%20", out) + self.assertIn("My Documents" if sys.platform != "win32" else "my documents", out) + + def test_hash_decoded(self) -> None: + out = normalize_file_path("file:///C:/repo/src%23internal/mod.py") + self.assertNotIn("%23", out) + self.assertIn("#", out) + + def test_percent_encoded_colon_in_uri_prefix(self) -> None: + """URI-style /d%3A/... path: %3A is decoded to ':'. + + On win32 the backslash branch is entered (leading slash removed + and path lowercased). On other platforms the leading slash prevents + the Windows-drive branch, so the path is returned as decoded only. + """ + out = normalize_file_path("/d%3A/_Work/project") + self.assertNotIn("%3A", out) + if sys.platform == "win32": + self.assertEqual(out, r"d:\_work\project") + else: + self.assertEqual(out, "/d:/_Work/project") + + +class TestNormalizeFilePathWindowsDrives(unittest.TestCase): + """Paths with Windows-style drive letters are normalised on all platforms. + + On win32 the win32 branch handles them natively. On Linux/macOS the + ``^[a-zA-Z]:[/\\]`` regex branch converts forward-slashes to backslashes + and lowercases the path so cross-platform reads of Cursor's Windows + workspaceStorage produce consistent keys. + """ + + def test_backslash_drive_path_lowercased(self) -> None: + out = normalize_file_path(r"D:\Work\Boost") + self.assertEqual(out, r"d:\work\boost") + + def test_forward_slash_drive_path_converted(self) -> None: + out = normalize_file_path("D:/Work/Boost") + self.assertEqual(out, r"d:\work\boost") + + def test_file_uri_with_windows_drive(self) -> None: + out = normalize_file_path("file:///C:/Users/Dev/project") + self.assertIn("users", out) + self.assertIn("dev", out) + self.assertTrue(out.startswith("c:") or out.startswith("C:")) + + def test_mixed_case_drive_lowercased(self) -> None: + out = normalize_file_path(r"E:\Mixed\Case\Path") + self.assertTrue(out.startswith("e:")) + self.assertEqual(out, r"e:\mixed\case\path") + + +class TestNormalizeFilePathPosixPassthrough(unittest.TestCase): + def test_plain_posix_path_unchanged_on_non_windows(self) -> None: + if sys.platform == "win32": + self.skipTest("POSIX path semantics differ on win32") + out = normalize_file_path("/home/user/project") + self.assertEqual(out, "/home/user/project") + + def test_path_without_scheme_unchanged(self) -> None: + if sys.platform == "win32": + self.skipTest("plain relative path behaviour differs on win32") + out = normalize_file_path("relative/path/file.py") + self.assertEqual(out, "relative/path/file.py") + + +if __name__ == "__main__": + unittest.main() From a3688c058706079322a9911a30371f094abcd40b Mon Sep 17 00:00:00 2001 From: bradjin8 Date: Mon, 18 May 2026 14:26:39 -0400 Subject: [PATCH 2/4] fix: align windows-drive case expectation in URI test --- tests/test_normalize_file_path.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_normalize_file_path.py b/tests/test_normalize_file_path.py index 3be8dab..99b1c47 100644 --- a/tests/test_normalize_file_path.py +++ b/tests/test_normalize_file_path.py @@ -38,7 +38,7 @@ class TestNormalizeFilePathPercentEncoding(unittest.TestCase): def test_space_decoded(self) -> None: out = normalize_file_path("file:///C:/My%20Documents/file.txt") self.assertNotIn("%20", out) - self.assertIn("My Documents" if sys.platform != "win32" else "my documents", out) + self.assertIn("my documents", out) def test_hash_decoded(self) -> None: out = normalize_file_path("file:///C:/repo/src%23internal/mod.py") From 011f6e874985876f62aee5a6c64e7de815380d60 Mon Sep 17 00:00:00 2001 From: bradjin8 Date: Mon, 18 May 2026 15:06:29 -0400 Subject: [PATCH 3/4] fix: use lowercase only for driver letter --- tests/test_normalize_file_path.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_normalize_file_path.py b/tests/test_normalize_file_path.py index 99b1c47..7fd2205 100644 --- a/tests/test_normalize_file_path.py +++ b/tests/test_normalize_file_path.py @@ -81,7 +81,7 @@ def test_file_uri_with_windows_drive(self) -> None: out = normalize_file_path("file:///C:/Users/Dev/project") self.assertIn("users", out) self.assertIn("dev", out) - self.assertTrue(out.startswith("c:") or out.startswith("C:")) + self.assertTrue(out.startswith("c:")) def test_mixed_case_drive_lowercased(self) -> None: out = normalize_file_path(r"E:\Mixed\Case\Path") From 88b7b107cd5cbd9dcd5d5f1f42db340f3dbe4a23 Mon Sep 17 00:00:00 2001 From: bradjin8 Date: Tue, 19 May 2026 00:38:33 -0400 Subject: [PATCH 4/4] fix: review comments --- tests/test_normalize_file_path.py | 74 ++++++++++++++++++++++--------- 1 file changed, 54 insertions(+), 20 deletions(-) diff --git a/tests/test_normalize_file_path.py b/tests/test_normalize_file_path.py index 7fd2205..b3531fa 100644 --- a/tests/test_normalize_file_path.py +++ b/tests/test_normalize_file_path.py @@ -1,22 +1,20 @@ -"""Tests for utils.path_helpers.normalize_file_path. - -Covers the shared implementation that was previously duplicated in -scripts/export.py (closes #46). All call-sites in both the web app and the -CLI export script now use this single copy. - -Edge-case matrix: - - file:/// and file:// URI schemes - - Percent-encoded characters: spaces (%20), colons (%3A), hashes (%23) - - Windows-style drive paths (backslash and forward-slash) on all platforms - - Drive-letter lowercasing on win32 - - Plain POSIX paths pass through unchanged - - Empty / None-like input +"""Tests for utils.path_helpers path/timestamp helpers (closes #46). + +Covers ``normalize_file_path`` and ``to_epoch_ms``, both previously duplicated +in scripts/export.py. All call-sites in the web app and CLI export script now +use the shared implementations in utils.path_helpers. + +Test inventory (this module only): 21 cases — 12 ``normalize_file_path``, +9 ``to_epoch_ms``. On win32, 2 cases skip (POSIX passthrough in +``TestNormalizeFilePathPosixPassthrough`` only). A full-suite run may report +more skips (e.g. ``skipped=4``) from other test modules, not this file. """ import sys import unittest +from datetime import datetime, timezone -from utils.path_helpers import normalize_file_path +from utils.path_helpers import normalize_file_path, to_epoch_ms class TestNormalizeFilePathUriStripping(unittest.TestCase): @@ -48,9 +46,14 @@ def test_hash_decoded(self) -> None: def test_percent_encoded_colon_in_uri_prefix(self) -> None: """URI-style /d%3A/... path: %3A is decoded to ':'. - On win32 the backslash branch is entered (leading slash removed - and path lowercased). On other platforms the leading slash prevents - the Windows-drive branch, so the path is returned as decoded only. + Only test that exercises the leading-``/`` + drive-letter shape end-to-end + (Cursor sometimes stores ``/d%3A/...`` URIs). Other drive-path tests use + ``D:/...`` or ``D:\\...`` without a leading slash. + + On win32 the win32 branch strips the leading slash, lowercases, and + normalises to backslashes. On other platforms the leading ``/`` prevents + the ``^[a-zA-Z]:[/\\]`` cross-platform branch in ``path_helpers``, so the + path is returned as percent-decoded only (no slash flip / lowercasing). """ out = normalize_file_path("/d%3A/_Work/project") self.assertNotIn("%3A", out) @@ -79,9 +82,8 @@ def test_forward_slash_drive_path_converted(self) -> None: def test_file_uri_with_windows_drive(self) -> None: out = normalize_file_path("file:///C:/Users/Dev/project") - self.assertIn("users", out) - self.assertIn("dev", out) - self.assertTrue(out.startswith("c:")) + # file:/// stripped, then same drive-letter branch as D:/ and D:\ inputs. + self.assertEqual(out, r"c:\users\dev\project") def test_mixed_case_drive_lowercased(self) -> None: out = normalize_file_path(r"E:\Mixed\Case\Path") @@ -103,5 +105,37 @@ def test_path_without_scheme_unchanged(self) -> None: self.assertEqual(out, "relative/path/file.py") +class TestToEpochMs(unittest.TestCase): + def test_none_returns_zero(self) -> None: + self.assertEqual(to_epoch_ms(None), 0) + + def test_ms_int_passthrough(self) -> None: + self.assertEqual(to_epoch_ms(1_700_000_000_000), 1_700_000_000_000) + + def test_seconds_int_converted_to_ms(self) -> None: + self.assertEqual(to_epoch_ms(1_700_000_000), 1_700_000_000_000) + + def test_seconds_float_converted_to_ms(self) -> None: + self.assertEqual(to_epoch_ms(1_700_000_000.5), 1_700_000_000_500) + + def test_zero_returns_zero(self) -> None: + self.assertEqual(to_epoch_ms(0), 0) + + def test_iso8601_zulu(self) -> None: + expected = int( + datetime(2026, 2, 3, 20, 39, 54, 17_000, tzinfo=timezone.utc).timestamp() * 1000 + ) + self.assertEqual(to_epoch_ms("2026-02-03T20:39:54.017Z"), expected) + + def test_numeric_string_already_ms(self) -> None: + self.assertEqual(to_epoch_ms("1700000000000"), 1_700_000_000_000) + + def test_numeric_string_seconds(self) -> None: + self.assertEqual(to_epoch_ms("1700000000"), 1_700_000_000_000) + + def test_unrecognised_string_returns_zero(self) -> None: + self.assertEqual(to_epoch_ms("not-a-timestamp"), 0) + + if __name__ == "__main__": unittest.main()