Skip to content

Commit 40ffdac

Browse files
committed
Materialize vendored ExternalData directly from cache
1 parent 08c5f67 commit 40ffdac

5 files changed

Lines changed: 256 additions & 65 deletions

File tree

README.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,14 @@ This is the exact consumer model we want:
121121
- normal local files materialized into build trees via symlinks, hardlinks, or copies
122122
- no requirement for consumers to know which remote backend served the blob
123123

124+
One pragmatic deviation exists on Windows. Stock `ExternalData.cmake` copies
125+
objects into `ExternalData_BINARY_ROOT`, which reintroduces a full build-local
126+
copy before the final build tree. This repository therefore vendors a
127+
small `ExternalData` patch by default that materializes directly from the
128+
shared object store into the final build tree and prefers `hardlink`, then
129+
`symlink`, then `copy` on Windows. Consumers can still fall back to the host
130+
CMake module with `-DNAM_USE_VENDORED_EXTERNALDATA=OFF`.
131+
124132
## Backends
125133

126134
The first backend is `GitHub Release assets`.

cmake/NablaAssetManifests.cmake

Lines changed: 111 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -5,15 +5,36 @@
55
# - `.dvc` files produced by `dvc add`
66
#
77
# Consumer-side behavior:
8-
# - pure public `ExternalData` API
8+
# - ExternalData-compatible public API surface
99
# - no hand-maintained asset catalog
1010

1111
include_guard(GLOBAL)
1212

13+
if (NOT DEFINED NAM_USE_VENDORED_EXTERNALDATA)
14+
set(
15+
NAM_USE_VENDORED_EXTERNALDATA
16+
ON
17+
CACHE BOOL
18+
"Use the vendored ExternalData module bundled with NAM instead of the stock host module"
19+
)
20+
endif()
21+
mark_as_advanced(NAM_USE_VENDORED_EXTERNALDATA)
22+
1323
function(_nam_summary MESSAGE_TEXT)
1424
message(STATUS "NablaAssetManifests: ${MESSAGE_TEXT}")
1525
endfunction()
1626

27+
function(_nam_include_externaldata OUT_VAR)
28+
if (NAM_USE_VENDORED_EXTERNALDATA)
29+
include("${CMAKE_CURRENT_FUNCTION_LIST_DIR}/vendor/ExternalData-NAM.cmake")
30+
set(_provider "vendored")
31+
else()
32+
include(ExternalData)
33+
set(_provider "stock")
34+
endif()
35+
set(${OUT_VAR} "${_provider}" PARENT_SCOPE)
36+
endfunction()
37+
1738
function(_nam_validate_file_link_mode MODE_VALUE OUT_VAR)
1839
string(TOLOWER "${MODE_VALUE}" _mode)
1940
if (
@@ -438,7 +459,8 @@ function(nam_add_channel_target)
438459
list(LENGTH _items _item_count)
439460
_nam_get_backend_kind(_backend_kind)
440461
_nam_resolve_cache_root(_cache_root CACHE_ROOT "${NAM_CACHE_ROOT}")
441-
_nam_summary("configure channel target `${NAM_TARGET}`: channel=`${NAM_CHANNEL}`, repo=`${NAM_REPO}`, tag=`${NAM_TAG}`, backend=`${_backend_kind}`, cache_root=`${_cache_root}`, total=${_item_count}")
462+
_nam_include_externaldata(_externaldata_provider)
463+
_nam_summary("configure channel target `${NAM_TARGET}`: channel=`${NAM_CHANNEL}`, repo=`${NAM_REPO}`, tag=`${NAM_TAG}`, backend=`${_backend_kind}`, externaldata=`${_externaldata_provider}`, cache_root=`${_cache_root}`, total=${_item_count}")
442464

443465
_nam_get_github_release_index_file(_index_file REPO "${NAM_REPO}" TAG "${NAM_TAG}" CACHE_ROOT "${NAM_CACHE_ROOT}")
444466

@@ -447,16 +469,20 @@ function(nam_add_channel_target)
447469
file(MAKE_DIRECTORY "${_build_root}")
448470
file(WRITE "${_fetch_script}" "set(CMAKE_MESSAGE_LOG_LEVEL NOTICE)\nset(NAM_RELEASE_INDEX_FILE [=[${_index_file}]=])\ninclude([=[${CMAKE_CURRENT_FUNCTION_LIST_DIR}/NablaAssetManifestsExternalDataFetch.cmake]=])\n")
449471

450-
include(ExternalData)
451472
set(ExternalData_OBJECT_STORES "${_cache_root}/objects")
452473
set(ExternalData_URL_TEMPLATES "ExternalDataCustomScript://NAM/%(hash)")
453474
set(ExternalData_CUSTOM_SCRIPT_NAM "${_fetch_script}")
454475
add_custom_target("${NAM_TARGET}")
455476
set(_refs_root "${_build_root}/refs")
456477
set(_externaldata_binary_root "${_build_root}/assets")
478+
if (_externaldata_provider STREQUAL "vendored")
479+
set(_materialization_source_root "${_cache_root}/objects/SHA256")
480+
else()
481+
set(_materialization_source_root "${_externaldata_binary_root}")
482+
endif()
457483
_nam_resolve_file_link_mode(
458484
_file_link_mode
459-
SOURCE_ROOT "${_externaldata_binary_root}"
485+
SOURCE_ROOT "${_materialization_source_root}"
460486
DESTINATION_ROOT "${NAM_DESTINATION_ROOT}/${NAM_CHANNEL}"
461487
)
462488
if (NAM_NO_SYMLINKS)
@@ -465,7 +491,9 @@ function(nam_add_channel_target)
465491
_nam_summary("materialization mode for file assets: `${_file_link_mode}`")
466492

467493
set(_asset_refs)
494+
set(_asset_data_refs)
468495
set(_asset_relpaths)
496+
set(_asset_target_paths)
469497
foreach(_asset IN LISTS _items)
470498
_nam_find_channel_asset(
471499
CHANNEL "${NAM_CHANNEL}"
@@ -485,53 +513,101 @@ function(nam_add_channel_target)
485513

486514
set(_data_name "${NAM_CHANNEL}/${_relative_path}")
487515
set(_data_ref "${_refs_root}/${_data_name}")
516+
set(_target_path "${NAM_DESTINATION_ROOT}/${_data_name}")
488517
get_filename_component(_link_dir "${_data_ref}" DIRECTORY)
489518
file(MAKE_DIRECTORY "${_link_dir}")
490519
file(WRITE "${_data_ref}.sha256" "${_sha256}\n")
491520
list(APPEND _asset_refs "DATA{${_data_ref}}")
521+
list(APPEND _asset_data_refs "${_data_ref}")
492522
list(APPEND _asset_relpaths "${_relative_path}")
523+
list(APPEND _asset_target_paths "${_target_path}")
493524
endforeach()
494525

495526
if (_asset_refs)
496-
set(_asset_target "${NAM_TARGET}__externaldata")
497-
set(ExternalData_SOURCE_ROOT "${_refs_root}")
498-
set(ExternalData_BINARY_ROOT "${_externaldata_binary_root}")
499-
unset(ExternalData_NO_SYMLINKS)
500-
set(_old_suppress_dev "${CMAKE_SUPPRESS_DEVELOPER_WARNINGS}")
501-
set(CMAKE_SUPPRESS_DEVELOPER_WARNINGS 1)
502-
ExternalData_Expand_Arguments("${_asset_target}" _asset_expanded ${_asset_refs})
503-
set(CMAKE_SUPPRESS_DEVELOPER_WARNINGS "${_old_suppress_dev}")
504-
ExternalData_Add_Target("${_asset_target}" SHOW_PROGRESS "${NAM_SHOW_PROGRESS}")
505-
set(_externaldata_config "${CMAKE_CURRENT_BINARY_DIR}/${_asset_target}_config.cmake")
506-
if (EXISTS "${_externaldata_config}")
527+
if (_externaldata_provider STREQUAL "vendored")
528+
set(ExternalData_LINK_MODE "${_file_link_mode}")
529+
set(ExternalData_TIMEOUT_INACTIVITY "")
530+
set(ExternalData_TIMEOUT_ABSOLUTE "")
531+
set(ExternalData_NO_SYMLINKS "")
532+
string(CONCAT _ExternalData_CONFIG_CODE
533+
"set(ExternalData_CUSTOM_SCRIPT_NAM [=[${_fetch_script}]=])")
534+
set(_externaldata_config "${CMAKE_CURRENT_BINARY_DIR}/${NAM_TARGET}__externaldata_config.cmake")
535+
configure_file(
536+
"${CMAKE_CURRENT_FUNCTION_LIST_DIR}/vendor/ExternalData_config.cmake.in"
537+
"${_externaldata_config}"
538+
@ONLY
539+
)
507540
if (NAM_VERBOSE)
508541
set(_externaldata_log_level "STATUS")
509542
else()
510543
set(_externaldata_log_level "NOTICE")
511544
endif()
512545
file(READ "${_externaldata_config}" _externaldata_config_contents)
513546
file(WRITE "${_externaldata_config}" "set(CMAKE_MESSAGE_LOG_LEVEL ${_externaldata_log_level})\n${_externaldata_config_contents}")
547+
548+
list(LENGTH _asset_data_refs _asset_count)
549+
math(EXPR _asset_last "${_asset_count} - 1")
550+
foreach(_index RANGE ${_asset_last})
551+
list(GET _asset_data_refs ${_index} _data_ref)
552+
list(GET _asset_target_paths ${_index} _target_path)
553+
set(_stamp "${_build_root}/file_stamps/${_index}.stamp")
554+
set(_hash_record "${_build_root}/hash_records/${_index}.txt")
555+
get_filename_component(_stamp_dir "${_stamp}" DIRECTORY)
556+
get_filename_component(_hash_record_dir "${_hash_record}" DIRECTORY)
557+
file(MAKE_DIRECTORY "${_stamp_dir}")
558+
file(MAKE_DIRECTORY "${_hash_record_dir}")
559+
add_custom_command(
560+
OUTPUT "${_stamp}" "${_target_path}" "${_hash_record}"
561+
COMMAND "${CMAKE_COMMAND}" -Drelative_top=${CMAKE_BINARY_DIR} -Dfile=${_target_path} -Dname=${_data_ref} -Dexts=.sha256 -DExternalData_STAMP_FILE=${_hash_record} -DExternalData_ACTION=fetch -DExternalData_SHOW_PROGRESS=${NAM_SHOW_PROGRESS} -DExternalData_CONFIG=${_externaldata_config} -P "${CMAKE_CURRENT_FUNCTION_LIST_DIR}/vendor/ExternalData-NAM.cmake"
562+
COMMAND "${CMAKE_COMMAND}" -E touch "${_stamp}"
563+
MAIN_DEPENDENCY "${_data_ref}.sha256"
564+
DEPENDS "${_fetch_script}" "${_externaldata_config}"
565+
VERBATIM
566+
)
567+
list(APPEND _materialize_stamps "${_stamp}")
568+
endforeach()
569+
else()
570+
set(_asset_target "${NAM_TARGET}__externaldata")
571+
set(ExternalData_SOURCE_ROOT "${_refs_root}")
572+
set(ExternalData_BINARY_ROOT "${_externaldata_binary_root}")
573+
unset(ExternalData_LINK_MODE)
574+
unset(ExternalData_NO_SYMLINKS)
575+
set(_old_suppress_dev "${CMAKE_SUPPRESS_DEVELOPER_WARNINGS}")
576+
set(CMAKE_SUPPRESS_DEVELOPER_WARNINGS 1)
577+
ExternalData_Expand_Arguments("${_asset_target}" _asset_expanded ${_asset_refs})
578+
set(CMAKE_SUPPRESS_DEVELOPER_WARNINGS "${_old_suppress_dev}")
579+
ExternalData_Add_Target("${_asset_target}" SHOW_PROGRESS "${NAM_SHOW_PROGRESS}")
580+
set(_externaldata_config "${CMAKE_CURRENT_BINARY_DIR}/${_asset_target}_config.cmake")
581+
if (EXISTS "${_externaldata_config}")
582+
if (NAM_VERBOSE)
583+
set(_externaldata_log_level "STATUS")
584+
else()
585+
set(_externaldata_log_level "NOTICE")
586+
endif()
587+
file(READ "${_externaldata_config}" _externaldata_config_contents)
588+
file(WRITE "${_externaldata_config}" "set(CMAKE_MESSAGE_LOG_LEVEL ${_externaldata_log_level})\n${_externaldata_config_contents}")
589+
endif()
590+
add_dependencies("${NAM_TARGET}" "${_asset_target}")
591+
592+
list(LENGTH _asset_expanded _asset_expanded_count)
593+
math(EXPR _asset_last "${_asset_expanded_count} - 1")
594+
foreach(_index RANGE ${_asset_last})
595+
list(GET _asset_expanded ${_index} _expanded_path)
596+
set(_stamp "${_build_root}/file_stamps/${_index}.stamp")
597+
get_filename_component(_stamp_dir "${_stamp}" DIRECTORY)
598+
file(MAKE_DIRECTORY "${_stamp_dir}")
599+
list(GET _asset_relpaths ${_index} _relative_path)
600+
set(_target_path "${NAM_DESTINATION_ROOT}/${NAM_CHANNEL}/${_relative_path}")
601+
add_custom_command(
602+
OUTPUT "${_stamp}"
603+
COMMAND "${CMAKE_COMMAND}" -DINPUT=${_expanded_path} -DDESTINATION=${_target_path} -DLINK_MODE=${_file_link_mode} -P "${CMAKE_CURRENT_FUNCTION_LIST_DIR}/NablaAssetManifestsMaterialize.cmake"
604+
COMMAND "${CMAKE_COMMAND}" -E touch "${_stamp}"
605+
DEPENDS "${_expanded_path}"
606+
VERBATIM
607+
)
608+
list(APPEND _materialize_stamps "${_stamp}")
609+
endforeach()
514610
endif()
515-
add_dependencies("${NAM_TARGET}" "${_asset_target}")
516-
517-
list(LENGTH _asset_expanded _asset_expanded_count)
518-
math(EXPR _asset_last "${_asset_expanded_count} - 1")
519-
foreach(_index RANGE ${_asset_last})
520-
list(GET _asset_expanded ${_index} _expanded_path)
521-
list(GET _asset_relpaths ${_index} _relative_path)
522-
set(_target_path "${NAM_DESTINATION_ROOT}/${NAM_CHANNEL}/${_relative_path}")
523-
set(_stamp "${_build_root}/file_stamps/${_index}.stamp")
524-
get_filename_component(_stamp_dir "${_stamp}" DIRECTORY)
525-
file(MAKE_DIRECTORY "${_stamp_dir}")
526-
add_custom_command(
527-
OUTPUT "${_stamp}"
528-
COMMAND "${CMAKE_COMMAND}" -DINPUT=${_expanded_path} -DDESTINATION=${_target_path} -DLINK_MODE=${_file_link_mode} -P "${CMAKE_CURRENT_FUNCTION_LIST_DIR}/NablaAssetManifestsMaterialize.cmake"
529-
COMMAND "${CMAKE_COMMAND}" -E touch "${_stamp}"
530-
DEPENDS "${_expanded_path}"
531-
VERBATIM
532-
)
533-
list(APPEND _materialize_stamps "${_stamp}")
534-
endforeach()
535611
endif()
536612

537613
if (_materialize_stamps)

cmake/README.md

Lines changed: 54 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@ This directory contains the consumer-side module behind the top-level
55

66
It is include-only.
77

8-
`nam.cmake` is not a script-mode downloader. The public `ExternalData` API is
9-
build-graph based so the intended consumer flow is a normal configure plus
8+
`nam.cmake` is not a script-mode downloader. It keeps the `ExternalData`
9+
build-graph model so the intended consumer flow is a normal configure plus
1010
build.
1111

1212
## Consumer usage
@@ -68,6 +68,15 @@ nam_add_channel_target(
6868
- Windows: `%LOCALAPPDATA%`
6969
- Linux: `${XDG_CACHE_HOME}` or `~/.cache`
7070

71+
## Module option
72+
73+
- `NAM_USE_VENDORED_EXTERNALDATA = ON`
74+
75+
By default NAM loads its vendored copy of `ExternalData.cmake`.
76+
77+
Set `-DNAM_USE_VENDORED_EXTERNALDATA=OFF` to use the stock `ExternalData.cmake`
78+
shipped with the host CMake instead.
79+
7180
## Source of truth
7281

7382
For input assets the source of truth is:
@@ -95,21 +104,50 @@ Consumer-side rule:
95104

96105
## ExternalData model
97106

98-
The module uses only public `ExternalData` APIs:
107+
When `NAM_USE_VENDORED_EXTERNALDATA=ON`, NAM uses
108+
`cmake/vendor/ExternalData-NAM.cmake`, which is a vendored copy of CMake 4.2
109+
`ExternalData.cmake`.
110+
111+
The vendored copy exists for one reason:
112+
113+
- stock `ExternalData.cmake` copies objects into `ExternalData_BINARY_ROOT` on
114+
Windows
115+
116+
That behavior creates a full build-local copy before the file reaches the final
117+
consumer destination tree.
118+
119+
The NAM patch is intentionally small:
120+
121+
- it adds `ExternalData_LINK_MODE = auto|symlink|hardlink|copy`
122+
- it lets NAM call the vendored build-time script directly per asset
123+
- on the vendored path those modes are applied directly from the shared object
124+
store into the final destination tree
125+
126+
This keeps the shared object store model while avoiding an unnecessary physical
127+
copy when the host supports lightweight links.
128+
129+
Once upstream CMake gains equivalent Windows behavior the default can be flipped
130+
back by changing `NAM_USE_VENDORED_EXTERNALDATA` without changing consumer call
131+
sites.
132+
133+
When `NAM_USE_VENDORED_EXTERNALDATA=OFF`, NAM falls back to the stock public
134+
`ExternalData` flow:
99135

100136
- `ExternalData_Expand_Arguments`
101137
- `ExternalData_Add_Target`
102138
- `ExternalData_CUSTOM_SCRIPT_<key>`
103139

104-
It does not call private `_ExternalData_*` functions and it does not spawn
105-
nested `cmake.exe` processes from the module itself.
106-
107140
The resulting model is:
108141

109142
- one shared local object store per user
110143
- content-addressed objects under `.../objects/SHA256/<hash>`
111144
- generated `.sha256` references under `${CMAKE_CURRENT_BINARY_DIR}/.nam/<target>/refs/<channel>/...`
112-
- intermediate `ExternalData` build outputs under `${CMAKE_CURRENT_BINARY_DIR}/.nam/<target>/assets`
145+
- vendored-path metadata under `${CMAKE_CURRENT_BINARY_DIR}/.nam/<target>/file_stamps` and
146+
`${CMAKE_CURRENT_BINARY_DIR}/.nam/<target>/hash_records`
147+
- direct final outputs under `${DESTINATION_ROOT}/${CHANNEL}/...` when the
148+
vendored module is enabled
149+
- a stock-module fallback path under `${CMAKE_CURRENT_BINARY_DIR}/.nam/<target>/assets`
150+
only when `NAM_USE_VENDORED_EXTERNALDATA=OFF`
113151
- normal build targets for consumers
114152

115153
During configure the module probes the current host once and selects the
@@ -122,19 +160,24 @@ Current detection order is:
122160

123161
At build time:
124162

125-
- `ExternalData` populates the shared object store
126-
- every release asset is materialized to the destination root exactly as it was
127-
published, using the detected lightweight file mode when available
163+
- the vendored path fetches missing objects into the shared object store and
164+
materializes final files directly from that store
165+
- on the default vendored path every release asset is exposed from the object
166+
store directly into the final destination root using the configured mode
167+
- on the stock fallback path NAM keeps the older `.nam/<target>/assets` staging
168+
step and then materializes into the destination root
128169

129170
Passing `NO_SYMLINKS` forces copy materialization even when the host supports
130171
lightweight links.
131172

173+
Explicit `symlink` mode on Windows still requires host symlink privilege.
174+
132175
## Logging
133176

134177
By default the module prints only:
135178

136179
- one short configure summary
137-
- the normal build-time `ExternalData` output
180+
- the normal build-time fetch/materialization output
138181

139182
## Smoke consumer
140183

0 commit comments

Comments
 (0)