From 09945526489e302db42472e3cd64d7f140bdde59 Mon Sep 17 00:00:00 2001 From: Zhipeng Wang Date: Thu, 4 Jun 2026 10:35:32 +0800 Subject: [PATCH 1/4] ci(telemetry): restore InstrumentationKey injection in official build Re-enable telemetry in the shipped wheel by restoring the iKey-injection and post-build verify steps removed in #728. The sdist still ships the empty placeholder; only the wheel carries the injected key, gated on the INSTRUMENTATION_KEY build secret defined in the ADO pipeline. --- .pipelines/modelkit-official-build.yml | 76 +++++++++++++++++++++++--- 1 file changed, 68 insertions(+), 8 deletions(-) diff --git a/.pipelines/modelkit-official-build.yml b/.pipelines/modelkit-official-build.yml index 73df877ff..2b7ca7d89 100644 --- a/.pipelines/modelkit-official-build.yml +++ b/.pipelines/modelkit-official-build.yml @@ -130,14 +130,74 @@ extends: - script: python -m pip install --upgrade build twine packaging displayName: 'Install build tools' - # Telemetry is disabled in shipped artifacts: the empty iKey - # placeholder in src/winml/modelkit/telemetry/constants.py - # ships unchanged in both sdist and wheel. The telemetry init - # path short-circuits to disabled when iKey is empty (no - # events emitted, no LoggerProvider constructed). To re-enable, - # restore an iKey-injection step before the wheel build. - - script: python -m build --outdir "$(ob_outputDirectory)" - displayName: 'Build sdist and wheel' + # Build sdist BEFORE iKey injection so the source archive + # ships with the empty placeholder. The PyPI sdist must + # never carry the real iKey. + - script: python -m build --sdist --outdir "$(ob_outputDirectory)" + displayName: 'Build sdist (with empty iKey placeholder)' + + - powershell: | + $path = "$(Build.SourcesDirectory)\src\winml\modelkit\telemetry\constants.py" + $key = $env:INSTRUMENTATION_KEY + if (-not $key) { throw "INSTRUMENTATION_KEY env var is empty or missing" } + $content = [System.IO.File]::ReadAllText($path) + $placeholder = 'INSTRUMENTATION_KEY = ""' + if (-not $content.Contains($placeholder)) { throw "placeholder not found in $path" } + $newContent = $content.Replace($placeholder, "INSTRUMENTATION_KEY = ""$key""") + [System.IO.File]::WriteAllText($path, $newContent) + Write-Host "Injected iKey into $path" + env: + INSTRUMENTATION_KEY: $(INSTRUMENTATION_KEY) + displayName: 'Inject InstrumentationKey into constants.py' + + # Build wheel AFTER injection so the wheel carries the + # real iKey. + - script: python -m build --wheel --outdir "$(ob_outputDirectory)" + displayName: 'Build wheel (with injected iKey)' + + # Verify the wheel carries the real iKey AND the sdist + # carries the empty placeholder. Reading constants.py out + # of each archive directly catches build-cache / ordering + # bugs that disk-only checks would miss. + - powershell: | + Add-Type -AssemblyName System.IO.Compression + Add-Type -AssemblyName System.IO.Compression.FileSystem + + function Read-ConstantsFromArchive($archivePath) { + $zip = [System.IO.Compression.ZipFile]::OpenRead($archivePath) + try { + $entry = $zip.Entries | Where-Object { $_.FullName -like "*telemetry/constants.py" } | Select-Object -First 1 + if (-not $entry) { throw "telemetry/constants.py not found in $archivePath" } + $reader = [System.IO.StreamReader]::new($entry.Open()) + try { return $reader.ReadToEnd() } finally { $reader.Close() } + } finally { $zip.Dispose() } + } + + $wheel = Get-ChildItem "$(ob_outputDirectory)\*.whl" | Select-Object -First 1 + if (-not $wheel) { throw "no wheel found in $(ob_outputDirectory)" } + $wheelContent = Read-ConstantsFromArchive $wheel.FullName + if ($wheelContent -match 'INSTRUMENTATION_KEY\s*=\s*""') { + throw "wheel contains empty INSTRUMENTATION_KEY placeholder - injection failed" + } + Write-Host "Wheel verified - constants.py has non-empty iKey" + + $sdist = Get-ChildItem "$(ob_outputDirectory)\*.tar.gz" | Select-Object -First 1 + if (-not $sdist) { throw "no sdist found in $(ob_outputDirectory)" } + # tar.gz inside a zip-aware reader: open via SharpCompress would + # add a dependency. Inspect by extracting via tar (Windows 10+ + # ships with tar.exe) and reading the file from disk. + $sdistTemp = Join-Path "$(Agent.TempDirectory)" "sdist_verify" + if (Test-Path $sdistTemp) { Remove-Item -Recurse -Force $sdistTemp } + New-Item -ItemType Directory -Path $sdistTemp | Out-Null + tar -xzf $sdist.FullName -C $sdistTemp + $sdistConstants = Get-ChildItem -Path $sdistTemp -Recurse -Filter "constants.py" | Where-Object { $_.FullName -like "*telemetry*" } | Select-Object -First 1 + if (-not $sdistConstants) { throw "constants.py not found in sdist" } + $sdistContent = Get-Content $sdistConstants.FullName -Raw + if ($sdistContent -notmatch 'INSTRUMENTATION_KEY\s*=\s*""') { + throw "sdist contains a non-empty iKey - sdist must ship clean" + } + Write-Host "Sdist verified - constants.py has empty placeholder" + displayName: 'Verify wheel has iKey, sdist does not' - script: python -m twine check "$(ob_outputDirectory)\*.whl" "$(ob_outputDirectory)\*.tar.gz" continueOnError: true From 1e258e5ff61d067f16afa5205bf43071daf40bca Mon Sep 17 00:00:00 2001 From: Zhipeng Wang Date: Thu, 4 Jun 2026 10:35:38 +0800 Subject: [PATCH 2/4] feat(telemetry): reword consent notice as "unlinked pseudonymized" Replace "anonymous" with "unlinked pseudonymized" in the first-run consent prompt and Privacy.md to accurately classify the collected data (a persisted per-machine device-id hash is pseudonymized, not anonymous). Bump _CONSENT_VERSION 1 -> 2 so already-consented users see and re-accept the updated notice. --- docs/Privacy.md | 4 ++-- src/winml/modelkit/telemetry/consent.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/Privacy.md b/docs/Privacy.md index 97add4ba1..2dd9094bc 100644 --- a/docs/Privacy.md +++ b/docs/Privacy.md @@ -1,7 +1,7 @@ # WinML CLI Privacy Statement -WinML CLI collects limited, anonymous telemetry to help improve the -product. This page describes exactly what is collected, what is not, +WinML CLI collects limited, unlinked pseudonymized telemetry to help +improve the product. This page describes exactly what is collected, what is not, and how to control it. ## Data category diff --git a/src/winml/modelkit/telemetry/consent.py b/src/winml/modelkit/telemetry/consent.py index 359c61b9e..61c6a9343 100644 --- a/src/winml/modelkit/telemetry/consent.py +++ b/src/winml/modelkit/telemetry/consent.py @@ -45,10 +45,10 @@ def _default_config_path() -> Path | None: # stored records with an older version are treated as unrecorded on # read so the user sees the updated notice and re-consents. Records # predating the version field are grandfathered as the current version. -_CONSENT_VERSION: int = 1 +_CONSENT_VERSION: int = 2 _PROMPT_TEXT = """\ -WinML CLI can collect anonymous usage data to help improve the product. +WinML CLI can collect unlinked pseudonymized usage data to help improve the product. What is collected: - Command name, duration, success/failure From afb3f059cae5a46d67c665b42fdfd9ff2cbbd81d Mon Sep 17 00:00:00 2001 From: Zhipeng Wang Date: Thu, 4 Jun 2026 10:45:22 +0800 Subject: [PATCH 3/4] docs(readme): add Data / Telemetry section linking to Privacy.md Document the now-enabled telemetry in the README: what's collected at a glance, first-run consent, opt-out via %USERPROFILE%\.winml\config.json, CI/non-TTY auto-disable, and a pointer to docs/Privacy.md. Uses the 'unlinked pseudonymized' classification. --- README.md | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/README.md b/README.md index db90d3d99..3ce387788 100644 --- a/README.md +++ b/README.md @@ -272,6 +272,30 @@ Each arrow is a WinML CLI command. You can enter the pipeline at any stage (for --- +## :lock: Data / Telemetry + +Official WinML CLI releases can collect **unlinked pseudonymized** usage telemetry +to help improve the product. Telemetry is classified as **Optional**. A one-time +prompt on your first run asks for consent (default: accept — press **Enter** to +enable, type `n` to decline). + +Dev installs (`pip install -e .` or running from a source checkout) never send +telemetry. + +**Control** — edit `%USERPROFILE%\.winml\config.json`: + +- Set `telemetry.consent` to `"disabled"` to opt out +- Set `telemetry.consent` to `"enabled"` to opt in +- Delete the file to re-show the first-run prompt on the next run + +Telemetry is automatically disabled in CI / non-TTY environments regardless of the +stored decision. + +See [docs/Privacy.md](docs/Privacy.md) for the full list of what is and is not +collected, event schemas, CI auto-disable behavior, and storage locations. + +--- + ## :handshake: Contributing We welcome contributions! Please see the [contribution guidelines](CONTRIBUTING.md). From 300c46bc84b97cc4e14d3c898103960756299239 Mon Sep 17 00:00:00 2001 From: Zhipeng Wang Date: Thu, 4 Jun 2026 10:48:11 +0800 Subject: [PATCH 4/4] docs(readme): trim telemetry section Drop the CI/non-TTY auto-disable note and the dev-installs note from the README Data / Telemetry section; both details remain documented in docs/Privacy.md. --- README.md | 6 ------ 1 file changed, 6 deletions(-) diff --git a/README.md b/README.md index 3ce387788..056013a09 100644 --- a/README.md +++ b/README.md @@ -279,18 +279,12 @@ to help improve the product. Telemetry is classified as **Optional**. A one-time prompt on your first run asks for consent (default: accept — press **Enter** to enable, type `n` to decline). -Dev installs (`pip install -e .` or running from a source checkout) never send -telemetry. - **Control** — edit `%USERPROFILE%\.winml\config.json`: - Set `telemetry.consent` to `"disabled"` to opt out - Set `telemetry.consent` to `"enabled"` to opt in - Delete the file to re-show the first-run prompt on the next run -Telemetry is automatically disabled in CI / non-TTY environments regardless of the -stored decision. - See [docs/Privacy.md](docs/Privacy.md) for the full list of what is and is not collected, event schemas, CI auto-disable behavior, and storage locations.