diff --git a/.pipelines/modelkit-official-build.yml b/.pipelines/modelkit-official-build.yml index 73df877ff..2b7ca7d89 100644 --- a/.pipelines/modelkit-official-build.yml +++ b/.pipelines/modelkit-official-build.yml @@ -130,14 +130,74 @@ extends: - script: python -m pip install --upgrade build twine packaging displayName: 'Install build tools' - # Telemetry is disabled in shipped artifacts: the empty iKey - # placeholder in src/winml/modelkit/telemetry/constants.py - # ships unchanged in both sdist and wheel. The telemetry init - # path short-circuits to disabled when iKey is empty (no - # events emitted, no LoggerProvider constructed). To re-enable, - # restore an iKey-injection step before the wheel build. - - script: python -m build --outdir "$(ob_outputDirectory)" - displayName: 'Build sdist and wheel' + # Build sdist BEFORE iKey injection so the source archive + # ships with the empty placeholder. The PyPI sdist must + # never carry the real iKey. + - script: python -m build --sdist --outdir "$(ob_outputDirectory)" + displayName: 'Build sdist (with empty iKey placeholder)' + + - powershell: | + $path = "$(Build.SourcesDirectory)\src\winml\modelkit\telemetry\constants.py" + $key = $env:INSTRUMENTATION_KEY + if (-not $key) { throw "INSTRUMENTATION_KEY env var is empty or missing" } + $content = [System.IO.File]::ReadAllText($path) + $placeholder = 'INSTRUMENTATION_KEY = ""' + if (-not $content.Contains($placeholder)) { throw "placeholder not found in $path" } + $newContent = $content.Replace($placeholder, "INSTRUMENTATION_KEY = ""$key""") + [System.IO.File]::WriteAllText($path, $newContent) + Write-Host "Injected iKey into $path" + env: + INSTRUMENTATION_KEY: $(INSTRUMENTATION_KEY) + displayName: 'Inject InstrumentationKey into constants.py' + + # Build wheel AFTER injection so the wheel carries the + # real iKey. + - script: python -m build --wheel --outdir "$(ob_outputDirectory)" + displayName: 'Build wheel (with injected iKey)' + + # Verify the wheel carries the real iKey AND the sdist + # carries the empty placeholder. Reading constants.py out + # of each archive directly catches build-cache / ordering + # bugs that disk-only checks would miss. + - powershell: | + Add-Type -AssemblyName System.IO.Compression + Add-Type -AssemblyName System.IO.Compression.FileSystem + + function Read-ConstantsFromArchive($archivePath) { + $zip = [System.IO.Compression.ZipFile]::OpenRead($archivePath) + try { + $entry = $zip.Entries | Where-Object { $_.FullName -like "*telemetry/constants.py" } | Select-Object -First 1 + if (-not $entry) { throw "telemetry/constants.py not found in $archivePath" } + $reader = [System.IO.StreamReader]::new($entry.Open()) + try { return $reader.ReadToEnd() } finally { $reader.Close() } + } finally { $zip.Dispose() } + } + + $wheel = Get-ChildItem "$(ob_outputDirectory)\*.whl" | Select-Object -First 1 + if (-not $wheel) { throw "no wheel found in $(ob_outputDirectory)" } + $wheelContent = Read-ConstantsFromArchive $wheel.FullName + if ($wheelContent -match 'INSTRUMENTATION_KEY\s*=\s*""') { + throw "wheel contains empty INSTRUMENTATION_KEY placeholder - injection failed" + } + Write-Host "Wheel verified - constants.py has non-empty iKey" + + $sdist = Get-ChildItem "$(ob_outputDirectory)\*.tar.gz" | Select-Object -First 1 + if (-not $sdist) { throw "no sdist found in $(ob_outputDirectory)" } + # tar.gz inside a zip-aware reader: open via SharpCompress would + # add a dependency. Inspect by extracting via tar (Windows 10+ + # ships with tar.exe) and reading the file from disk. + $sdistTemp = Join-Path "$(Agent.TempDirectory)" "sdist_verify" + if (Test-Path $sdistTemp) { Remove-Item -Recurse -Force $sdistTemp } + New-Item -ItemType Directory -Path $sdistTemp | Out-Null + tar -xzf $sdist.FullName -C $sdistTemp + $sdistConstants = Get-ChildItem -Path $sdistTemp -Recurse -Filter "constants.py" | Where-Object { $_.FullName -like "*telemetry*" } | Select-Object -First 1 + if (-not $sdistConstants) { throw "constants.py not found in sdist" } + $sdistContent = Get-Content $sdistConstants.FullName -Raw + if ($sdistContent -notmatch 'INSTRUMENTATION_KEY\s*=\s*""') { + throw "sdist contains a non-empty iKey - sdist must ship clean" + } + Write-Host "Sdist verified - constants.py has empty placeholder" + displayName: 'Verify wheel has iKey, sdist does not' - script: python -m twine check "$(ob_outputDirectory)\*.whl" "$(ob_outputDirectory)\*.tar.gz" continueOnError: true diff --git a/README.md b/README.md index db90d3d99..056013a09 100644 --- a/README.md +++ b/README.md @@ -272,6 +272,24 @@ Each arrow is a WinML CLI command. You can enter the pipeline at any stage (for --- +## :lock: Data / Telemetry + +Official WinML CLI releases can collect **unlinked pseudonymized** usage telemetry +to help improve the product. Telemetry is classified as **Optional**. A one-time +prompt on your first run asks for consent (default: accept — press **Enter** to +enable, type `n` to decline). + +**Control** — edit `%USERPROFILE%\.winml\config.json`: + +- Set `telemetry.consent` to `"disabled"` to opt out +- Set `telemetry.consent` to `"enabled"` to opt in +- Delete the file to re-show the first-run prompt on the next run + +See [docs/Privacy.md](docs/Privacy.md) for the full list of what is and is not +collected, event schemas, CI auto-disable behavior, and storage locations. + +--- + ## :handshake: Contributing We welcome contributions! Please see the [contribution guidelines](CONTRIBUTING.md). diff --git a/docs/Privacy.md b/docs/Privacy.md index 97add4ba1..2dd9094bc 100644 --- a/docs/Privacy.md +++ b/docs/Privacy.md @@ -1,7 +1,7 @@ # WinML CLI Privacy Statement -WinML CLI collects limited, anonymous telemetry to help improve the -product. This page describes exactly what is collected, what is not, +WinML CLI collects limited, unlinked pseudonymized telemetry to help +improve the product. This page describes exactly what is collected, what is not, and how to control it. ## Data category diff --git a/src/winml/modelkit/telemetry/consent.py b/src/winml/modelkit/telemetry/consent.py index 359c61b9e..61c6a9343 100644 --- a/src/winml/modelkit/telemetry/consent.py +++ b/src/winml/modelkit/telemetry/consent.py @@ -45,10 +45,10 @@ def _default_config_path() -> Path | None: # stored records with an older version are treated as unrecorded on # read so the user sees the updated notice and re-consents. Records # predating the version field are grandfathered as the current version. -_CONSENT_VERSION: int = 1 +_CONSENT_VERSION: int = 2 _PROMPT_TEXT = """\ -WinML CLI can collect anonymous usage data to help improve the product. +WinML CLI can collect unlinked pseudonymized usage data to help improve the product. What is collected: - Command name, duration, success/failure