diff --git a/api/dashboard/client.go b/api/dashboard/client.go index 46d77a5e..0d1be53b 100644 --- a/api/dashboard/client.go +++ b/api/dashboard/client.go @@ -335,11 +335,14 @@ func (c *Client) CreateApplication(accessToken, region, name string) (*Applicati } } - return nil, fmt.Errorf( - "create application failed with status %d: %s", - resp.StatusCode, - respStr, - ) + return nil, &APIError{ + StatusCode: resp.StatusCode, + Message: fmt.Sprintf( + "create application failed with status %d: %s", + resp.StatusCode, + respStr, + ), + } } var singleResp SingleApplicationResponse @@ -493,7 +496,13 @@ func (c *Client) ChangeApplicationPlan(accessToken, appID, plan string) (*Applic return nil, ErrSessionExpired } if resp.StatusCode < 200 || resp.StatusCode >= 300 { - return nil, fmt.Errorf("Couldn't change your application's plan: %d", resp.StatusCode) + return nil, &APIError{ + StatusCode: resp.StatusCode, + Message: fmt.Sprintf( + "Couldn't change your application's plan: %d", + resp.StatusCode, + ), + } } respBody, _ := io.ReadAll(resp.Body) diff --git a/api/dashboard/types.go b/api/dashboard/types.go index da309f4d..edf9f285 100644 --- a/api/dashboard/types.go +++ b/api/dashboard/types.go @@ -106,6 +106,17 @@ type RegionsResponse struct { // ErrSessionExpired is returned when an API call gets a 401 Unauthorized. var ErrSessionExpired = errors.New("session expired") +// APIError is returned for non-2xx dashboard responses. It carries the HTTP +// status so callers (and telemetry) can branch on it, keeping the message. +type APIError struct { + StatusCode int + Message string +} + +func (e *APIError) Error() string { return e.Message } + +func (e *APIError) HTTPStatusCode() int { return e.StatusCode } + // ErrClusterUnavailable is returned when a region has no available cluster. type ErrClusterUnavailable struct { Region string diff --git a/pkg/auth/authenticate.go b/pkg/auth/authenticate.go index af0df076..a8698d3d 100644 --- a/pkg/auth/authenticate.go +++ b/pkg/auth/authenticate.go @@ -1,6 +1,7 @@ package auth import ( + "context" "errors" "fmt" @@ -23,7 +24,9 @@ func EnsureAuthenticated( cs := io.ColorScheme() fmt.Fprintf(io.Out, "%s %s\n", cs.WarningIcon(), err) - return RunOAuth(io, client, false, true) + // Lazy login from another command: no request-scoped telemetry context here, + // so OAuth flow events are emitted only by the dedicated auth login/signup commands. + return RunOAuth(context.Background(), io, client, false, true) } // ReauthenticateIfExpired checks if err is a session-expired error from the API. @@ -41,5 +44,5 @@ func ReauthenticateIfExpired( ClearToken() fmt.Fprintf(io.Out, "%s Session expired.\n", cs.WarningIcon()) - return RunOAuth(io, client, false, true) + return RunOAuth(context.Background(), io, client, false, true) } diff --git a/pkg/auth/oauth_flow.go b/pkg/auth/oauth_flow.go index a4334f43..e51d8f65 100644 --- a/pkg/auth/oauth_flow.go +++ b/pkg/auth/oauth_flow.go @@ -1,13 +1,17 @@ package auth import ( + "context" "fmt" "os" "os/exec" "runtime" + "time" "github.com/algolia/cli/api/dashboard" + "github.com/algolia/cli/pkg/cmdutil" "github.com/algolia/cli/pkg/iostreams" + "github.com/algolia/cli/pkg/telemetry" ) // DefaultOAuthClientID is a public OAuth client ID (PKCE flow, not a secret). @@ -20,7 +24,10 @@ func OAuthClientID() string { return v } if DefaultOAuthClientID == "" { - fmt.Fprintln(os.Stderr, "fatal: ALGOLIA_OAUTH_CLIENT_ID is not set and no default was compiled in") + fmt.Fprintln( + os.Stderr, + "fatal: ALGOLIA_OAUTH_CLIENT_ID is not set and no default was compiled in", + ) os.Exit(1) } return DefaultOAuthClientID @@ -35,11 +42,27 @@ func OAuthClientID() string { // launched, e.g. SSH / containers). // // If signup is true the browser opens to the sign-up page. -func RunOAuth(io *iostreams.IOStreams, client *dashboard.Client, signup, openBrowser bool) (string, error) { +func RunOAuth( + ctx context.Context, + io *iostreams.IOStreams, + client *dashboard.Client, + signup, openBrowser bool, +) (string, error) { cs := io.ColorScheme() + flow := telemetry.FlowLogin + if signup { + flow = telemetry.FlowSignup + } + start := time.Now() + telemetry.Track(ctx, telemetry.AuthStarted(flow, !openBrowser)) + redirectURI, resultCh, err := StartCallbackServer() if err != nil { + telemetry.Track( + ctx, + telemetry.AuthFailed(flow, telemetry.AuthStepCallback, cmdutil.ErrorClass(err)), + ) return "", err } @@ -63,25 +86,44 @@ func RunOAuth(io *iostreams.IOStreams, client *dashboard.Client, signup, openBro fmt.Fprintf(io.Out, "Opening browser to sign in...\n") } fmt.Fprintf(io.Out, "If the browser doesn't open, visit:\n %s\n\n", cs.Bold(authorizeURL)) - _ = OpenBrowser(authorizeURL) + if browserErr := OpenBrowser(authorizeURL); browserErr != nil { + telemetry.Track(ctx, telemetry.AuthBrowserFailed(flow, cmdutil.ErrorClass(browserErr))) + } else { + telemetry.Track(ctx, telemetry.AuthBrowserOpened(flow)) + } } else { fmt.Fprintf(io.Out, "Open this URL in your browser to authenticate:\n\n %s\n\n", cs.Bold(authorizeURL)) } fmt.Fprintf(io.Out, "Waiting for authentication...\n") cbResult := <-resultCh + telemetry.Track(ctx, telemetry.AuthCallbackReceived(flow, time.Since(start))) if cbResult.Error != "" { - return "", fmt.Errorf("authorization failed: %s", cbResult.Error) + err := fmt.Errorf("authorization failed: %s", cbResult.Error) + telemetry.Track( + ctx, + telemetry.AuthFailed(flow, telemetry.AuthStepCallback, cmdutil.ErrorClass(err)), + ) + return "", err } if cbResult.Code == "" { - return "", fmt.Errorf("no authorization code received") + err := fmt.Errorf("no authorization code received") + telemetry.Track( + ctx, + telemetry.AuthFailed(flow, telemetry.AuthStepCallback, cmdutil.ErrorClass(err)), + ) + return "", err } io.StartProgressIndicatorWithLabel("Exchanging code for tokens") tokenResp, err := client.AuthorizationCodeGrant(cbResult.Code, codeVerifier, redirectURI) io.StopProgressIndicator() if err != nil { + telemetry.Track( + ctx, + telemetry.AuthFailed(flow, telemetry.AuthStepExchange, cmdutil.ErrorClass(err)), + ) return "", err } diff --git a/pkg/cmd/application/create/create.go b/pkg/cmd/application/create/create.go index ed623900..14911394 100644 --- a/pkg/cmd/application/create/create.go +++ b/pkg/cmd/application/create/create.go @@ -1,6 +1,7 @@ package create import ( + "context" "fmt" "strings" @@ -16,6 +17,7 @@ import ( "github.com/algolia/cli/pkg/iostreams" pkgopen "github.com/algolia/cli/pkg/open" "github.com/algolia/cli/pkg/prompt" + "github.com/algolia/cli/pkg/telemetry" "github.com/algolia/cli/pkg/validators" ) @@ -78,7 +80,7 @@ func NewCreateCmd(f *cmdutil.Factory) *cobra.Command { }, RunE: func(cmd *cobra.Command, args []string) error { opts.nameProvided = cmd.Flags().Changed("name") - return runCreateCmd(opts) + return runCreateCmd(cmd.Context(), opts) }, } @@ -99,7 +101,7 @@ func NewCreateCmd(f *cmdutil.Factory) *cobra.Command { return cmd } -func runCreateCmd(opts *CreateOptions) error { +func runCreateCmd(ctx context.Context, opts *CreateOptions) error { cs := opts.IO.ColorScheme() name, err := resolveName(opts) @@ -180,11 +182,23 @@ func runCreateCmd(opts *CreateOptions) error { return err } if !accepted { + telemetry.Track( + ctx, + telemetry.ApplicationCreateAborted(telemetry.TriggeredFromExplicitCommand), + ) fmt.Fprintf(opts.IO.Out, "%s Aborted; no application was created.\n", cs.WarningIcon()) return nil } - appDetails, err := apputil.CreateAndFetchApplication(opts.IO, client, token, opts.Region, name) + appDetails, err := apputil.CreateAndFetchApplication( + ctx, + opts.IO, + client, + token, + opts.Region, + name, + telemetry.TriggeredFromExplicitCommand, + ) if err != nil { return err } diff --git a/pkg/cmd/application/create/create_test.go b/pkg/cmd/application/create/create_test.go index 097c2adb..c8cd6048 100644 --- a/pkg/cmd/application/create/create_test.go +++ b/pkg/cmd/application/create/create_test.go @@ -1,6 +1,7 @@ package create import ( + "context" "encoding/json" "net/http" "net/http/httptest" @@ -212,7 +213,7 @@ func TestRun_FreeNonInteractive(t *testing.T) { opts, out, _ := newOpts(t, srv, false) opts.AcceptTerms = true - require.NoError(t, runCreateCmd(opts)) + require.NoError(t, runCreateCmd(context.Background(), opts)) assert.Equal(t, 1, srv.createCalls) assert.Equal(t, 0, srv.patchCalls) assert.Contains(t, out.String(), "APP1") @@ -224,7 +225,7 @@ func TestRun_NonInteractiveRequiresAcceptTerms(t *testing.T) { opts, _, _ := newOpts(t, srv, false) - err := runCreateCmd(opts) + err := runCreateCmd(context.Background(), opts) require.Error(t, err) assert.Contains(t, err.Error(), "must be accepted") assert.Equal(t, 0, srv.createCalls) @@ -238,7 +239,7 @@ func TestRun_PaidWithBillingNonInteractive(t *testing.T) { opts.Plan = "grow" opts.AcceptTerms = true - require.NoError(t, runCreateCmd(opts)) + require.NoError(t, runCreateCmd(context.Background(), opts)) assert.Equal(t, 1, srv.createCalls) assert.Equal(t, 1, srv.patchCalls) assert.Equal(t, "grow", srv.lastPlan) @@ -252,7 +253,7 @@ func TestRun_PaidWithBillingRequiresAcceptTerms(t *testing.T) { opts, _, _ := newOpts(t, srv, false) opts.Plan = "grow" - err := runCreateCmd(opts) + err := runCreateCmd(context.Background(), opts) require.Error(t, err) assert.Contains(t, err.Error(), "must be accepted") assert.Equal(t, 0, srv.createCalls) @@ -267,7 +268,7 @@ func TestRun_PaidNoBillingNonInteractive(t *testing.T) { opts.Plan = "grow" opts.AcceptTerms = true - err := runCreateCmd(opts) + err := runCreateCmd(context.Background(), opts) require.Error(t, err) assert.Contains(t, err.Error(), "payment method") assert.Equal(t, 0, srv.createCalls) @@ -284,7 +285,7 @@ func TestRun_PaidNoBillingInteractiveOpensBilling(t *testing.T) { opts, _, opened := newOpts(t, srv, true) opts.Plan = "grow" - require.NoError(t, runCreateCmd(opts)) + require.NoError(t, runCreateCmd(context.Background(), opts)) assert.Equal(t, 0, srv.createCalls) assert.Equal( t, @@ -302,7 +303,7 @@ func TestRun_PaidNoBillingInteractiveDeclineOpen(t *testing.T) { opts, _, opened := newOpts(t, srv, true) opts.Plan = "grow" - require.NoError(t, runCreateCmd(opts)) + require.NoError(t, runCreateCmd(context.Background(), opts)) assert.Equal(t, 0, srv.createCalls) assert.Empty(t, *opened) } @@ -316,7 +317,7 @@ func TestRun_ToSDeclineAborts(t *testing.T) { opts, out, _ := newOpts(t, srv, true) opts.Plan = "free" - require.NoError(t, runCreateCmd(opts)) + require.NoError(t, runCreateCmd(context.Background(), opts)) assert.Equal(t, 0, srv.createCalls) assert.Contains(t, out.String(), "Aborted") } @@ -332,7 +333,7 @@ func TestRun_AcceptTermsSkipsPromptInteractive(t *testing.T) { opts.Plan = "free" opts.AcceptTerms = true - require.NoError(t, runCreateCmd(opts)) + require.NoError(t, runCreateCmd(context.Background(), opts)) assert.Equal(t, 1, srv.createCalls) assert.Contains(t, out.String(), "Terms accepted via --accept-terms") } @@ -346,7 +347,7 @@ func TestRun_PaidPlanHiddenByServerNonInteractive(t *testing.T) { opts.Plan = "grow" opts.AcceptTerms = true - err := runCreateCmd(opts) + err := runCreateCmd(context.Background(), opts) require.Error(t, err) assert.Contains(t, err.Error(), "payment method") assert.NotContains(t, err.Error(), "invalid plan") @@ -366,7 +367,7 @@ func TestRun_PaidPlanHiddenByServerInteractiveOpensBilling(t *testing.T) { opts, _, opened := newOpts(t, srv, true) opts.Plan = "grow" - require.NoError(t, runCreateCmd(opts)) + require.NoError(t, runCreateCmd(context.Background(), opts)) assert.Equal(t, 0, srv.createCalls) assert.Equal( t, @@ -383,7 +384,7 @@ func TestRun_InvalidPlanErrors(t *testing.T) { opts.Plan = "bogus" opts.AcceptTerms = true - err := runCreateCmd(opts) + err := runCreateCmd(context.Background(), opts) require.Error(t, err) assert.Contains(t, err.Error(), "invalid plan") assert.Equal(t, 0, srv.createCalls) @@ -397,7 +398,7 @@ func TestRun_InteractivePickerHidesPaidWithoutBilling(t *testing.T) { opts, out, _ := newOpts(t, srv, true) - require.NoError(t, runCreateCmd(opts)) + require.NoError(t, runCreateCmd(context.Background(), opts)) assert.Equal(t, 1, srv.createCalls) assert.Equal(t, 0, srv.patchCalls) assert.Contains(t, out.String(), "only the Free plan is available") @@ -418,7 +419,7 @@ func TestRun_InteractivePickerSelectsPaid(t *testing.T) { opts, out, _ := newOpts(t, srv, true) - require.NoError(t, runCreateCmd(opts)) + require.NoError(t, runCreateCmd(context.Background(), opts)) assert.Equal(t, 1, srv.createCalls) assert.Equal(t, 1, srv.patchCalls) assert.Equal(t, "grow", srv.lastPlan) @@ -434,7 +435,7 @@ func TestRun_DryRunDoesNotCallAPI(t *testing.T) { opts.DryRun = true opts.PrintFlags = newPrintFlags("") - require.NoError(t, runCreateCmd(opts)) + require.NoError(t, runCreateCmd(context.Background(), opts)) assert.Equal(t, 0, srv.createCalls) assert.Equal(t, 0, srv.patchCalls) assert.Contains(t, out.String(), "Dry run") @@ -450,7 +451,7 @@ func TestRun_PlanChangeFailureKeepsFreeApp(t *testing.T) { opts.Plan = "grow" opts.AcceptTerms = true - err := runCreateCmd(opts) + err := runCreateCmd(context.Background(), opts) require.Error(t, err) assert.Contains(t, err.Error(), "failed to apply") assert.Equal(t, 1, srv.createCalls) diff --git a/pkg/cmd/application/downgrade/downgrade.go b/pkg/cmd/application/downgrade/downgrade.go index 27013e51..69be2623 100644 --- a/pkg/cmd/application/downgrade/downgrade.go +++ b/pkg/cmd/application/downgrade/downgrade.go @@ -49,7 +49,7 @@ func NewDowngradeCmd(f *cmdutil.Factory) *cobra.Command { "skipAuthCheck": "true", }, RunE: func(cmd *cobra.Command, args []string) error { - return planchange.Run(opts) + return planchange.Run(cmd.Context(), opts) }, } diff --git a/pkg/cmd/application/planchange/planchange.go b/pkg/cmd/application/planchange/planchange.go index 9c604164..6bab1b19 100644 --- a/pkg/cmd/application/planchange/planchange.go +++ b/pkg/cmd/application/planchange/planchange.go @@ -5,6 +5,7 @@ package planchange import ( + "context" "fmt" "strings" @@ -17,6 +18,7 @@ import ( "github.com/algolia/cli/pkg/iostreams" pkgopen "github.com/algolia/cli/pkg/open" "github.com/algolia/cli/pkg/prompt" + "github.com/algolia/cli/pkg/telemetry" ) // Direction selects whether the flow offers higher-tier (upgrade) or @@ -54,8 +56,11 @@ type changeResult struct { } // Run executes the shared plan-change flow. -func Run(opts *Options) error { +func Run(ctx context.Context, opts *Options) error { cs := opts.IO.ColorScheme() + ev := planChangeEventsFor(opts.Direction) + + telemetry.Track(ctx, ev.started(opts.Plan)) appID, err := opts.Config.Profile().GetApplicationID() if err != nil { @@ -146,6 +151,7 @@ func Run(opts *Options) error { return err } if !accepted { + telemetry.Track(ctx, ev.declinedTerms(target.ID)) fmt.Fprintf( opts.IO.Out, "%s Plan change aborted; no changes were made.\n", @@ -153,13 +159,17 @@ func Run(opts *Options) error { ) return nil } + telemetry.Track(ctx, ev.acceptedTerms(target.ID)) if err := callWithReauth(opts.IO, client, &token, "Changing plan", func(t string) error { _, e := client.ChangeApplicationPlan(t, appID, target.ID) return e }); err != nil { + class, _, status := cmdutil.ClassifyError(err) + telemetry.Track(ctx, ev.failed(target.ID, class, status)) return err } + telemetry.Track(ctx, ev.completed(target.ID)) if opts.PrintFlags.OutputFlagSpecified() && opts.PrintFlags.OutputFormat != nil { p, err := opts.PrintFlags.ToPrinter() @@ -189,6 +199,36 @@ func Run(opts *Options) error { return offerCostManagementBudget(opts, client.DashboardURL, appID) } +// planChangeEvents bundles the telemetry constructors for one direction, so the +// shared flow emits upgrade- or downgrade-named events without branching at +// each call site. +type planChangeEvents struct { + started func(plan string) telemetry.Event + acceptedTerms func(plan string) telemetry.Event + declinedTerms func(plan string) telemetry.Event + failed func(plan, errorClass string, httpStatus int) telemetry.Event + completed func(plan string) telemetry.Event +} + +func planChangeEventsFor(dir Direction) planChangeEvents { + if dir == DirectionDowngrade { + return planChangeEvents{ + started: telemetry.ApplicationDowngradeStarted, + acceptedTerms: telemetry.ApplicationDowngradeAcceptedTerms, + declinedTerms: telemetry.ApplicationDowngradeDeclinedTerms, + failed: telemetry.ApplicationDowngradeFailed, + completed: telemetry.ApplicationDowngradeCompleted, + } + } + return planChangeEvents{ + started: telemetry.ApplicationUpgradeStarted, + acceptedTerms: telemetry.ApplicationUpgradeAcceptedTerms, + declinedTerms: telemetry.ApplicationUpgradeDeclinedTerms, + failed: telemetry.ApplicationUpgradeFailed, + completed: telemetry.ApplicationUpgradeCompleted, + } +} + // offerCostManagementBudget tells the user they can create a budget and, when // confirmed, opens the cost management page in the browser. func offerCostManagementBudget(opts *Options, dashboardURL, appID string) error { diff --git a/pkg/cmd/application/planchange/planchange_test.go b/pkg/cmd/application/planchange/planchange_test.go index bd637118..1f7d45e5 100644 --- a/pkg/cmd/application/planchange/planchange_test.go +++ b/pkg/cmd/application/planchange/planchange_test.go @@ -1,6 +1,7 @@ package planchange import ( + "context" "encoding/json" "net/http" "net/http/httptest" @@ -16,6 +17,7 @@ import ( "github.com/algolia/cli/pkg/auth" "github.com/algolia/cli/pkg/cmdutil" "github.com/algolia/cli/pkg/prompt" + "github.com/algolia/cli/pkg/telemetry" "github.com/algolia/cli/test" ) @@ -196,7 +198,7 @@ func TestRun_WithPlanFlag(t *testing.T) { opts.Plan = "grow" opts.AcceptTerms = true - require.NoError(t, Run(opts)) + require.NoError(t, Run(context.Background(), opts)) assert.Equal(t, 1, srv.patchCalls) assert.Equal(t, "grow", srv.lastPlan) assert.Contains(t, out.String(), "Grow") @@ -211,7 +213,7 @@ func TestRun_FreeTargetNotBilled(t *testing.T) { opts.Plan = "free" opts.AcceptTerms = true - require.NoError(t, Run(opts)) + require.NoError(t, Run(context.Background(), opts)) assert.Equal(t, 1, srv.patchCalls) // "free" maps to the free-type template, whose id is "build". assert.Equal(t, "build", srv.lastPlan) @@ -226,7 +228,7 @@ func TestRun_BillingBlock(t *testing.T) { opts.Plan = "grow" opts.AcceptTerms = true - err := Run(opts) + err := Run(context.Background(), opts) require.Error(t, err) assert.Contains(t, err.Error(), "payment method") assert.Equal(t, 0, srv.patchCalls) @@ -241,7 +243,7 @@ func TestRun_ToSDeclineAborts(t *testing.T) { opts, out, _ := newOpts(t, srv, true) opts.Plan = "grow" - require.NoError(t, Run(opts)) + require.NoError(t, Run(context.Background(), opts)) assert.Equal(t, 0, srv.patchCalls) assert.Contains(t, out.String(), "aborted") } @@ -253,7 +255,7 @@ func TestRun_NonInteractiveRequiresPlan(t *testing.T) { opts, _, _ := newOpts(t, srv, false) // No --plan and no TTY. - err := Run(opts) + err := Run(context.Background(), opts) require.Error(t, err) assert.Contains(t, err.Error(), "--plan is required") assert.Equal(t, 0, srv.patchCalls) @@ -274,7 +276,7 @@ func TestRun_InteractivePicker(t *testing.T) { opts, out, _ := newOpts(t, srv, true) - require.NoError(t, Run(opts)) + require.NoError(t, Run(context.Background(), opts)) assert.Equal(t, 1, srv.patchCalls) assert.Equal(t, "grow", srv.lastPlan) assert.Contains(t, out.String(), "Current application: APP1 (My App)") @@ -288,7 +290,7 @@ func TestRun_DryRunDoesNotCallAPI(t *testing.T) { opts.Plan = "grow" opts.DryRun = true - require.NoError(t, Run(opts)) + require.NoError(t, Run(context.Background(), opts)) assert.Equal(t, 0, srv.patchCalls) assert.Contains(t, out.String(), "Dry run") assert.Contains(t, out.String(), "Grow") @@ -303,7 +305,7 @@ func TestRun_OfferCostManagementBudget(t *testing.T) { opts, out, opened := newOpts(t, srv, true) opts.Plan = "grow" - require.NoError(t, Run(opts)) + require.NoError(t, Run(context.Background(), opts)) assert.Equal(t, 1, srv.patchCalls) assert.Contains(t, out.String(), "create a budget") assert.Equal( @@ -323,7 +325,7 @@ func TestRun_FreePlanSkipsCostManagementBudget(t *testing.T) { opts.Plan = "free" opts.AcceptTerms = true - require.NoError(t, Run(opts)) + require.NoError(t, Run(context.Background(), opts)) assert.Equal(t, 1, srv.patchCalls) assert.NotContains(t, out.String(), "create a budget") assert.Empty(t, *opened) @@ -338,7 +340,7 @@ func TestRun_OutputJSON(t *testing.T) { opts.AcceptTerms = true opts.PrintFlags = newPrintFlags("json") - require.NoError(t, Run(opts)) + require.NoError(t, Run(context.Background(), opts)) assert.Equal(t, 1, srv.patchCalls) assert.Contains(t, out.String(), `"plan":"grow"`) assert.Contains(t, out.String(), `"application_id":"APP1"`) @@ -355,7 +357,7 @@ func TestRun_UpgradeFiltersToHigherPlans(t *testing.T) { opts, out, _ := newOpts(t, srv, true) opts.Direction = DirectionUpgrade - require.NoError(t, Run(opts)) + require.NoError(t, Run(context.Background(), opts)) assert.Equal(t, 1, srv.patchCalls) assert.Equal(t, "grow-plus", srv.lastPlan) assert.Contains(t, out.String(), "current plan: Grow") @@ -372,7 +374,7 @@ func TestRun_DowngradeFiltersToLowerPlans(t *testing.T) { opts, _, _ := newOpts(t, srv, true) opts.Direction = DirectionDowngrade - require.NoError(t, Run(opts)) + require.NoError(t, Run(context.Background(), opts)) assert.Equal(t, 1, srv.patchCalls) assert.Equal(t, "build", srv.lastPlan) } @@ -385,7 +387,7 @@ func TestRun_UpgradeAtHighestPlanIsNoOp(t *testing.T) { opts, out, _ := newOpts(t, srv, true) opts.Direction = DirectionUpgrade - require.NoError(t, Run(opts)) + require.NoError(t, Run(context.Background(), opts)) assert.Equal(t, 0, srv.patchCalls) assert.Contains(t, out.String(), "already on the highest") assert.Contains(t, out.String(), "nothing to upgrade") @@ -399,7 +401,7 @@ func TestRun_DowngradeAtLowestPlanIsNoOp(t *testing.T) { opts, out, _ := newOpts(t, srv, true) opts.Direction = DirectionDowngrade - require.NoError(t, Run(opts)) + require.NoError(t, Run(context.Background(), opts)) assert.Equal(t, 0, srv.patchCalls) assert.Contains(t, out.String(), "already on the lowest") assert.Contains(t, out.String(), "nothing to downgrade") @@ -417,7 +419,7 @@ func TestRun_PlanFlagOverridesDirection(t *testing.T) { opts.Plan = "free" opts.AcceptTerms = true - require.NoError(t, Run(opts)) + require.NoError(t, Run(context.Background(), opts)) assert.Equal(t, 1, srv.patchCalls) assert.Equal(t, "build", srv.lastPlan) } @@ -431,12 +433,97 @@ func TestRun_SamePlanIsNoOp(t *testing.T) { opts.Plan = "grow" opts.AcceptTerms = true - require.NoError(t, Run(opts)) + require.NoError(t, Run(context.Background(), opts)) assert.Equal(t, 0, srv.patchCalls) assert.Contains(t, out.String(), "already on the Grow plan") assert.Contains(t, out.String(), "no change needed") } +// recordingTelemetry records the names of the events emitted during a run. +type recordingTelemetry struct{ events []string } + +func (r *recordingTelemetry) Identify(context.Context) error { return nil } + +func (r *recordingTelemetry) Track(_ context.Context, event string, _ map[string]any) error { + r.events = append(r.events, event) + return nil +} + +func (r *recordingTelemetry) Close() {} + +// TestRun_EmitsPlanChangeEventsByDirection verifies the shared flow emits the +// upgrade- or downgrade-named events depending on the direction. +func TestRun_EmitsPlanChangeEventsByDirection(t *testing.T) { + tests := []struct { + name string + direction Direction + want []string + }{ + { + name: "upgrade", + direction: DirectionUpgrade, + want: []string{ + telemetry.EventApplicationUpgradeStarted, + telemetry.EventApplicationUpgradeAcceptedTerms, + telemetry.EventApplicationUpgradeCompleted, + }, + }, + { + name: "downgrade", + direction: DirectionDowngrade, + want: []string{ + telemetry.EventApplicationDowngradeStarted, + telemetry.EventApplicationDowngradeAcceptedTerms, + telemetry.EventApplicationDowngradeCompleted, + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + srv := newServer(t, `{"has_payment_method": true}`) + srv.currentPlanLabel = "Grow" + defer srv.Close() + + stubPicker(t, 0) + defer prompt.StubConfirm(true)() + + opts, _, _ := newOpts(t, srv, true) + opts.Direction = tt.direction + + rec := &recordingTelemetry{} + ctx := telemetry.WithTelemetryClient(context.Background(), rec) + + require.NoError(t, Run(ctx, opts)) + assert.Equal(t, tt.want, rec.events) + }) + } +} + +// TestRun_EmitsDeclinedTermsEvent verifies declining the terms emits the +// direction-specific declined-terms event and nothing after it. +func TestRun_EmitsDeclinedTermsEvent(t *testing.T) { + srv := newServer(t, `{"has_payment_method": true}`) + srv.currentPlanLabel = "Grow" + defer srv.Close() + + stubPicker(t, 0) + defer prompt.StubConfirm(false)() + + opts, _, _ := newOpts(t, srv, true) + opts.Direction = DirectionDowngrade + + rec := &recordingTelemetry{} + ctx := telemetry.WithTelemetryClient(context.Background(), rec) + + require.NoError(t, Run(ctx, opts)) + assert.Equal(t, []string{ + telemetry.EventApplicationDowngradeStarted, + telemetry.EventApplicationDowngradeDeclinedTerms, + }, rec.events) + assert.Equal(t, 0, srv.patchCalls) +} + func TestRun_UnknownCurrentPlanShowsAllPlans(t *testing.T) { srv := newServer(t, `{"has_payment_method": true}`) srv.currentPlanLabel = "Enterprise" @@ -448,7 +535,7 @@ func TestRun_UnknownCurrentPlanShowsAllPlans(t *testing.T) { opts, _, _ := newOpts(t, srv, true) opts.Direction = DirectionUpgrade - require.NoError(t, Run(opts)) + require.NoError(t, Run(context.Background(), opts)) assert.Equal(t, 1, srv.patchCalls) assert.Equal(t, "build", srv.lastPlan) } diff --git a/pkg/cmd/application/upgrade/upgrade.go b/pkg/cmd/application/upgrade/upgrade.go index ed1cd632..1eee1c7e 100644 --- a/pkg/cmd/application/upgrade/upgrade.go +++ b/pkg/cmd/application/upgrade/upgrade.go @@ -50,7 +50,7 @@ func NewUpgradeCmd(f *cmdutil.Factory) *cobra.Command { "skipAuthCheck": "true", }, RunE: func(cmd *cobra.Command, args []string) error { - return planchange.Run(opts) + return planchange.Run(cmd.Context(), opts) }, } diff --git a/pkg/cmd/auth/login/login.go b/pkg/cmd/auth/login/login.go index 7fdb1461..5b2a1ee7 100644 --- a/pkg/cmd/auth/login/login.go +++ b/pkg/cmd/auth/login/login.go @@ -3,6 +3,7 @@ package login import ( "context" "fmt" + "time" "github.com/AlecAivazis/survey/v2" "github.com/MakeNowJust/heredoc" @@ -78,9 +79,11 @@ func NewLoginCmd(f *cmdutil.Factory) *cobra.Command { } cmd.Flags().StringVar(&opts.AppName, "app-name", "", "Auto-select application by name") - cmd.Flags().StringVar(&opts.ProfileName, "profile-name", "", "Name for the CLI profile (defaults to application name)") + cmd.Flags(). + StringVar(&opts.ProfileName, "profile-name", "", "Name for the CLI profile (defaults to application name)") cmd.Flags().BoolVar(&opts.Default, "default", true, "Set the profile as the default") - cmd.Flags().BoolVar(&opts.NoBrowser, "no-browser", false, "Print the authorize URL instead of opening the browser") + cmd.Flags(). + BoolVar(&opts.NoBrowser, "no-browser", false, "Print the authorize URL instead of opening the browser") return cmd } @@ -95,8 +98,14 @@ func RunOAuthFlow(ctx context.Context, opts *LoginOptions, signup bool) error { cs := opts.IO.ColorScheme() client := opts.NewDashboardClient(auth.OAuthClientID()) + flow := telemetry.FlowLogin + if signup { + flow = telemetry.FlowSignup + } + start := time.Now() + openBrowser := !opts.NoBrowser - accessToken, err := auth.RunOAuth(opts.IO, client, signup, openBrowser) + accessToken, err := auth.RunOAuth(ctx, opts.IO, client, signup, openBrowser) if err != nil { return err } @@ -107,18 +116,38 @@ func RunOAuthFlow(ctx context.Context, opts *LoginOptions, signup bool) error { apps, err := client.ListApplications(accessToken) opts.IO.StopProgressIndicator() if err != nil { + telemetry.Track( + ctx, + telemetry.AuthFailed(flow, telemetry.AuthStepAppsFetch, cmdutil.ErrorClass(err)), + ) return err } + hadExistingApps := len(apps) > 0 + createdAppDuringFlow := false + var appDetails *dashboard.Application if len(apps) == 0 { - fmt.Fprintf(opts.IO.Out, "\n%s No applications found. Let's create one.\n", cs.WarningIcon()) - - appDetails, err = apputil.CreateAndFetchApplication(opts.IO, client, accessToken, "", opts.AppName) + fmt.Fprintf( + opts.IO.Out, + "\n%s No applications found. Let's create one.\n", + cs.WarningIcon(), + ) + + appDetails, err = apputil.CreateAndFetchApplication( + ctx, + opts.IO, + client, + accessToken, + "", + opts.AppName, + telemetry.TriggeredFromAuthFlow, + ) if err != nil { return err } + createdAppDuringFlow = true } else { interactive := opts.IO.CanPrompt() app, err := selectApplication(opts, apps, interactive) @@ -139,7 +168,15 @@ func RunOAuthFlow(ctx context.Context, opts *LoginOptions, signup bool) error { profileName = appDetails.Name } - return apputil.ConfigureProfile(opts.IO, opts.Config, appDetails, profileName, opts.Default) + if err := apputil.ConfigureProfile(opts.IO, opts.Config, appDetails, profileName, opts.Default); err != nil { + return err + } + + telemetry.Track( + ctx, + telemetry.AuthCompleted(flow, time.Since(start), hadExistingApps, createdAppDuringFlow), + ) + return nil } // identifyAuthenticatedUser emits a telemetry Identify for the user that just @@ -180,7 +217,11 @@ func reuseExistingAPIKey(cfg config.IConfig, app *dashboard.Application) bool { return false } -func selectApplication(opts *LoginOptions, apps []dashboard.Application, interactive bool) (*dashboard.Application, error) { +func selectApplication( + opts *LoginOptions, + apps []dashboard.Application, + interactive bool, +) (*dashboard.Application, error) { if opts.AppName != "" { for i := range apps { if apps[i].Name == opts.AppName { diff --git a/pkg/cmd/root/root.go b/pkg/cmd/root/root.go index 399535e1..1a273032 100644 --- a/pkg/cmd/root/root.go +++ b/pkg/cmd/root/root.go @@ -13,6 +13,7 @@ import ( "os/exec" "path/filepath" "strings" + "time" "github.com/AlecAivazis/survey/v2/terminal" "github.com/MakeNowJust/heredoc" @@ -191,14 +192,12 @@ func Execute() exitCode { return err } - // Send telemetry. - err = telemetryClient.Track(ctx, "Command Invoked") + // Command Invoked; flushed at the end of Execute with the command's other events. + err = telemetryClient.Track(ctx, telemetry.EventCommandInvoked, nil) if err != nil && hasDebug { fmt.Fprintf(stderr, "Error tracking telemetry: %s\n", err) } - go telemetryClient.Close() // flush telemetry events - return nil } @@ -210,23 +209,41 @@ func Execute() exitCode { } // Run the command. + start := time.Now() cmd, err := rootCmd.ExecuteContextC(ctx) - // Handle eventual errors. + + // Exit code; reused below as the Command Completed exit_code property. + code := exitCodeForError(err, authError) + + // Command Failed precedes Command Completed so Command Completed is always last. + if cmd != nil && cmdutil.ShouldTrackUsage(cmd) { + if err != nil { + class, source, status := cmdutil.ClassifyError(err) + telemetry.Track(ctx, telemetry.CommandFailed(class, source, status)) + } + telemetry.Track( + ctx, + telemetry.CommandCompleted(time.Since(start), err == nil, int(code)), + ) + } + flushTelemetry(ctx) + + // Print the error (exit code already resolved above). if err != nil { - if err == cmdutil.ErrSilent { - return exitError - } else if cmdutil.IsUserCancellation(err) { + switch { + case err == cmdutil.ErrSilent: + // Intentionally silent. + case cmdutil.IsUserCancellation(err): if errors.Is(err, terminal.InterruptErr) { // ensure the next shell prompt will start on its own line fmt.Fprint(stderr, "\n") } - return exitCancel - } else if errors.Is(err, authError) { - return exitAuth + case errors.Is(err, authError): + // Already reported by PersistentPreRunE. + default: + printError(stderr, err, cmd, hasDebug) } - - printError(stderr, err, cmd, hasDebug) - return exitError + return code } // If there is an update available, notify the user. @@ -248,6 +265,44 @@ func Execute() exitCode { return exitOK } +// exitCodeForError maps a command error to its process exit code. +func exitCodeForError(err error, authError error) exitCode { + switch { + case err == nil: + return exitOK + case err == cmdutil.ErrSilent: + return exitError + case cmdutil.IsUserCancellation(err): + return exitCancel + case errors.Is(err, authError): + return exitAuth + default: + return exitError + } +} + +// flushTelemetry sends this command's queued events as one batch and waits for +// it, so events aren't dropped at exit. The client bounds its own HTTP request; +// the cap here is a last-resort ceiling, kept above that timeout so a real +// in-flight flush is never abandoned. +func flushTelemetry(ctx context.Context) { + client := telemetry.GetTelemetryClient(ctx) + if client == nil { + return + } + + done := make(chan struct{}) + go func() { + client.Close() + close(done) + }() + + select { + case <-done: + case <-time.After(5 * time.Second): + } +} + // createContext creates a context with telemetry. func createContext( cmd *cobra.Command, diff --git a/pkg/cmd/shared/apputil/create.go b/pkg/cmd/shared/apputil/create.go index a5d217ee..7e7e8576 100644 --- a/pkg/cmd/shared/apputil/create.go +++ b/pkg/cmd/shared/apputil/create.go @@ -1,29 +1,39 @@ package apputil import ( + "context" "errors" "fmt" "strings" + "time" "github.com/AlecAivazis/survey/v2" "github.com/algolia/cli/api/dashboard" + "github.com/algolia/cli/pkg/cmdutil" "github.com/algolia/cli/pkg/config" "github.com/algolia/cli/pkg/iostreams" "github.com/algolia/cli/pkg/prompt" + "github.com/algolia/cli/pkg/telemetry" ) // CreateApplicationWithRetry creates an application, retrying with a different -// region if the selected one has no available cluster. +// region if the selected one has no available cluster. triggeredFrom tags the +// emitted telemetry events with their origin (telemetry.TriggeredFrom*). func CreateApplicationWithRetry( + ctx context.Context, io *iostreams.IOStreams, client *dashboard.Client, accessToken string, region string, appName string, + triggeredFrom string, ) (*dashboard.Application, string, error) { cs := io.ColorScheme() + telemetry.Track(ctx, telemetry.ApplicationCreateStarted(triggeredFrom)) + start := time.Now() + for { if region == "" { var err error @@ -38,6 +48,10 @@ func CreateApplicationWithRetry( io.StopProgressIndicator() if err == nil { + telemetry.Track( + ctx, + telemetry.ApplicationCreateCompleted(triggeredFrom, time.Since(start)), + ) fmt.Fprintf(io.Out, "%s Application %s created in region %q\n", cs.SuccessIcon(), cs.Bold(app.ID), region) return app, region, nil @@ -45,20 +59,35 @@ func CreateApplicationWithRetry( var clusterErr *dashboard.ErrClusterUnavailable if errors.As(err, &clusterErr) { - fmt.Fprintf(io.Out, "%s No cluster available in region %q. Please select another region.\n", - cs.WarningIcon(), region) + fmt.Fprintf( + io.Out, + "%s No cluster available in region %q. Please select another region.\n", + cs.WarningIcon(), + region, + ) region = "" if !io.CanPrompt() { - return nil, "", fmt.Errorf("no cluster available in region %q — try a different --region", clusterErr.Region) + trackCreateFailed(ctx, triggeredFrom, err) + return nil, "", fmt.Errorf( + "no cluster available in region %q — try a different --region", + clusterErr.Region, + ) } continue } + trackCreateFailed(ctx, triggeredFrom, err) return nil, "", fmt.Errorf("application creation failed: %w", err) } } +// trackCreateFailed emits CLI Application Create Failed with the classified error. +func trackCreateFailed(ctx context.Context, triggeredFrom string, err error) { + class, _, status := cmdutil.ClassifyError(err) + telemetry.Track(ctx, telemetry.ApplicationCreateFailed(triggeredFrom, class, status)) +} + // PromptRegion fetches regions from the API and prompts the user to select one. func PromptRegion( io *iostreams.IOStreams, @@ -103,11 +132,20 @@ func PromptRegion( // CreateAndFetchApplication creates an application (with region retry) and // generates an API key for it. func CreateAndFetchApplication( + ctx context.Context, io *iostreams.IOStreams, client *dashboard.Client, - accessToken, region, appName string, + accessToken, region, appName, triggeredFrom string, ) (*dashboard.Application, error) { - app, _, err := CreateApplicationWithRetry(io, client, accessToken, region, appName) + app, _, err := CreateApplicationWithRetry( + ctx, + io, + client, + accessToken, + region, + appName, + triggeredFrom, + ) if err != nil { return nil, err } @@ -160,7 +198,8 @@ func ConfigureProfile( } profileName = strings.ToLower(profileName) - if exists, existingAppID := cfg.ApplicationIDForProfile(profileName); exists && existingAppID != appDetails.ID { + if exists, existingAppID := cfg.ApplicationIDForProfile(profileName); exists && + existingAppID != appDetails.ID { profileName = strings.ToLower(appDetails.Name + "-" + appDetails.ID) } diff --git a/pkg/cmdutil/classify.go b/pkg/cmdutil/classify.go new file mode 100644 index 00000000..7c5bae52 --- /dev/null +++ b/pkg/cmdutil/classify.go @@ -0,0 +1,90 @@ +package cmdutil + +import ( + "context" + "errors" + "fmt" + "net" + "net/http" + + "github.com/algolia/cli/api/dashboard" +) + +// error_source buckets for failure telemetry: where an error came from, so +// failures can be triaged without parsing the (high-cardinality) message. +const ( + ErrorSourceNetwork = "network" + ErrorSourceAPI = "api" + ErrorSourceValidation = "validation" + ErrorSourceLocal = "local" +) + +// httpStatusError is satisfied by errors carrying an HTTP status (e.g. +// *dashboard.APIError), declared here to keep the classifier decoupled from it. +type httpStatusError interface{ HTTPStatusCode() int } + +// ClassifyError returns a stable, low-cardinality error class, a source bucket +// (ErrorSource*), and the HTTP status when present (0 otherwise), so every +// "*Failed" event classifies errors the same way. Most specific checks first. +func ClassifyError(err error) (class, source string, httpStatus int) { + if err == nil { + return "", "", 0 + } + + var flagErr *FlagError + if errors.As(err, &flagErr) { + return "validation_error", ErrorSourceValidation, 0 + } + + if errors.Is(err, dashboard.ErrSessionExpired) { + return "session_expired", ErrorSourceAPI, http.StatusUnauthorized + } + var clusterErr *dashboard.ErrClusterUnavailable + if errors.As(err, &clusterErr) { + return "cluster_unavailable", ErrorSourceAPI, 0 + } + var statusErr httpStatusError + if errors.As(err, &statusErr) { + status := statusErr.HTTPStatusCode() + return fmt.Sprintf("http_%d", status), ErrorSourceAPI, status + } + + if errors.Is(err, context.Canceled) { + return "canceled", ErrorSourceLocal, 0 + } + if errors.Is(err, context.DeadlineExceeded) { + return "timeout", ErrorSourceNetwork, 0 + } + + var dnsErr *net.DNSError + if errors.As(err, &dnsErr) { + return "dns_error", ErrorSourceNetwork, 0 + } + var netErr net.Error + if errors.As(err, &netErr) { + if netErr.Timeout() { + return "timeout", ErrorSourceNetwork, 0 + } + return "network_error", ErrorSourceNetwork, 0 + } + + // Fall back to the error's type name: stable/low-cardinality for typed errors. + return rootCauseType(err), ErrorSourceLocal, 0 +} + +// ErrorClass returns only the class component of ClassifyError. +func ErrorClass(err error) string { + class, _, _ := ClassifyError(err) + return class +} + +// rootCauseType returns the type name of the deepest wrapped error. +func rootCauseType(err error) string { + for { + next := errors.Unwrap(err) + if next == nil { + return fmt.Sprintf("%T", err) + } + err = next + } +} diff --git a/pkg/cmdutil/classify_test.go b/pkg/cmdutil/classify_test.go new file mode 100644 index 00000000..38a2b8b8 --- /dev/null +++ b/pkg/cmdutil/classify_test.go @@ -0,0 +1,91 @@ +package cmdutil + +import ( + "context" + "errors" + "fmt" + "net" + "testing" + + "github.com/stretchr/testify/assert" + + "github.com/algolia/cli/api/dashboard" +) + +func TestClassifyError(t *testing.T) { + tests := []struct { + name string + err error + wantClass string + wantSource string + wantStatus int + }{ + { + name: "nil", + }, + { + name: "flag error", + err: FlagErrorf("bad flag"), + wantClass: "validation_error", + wantSource: ErrorSourceValidation, + }, + { + name: "wrapped api error carries status", + err: fmt.Errorf( + "create failed: %w", + &dashboard.APIError{StatusCode: 500, Message: "boom"}, + ), + wantClass: "http_500", + wantSource: ErrorSourceAPI, + wantStatus: 500, + }, + { + name: "session expired", + err: fmt.Errorf("call failed: %w", dashboard.ErrSessionExpired), + wantClass: "session_expired", + wantSource: ErrorSourceAPI, + wantStatus: 401, + }, + { + name: "cluster unavailable", + err: &dashboard.ErrClusterUnavailable{Region: "us", Message: "no cluster"}, + wantClass: "cluster_unavailable", + wantSource: ErrorSourceAPI, + }, + { + name: "context canceled", + err: context.Canceled, + wantClass: "canceled", + wantSource: ErrorSourceLocal, + }, + { + name: "deadline exceeded", + err: context.DeadlineExceeded, + wantClass: "timeout", + wantSource: ErrorSourceNetwork, + }, + { + name: "dns error", + err: &net.DNSError{Err: "no such host", Name: "example.invalid"}, + wantClass: "dns_error", + wantSource: ErrorSourceNetwork, + }, + { + name: "generic error falls back to local", + err: errors.New("something went wrong"), + wantClass: "*errors.errorString", + wantSource: ErrorSourceLocal, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + class, source, status := ClassifyError(tt.err) + assert.Equal(t, tt.wantClass, class) + assert.Equal(t, tt.wantSource, source) + assert.Equal(t, tt.wantStatus, status) + + assert.Equal(t, tt.wantClass, ErrorClass(tt.err)) + }) + } +} diff --git a/pkg/telemetry/event.go b/pkg/telemetry/event.go new file mode 100644 index 00000000..5924fafd --- /dev/null +++ b/pkg/telemetry/event.go @@ -0,0 +1,16 @@ +package telemetry + +import "context" + +type Event struct { + Name string + Properties map[string]any +} + +func Track(ctx context.Context, event Event) { + client := GetTelemetryClient(ctx) + if client == nil { + return + } + _ = client.Track(ctx, event.Name, event.Properties) +} diff --git a/pkg/telemetry/events.go b/pkg/telemetry/events.go new file mode 100644 index 00000000..4aaa6fbe --- /dev/null +++ b/pkg/telemetry/events.go @@ -0,0 +1,301 @@ +package telemetry + +import "time" + +// Catalog of the analytics events the CLI emits, mirroring the analytics spec. +// Keeping names and property schemas here (not inline at call sites) keeps them +// auditable in one place and prevents names/keys from drifting. +// +// To add an event: add a name constant, add a constructor, then call +// telemetry.Track(ctx, telemetry.YourEvent(...)). + +// Event names. Flow events follow the "CLI " convention. +const ( + EventCommandInvoked = "Command Invoked" + EventCommandCompleted = "Command Completed" + EventCommandFailed = "Command Failed" + + EventAuthStarted = "CLI Auth Started" + EventAuthBrowserOpened = "CLI Auth Browser Opened" + EventAuthBrowserFailed = "CLI Auth Browser Failed" + EventAuthCallbackReceived = "CLI Auth Callback Received" + EventAuthCompleted = "CLI Auth Completed" + EventAuthFailed = "CLI Auth Failed" + + EventApplicationCreateStarted = "CLI Application Create Started" + EventApplicationCreateCompleted = "CLI Application Create Completed" + EventApplicationCreateFailed = "CLI Application Create Failed" + EventApplicationCreateAborted = "CLI Application Create Aborted" + + EventApplicationUpgradeStarted = "CLI Application Upgrade Started" + EventApplicationUpgradeAcceptedTerms = "CLI Application Upgrade Accepted Terms" + EventApplicationUpgradeDeclinedTerms = "CLI Application Upgrade Declined Terms" + EventApplicationUpgradeFailed = "CLI Application Upgrade Failed" + EventApplicationUpgradeCompleted = "CLI Application Upgrade Completed" + + EventApplicationDowngradeStarted = "CLI Application Downgrade Started" + EventApplicationDowngradeAcceptedTerms = "CLI Application Downgrade Accepted Terms" + EventApplicationDowngradeDeclinedTerms = "CLI Application Downgrade Declined Terms" + EventApplicationDowngradeFailed = "CLI Application Downgrade Failed" + EventApplicationDowngradeCompleted = "CLI Application Downgrade Completed" +) + +// Property values, so call sites reference a constant instead of a literal. +const ( + FlowLogin = "login" + FlowSignup = "signup" + + // triggered_from: auth flow vs explicit "application create" command. + TriggeredFromAuthFlow = "auth_flow" + TriggeredFromExplicitCommand = "explicit_command" + + // step: where the auth flow failed (CLI Auth Failed). + AuthStepBrowser = "browser" + AuthStepCallback = "callback" + AuthStepExchange = "exchange" + AuthStepAppsFetch = "apps_fetch" +) + +// --- Root command events --------------------------------------------------- + +// CommandInvoked is emitted when a tracked command starts. +func CommandInvoked() Event { + return Event{Name: EventCommandInvoked} +} + +// CommandCompleted is emitted (deferred) when the root command returns. +func CommandCompleted(duration time.Duration, succeeded bool, exitCode int) Event { + return Event{ + Name: EventCommandCompleted, + Properties: map[string]any{ + "duration_ms": duration.Milliseconds(), + "succeeded": succeeded, + "exit_code": exitCode, + }, + } +} + +// CommandFailed is emitted when the root command returns an error. httpStatus +// is omitted when zero (i.e. the error carried no HTTP status). +func CommandFailed(errorClass, errorSource string, httpStatus int) Event { + props := map[string]any{ + "error_class": errorClass, + "error_source": errorSource, + } + if httpStatus != 0 { + props["http_status"] = httpStatus + } + return Event{Name: EventCommandFailed, Properties: props} +} + +// --- Auth flow events ------------------------------------------------------ + +// AuthStarted is emitted when the OAuth flow begins. +func AuthStarted(flow string, noBrowser bool) Event { + return Event{ + Name: EventAuthStarted, + Properties: map[string]any{ + "flow": flow, + "no_browser": noBrowser, + }, + } +} + +// AuthBrowserOpened is emitted when the default browser is launched successfully. +func AuthBrowserOpened(flow string) Event { + return Event{ + Name: EventAuthBrowserOpened, + Properties: map[string]any{"flow": flow}, + } +} + +// AuthBrowserFailed is emitted when launching the browser fails and the +// authorize URL is printed instead. +func AuthBrowserFailed(flow, errorClass string) Event { + return Event{ + Name: EventAuthBrowserFailed, + Properties: map[string]any{ + "flow": flow, + "error_class": errorClass, + }, + } +} + +// AuthCallbackReceived is emitted when the OAuth redirect hits the local +// callback server. +func AuthCallbackReceived(flow string, duration time.Duration) Event { + return Event{ + Name: EventAuthCallbackReceived, + Properties: map[string]any{ + "flow": flow, + "duration_ms": duration.Milliseconds(), + }, + } +} + +// AuthCompleted is emitted once the profile is fully configured at the end of +// the flow. +func AuthCompleted( + flow string, + duration time.Duration, + hadExistingApps, createdAppDuringFlow bool, +) Event { + return Event{ + Name: EventAuthCompleted, + Properties: map[string]any{ + "flow": flow, + "duration_ms": duration.Milliseconds(), + "had_existing_apps": hadExistingApps, + "created_app_during_flow": createdAppDuringFlow, + }, + } +} + +// AuthFailed is emitted on a timeout, token exchange error, or app fetch error. +// step is one of the AuthStep* constants. +func AuthFailed(flow, step, errorClass string) Event { + return Event{ + Name: EventAuthFailed, + Properties: map[string]any{ + "flow": flow, + "step": step, + "error_class": errorClass, + }, + } +} + +// --- Application create events --------------------------------------------- + +// ApplicationCreateStarted is emitted once validation passes, before the +// Dashboard API call. triggeredFrom is one of the TriggeredFrom* constants. +func ApplicationCreateStarted(triggeredFrom string) Event { + return Event{ + Name: EventApplicationCreateStarted, + Properties: map[string]any{"triggered_from": triggeredFrom}, + } +} + +// ApplicationCreateCompleted is emitted when the Dashboard API returns 2xx. +func ApplicationCreateCompleted(triggeredFrom string, duration time.Duration) Event { + return Event{ + Name: EventApplicationCreateCompleted, + Properties: map[string]any{ + "triggered_from": triggeredFrom, + "duration_ms": duration.Milliseconds(), + }, + } +} + +// ApplicationCreateFailed is emitted when the Dashboard API returns an error. +// httpStatus is omitted when zero. +func ApplicationCreateFailed(triggeredFrom, errorClass string, httpStatus int) Event { + props := map[string]any{ + "triggered_from": triggeredFrom, + "error_class": errorClass, + } + if httpStatus != 0 { + props["http_status"] = httpStatus + } + return Event{Name: EventApplicationCreateFailed, Properties: props} +} + +// ApplicationCreateAborted is emitted when the user declines the confirmation +// prompt. +func ApplicationCreateAborted(triggeredFrom string) Event { + return Event{ + Name: EventApplicationCreateAborted, + Properties: map[string]any{"triggered_from": triggeredFrom}, + } +} + +// --- Application upgrade events --------------------------------------------- + +// ApplicationUpgradeStarted is emitted when the user starts the upgrade flow. +func ApplicationUpgradeStarted(plan string) Event { + return Event{ + Name: EventApplicationUpgradeStarted, + } +} + +// ApplicationUpgradeAcceptedTerms is emitted when the user accepts the T&C. +func ApplicationUpgradeAcceptedTerms(plan string) Event { + return Event{ + Name: EventApplicationUpgradeAcceptedTerms, + Properties: map[string]any{"plan": plan}, + } +} + +// ApplicationUpgradeDeclinedTerms is emitted when the user declines the T&C. +func ApplicationUpgradeDeclinedTerms(plan string) Event { + return Event{ + Name: EventApplicationUpgradeDeclinedTerms, + Properties: map[string]any{"plan": plan}, + } +} + +// ApplicationUpgradeFailed is emitted when the Dashboard API returns an error. +// httpStatus is omitted when zero. +func ApplicationUpgradeFailed(plan, errorClass string, httpStatus int) Event { + props := map[string]any{ + "plan": plan, + "error_class": errorClass, + } + if httpStatus != 0 { + props["http_status"] = httpStatus + } + return Event{Name: EventApplicationUpgradeFailed, Properties: props} +} + +// ApplicationUpgradeCompleted is emitted when the upgrade flow succeeds. +func ApplicationUpgradeCompleted(plan string) Event { + return Event{ + Name: EventApplicationUpgradeCompleted, + Properties: map[string]any{"plan": plan}, + } +} + +// --- Application downgrade events ------------------------------------------- + +// ApplicationDowngradeStarted is emitted when the user starts the downgrade flow. +func ApplicationDowngradeStarted(plan string) Event { + return Event{ + Name: EventApplicationDowngradeStarted, + } +} + +// ApplicationDowngradeAcceptedTerms is emitted when the user accepts the T&C. +func ApplicationDowngradeAcceptedTerms(plan string) Event { + return Event{ + Name: EventApplicationDowngradeAcceptedTerms, + Properties: map[string]any{"plan": plan}, + } +} + +// ApplicationDowngradeDeclinedTerms is emitted when the user declines the T&C. +func ApplicationDowngradeDeclinedTerms(plan string) Event { + return Event{ + Name: EventApplicationDowngradeDeclinedTerms, + Properties: map[string]any{"plan": plan}, + } +} + +// ApplicationDowngradeFailed is emitted when the Dashboard API returns an error. +// httpStatus is omitted when zero. +func ApplicationDowngradeFailed(plan, errorClass string, httpStatus int) Event { + props := map[string]any{ + "plan": plan, + "error_class": errorClass, + } + if httpStatus != 0 { + props["http_status"] = httpStatus + } + return Event{Name: EventApplicationDowngradeFailed, Properties: props} +} + +// ApplicationDowngradeCompleted is emitted when the downgrade flow succeeds. +func ApplicationDowngradeCompleted(plan string) Event { + return Event{ + Name: EventApplicationDowngradeCompleted, + Properties: map[string]any{"plan": plan}, + } +} diff --git a/pkg/telemetry/telemetry.go b/pkg/telemetry/telemetry.go index f81abe75..c3b2b61f 100644 --- a/pkg/telemetry/telemetry.go +++ b/pkg/telemetry/telemetry.go @@ -4,10 +4,14 @@ import ( "context" "crypto/md5" // nolint:gosec "fmt" + "io" "log" "net" + "net/http" "os" "runtime" + "sync" + "time" "github.com/segmentio/analytics-go/v3" "github.com/spf13/cobra" @@ -21,6 +25,11 @@ import ( const ( AppName = "cli" telemetryAnalyticsURL = "https://telemetry-proxy.algolia.com/" + + // telemetryHTTPTimeout bounds the total duration of the telemetry flush + // HTTP request (connect, TLS, and response), so closing the client at the + // end of a command can never hang the CLI on a slow or unreachable endpoint. + telemetryHTTPTimeout = 3 * time.Second ) type telemetryMetadataKey struct{} @@ -29,12 +38,16 @@ type telemetryClientKey struct{} type TelemetryClient interface { Identify(ctx context.Context) error - Track(ctx context.Context, event string) error + Track(ctx context.Context, event string, properties map[string]any) error Close() } type AnalyticsTelemetryClient struct { client analytics.Client + debug bool + + mu sync.Mutex + lastTS time.Time } type AnalyticsTelemetryLogger struct { @@ -60,14 +73,61 @@ func newTelemetryLogger(debug bool) AnalyticsTelemetryLogger { } func NewAnalyticsTelemetryClient(debug bool) (TelemetryClient, error) { + return newAnalyticsTelemetryClient(telemetryAnalyticsURL, debug) +} + +func newAnalyticsTelemetryClient(endpoint string, debug bool) (TelemetryClient, error) { client, err := analytics.NewWithConfig("", analytics.Config{ - Endpoint: telemetryAnalyticsURL, + Endpoint: endpoint, Logger: newTelemetryLogger(debug), + // In debug mode, surface the library's own batch/flush logs. + Verbose: debug, + // Buffer every event into one batch flushed at Close. The default 5s + // interval would split a long command (e.g. interactive login) across + // requests, reordering events downstream and risking a dropped batch. + Interval: 24 * time.Hour, + BatchSize: 250, + // Bound the flush request so Close() at exit can't hang the CLI. + Transport: boundedRoundTripper{ + base: http.DefaultTransport, + timeout: telemetryHTTPTimeout, + }, }) if err != nil { return nil, err } - return &AnalyticsTelemetryClient{client: client}, nil + return &AnalyticsTelemetryClient{client: client, debug: debug}, nil +} + +// boundedRoundTripper applies a total per-request timeout, like +// http.Client.Timeout (analytics.Config only accepts a RoundTripper). +type boundedRoundTripper struct { + base http.RoundTripper + timeout time.Duration +} + +func (t boundedRoundTripper) RoundTrip(req *http.Request) (*http.Response, error) { + ctx, cancel := context.WithTimeout(req.Context(), t.timeout) + resp, err := t.base.RoundTrip(req.WithContext(ctx)) + if err != nil { + cancel() + return nil, err + } + // Cancel when the body is closed, not now, or the response would be truncated. + resp.Body = &cancelOnCloseBody{ReadCloser: resp.Body, cancel: cancel} + return resp, nil +} + +// cancelOnCloseBody cancels the request context when the body is closed. +type cancelOnCloseBody struct { + io.ReadCloser + cancel context.CancelFunc +} + +func (b *cancelOnCloseBody) Close() error { + err := b.ReadCloser.Close() + b.cancel() + return err } // IdentifyOnce sends a single Identify event through a short-lived client and @@ -236,19 +296,34 @@ func (a *AnalyticsTelemetryClient) Identify(ctx context.Context) error { return a.client.Enqueue(identify) } -// Track tracks the event with the provided properties -func (a *AnalyticsTelemetryClient) Track(ctx context.Context, event string) error { +// Track merges custom properties over the base properties (custom wins on collisions). +func (a *AnalyticsTelemetryClient) Track( + ctx context.Context, + event string, + properties map[string]any, +) error { metadata := GetEventMetadata(ctx) + props := map[string]interface{}{ + "invocation_id": metadata.InvocationID, + "app_id": metadata.AppID, + "command": metadata.CommandPath, + "flags": metadata.CommandFlags, + } + for k, v := range properties { + props[k] = v + } + + // In debug mode, echo each event to stderr for local observability. + if a.debug { + fmt.Fprintf(os.Stderr, "[telemetry] %s %v\n", event, properties) + } + track := analytics.Track{ Event: event, AnonymousId: metadata.AnonymousID, - Properties: map[string]interface{}{ - "invocation_id": metadata.InvocationID, - "app_id": metadata.AppID, - "command": metadata.CommandPath, - "flags": metadata.CommandFlags, - }, + Properties: props, + Timestamp: a.nextTimestamp(), Context: &analytics.Context{ Device: analytics.DeviceInfo{ Id: metadata.AnonymousID, @@ -263,11 +338,33 @@ func (a *AnalyticsTelemetryClient) Track(ctx context.Context, event string) erro return a.client.Enqueue(track) } +// nextTimestamp returns strictly increasing timestamps at least 1ms apart. +// Amplitude truncates event_time to milliseconds, so same-millisecond events +// are ordered non-deterministically; spacing by 1ms preserves emit order. +func (a *AnalyticsTelemetryClient) nextTimestamp() time.Time { + a.mu.Lock() + defer a.mu.Unlock() + + ts := time.Now() + if !ts.After(a.lastTS) { + ts = a.lastTS.Add(time.Millisecond) + } + a.lastTS = ts + return ts +} + // Close closes the client, waiting for all pending events to be sent. func (a *AnalyticsTelemetryClient) Close() { _ = a.client.Close() } -func (a *NoOpTelemetryClient) Identify(ctx context.Context) error { return nil } -func (a *NoOpTelemetryClient) Track(ctx context.Context, event string) error { return nil } -func (a *NoOpTelemetryClient) Close() {} +func (a *NoOpTelemetryClient) Identify(ctx context.Context) error { return nil } + +func (a *NoOpTelemetryClient) Track( + ctx context.Context, + event string, + properties map[string]any, +) error { + return nil +} +func (a *NoOpTelemetryClient) Close() {} diff --git a/pkg/telemetry/telemetry_test.go b/pkg/telemetry/telemetry_test.go index 8e381464..f7617b24 100644 --- a/pkg/telemetry/telemetry_test.go +++ b/pkg/telemetry/telemetry_test.go @@ -2,7 +2,12 @@ package telemetry import ( "context" + "encoding/json" + "net/http" + "net/http/httptest" + "sync" "testing" + "time" "github.com/segmentio/analytics-go/v3" "github.com/spf13/cobra" @@ -130,7 +135,7 @@ func TestTrack_IncludesUserWhenAuthenticated(t *testing.T) { metadata.SetUser("user-42", "user@test.com", "Test User") ctx := WithEventMetadata(context.Background(), metadata) - require.NoError(t, client.Track(ctx, "Command Invoked")) + require.NoError(t, client.Track(ctx, "Command Invoked", nil)) require.Len(t, fake.messages, 1) track, ok := fake.messages[0].(analytics.Track) @@ -146,10 +151,157 @@ func TestTrack_OmitsUserWhenAnonymous(t *testing.T) { metadata := NewEventMetadata() ctx := WithEventMetadata(context.Background(), metadata) - require.NoError(t, client.Track(ctx, "Command Invoked")) + require.NoError(t, client.Track(ctx, "Command Invoked", nil)) require.Len(t, fake.messages, 1) track, ok := fake.messages[0].(analytics.Track) require.True(t, ok) assert.Empty(t, track.UserId) } + +func TestTrack_TimestampsAreStrictlyIncreasing(t *testing.T) { + fake := &fakeAnalyticsClient{} + client := &AnalyticsTelemetryClient{client: fake} + + metadata := NewEventMetadata() + ctx := WithEventMetadata(context.Background(), metadata) + + // Back-to-back events land in the same millisecond but must still get + // strictly increasing timestamps so Amplitude preserves emit order. + const n = 5 + for i := 0; i < n; i++ { + require.NoError(t, client.Track(ctx, "Event", nil)) + } + require.Len(t, fake.messages, n) + + var prev time.Time + for i, m := range fake.messages { + ts := m.(analytics.Track).Timestamp + if i > 0 { + assert.True( + t, + ts.After(prev), + "timestamp %d (%v) must be strictly after previous (%v)", + i, ts, prev, + ) + } + prev = ts + } +} + +// collectBatches starts a server recording each batch POST's event names in order. +func collectBatches(t *testing.T) (url string, batches *[][]string) { + t.Helper() + + var ( + mu sync.Mutex + got [][]string + ) + srv := httptest.NewServer( + http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + var payload struct { + Batch []struct { + Event string `json:"event"` + } `json:"batch"` + } + require.NoError(t, json.NewDecoder(r.Body).Decode(&payload)) + + names := make([]string, 0, len(payload.Batch)) + for _, m := range payload.Batch { + names = append(names, m.Event) + } + + mu.Lock() + got = append(got, names) + mu.Unlock() + + w.WriteHeader(http.StatusOK) + }), + ) + t.Cleanup(srv.Close) + + return srv.URL, &got +} + +// TestAnalyticsClient_SendsAllEventsInOneOrderedBatch is the regression test for +// the "events out of order / sometimes missing" bug: every event must reach the +// backend in one ordered batch, not split across the library's periodic flushes. +func TestAnalyticsClient_SendsAllEventsInOneOrderedBatch(t *testing.T) { + url, batches := collectBatches(t) + + client, err := newAnalyticsTelemetryClient(url, false) + require.NoError(t, err) + + metadata := NewEventMetadata() + metadata.AnonymousID = "anon-test" // ensure messages validate without a MAC + ctx := WithEventMetadata(context.Background(), metadata) + + want := []string{ + EventCommandInvoked, + EventAuthStarted, + EventAuthBrowserOpened, + EventAuthCallbackReceived, + EventAuthCompleted, + EventCommandCompleted, + } + for _, name := range want { + require.NoError(t, client.Track(ctx, name, nil)) + // A real command emits these over time; the gap must not trigger a flush. + time.Sleep(2 * time.Millisecond) + } + + // Close is the single flush point and must block until the batch is sent. + client.Close() + + require.Len(t, *batches, 1, "all events must be delivered in exactly one batch") + assert.Equal(t, want, (*batches)[0], "events must arrive in emit order") +} + +// TestBoundedRoundTripper_TimesOut verifies the flush request is bounded, so +// closing the client at exit can never hang the CLI on a stalled endpoint. +func TestBoundedRoundTripper_TimesOut(t *testing.T) { + blocked := make(chan struct{}) + srv := httptest.NewServer( + http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + <-blocked // never respond until the test tears the server down + }), + ) + t.Cleanup(func() { + close(blocked) + srv.Close() + }) + + rt := boundedRoundTripper{base: http.DefaultTransport, timeout: 50 * time.Millisecond} + req, err := http.NewRequest(http.MethodGet, srv.URL, nil) + require.NoError(t, err) + + start := time.Now() + resp, err := rt.RoundTrip(req) + elapsed := time.Since(start) + + require.Error(t, err, "a stalled endpoint must surface as an error, not a hang") + if resp != nil { + _ = resp.Body.Close() + } + assert.Less(t, elapsed, time.Second, "request must be abandoned near the configured timeout") +} + +func TestTrack_MergesCustomProperties(t *testing.T) { + fake := &fakeAnalyticsClient{} + client := &AnalyticsTelemetryClient{client: fake} + + metadata := NewEventMetadata() + ctx := WithEventMetadata(context.Background(), metadata) + + props := map[string]any{"flow": "signup", "duration_ms": int64(1200)} + require.NoError(t, client.Track(ctx, EventAuthCompleted, props)) + require.Len(t, fake.messages, 1) + + track, ok := fake.messages[0].(analytics.Track) + require.True(t, ok) + assert.Equal(t, EventAuthCompleted, track.Event) + assert.Equal(t, "signup", track.Properties["flow"]) + assert.Equal(t, int64(1200), track.Properties["duration_ms"]) + assert.Contains(t, track.Properties, "invocation_id") + assert.Contains(t, track.Properties, "command") +}