diff --git a/.changeset/structured-error-grouping.md b/.changeset/structured-error-grouping.md new file mode 100644 index 00000000000..578ab3d8688 --- /dev/null +++ b/.changeset/structured-error-grouping.md @@ -0,0 +1,7 @@ +--- +'@shopify/cli-kit': patch +--- + +Group CLI crash reports on structured error signals instead of one catch-all bucket + +Bugsnag error grouping now derives a stable `slice:category:signature` hash from the original typed error's HTTP status and GraphQL `extensions.code` (falling back to the shared keyword categorizer, and to Bugsnag's stack-trace grouping for genuinely unknown errors). The same signals are emitted as `error_grouping` metadata so backend routing works regardless of CLI version. Known-transient API errors (HTTP 401/429 and `THROTTLED`) are now treated as expected and kept out of crash reporting. diff --git a/packages/cli-kit/src/cli/api/graphql/admin/generated/types.d.ts b/packages/cli-kit/src/cli/api/graphql/admin/generated/types.d.ts index 2da991ce827..9ce2584b9d6 100644 --- a/packages/cli-kit/src/cli/api/graphql/admin/generated/types.d.ts +++ b/packages/cli-kit/src/cli/api/graphql/admin/generated/types.d.ts @@ -232,6 +232,8 @@ export type OnlineStoreThemeFilesUserErrorsCode = | 'LESS_THAN_OR_EQUAL_TO' /** The record with the ID used as the input value couldn't be found. */ | 'NOT_FOUND' + /** Theme contextualization and condition types are not compatible with each other. */ + | 'THEME_CONTEXTUALIZATION_NOT_COMPATIBLE_WITH_CONDITION_TYPES' /** There are theme files with conflicts. */ | 'THEME_FILES_CONFLICT' /** This action is not available on your current plan. Please upgrade to access theme editing features. */ diff --git a/packages/cli-kit/src/private/node/analytics/error-grouping.test.ts b/packages/cli-kit/src/private/node/analytics/error-grouping.test.ts new file mode 100644 index 00000000000..a87c5cedf80 --- /dev/null +++ b/packages/cli-kit/src/private/node/analytics/error-grouping.test.ts @@ -0,0 +1,162 @@ +import {errorGroupingHash, errorGroupingSignals, resolveErrorGrouping} from './error-grouping.js' +import {GraphQLClientError} from '../api/headers.js' +import {AbortError} from '../../../public/node/error.js' +import {ClientError} from 'graphql-request' +import {describe, expect, test} from 'vitest' + +function clientError(status: number, code?: string): ClientError { + const errors = code ? [{message: 'boom', extensions: {code}}] : undefined + return new ClientError({status, errors, headers: {}} as any, {query: 'q'} as any) +} + +function clientErrorWithErrors(status: number, errors: unknown[]): ClientError { + return new ClientError({status, errors, headers: {}} as any, {query: 'q'} as any) +} + +describe('errorGroupingSignals', () => { + test('reads status, code, and class from a GraphQLClientError', () => { + const error = new GraphQLClientError('Forbidden', 403, [{extensions: {code: 'ACCESS_DENIED'}}]) + + expect(errorGroupingSignals(error)).toEqual({ + httpStatus: 403, + code: 'ACCESS_DENIED', + errorClass: 'GraphQLClientError', + }) + }) + + test('reads status and code from a raw graphql-request ClientError', () => { + const error = clientError(429, 'THROTTLED') + + expect(errorGroupingSignals(error)).toEqual({ + httpStatus: 429, + code: 'THROTTLED', + errorClass: 'ClientError', + }) + }) + + test('returns only the class name for a generic Error', () => { + expect(errorGroupingSignals(new Error('boom'))).toEqual({errorClass: 'Error'}) + }) + + test('returns an empty object for a non-Error value', () => { + expect(errorGroupingSignals('boom')).toEqual({}) + }) + + test('ignores a non-array errors value', () => { + const error = new GraphQLClientError('weird', 400, 'not-an-array' as any) + + expect(errorGroupingSignals(error)).toEqual({httpStatus: 400, errorClass: 'GraphQLClientError'}) + }) +}) + +describe('errorGroupingHash — structured decision table', () => { + test.each<[string, ClientError | GraphQLClientError, string]>([ + ['THROTTLED code -> rate_limit', clientError(400, 'THROTTLED'), 'theme:rate_limit:http-400-throttled'], + ['HTTP 429 -> rate_limit', clientError(429), 'theme:rate_limit:http-429'], + ['HTTP 401 -> authentication', clientError(401), 'theme:authentication:http-401'], + ['HTTP 403 -> permission', clientError(403), 'theme:permission:http-403'], + ['ACCESS_DENIED code -> permission', clientError(400, 'ACCESS_DENIED'), 'theme:permission:http-400-access-denied'], + ['HTTP 500 -> server', clientError(500), 'theme:server:http-500'], + ['HTTP 503 -> server', clientError(503), 'theme:server:http-503'], + ])('%s', (_, error, expected) => { + expect(errorGroupingHash(error, 'theme')).toEqual(expected) + }) + + test('403 wins over a 401-looking authentication category (permission, not authentication)', () => { + const error = new GraphQLClientError('Forbidden', 403, [{extensions: {code: 'ACCESS_DENIED'}}]) + + expect(errorGroupingHash(error, 'store')).toEqual('store:permission:http-403-access-denied') + }) + + test('prefixes the hash with the provided slice name', () => { + expect(errorGroupingHash(clientError(429), 'app')).toEqual('app:rate_limit:http-429') + expect(errorGroupingHash(clientError(429), 'cli')).toEqual('cli:rate_limit:http-429') + }) +}) + +describe('errorGroupingHash — message fallback', () => { + test('recovers an HTTP status flattened into an AbortError message', () => { + const error = new AbortError('The request responded unsuccessfully with the HTTP status 500') + + expect(errorGroupingHash(error, 'theme')).toEqual('theme:server:http-500') + }) + + test('recovers a graphql-request style "(Code: NNN)" status from a message', () => { + const error = new Error('GraphQL Error (Code: 401): {"response":{"status":401}}') + + expect(errorGroupingHash(error, 'store')).toEqual('store:authentication:http-401') + }) + + test('falls back to the keyword categorizer for an untyped, signal-less error', () => { + const error = new Error('connect ETIMEDOUT 1.2.3.4:443') + const hash = errorGroupingHash(error, 'cli') + + expect(hash).toMatch(/^cli:network:/) + }) + + test('returns undefined for an unknown error so stack-trace grouping is preserved', () => { + expect(errorGroupingHash(new Error('something nobody has categorized'), 'cli')).toBeUndefined() + }) + + test('returns undefined for a non-Error value', () => { + expect(errorGroupingHash(undefined, 'cli')).toBeUndefined() + }) +}) + +describe('errorGroupingHash — multi-error & nested codes (review #3/#5)', () => { + test('recognizes the GraphQL string code "429" as rate_limit, even at HTTP 200', () => { + // Mirrors errorsIncludeStatus429 in private/node/api.ts. + expect(errorGroupingHash(clientError(200, '429'), 'theme')).toEqual('theme:rate_limit:http-200-429') + }) + + test('scans every error, not just the first, for a routable code', () => { + const error = clientErrorWithErrors(200, [{message: 'noise'}, {extensions: {code: 'THROTTLED'}}]) + + expect(errorGroupingHash(error, 'theme')).toEqual('theme:rate_limit:http-200-throttled') + }) + + test('recognizes a nested App Management app_errors access_denied as permission', () => { + const error = clientErrorWithErrors(200, [{extensions: {app_errors: {errors: [{category: 'access_denied'}]}}}]) + + expect(errorGroupingHash(error, 'app')).toEqual('app:permission:http-200-access-denied') + }) +}) + +describe('errorGroupingHash — precedence & merge (review #2/#4)', () => { + test('401 wins over a co-occurring ACCESS_DENIED code (authentication, by decision)', () => { + const error = new GraphQLClientError('Unauthenticated', 401, [{extensions: {code: 'ACCESS_DENIED'}}]) + + // HTTP 401 is authoritative; the access-denied code is still recorded in the signature. + expect(errorGroupingHash(error, 'theme')).toEqual('theme:authentication:http-401-access-denied') + }) + + test('an absent object code does not erase a code recovered from the message', () => { + const error = new GraphQLClientError( + 'GraphQL Error (Code: 500): {"response":{"errors":[{"extensions":{"code":"INTERNAL"}}]}}', + 500, + ) + + const {signals, category} = resolveErrorGrouping(error, 'theme') + expect(category).toEqual('server') + expect(signals.httpStatus).toEqual(500) + expect(signals.code).toEqual('INTERNAL') + }) +}) + +describe('resolveErrorGrouping', () => { + test('returns hash, category, and signals together for the metadata path', () => { + expect(resolveErrorGrouping(clientError(403, 'ACCESS_DENIED'), 'store')).toEqual({ + hash: 'store:permission:http-403-access-denied', + category: 'permission', + signals: {httpStatus: 403, code: 'ACCESS_DENIED', errorClass: 'ClientError'}, + }) + }) + + test('returns an undefined hash but still-useful signals for an unknown error', () => { + const result = resolveErrorGrouping(new Error('nothing categorizes this'), 'cli') + + expect(result.hash).toBeUndefined() + expect(result.category).toBeUndefined() + expect(result.signals.errorClass).toEqual('Error') + }) +}) diff --git a/packages/cli-kit/src/private/node/analytics/error-grouping.ts b/packages/cli-kit/src/private/node/analytics/error-grouping.ts new file mode 100644 index 00000000000..ff66a1803d7 --- /dev/null +++ b/packages/cli-kit/src/private/node/analytics/error-grouping.ts @@ -0,0 +1,237 @@ +import {categorizeError, ErrorCategory, formatErrorMessage} from './error-categorizer.js' +import {graphQLErrorCodes, isPermissionCode, isRateLimitCode} from './graphql-error-codes.js' +import {GraphQLClientError} from '../api/headers.js' +import {ClientError} from 'graphql-request' + +/** + * Structured signals extracted from an error, used to group it into a meaningful Bugsnag bucket. + * + * These are read from the *original* typed error (before it is flattened to a generic `Error` + * for reporting), so we group on facts the error already carries rather than by re-parsing a + * stringified message. + */ +interface ErrorGroupingSignals { + httpStatus?: number + code?: string + errorClass?: string +} + +/** + * The fully resolved grouping decision for an error: the Bugsnag grouping hash (or `undefined` to + * fall back to stack-trace grouping), the semantic category, and the structured signals that drove + * the decision. The reporter uses a single resolution for both `event.groupingHash` and the + * `error_grouping` metadata so they can never disagree. + */ +interface ResolvedErrorGrouping { + hash?: string + category?: string + signals: ErrorGroupingSignals +} + +/** + * Extracts structured grouping signals from an error object. + * + * Only assigns a field when a value is actually present, so the result can be safely merged over + * message-derived signals without an explicit `undefined` erasing a recovered value. + * + * @param error - The original error (any type). + * @returns The HTTP status, GraphQL error code, and error class name when available. + */ +export function errorGroupingSignals(error: unknown): ErrorGroupingSignals { + const signals: ErrorGroupingSignals = {} + + if (error instanceof Error) { + signals.errorClass = error.constructor.name + } + + // GraphQLClientError (handleErrors: true, status < 500) preserves the status and errors array. + if (error instanceof GraphQLClientError) { + if (error.statusCode !== undefined) signals.httpStatus = error.statusCode + const code = routableCode(error.errors) + if (code !== undefined) signals.code = code + return signals + } + + // Raw graphql-request ClientError (handleErrors: false) exposes the full response. + if (error instanceof ClientError) { + const status = error.response?.status + if (status !== undefined) signals.httpStatus = status + const code = routableCode(error.response?.errors) + if (code !== undefined) signals.code = code + } + + return signals +} + +/** + * Resolves the full grouping decision for an error: hash, category, and the signals behind it. + * + * Categories are resolved structured-first (HTTP status / GraphQL code), falling back to the + * shared keyword categorizer only for untyped errors. When no meaningful category can be + * resolved, `hash` is `undefined` so the caller leaves `event.groupingHash` unset and Bugsnag's + * default stack-trace grouping applies — this avoids merging genuinely distinct unknown bugs. + * + * @param error - The original error (any type). + * @param sliceName - The product slice (`app`, `theme`, `store`, `hydrogen`, or `cli`). + * @returns The resolved hash (or `undefined`), category, and structured signals. + */ +export function resolveErrorGrouping(error: unknown, sliceName: string): ResolvedErrorGrouping { + const message = errorMessage(error) + // Object signals are authoritative; message-derived signals fill gaps (e.g. 5xx errors that the + // API layer flattens into an AbortError, dropping the structured status field). Merge field by + // field so an absent object signal never clobbers one recovered from the message. + const fromError = errorGroupingSignals(error) + const fromMessage = signalsFromMessage(message) + const signals: ErrorGroupingSignals = { + httpStatus: fromError.httpStatus ?? fromMessage.httpStatus, + code: fromError.code ?? fromMessage.code, + errorClass: fromError.errorClass ?? fromMessage.errorClass, + } + + const structuredCategory = categoryFromSignals(signals) + if (structuredCategory) { + return { + hash: `${sliceName}:${structuredCategory}:${structuredSignature(signals)}`, + category: structuredCategory, + signals, + } + } + + const groupingError = new Error(stripJsonDump(message)) + const category = categorizeError(groupingError) + if (category === ErrorCategory.Unknown) { + return {hash: undefined, category: undefined, signals} + } + + const categoryName = category.toLowerCase() + return { + hash: `${sliceName}:${categoryName}:${formatErrorMessage(groupingError, category)}`, + category: categoryName, + signals, + } +} + +/** + * Builds a Bugsnag grouping hash of the form `${slice}:${category}:${signature}`. + * + * Thin wrapper over {@link resolveErrorGrouping} for callers that only need the hash. + * + * @param error - The original error (any type). + * @param sliceName - The product slice (`app`, `theme`, `store`, `hydrogen`, or `cli`). + * @returns The grouping hash, or `undefined` to fall back to stack-trace grouping. + */ +export function errorGroupingHash(error: unknown, sliceName: string): string | undefined { + return resolveErrorGrouping(error, sliceName).hash +} + +/** + * Resolves a semantic category from structured signals using an explicit decision table. + * + * Precedence is deliberate: + * - rate limit (`THROTTLED`/`429` code or HTTP 429) wins first — it is the most actionable bucket. + * - HTTP 401 is authoritative for `authentication`: a request the server rejected as + * unauthenticated is an authentication problem even if a GraphQL code also reports access-denied. + * `ACCESS_DENIED` in practice pairs with HTTP 403, which is handled by the permission branch. + * - 403 / `ACCESS_DENIED` (incl. nested App Management `access_denied`) → `permission`. + * + * Returns `undefined` when no structured signal is present. + */ +function categoryFromSignals(signals: ErrorGroupingSignals): string | undefined { + const {httpStatus, code} = signals + + if (isRateLimitCode(code) || httpStatus === 429) return 'rate_limit' + if (httpStatus === 401) return 'authentication' + if (httpStatus === 403 || isPermissionCode(code)) return 'permission' + if (httpStatus !== undefined && httpStatus >= 500) return 'server' + + return undefined +} + +/** + * Builds a stable, low-cardinality signature slug from structured signals. + */ +function structuredSignature(signals: ErrorGroupingSignals): string { + const parts: string[] = [] + if (signals.httpStatus !== undefined) parts.push(`http-${signals.httpStatus}`) + if (signals.code) parts.push(signals.code) + if (parts.length === 0 && signals.errorClass) parts.push(signals.errorClass) + + return slugify(parts.join('-')) +} + +/** + * Recovers structured signals from an error message string. Handles both error shapes seen from + * the API layer: graphql-request's `GraphQL Error (Code: NNN): {json}` and the cli-kit wrapper's + * `... responded unsuccessfully with the HTTP status NNN ...`. + */ +function signalsFromMessage(message: string): ErrorGroupingSignals { + const signals: ErrorGroupingSignals = {} + + const statusMatch = message.match(/HTTP status (\d{3})/i) ?? message.match(/\(Code: (\d{3})\)/i) + if (statusMatch?.[1]) signals.httpStatus = Number(statusMatch[1]) + + const payload = parseEmbeddedJson(message) + if (signals.httpStatus === undefined && payload?.response?.status !== undefined) { + signals.httpStatus = payload.response.status + } + const code = routableCode(payload?.response?.errors) + if (code !== undefined) signals.code = code + + return signals +} + +interface EmbeddedGraphQLPayload { + response?: { + status?: number + errors?: unknown + } +} + +/** + * Parses the JSON blob embedded in a graphql-request ClientError message, if present. + */ +function parseEmbeddedJson(message: string): EmbeddedGraphQLPayload | undefined { + const jsonStart = message.indexOf('{') + if (jsonStart === -1) return undefined + + try { + return JSON.parse(message.slice(jsonStart)) as EmbeddedGraphQLPayload + // eslint-disable-next-line no-catch-all/no-catch-all + } catch { + return undefined + } +} + +/** + * Picks the most routing-relevant code from a GraphQL `errors` value. + * + * Scans every error (not just the first) and prefers a code we route on — a rate-limit code, then + * a permission code — so a benign leading code can't mask a `THROTTLED` or `ACCESS_DENIED` further + * down the array. Falls back to the first code present for visibility. + */ +function routableCode(errors: unknown): string | undefined { + const codes = graphQLErrorCodes(errors) + return codes.find(isRateLimitCode) ?? codes.find(isPermissionCode) ?? codes[0] +} + +/** + * Strips a trailing JSON dump (`...: {json}`) from a message so the keyword categorizer sees the + * human-readable prefix rather than the serialized request/response (which contains the literal + * `request`, mis-routing to the network category). + */ +function stripJsonDump(message: string): string { + return message.split(': {')[0] ?? message +} + +function errorMessage(error: unknown): string { + if (error instanceof Error) return error.message + return typeof error === 'string' ? error : '' +} + +function slugify(value: string): string { + return value + .toLowerCase() + .replace(/[^a-z0-9]+/g, '-') + .replace(/^-|-$/g, '') + .slice(0, 50) +} diff --git a/packages/cli-kit/src/private/node/analytics/graphql-error-codes.test.ts b/packages/cli-kit/src/private/node/analytics/graphql-error-codes.test.ts new file mode 100644 index 00000000000..445cdde70b0 --- /dev/null +++ b/packages/cli-kit/src/private/node/analytics/graphql-error-codes.test.ts @@ -0,0 +1,41 @@ +import {graphQLErrorCodes, hasRateLimitCode, isPermissionCode, isRateLimitCode} from './graphql-error-codes.js' +import {describe, expect, test} from 'vitest' + +describe('graphQLErrorCodes', () => { + test('reads top-level extensions.code from every error, in order', () => { + const errors = [{extensions: {code: 'FIRST'}}, {message: 'no code'}, {extensions: {code: 'THROTTLED'}}] + + expect(graphQLErrorCodes(errors)).toEqual(['FIRST', 'THROTTLED']) + }) + + test('reads nested App Management app_errors categories', () => { + const errors = [{extensions: {app_errors: {errors: [{category: 'access_denied'}, {category: 'other'}]}}}] + + expect(graphQLErrorCodes(errors)).toEqual(['access_denied', 'other']) + }) + + test('returns an empty array for a string errors value (some 401 responses)', () => { + expect(graphQLErrorCodes('unauthenticated')).toEqual([]) + expect(graphQLErrorCodes(undefined)).toEqual([]) + }) +}) + +describe('code predicates', () => { + test('isRateLimitCode recognizes THROTTLED and the string 429', () => { + expect(isRateLimitCode('THROTTLED')).toBe(true) + expect(isRateLimitCode('429')).toBe(true) + expect(isRateLimitCode('ACCESS_DENIED')).toBe(false) + expect(isRateLimitCode(undefined)).toBe(false) + }) + + test('isPermissionCode recognizes ACCESS_DENIED and nested access_denied', () => { + expect(isPermissionCode('ACCESS_DENIED')).toBe(true) + expect(isPermissionCode('access_denied')).toBe(true) + expect(isPermissionCode('THROTTLED')).toBe(false) + }) + + test('hasRateLimitCode scans all errors, not just the first', () => { + expect(hasRateLimitCode([{message: 'a'}, {extensions: {code: '429'}}])).toBe(true) + expect(hasRateLimitCode([{extensions: {code: 'OTHER'}}])).toBe(false) + }) +}) diff --git a/packages/cli-kit/src/private/node/analytics/graphql-error-codes.ts b/packages/cli-kit/src/private/node/analytics/graphql-error-codes.ts new file mode 100644 index 00000000000..78126c604a4 --- /dev/null +++ b/packages/cli-kit/src/private/node/analytics/graphql-error-codes.ts @@ -0,0 +1,79 @@ +/** + * Pure helpers for reading routing-relevant codes out of a GraphQL error response. + * + * This module is intentionally dependency-free: it is imported by both the grouping logic + * (`error-grouping.ts`) and the crash-report suppression logic (`../../public/node/error.ts`). + * `error.ts` cannot import `error-grouping.ts` directly — that would create an + * `error.ts → headers.ts → error.ts` import cycle — so the shared scanning logic lives here, + * where it imports nothing from cli-kit. + */ + +/** GraphQL `extensions.code` values we treat as rate limiting. */ +const RATE_LIMIT_CODES = new Set(['THROTTLED', '429']) + +/** + * GraphQL `extensions.code` values (and nested App Management `app_errors` categories) we treat as + * a permission / access-denied signal. The top-level admin code is `ACCESS_DENIED`; the App + * Management nested shape uses the lowercase category `access_denied`. + */ +const PERMISSION_CODES = new Set(['ACCESS_DENIED', 'access_denied']) + +/** + * Collects every routing-relevant code from a GraphQL `errors` value. + * + * Scans *all* errors (not just the first), reading both the top-level `extensions.code` and the + * nested App Management shape `extensions.app_errors.errors[].category`. The API can also return + * `errors` as a string (e.g. some 401s), which yields no codes. + * + * @param errors - The `errors` value from a GraphQL response (array, string, or undefined). + * @returns Every string code/category found, in document order. + */ +export function graphQLErrorCodes(errors: unknown): string[] { + if (!Array.isArray(errors)) return [] + + return errors.flatMap((entry) => { + const found: string[] = [] + const error = entry as {extensions?: {code?: unknown; app_errors?: {errors?: unknown}}} | undefined + + const code = error?.extensions?.code + if (typeof code === 'string') found.push(code) + + const appErrors = error?.extensions?.app_errors?.errors + if (Array.isArray(appErrors)) { + for (const appError of appErrors) { + const category = (appError as {category?: unknown} | undefined)?.category + if (typeof category === 'string') found.push(category) + } + } + + return found + }) +} + +/** + * Whether a single code is a rate-limit signal (`THROTTLED` or `429`). + * + * Mirrors the established shape detected by `errorsIncludeStatus429` in `private/node/api.ts`, + * where `extensions.code === '429'` signals rate limiting even at HTTP 200. + */ +export function isRateLimitCode(code: string | undefined): boolean { + return code !== undefined && RATE_LIMIT_CODES.has(code) +} + +/** + * Whether a single code/category is a permission / access-denied signal. + */ +export function isPermissionCode(code: string | undefined): boolean { + return code !== undefined && PERMISSION_CODES.has(code) +} + +/** + * Whether any GraphQL error carries a rate-limit code. Convenience for suppression logic that only + * needs a boolean over the raw `errors` value. + * + * @param errors - The `errors` value from a GraphQL response. + * @returns True when a rate-limit code is present in any error. + */ +export function hasRateLimitCode(errors: unknown): boolean { + return graphQLErrorCodes(errors).some(isRateLimitCode) +} diff --git a/packages/cli-kit/src/public/node/error-handler.test.ts b/packages/cli-kit/src/public/node/error-handler.test.ts index 8f101ab55d4..261894a1577 100644 --- a/packages/cli-kit/src/public/node/error-handler.test.ts +++ b/packages/cli-kit/src/public/node/error-handler.test.ts @@ -6,13 +6,14 @@ import * as error from './error.js' import {hashString} from './crypto.js' import {isLocalEnvironment} from '../../private/node/context/service.js' import {getLastSeenUserIdAfterAuth} from '../../private/node/session.js' +import {GraphQLClientError} from '../../private/node/api/headers.js' import {settings} from '@oclif/core' import {beforeEach, describe, expect, test, vi} from 'vitest' const onNotify = vi.fn() const capturedEventHandler = vi.fn() -let lastBugsnagEvent: {addMetadata: ReturnType} | undefined +let lastBugsnagEvent: {addMetadata: ReturnType; groupingHash?: string} | undefined vi.mock('process') vi.mock('@bugsnag/js', () => { @@ -303,4 +304,61 @@ describe('sends errors to Bugsnag', () => { expect(lastBugsnagEvent).toBeDefined() expect(lastBugsnagEvent!.addMetadata).toHaveBeenCalledWith('custom', {slice_name: 'cli'}) }) + + test('sets a structured groupingHash and error_grouping metadata for a typed API error', async () => { + await metadata.addSensitiveMetadata(() => ({ + commandStartOptions: {startTime: Date.now(), startCommand: 'theme dev', startArgs: []}, + })) + const apiError = new GraphQLClientError('Forbidden', 403, [{extensions: {code: 'ACCESS_DENIED'}}]) + + await sendErrorToBugsnag(apiError, 'unexpected_error') + + expect(lastBugsnagEvent).toBeDefined() + expect(lastBugsnagEvent!.groupingHash).toEqual('theme:permission:http-403-access-denied') + expect(lastBugsnagEvent!.addMetadata).toHaveBeenCalledWith('error_grouping', { + slice_name: 'theme', + category: 'permission', + http_status: 403, + error_code: 'ACCESS_DENIED', + error_class: 'GraphQLClientError', + }) + }) + + test('emits message-derived signals and category in metadata for a flattened API error', async () => { + await metadata.addSensitiveMetadata(() => ({ + commandStartOptions: {startTime: Date.now(), startCommand: 'theme dev', startArgs: []}, + })) + // A 5xx the API layer flattened into an AbortError: the structured status field is gone, but it + // is recoverable from the message and must still reach both the hash and the metadata. + const flattened = new Error('The request responded unsuccessfully with the HTTP status 503') + + await sendErrorToBugsnag(flattened, 'unexpected_error') + + expect(lastBugsnagEvent!.groupingHash).toEqual('theme:server:http-503') + expect(lastBugsnagEvent!.addMetadata).toHaveBeenCalledWith('error_grouping', { + slice_name: 'theme', + category: 'server', + http_status: 503, + error_code: undefined, + error_class: 'Error', + }) + }) + + test('leaves groupingHash unset for an unknown error but still tags error_grouping metadata', async () => { + await metadata.addSensitiveMetadata(() => ({ + commandStartOptions: {startTime: Date.now(), startCommand: 'app dev', startArgs: []}, + })) + + await sendErrorToBugsnag(new Error('something nobody has categorized'), 'unexpected_error') + + expect(lastBugsnagEvent).toBeDefined() + expect(lastBugsnagEvent!.groupingHash).toBeUndefined() + expect(lastBugsnagEvent!.addMetadata).toHaveBeenCalledWith('error_grouping', { + slice_name: 'app', + category: undefined, + http_status: undefined, + error_code: undefined, + error_class: 'Error', + }) + }) }) diff --git a/packages/cli-kit/src/public/node/error-handler.ts b/packages/cli-kit/src/public/node/error-handler.ts index d639c9d3d33..26419f5dd37 100644 --- a/packages/cli-kit/src/public/node/error-handler.ts +++ b/packages/cli-kit/src/public/node/error-handler.ts @@ -12,6 +12,7 @@ import { } from './error.js' import {outputDebug, outputInfo} from './output.js' import {getEnvironmentData} from '../../private/node/analytics.js' +import {resolveErrorGrouping} from '../../private/node/analytics/error-grouping.js' import {isLocalEnvironment} from '../../private/node/context/service.js' import {bugsnagApiKey, reportingRateLimit} from '../../private/node/constants.js' import {CLI_KIT_VERSION} from '../common/version.js' @@ -28,6 +29,14 @@ import {realpath} from 'fs/promises' // Hardcoded list per product slices to keep analytics consistent. const ALLOWED_SLICE_NAMES = new Set(['app', 'theme', 'hydrogen', 'store']) +// Derives the product slice from the command that was running (e.g. `theme dev` -> `theme`), +// defaulting to `cli` when the command is unknown or not in the allow-list. +function sliceNameFromStartCommand(startCommand: string | undefined): string { + if (!startCommand) return 'cli' + const firstWord = startCommand.trim().split(/\s+/)[0] ?? 'cli' + return ALLOWED_SLICE_NAMES.has(firstWord) ? firstWord : 'cli' +} + export async function errorHandler( error: Error & {exitCode?: number | undefined}, config?: Interfaces.Config, @@ -147,11 +156,29 @@ export async function sendErrorToBugsnag( // Attach command metadata so we know which CLI command triggered the error const {commandStartOptions} = metadata.getAllSensitiveMetadata() const {startCommand} = commandStartOptions ?? {} + const sliceName = sliceNameFromStartCommand(startCommand) if (startCommand) { - const firstWord = startCommand.trim().split(/\s+/)[0] ?? 'cli' - const sliceName = ALLOWED_SLICE_NAMES.has(firstWord) ? firstWord : 'cli' event.addMetadata('custom', {slice_name: sliceName}) } + + // Group on structured signals from the *original* error (not the flattened + // `reportableError`), and emit those signals as metadata so backend dashboards and + // routing work without depending on the grouping hash or on CLI version adoption. + // A single resolution drives both the hash and the metadata so they can never disagree: + // the metadata carries the resolved category and the same (message-merged) signals. + // When no meaningful category resolves we leave `groupingHash` unset so Bugsnag's + // stack-trace grouping applies and distinct unknown bugs are not merged. + const {hash: groupingHash, category, signals} = resolveErrorGrouping(error, sliceName) + if (groupingHash) { + event.groupingHash = groupingHash + } + event.addMetadata('error_grouping', { + slice_name: sliceName, + category, + http_status: signals.httpStatus, + error_code: signals.code, + error_class: signals.errorClass, + }) } const errorHandler = (error: unknown) => { if (error) { diff --git a/packages/cli-kit/src/public/node/error.test.ts b/packages/cli-kit/src/public/node/error.test.ts index 1bba30c0633..b80d0054526 100644 --- a/packages/cli-kit/src/public/node/error.test.ts +++ b/packages/cli-kit/src/public/node/error.test.ts @@ -1,7 +1,13 @@ import {AbortError, BugError, handler, cleanSingleStackTracePath, shouldReportErrorAsUnexpected} from './error.js' import {renderFatalError} from './ui.js' +import {ClientError} from 'graphql-request' import {describe, expect, test, vi} from 'vitest' +function clientError(status: number, code?: string): ClientError { + const errors = code ? [{message: 'boom', extensions: {code}}] : undefined + return new ClientError({status, errors, headers: {}} as any, {query: 'q'} as any) +} + vi.mock('./ui.js') describe('handler', () => { @@ -82,4 +88,33 @@ describe('shouldReportErrorAsUnexpected helper', () => { test('returns false for unsupported platform errors', () => { expect(shouldReportErrorAsUnexpected(new Error('Unsupported platform: win32 arm64 LE'))).toBe(false) }) + + test('returns false for a raw ClientError that is rate limited (HTTP 429)', () => { + expect(shouldReportErrorAsUnexpected(clientError(429))).toBe(false) + }) + + test('returns false for a raw ClientError that is unauthenticated (HTTP 401)', () => { + expect(shouldReportErrorAsUnexpected(clientError(401))).toBe(false) + }) + + test('returns false for a raw ClientError with a THROTTLED code', () => { + expect(shouldReportErrorAsUnexpected(clientError(400, 'THROTTLED'))).toBe(false) + }) + + test('returns false for a raw ClientError with a GraphQL "429" code at HTTP 200', () => { + // Matches errorsIncludeStatus429 in private/node/api.ts. + expect(shouldReportErrorAsUnexpected(clientError(200, '429'))).toBe(false) + }) + + test('returns false for a rate-limit code on a later error entry, not just the first', () => { + const error = new ClientError( + {status: 200, errors: [{message: 'noise'}, {extensions: {code: 'THROTTLED'}}], headers: {}} as any, + {query: 'q'} as any, + ) + expect(shouldReportErrorAsUnexpected(error)).toBe(false) + }) + + test('returns true for a raw ClientError that is a genuine failure (HTTP 500)', () => { + expect(shouldReportErrorAsUnexpected(clientError(500))).toBe(true) + }) }) diff --git a/packages/cli-kit/src/public/node/error.ts b/packages/cli-kit/src/public/node/error.ts index c2a7852c65c..9f84774cef9 100644 --- a/packages/cli-kit/src/public/node/error.ts +++ b/packages/cli-kit/src/public/node/error.ts @@ -1,8 +1,10 @@ import {normalizePath} from './path.js' import {OutputMessage, stringifyMessage, TokenizedString} from './output.js' import {InlineToken, TokenItem, tokenItemToString} from '../../private/node/ui/components/TokenizedText.js' +import {hasRateLimitCode} from '../../private/node/analytics/graphql-error-codes.js' import {Errors} from '@oclif/core' +import {ClientError} from 'graphql-request' import type {AlertCustomSection} from './ui.js' @@ -193,6 +195,12 @@ export function shouldReportErrorAsUnexpected(error: unknown): boolean { if (!isFatal(error)) { // this means its not one of the CLI wrapped errors if (error instanceof Error) { + // Raw API errors that slip through unwrapped (e.g. the handleErrors:false path) are expected + // environmental conditions, not CLI bugs. Treat them as expected so they don't pollute crash + // reporting. + if (isExpectedApiError(error)) { + return false + } const message = error.message return !errorMessageImpliesEnvironmentIssue(message) } @@ -204,6 +212,33 @@ export function shouldReportErrorAsUnexpected(error: unknown): boolean { return false } +/** + * Detects raw graphql-request `ClientError`s that are expected environmental conditions rather than + * CLI bugs. These reach the reporter as plain `Error`s (not `FatalError`s) and would otherwise be + * reported as unexpected. Two distinct cases, both kept out of crash reporting: + * + * HTTP 401 (unauthenticated) is not "transient" in the retry sense — it means the user's session + * token is expired or invalid, a credential/environment condition (see issue #7891). Rate limiting + * (HTTP 429, or a `THROTTLED`/`429` GraphQL code on any error in the response) matches the shape + * detected by `errorsIncludeStatus429` in `private/node/api.ts`. + * + * Scoped to the external `ClientError` type only — importing the cli-kit `GraphQLClientError` + * wrapper here would create an `error.ts → headers.ts → error.ts` import cycle. + * + * @param error - Error to be checked. + * @returns A boolean indicating if the error is a known expected API error. + */ +function isExpectedApiError(error: Error): boolean { + if (!(error instanceof ClientError)) { + return false + } + const status = error.response?.status + if (status === 401 || status === 429) { + return true + } + return hasRateLimitCode(error.response?.errors) +} + /** * Stack traces usually have file:// - we strip that and also remove the Windows drive designation. *