diff --git a/content/apps/oauth-apps/using-oauth-apps/privileged-oauth-apps.md b/content/apps/oauth-apps/using-oauth-apps/privileged-oauth-apps.md index 96dc74a22e58..3a256486ee27 100644 --- a/content/apps/oauth-apps/using-oauth-apps/privileged-oauth-apps.md +++ b/content/apps/oauth-apps/using-oauth-apps/privileged-oauth-apps.md @@ -23,6 +23,7 @@ These {% data variables.product.prodname_oauth_apps %} are : * {% data variables.product.prodname_android %} * {% data variables.product.prodname_cli %} * {% data variables.product.prodname_github_codespaces %} for JetBrains +* {% data variables.copilot.copilot_cli %} * {% data variables.product.prodname_desktop %} * {% data variables.product.prodname_education %} * github-importer-production diff --git a/content/authentication/connecting-to-github-with-ssh/generating-a-new-ssh-key-and-adding-it-to-the-ssh-agent.md b/content/authentication/connecting-to-github-with-ssh/generating-a-new-ssh-key-and-adding-it-to-the-ssh-agent.md index 28b21356834a..96b7bb79e6a6 100644 --- a/content/authentication/connecting-to-github-with-ssh/generating-a-new-ssh-key-and-adding-it-to-the-ssh-agent.md +++ b/content/authentication/connecting-to-github-with-ssh/generating-a-new-ssh-key-and-adding-it-to-the-ssh-agent.md @@ -162,6 +162,7 @@ Before adding a new SSH key to the ssh-agent to manage your keys, you should hav ```powershell ssh-add c:/Users/YOU/.ssh/id_ed25519 ``` + {% data reusables.ssh.add-public-key-to-github %} > ### Troubleshooting SSH agent conflicts in Windows diff --git a/content/copilot/how-tos/chat-with-copilot/chat-in-ide.md b/content/copilot/how-tos/chat-with-copilot/chat-in-ide.md index 5613f14cc9ed..703b65307c5d 100644 --- a/content/copilot/how-tos/chat-with-copilot/chat-in-ide.md +++ b/content/copilot/how-tos/chat-with-copilot/chat-in-ide.md @@ -185,7 +185,13 @@ When the {% data variables.copilot.subagent_short %} completes its task, its res ### Plan mode -{% data reusables.copilot.plan-agent-intro %} +Plan mode helps you to create detailed implementation plans before executing them. This ensures that all requirements are considered and addressed before any code changes are made. The plan agent does not make any code changes until the plan is reviewed and approved by you. Once approved, you can hand off the plan to the default agent or save it for further refinement, review, or team discussions. + +The plan agent is designed to: + +* Research the task comprehensively using read-only tools and codebase analysis to identify requirements and constraints. +* Break down the task into manageable, actionable steps and include open questions about ambiguous requirements. +* Present a concise plan draft, based on a standardized plan format, for user review and iteration. #### Using plan mode diff --git a/content/copilot/how-tos/copilot-cli/set-up-copilot-cli/authenticate-copilot-cli.md b/content/copilot/how-tos/copilot-cli/set-up-copilot-cli/authenticate-copilot-cli.md new file mode 100644 index 000000000000..54f40821b7d5 --- /dev/null +++ b/content/copilot/how-tos/copilot-cli/set-up-copilot-cli/authenticate-copilot-cli.md @@ -0,0 +1,170 @@ +--- +title: Authenticating GitHub Copilot CLI +shortTitle: Authenticate Copilot CLI +intro: Authenticate {% data variables.copilot.copilot_cli_short %} so that you can use {% data variables.product.prodname_copilot_short %} directly from the command line. +product: '{% data reusables.gated-features.copilot-cli %}' +versions: + feature: copilot +topics: + - Copilot + - CLI +contentType: how-tos +category: + - Configure Copilot CLI +--- + +## About authentication + +{% data variables.copilot.copilot_cli %} supports three authentication methods. The method you use depends on whether you are working interactively or in an automated environment. + +* **OAuth device flow**: The default and recommended method for interactive use. When you run `/login` in {% data variables.copilot.copilot_cli_short %}, the CLI generates a one-time code and directs you to authenticate in your browser. This is the simplest way to authenticate. +* **Environment variables**: Recommended for CI/CD pipelines, containers, and non-interactive environments. You set a supported token as an environment variable (`COPILOT_GITHUB_TOKEN`, `GH_TOKEN`, or `GITHUB_TOKEN`), and the CLI uses it automatically without prompting. +* **{% data variables.product.prodname_cli %} fallback**: If you have {% data variables.product.prodname_cli %} (`gh`) (note: the `gh` CLI, not `copilot`) installed and authenticated, {% data variables.copilot.copilot_cli_short %} can use its token automatically. This is the lowest priority method and activates only when no other credentials are found. + +Once authenticated, {% data variables.copilot.copilot_cli_short %} remembers your login and automatically uses the token for all {% data variables.product.prodname_copilot_short %} API requests. You can log in with multiple accounts, and the CLI will remember the last-used account. Token lifetime and expiration depend on how the token was created on your account or organization settings. + +### Supported token types + +| Token type | Prefix | Supported | Notes | +|----------------------------|---------------|-----------|--------------------------------------------------------| +| OAuth token (device flow) | `gho_` | Yes | Default method via `copilot login` | +| Fine-grained PAT | `github_pat_` | Yes | Must include required permissions **Copilot Requests** | +| GitHub App user-to-server | `ghu_` | Yes | Via environment variable | +| Classic PAT | `ghp_` | No | Not supported by {% data variables.copilot.copilot_cli_short %} | + +### How Copilot CLI stores credentials + +By default, the CLI stores your OAuth token in your operating system's keychain under the service name `copilot-cli`: + +| Platform | Keychain | +|---|---| +| macOS | Keychain Access | +| Windows | Credential Manager | +| Linux | libsecret (GNOME Keyring, KWallet) | + +If the system keychain is unavailable—for example, on a headless Linux server without `libsecret` installed—the CLI prompts you to store the token in a plaintext configuration file at `~/.copilot/config.json`. + +When you run a command, {% data variables.copilot.copilot_cli_short %} checks for credentials in the following order: + +1. `COPILOT_GITHUB_TOKEN` environment variable +1. `GH_TOKEN` environment variable +1. `GITHUB_TOKEN` environment variable +1. OAuth token from the system keychain +1. GitHub CLI (`gh auth token`) fallback + +> [!NOTE] +> An environment variable silently overrides a stored OAuth token. If you set `GH_TOKEN` for another tool, the CLI uses that token instead of the OAuth token from `copilot login`. To avoid unexpected behavior, unset environment variables you do not intend the CLI to use. + +## Authenticating with OAuth + +The OAuth device flow is the default authentication method for interactive use. You can authenticate by running `/login` from {% data variables.copilot.copilot_cli_short %} or `copilot login` from your terminal. + +### Authenticate with `/login` + +1. From {% data variables.copilot.copilot_cli_short %}, run `/login`. + + ```bash copy + /login + ``` + +1. Select the account you want to authenticate with. For {% data variables.product.prodname_ghe_cloud %} with data residency, enter the hostname of your instance + + ```text + What account do you want to log into? + 1. {% data variables.product.prodname_dotcom_the_website %} + 2. {% data variables.product.prodname_ghe_cloud %} with data residency (*.ghe.com) + ``` + +1. The CLI displays a one-time user code and automatically copies it to your clipboard and opens your browser. + + ```text + Waiting for authorization... + Enter one-time code: 1234-5678 at https://github.com/login/device + Press any key to copy to clipboard and open browser... + ``` + +1. Navigate to the verification URL at `https://github.com/login/device` if your browser did not open automatically. +1. Paste the one-time code in the field on the page. +1. If your organization uses SAML SSO, click **Authorize** next to each organization you want to grant access to. +1. Review the requested permissions and click **Authorize GitHub Copilot CLI**. +1. Return to your terminal. The CLI displays a success message when authentication is complete. + + ```text + Signed in successfully as Octocat. You can now use {% data variables.product.prodname_copilot_short %}. + ``` + +### Authenticate with `copilot login` + +1. From the terminal, run `copilot login`. If you are using {% data variables.product.prodname_ghe_cloud %} with data residency, pass the hostname of your instance. + + ```bash copy + copilot login + ``` + + For {% data variables.product.prodname_ghe_cloud %}: + + ```bash copy + copilot login --host HOSTNAME + ``` + + The CLI displays a one-time user code and automatically copies it to your clipboard and opens your browser. + + ```text + To authenticate, visit https://github.com/login/device and enter code 1234-5678. + ``` + +1. Navigate to the verification URL at `https://github.com/login/device` if your browser did not open automatically. +1. Paste the one-time code in the field on the page. +1. If your organization uses SAML SSO, click **Authorize** next to each organization you want to grant access to. +1. Review the requested permissions and click **Authorize GitHub Copilot CLI**. +1. Return to your terminal. The CLI displays a success message when authentication is complete. + + ```text + Signed in successfully as Octocat. + ``` + +## Authenticating with environment variables + +For non-interactive environments, you can authenticate by setting an environment variable with a supported token. This is ideal for CI/CD pipelines, containers, or headless servers. + +{% data reusables.copilot.copilot-cli-pat-steps %} + +## Authenticating with {% data variables.product.prodname_cli %} + +If you have {% data variables.product.prodname_cli %} installed and authenticated, {% data variables.copilot.copilot_cli_short %} can use its token as a fallback. This method has the lowest priority and activates only when no environment variables are set and no stored token is found. + +1. Verify that {% data variables.product.prodname_cli %} is authenticated. + + ```bash copy + gh auth status + ``` + + If you use {% data variables.product.prodname_ghe_cloud %} with data residency, verify the correct hostname is authenticated. + + ```bash copy + gh auth status --hostname HOSTNAME + ``` + +1. Run `copilot`. The Copilot CLI uses the {% data variables.product.prodname_cli %} token automatically. +1. Run `/user` to verify your authenticated account in the CLI. + +## Switching between accounts + +{% data variables.copilot.copilot_cli_short %} supports multiple accounts. You can list available accounts and switch between them from within the CLI. +To list available accounts, run `/user list` from the {% data variables.copilot.copilot_cli_short %} prompt. +To switch to a different account, type `/user switch` on the prompt. + +To add another account, run `copilot login` from a new terminal session, or run the login command from within the CLI and authorize with the other account. + +## Signing out and removing credentials + +To sign out, type `/logout` at the {% data variables.copilot.copilot_cli_short %} prompt. This removes the locally stored token but does not revoke it on {% data variables.product.github %}. + +To revoke the OAuth app authorization on {% data variables.product.github %} and prevent it from being used elsewhere, follow these steps. + +1. Navigate to **Settings** > **Applications** > **Authorized OAuth Apps**. +1. Navigate to your settings page: + 1. In the upper-right corner of any page on {% data variables.product.prodname_dotcom %}, click your profile picture. + 1. Click **Settings**. +1. In the left sidebar, click **Applications**. +1. Under **Authorized OAuth Apps**, click {% octicon "kebab-horizontal" aria-label="The horizontal kebab icon" %} next to **GitHub CLI** to expand the menu and select **Revoke**. diff --git a/content/copilot/how-tos/copilot-cli/set-up-copilot-cli/index.md b/content/copilot/how-tos/copilot-cli/set-up-copilot-cli/index.md index 978e659e7e6b..29179bd7b888 100644 --- a/content/copilot/how-tos/copilot-cli/set-up-copilot-cli/index.md +++ b/content/copilot/how-tos/copilot-cli/set-up-copilot-cli/index.md @@ -6,6 +6,9 @@ versions: feature: copilot children: - /install-copilot-cli + - /authenticate-copilot-cli - /configure-copilot-cli + - /troubleshoot-copilot-cli-auth + contentType: how-tos ---- \ No newline at end of file +--- diff --git a/content/copilot/how-tos/copilot-cli/set-up-copilot-cli/install-copilot-cli.md b/content/copilot/how-tos/copilot-cli/set-up-copilot-cli/install-copilot-cli.md index 19e60c91dbca..7adc1594511d 100644 --- a/content/copilot/how-tos/copilot-cli/set-up-copilot-cli/install-copilot-cli.md +++ b/content/copilot/how-tos/copilot-cli/set-up-copilot-cli/install-copilot-cli.md @@ -116,16 +116,13 @@ Download the executable for your platform, unpack it, and run. ## Authenticating with {% data variables.copilot.copilot_cli_short %} -On first launch, if you're not currently logged in to {% data variables.product.github %}, you'll be prompted to use the `/login` slash command. Enter this command and follow the on-screen instructions to authenticate. +On first launch, if you're not currently logged in to {% data variables.product.github %}, you'll be prompted to use the `/login` slash command. Enter this command and follow the on-screen instructions to authenticate. For more information on the authentication process, see [AUTOTITLE](/copilot/how-tos/copilot-cli/set-up-copilot-cli/authenticate-copilot-cli). ### Authenticating with a {% data variables.product.pat_generic %} You can also authenticate using a {% data variables.product.pat_v2 %} with the "{% data variables.product.prodname_copilot_short %} Requests" permission enabled. -1. Visit [{% data variables.product.pat_v2_caps_plural %}](https://github.com/settings/personal-access-tokens/new). -1. Under "Permissions," click **Add permissions** and select **{% data variables.product.prodname_copilot_short %} Requests**. -1. Click **Generate token**. -1. Add the token to your environment using the `GH_TOKEN` or `GITHUB_TOKEN` environment variable (in order of precedence). +{% data reusables.copilot.copilot-cli-pat-steps %} ## Next steps diff --git a/content/copilot/how-tos/copilot-cli/set-up-copilot-cli/troubleshoot-copilot-cli-auth.md b/content/copilot/how-tos/copilot-cli/set-up-copilot-cli/troubleshoot-copilot-cli-auth.md new file mode 100644 index 000000000000..c46c6e804234 --- /dev/null +++ b/content/copilot/how-tos/copilot-cli/set-up-copilot-cli/troubleshoot-copilot-cli-auth.md @@ -0,0 +1,199 @@ +--- +title: Troubleshooting GitHub Copilot CLI authentication +shortTitle: Troubleshoot Copilot CLI auth +intro: Diagnose authentication failures when signing in to {% data variables.copilot.copilot_cli_short %}. +versions: + feature: copilot +topics: + - Copilot + - Troubleshooting +contentType: how-tos +category: + - Configure Copilot CLI +--- + +## Authentication errors + +If you encounter authentication errors, use the table below to identify the cause and resolution. + +| Issue | Cause | Fix | More information | +|---------------------------------------------------------|--------------------------------------------------------------------------------------|-----------------------------------------------------------|-----------------------------------------------------------------------------------| +| No authentication information found | No credentials stored | Run `copilot login` | [No authentication information found](#no-authentication-information-found) | +| 401 Unauthorized | Token revoked or insufficient permissions | Generate token with permissions | [Token expired or revoked](#token-expired-or-revoked) | +| {% data variables.product.pat_classic_caps %} rejected | {% data variables.product.pat_classic_caps %} (`ghp_`) | Use fine-grained {% data variables.product.pat_generic %} | [{% data variables.product.pat_classic_caps %} rejected](#token-classic-rejected) | +| 403 Forbidden or policy denied | {% data variables.product.prodname_copilot_short %} license or enterprise/org policy | Check subscription and org settings | [Access denied](#access-denied) | +| Keychain unavailable | Missing system keychain | Install `libsecret` or accept plaintext | [Keychain access failure](#keychain-access-failure) | +| Wrong account | Multiple accounts or env var override | Check env vars, use `/user switch` | [Wrong account](#wrong-account) | + +## No authentication information found + +{% data variables.copilot.copilot_cli_short %} displays the following error: + + +```text +Error: No authentication information found +Copilot can be authenticated with GitHub using an OAuth Token or a Fine-Grained Personal Access Token +``` + + +### Cause + +No credentials exist in any of the checked locations. + +### Fix + +Use the following steps to find where authentication is missing and restore access. + +#### Check your authentication status + + ```bash copy + gh auth status + ``` + +If you see a message indicating that you’re not logged in, log in with `gh auth login` or use the OAuth flow with `copilot login`. + +#### Check whether an authentication environment variable is set + +If you are using an environment variable, check whether the `COPILOT_GITHUB_TOKEN`, `GH_TOKEN`, or `GITHUB_TOKEN` environment variable is set: + + ```bash copy + echo $COPILOT_GITHUB_TOKEN + ``` + +If the command prints nothing, the variable is not set. Set the variable to a valid token. To generate a token, see [AUTOTITLE](/copilot/how-tos/copilot-cli/set-up-copilot-cli/authenticate-copilot-cli#authenticate-with-a-personal-access-token-pat). + +```bash copy + export $COPILOT_GITHUB_TOKEN=PERSONAL_ACCESS_TOKEN + ``` + +#### macOS keychain + + ```bash copy + security find-generic-password -s copilot-cli + ``` + +If no item is found, authenticate again with `/login` or `copilot login` to create one. +If an item is found but authentication still fails, remove the saved credential then authenticate again with `/login` or `copilot login`: + + ```bash copy + security delete-generic-password -s copilot-cli + ``` + +## Token expired or revoked + +{% data variables.copilot.copilot_cli_short %} displays the following error: + + +```text +Error: Authentication failed + +Your GitHub token may be invalid, expired, or lacking the required permissions. + +To resolve this, try the following: + • Start 'copilot' and run the '${LOGIN_COMMAND}' command to re-authenticate + • If using a Fine-Grained PAT, ensure it has the 'Copilot Requests' permission enabled + • If using COPILOT_GITHUB_TOKEN, GH_TOKEN or GITHUB_TOKEN environment variable, verify the token is valid and not expired + • Run 'gh auth status' to check your current authentication status +``` + + +### Cause + +The token was revoked, has expired, or was created without the required permissions. + +### Fix + +Review the token's status and permissions on {% data variables.product.prodname_dotcom %}. The token must have the **Copilot Requests** permission. Generate a new token with the required permissions if necessary. + +## {% data variables.product.pat_classic_caps %} rejected + +A token starting with `ghp_` is silently ignored and the CLI behaves as if no token is set. + +### Cause + +Classic {% data variables.product.pat_generic_plural %} are not supported by {% data variables.copilot.copilot_cli_short %}. + +### Fix +Generate a fine-grained {% data variables.product.pat_generic %} with the required scopes. + +## Access denied + +{% data variables.copilot.copilot_cli_short %} displays one of the following errors: + +```text +Error: Access denied by policy settings + +Your Copilot CLI policy setting may be preventing access. This can happen when: + • Your organization has restricted Copilot access + • Your Copilot subscription does not include this feature + • Required policies have not been enabled by your administrator + +To resolve this, visit your Copilot settings: +``` + +### Cause + +An organization policy blocks {% data variables.copilot.copilot_cli %}, or the user account lacks a {% data variables.product.prodname_copilot %} license. + +### Fix + +* Check that your account has an active {% data variables.product.prodname_copilot %} license. +* Ask your organization admin to enable {% data variables.copilot.copilot_cli %} in the organization policy. + +## Keychain access failure + +During login, the CLI prompts you about the system keychain being unavailable and asks whether to store credentials in plaintext. + +```text +System keychain unavailable. Store token in plaintext config file? (y/N) +``` + +### Cause + +The system keychain is not accessible. This may occur on Linux systems without `libsecret`, headless servers, or systems with a permission issue. + +### Fix + +Follow the steps for your operating system to restore secure credential storage. + +#### macOS or Windows + +On macOS, confirm Keychain Access app is available, and you can unlock your login keychain. +On Windows, confirm Credential Manager is available, and you can access the Windows Vault. +If you can’t access the system credential manager, use plaintext storage (if prompted) or authenticate using an environment variable token, then rerun `/login` or `copilot login`. + +#### Linux + +On Linux, use the system keyring or store credentials in plaintext. + +1. Check whether `secret-tool` is installed: + + ```bash copy + command -v secret-tool + ``` + +1. If `secret-tool` is not found or the search command returns no results, install `libsecret` and its dependencies. + + ```bash copy + sudo apt sudo apt list libsecret-1-0 libsecret-1-dev libsecret-common gnome-keyring gnome-keyring-pkcs11 seahorse + ``` + +1. Once `secret-tool` is installed, search the keyring for a saved credential: + + ```bash copy + secret-tool search copilot-cli + ``` + + If the command returns one or more results, credentials exist in the keyring. Run `copilot login` in the terminal or `/login` in {% data variables.copilot.copilot_cli_short %} again. + +## Wrong account + +The wrong user is authenticated, or the token belongs to the wrong organization. + +### Cause + +Multiple accounts are stored, or an environment variable is overriding the stored token. + +### Fix + +To switch accounts, use `/user switch` at the CLI prompt, or sign out with `/logout` and run `/login` with the correct account. diff --git a/content/copilot/reference/ai-models/model-hosting.md b/content/copilot/reference/ai-models/model-hosting.md index fd8f106d88a1..f5b0294be840 100644 --- a/content/copilot/reference/ai-models/model-hosting.md +++ b/content/copilot/reference/ai-models/model-hosting.md @@ -62,7 +62,9 @@ Used for: These models are hosted by Amazon Web Services, Anthropic PBC, and Google Cloud Platform. {% data variables.product.github %} has provider agreements in place to ensure data is not used for training. Additional details for each provider are included below: * Amazon Bedrock: Amazon makes the [following data commitments](https://docs.aws.amazon.com/bedrock/latest/userguide/data-protection.html): _Amazon Bedrock doesn't store or log your prompts and completions. Amazon Bedrock doesn't use your prompts and completions to train any AWS models and doesn't distribute them to third parties_. -* Anthropic PBC: {% data variables.product.github %} maintains a [zero data retention agreement](https://privacy.anthropic.com/en/articles/8956058-i-have-a-zero-retention-agreement-with-anthropic-what-products-does-it-apply-to) with Anthropic. + +* Anthropic PBC: {% data variables.product.github %} maintains a [zero data retention agreement](https://privacy.anthropic.com/en/articles/8956058-i-have-a-zero-retention-agreement-with-anthropic-what-products-does-it-apply-to) with Anthropic for generally available Anthropic features in {% data variables.product.prodname_copilot %}. Some Anthropic features in beta or {% data variables.release-phases.public_preview %}—including tool search via the Messages API—are not covered by this agreement. For these features, data may be retained by Anthropic in accordance with [Anthropic's ZDR documentation](https://platform.claude.com/docs/en/build-with-claude/zero-data-retention). {% data variables.product.github %} will update this page as ZDR coverage changes. + * Google Cloud: [Google commits to not training on {% data variables.product.github %} data as part of their service terms](https://cloud.google.com/vertex-ai/generative-ai/docs/data-governance). {% data variables.product.github %} is additionally not subject to prompt logging for abuse monitoring. To provide better service quality and reduce latency, {% data variables.product.github %} uses [prompt caching](https://platform.claude.com/docs/en/build-with-claude/prompt-caching). You can read more about prompt caching on [Anthropic PBC](https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching), [Amazon Bedrock](https://docs.aws.amazon.com/bedrock/latest/userguide/prompt-caching.html), and [Google Cloud](https://cloud.google.com/vertex-ai/generative-ai/docs/partner-models/claude-prompt-caching). diff --git a/data/reusables/copilot/copilot-cli-pat-steps.md b/data/reusables/copilot/copilot-cli-pat-steps.md new file mode 100644 index 000000000000..52b935d35e3a --- /dev/null +++ b/data/reusables/copilot/copilot-cli-pat-steps.md @@ -0,0 +1,4 @@ +1. Visit [{% data variables.product.pat_v2_caps_plural %}](https://github.com/settings/personal-access-tokens/new). +1. Under "Permissions," click **Add permissions** and select **{% data variables.product.prodname_copilot_short %} Requests**. +1. Click **Generate token**. +1. Export the token in your terminal or environment configuration. Use the `COPILOT_GITHUB_TOKEN`, `GH_TOKEN`, or `GITHUB_TOKEN` environment variable (in order of precedence). \ No newline at end of file diff --git a/data/reusables/enterprise_installation/replication-command.md b/data/reusables/enterprise_installation/replication-command.md index 9f91871be081..27c96e21cc62 100644 --- a/data/reusables/enterprise_installation/replication-command.md +++ b/data/reusables/enterprise_installation/replication-command.md @@ -10,7 +10,6 @@ > If you are running version 3.14.20, 3.15.15, 3.16.11, 3.17.8, 3.18.2, or a later version including future releases like 3.19, you no longer need to put the primary in maintenance mode prior to running `ghe-repl-start`. This command is no longer expected to cause an outage on the primary server. However, when setting up a new replica, `ghe-repl-start` won't cause an outage as long as you run `ghe-config-apply` between `ghe-repl-setup` and `ghe-repl-start`. Skipping `ghe-config-apply` and going straight from `ghe-repl-setup` to `ghe-repl-start` will still result in an outage. {% else %} - > [!WARNING] > [!WARNING] > To ensure uninterrupted service when setting up a new replica, run `ghe-config-apply` between `ghe-repl-setup` and `ghe-repl-start`. This allows the primary server to remain available throughout the replication setup process. diff --git a/src/search/scripts/scrape/lib/build-records-from-api.ts b/src/search/scripts/scrape/lib/build-records-from-api.ts index 94b84a1b8bfe..bf2572f17171 100644 --- a/src/search/scripts/scrape/lib/build-records-from-api.ts +++ b/src/search/scripts/scrape/lib/build-records-from-api.ts @@ -416,6 +416,18 @@ export default async function buildRecordsFromApi( } if (result.record) { + // Validate required fields before adding to records + if (!result.record.title) { + failedPages.push({ + url: permalink.href, + relativePath: permalink.relativePath, + error: 'Record has empty title', + errorType: 'Validation Error', + }) + if (!noMarkers) process.stdout.write(chalk.red('✗')) + return null + } + // Apply popularity const pathArticle = permalink.relativePath.replace('/index.md', '').replace('.md', '') let popularity = (hasPopularPages && popularPages[pathArticle]) || 0.0 diff --git a/src/search/scripts/scrape/lib/search-index-records.ts b/src/search/scripts/scrape/lib/search-index-records.ts index b2ddb01b1c45..78a6cac75739 100644 --- a/src/search/scripts/scrape/lib/search-index-records.ts +++ b/src/search/scripts/scrape/lib/search-index-records.ts @@ -1,7 +1,7 @@ import path from 'path' import fsSync from 'fs' import fs from 'fs/promises' -import assert from 'assert' +import chalk from 'chalk' import { isArray, isString } from 'lodash-es' import type { Record } from '@/search/scripts/scrape/types' @@ -11,9 +11,9 @@ export async function writeIndexRecords( records: Record[], outDirectory: string, ): Promise { - validateRecords(name, records) + const validRecords = validateRecords(name, records) - const recordsObject = Object.fromEntries(records.map((record) => [record.objectID, record])) + const recordsObject = Object.fromEntries(validRecords.map((record) => [record.objectID, record])) const content = JSON.stringify(recordsObject, undefined, 0) // If the outDirectory doesn't exist, create it @@ -27,30 +27,52 @@ export async function writeIndexRecords( return filePath } -function validateRecords(name: string, records: Record[]): true { - assert(isString(name) && name.length, '`name` is required') - assert(isArray(records) && records.length, '`records` must be a non-empty array') +function validateRecords(name: string, records: Record[]): Record[] { + if (!isString(name) || !name.length) { + throw new Error('`name` is required') + } + if (!isArray(records) || !records.length) { + throw new Error('`records` must be a non-empty array') + } - // each ID is unique + // each ID is unique — deduplicate rather than crash const objectIDs = records.map((record) => record.objectID) const dupes = countArrayValues(objectIDs) .filter(({ count }) => count > 1) .map(({ value }) => value) - assert(!dupes.length, `every objectID must be unique. dupes: ${dupes.join('; ')}`) + if (dupes.length) { + console.warn( + chalk.yellow(`⚠ Duplicate objectIDs found and will be deduplicated: ${dupes.join('; ')}`), + ) + } + + const seen = new Set() + const validRecords: Record[] = [] for (const record of records) { - assert( - isString(record.objectID) && record.objectID.length, - `objectID must be a string. received: ${record.objectID}, ${JSON.stringify(record)}`, - ) + if (!isString(record.objectID) || !record.objectID.length) { + console.warn( + chalk.yellow( + `⚠ Skipping record with invalid objectID: ${JSON.stringify({ objectID: record.objectID, title: record.title })}`, + ), + ) + continue + } - assert( - isString(record.title) && record.title.length, - `title must be a string. received: ${record.title}, ${JSON.stringify(record)}`, - ) + if (!isString(record.title) || !record.title.length) { + console.warn(chalk.yellow(`⚠ Skipping record with empty title: ${record.objectID}`)) + continue + } + + if (seen.has(record.objectID)) { + continue + } + seen.add(record.objectID) + + validRecords.push(record) } - return true + return validRecords } function countArrayValues(arr: string[]) { diff --git a/src/search/scripts/scrape/tests/build-records-from-api.ts b/src/search/scripts/scrape/tests/build-records-from-api.ts new file mode 100644 index 000000000000..ef3f409c9b3b --- /dev/null +++ b/src/search/scripts/scrape/tests/build-records-from-api.ts @@ -0,0 +1,131 @@ +import { describe, test, expect } from 'vitest' + +import { + articleApiResponseToRecord, + extractFromMarkdown, + type ArticleApiResponse, +} from '@/search/scripts/scrape/lib/build-records-from-api' + +describe('articleApiResponseToRecord', () => { + function makeApiResponse(overrides: Partial = {}): ArticleApiResponse { + return { + meta: { + title: 'Test Page', + intro: 'An intro paragraph.', + product: 'test-product', + breadcrumbs: [ + { href: '/en', title: 'Home' }, + { href: '/en/test', title: 'Test' }, + { href: '/en/test/page', title: 'Test Page' }, + ], + ...overrides.meta, + }, + body: overrides.body ?? '## Getting started\n\nSome content here.', + } + } + + test('converts API response to a search record', () => { + const record = articleApiResponseToRecord('/en/test/page', makeApiResponse()) + + expect(record.objectID).toBe('/en/test/page') + expect(record.title).toBe('Test Page') + expect(record.intro).toBe('An intro paragraph.') + expect(record.breadcrumbs).toBe('Home / Test') + expect(record.toplevel).toBe('Home') + expect(record.headings).toContain('Getting started') + expect(record.content).toContain('Some content here.') + }) + + test('returns empty title when API response has empty title', () => { + const record = articleApiResponseToRecord( + '/en/test/page', + makeApiResponse({ meta: { title: '', intro: '', product: 'test' } }), + ) + + expect(record.title).toBe('') + }) + + test('excludes navigational headings', () => { + const record = articleApiResponseToRecord( + '/en/test/page', + makeApiResponse({ + body: '## In this article\n\n## Real heading\n\n## Further reading\n\nContent.', + }), + ) + + expect(record.headings).toBe('Real heading') + }) + + test('handles missing breadcrumbs', () => { + const response = makeApiResponse() + response.meta.breadcrumbs = undefined + const record = articleApiResponseToRecord('/en/test/page', response) + + expect(record.breadcrumbs).toBe('') + expect(record.toplevel).toBe('') + }) + + test('prepends intro to content when not already present', () => { + const record = articleApiResponseToRecord( + '/en/test/page', + makeApiResponse({ + meta: { title: 'T', intro: 'Unique intro.', product: 'p' }, + body: 'Body text only.', + }), + ) + + expect(record.content).toMatch(/^Unique intro\.\nBody text only\.$/) + }) + + test('does not duplicate intro when body already contains it', () => { + const record = articleApiResponseToRecord( + '/en/test/page', + makeApiResponse({ + meta: { title: 'T', intro: 'Body text', product: 'p' }, + body: 'Body text only.', + }), + ) + + const occurrences = record.content.split('Body text').length - 1 + expect(occurrences).toBe(1) + }) +}) + +describe('extractFromMarkdown', () => { + test('extracts h2 headings and plain text content', () => { + const md = '## Heading One\n\nParagraph text.\n\n## Heading Two\n\nMore text.' + const result = extractFromMarkdown(md) + + expect(result.headings).toBe('Heading One\nHeading Two') + expect(result.content).toContain('Paragraph text.') + expect(result.content).toContain('More text.') + }) + + test('skips ignored heading slugs', () => { + const md = '## In this article\n\n## Prerequisites\n\n## Real heading' + const result = extractFromMarkdown(md) + + expect(result.headings).toBe('Real heading') + }) + + test('skips ignored heading texts in non-English', () => { + const md = '## 本文内容\n\n## 先决条件\n\n## 真正的标题' + const result = extractFromMarkdown(md) + + expect(result.headings).toBe('真正的标题') + }) + + test('ignores h1 and h3+ headings', () => { + const md = '# H1 Title\n\n## H2 Heading\n\n### H3 Subheading' + const result = extractFromMarkdown(md) + + expect(result.headings).toBe('H2 Heading') + }) + + test('preserves code block text in content', () => { + const md = '## Setup\n\n```bash\nssh_url=git@github.com\n```' + const result = extractFromMarkdown(md) + + expect(result.content).toContain('ssh_url=git@github.com') + }) +}) diff --git a/src/search/scripts/scrape/tests/search-index-records.ts b/src/search/scripts/scrape/tests/search-index-records.ts new file mode 100644 index 000000000000..742f2e2f7149 --- /dev/null +++ b/src/search/scripts/scrape/tests/search-index-records.ts @@ -0,0 +1,123 @@ +import { describe, test, expect, vi, beforeEach } from 'vitest' +import fs from 'fs/promises' +import fsSync from 'fs' + +import { writeIndexRecords } from '@/search/scripts/scrape/lib/search-index-records' +import type { Record } from '@/search/scripts/scrape/types' + +vi.mock('fs/promises') +vi.mock('fs') + +function makeRecord(overrides: Partial = {}): Record { + return { + objectID: '/en/test-page', + breadcrumbs: 'Test', + title: 'Test Page', + headings: '', + content: 'Some content', + intro: '', + toplevel: 'Test', + ...overrides, + } +} + +describe('writeIndexRecords', () => { + beforeEach(() => { + vi.clearAllMocks() + vi.mocked(fsSync.existsSync).mockReturnValue(true) + vi.mocked(fs.writeFile).mockResolvedValue() + }) + + test('writes valid records to JSON file', async () => { + const records = [makeRecord(), makeRecord({ objectID: '/en/other-page', title: 'Other Page' })] + + const result = await writeIndexRecords('test-index', records, '/tmp/out') + + expect(result).toBe('/tmp/out/test-index-records.json') + expect(fs.writeFile).toHaveBeenCalledOnce() + const writtenJson = vi.mocked(fs.writeFile).mock.calls[0][1] as string + const parsed = JSON.parse(writtenJson) + expect(Object.keys(parsed)).toEqual(['/en/test-page', '/en/other-page']) + }) + + test('filters out records with empty titles', async () => { + const warnSpy = vi.spyOn(console, 'warn').mockImplementation(() => {}) + const records = [makeRecord(), makeRecord({ objectID: '/en/bad-page', title: '' })] + + await writeIndexRecords('test-index', records, '/tmp/out') + + const writtenJson = vi.mocked(fs.writeFile).mock.calls[0][1] as string + const parsed = JSON.parse(writtenJson) + expect(Object.keys(parsed)).toEqual(['/en/test-page']) + expect(warnSpy).toHaveBeenCalledWith(expect.stringContaining('empty title')) + warnSpy.mockRestore() + }) + + test('filters out records with missing objectID', async () => { + const warnSpy = vi.spyOn(console, 'warn').mockImplementation(() => {}) + const records = [makeRecord(), makeRecord({ objectID: '', title: 'No ID' })] + + await writeIndexRecords('test-index', records, '/tmp/out') + + const writtenJson = vi.mocked(fs.writeFile).mock.calls[0][1] as string + const parsed = JSON.parse(writtenJson) + expect(Object.keys(parsed)).toEqual(['/en/test-page']) + expect(warnSpy).toHaveBeenCalledWith(expect.stringContaining('invalid objectID')) + warnSpy.mockRestore() + }) + + test('deduplicates records with the same objectID', async () => { + const warnSpy = vi.spyOn(console, 'warn').mockImplementation(() => {}) + const records = [ + makeRecord({ title: 'First' }), + makeRecord({ title: 'Duplicate' }), // same objectID + ] + + await writeIndexRecords('test-index', records, '/tmp/out') + + const writtenJson = vi.mocked(fs.writeFile).mock.calls[0][1] as string + const parsed = JSON.parse(writtenJson) + expect(parsed['/en/test-page'].title).toBe('First') + expect(warnSpy).toHaveBeenCalledWith(expect.stringContaining('Duplicate objectIDs')) + warnSpy.mockRestore() + }) + + test('does not log full record content for invalid objectID', async () => { + const warnSpy = vi.spyOn(console, 'warn').mockImplementation(() => {}) + const records = [ + makeRecord(), + makeRecord({ + objectID: '', + title: 'No ID', + content: 'A very long content string that should not appear in logs', + }), + ] + + await writeIndexRecords('test-index', records, '/tmp/out') + + const warnMessage = warnSpy.mock.calls[0][0] as string + expect(warnMessage).not.toContain('very long content string') + warnSpy.mockRestore() + }) + + test('throws when name is empty', async () => { + await expect(writeIndexRecords('', [makeRecord()], '/tmp/out')).rejects.toThrow( + '`name` is required', + ) + }) + + test('throws when records array is empty', async () => { + await expect(writeIndexRecords('test-index', [], '/tmp/out')).rejects.toThrow( + '`records` must be a non-empty array', + ) + }) + + test('creates output directory if it does not exist', async () => { + vi.mocked(fsSync.existsSync).mockReturnValue(false) + vi.mocked(fs.mkdir).mockResolvedValue(undefined) + + await writeIndexRecords('test-index', [makeRecord()], '/tmp/new-dir') + + expect(fs.mkdir).toHaveBeenCalledWith('/tmp/new-dir', { recursive: true }) + }) +})