Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -92,8 +92,8 @@
"escape-string-regexp": "~5.0.0",
"execa": "^9.5.2",
"express": "~5.2.0",
"globby": "~15.0.0",
"handlebars": "~4.7.8",
"ignore": "^5.3.2",
"indent-string": "^5.0.0",
"is-ci": "~4.1.0",
"istextorbinary": "~9.5.0",
Expand All @@ -107,6 +107,7 @@
"string-width": "^8.0.0",
"strip-ansi": "^7.1.0",
"tiged": "~2.12.7",
"tinyglobby": "^0.2.15",
"which": "^6.0.0",
"widest-line": "^6.0.0",
"wrap-ansi": "^10.0.0"
Expand Down
132 changes: 120 additions & 12 deletions src/lib/utils.ts
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import { execSync } from 'node:child_process';
import { createWriteStream, existsSync, mkdirSync, readdirSync, readFileSync, writeFileSync } from 'node:fs';
import { mkdir, readFile } from 'node:fs/promises';
import type { IncomingMessage } from 'node:http';
import { get } from 'node:https';
import { homedir } from 'node:os';
import { dirname, join } from 'node:path';
import { dirname, join, relative } from 'node:path';
import process from 'node:process';
import { finished } from 'node:stream/promises';

Expand All @@ -15,12 +16,13 @@ import { type ActorRun, ApifyClient, type ApifyClientOptions, type Build } from
import archiver from 'archiver';
import { AxiosHeaders } from 'axios';
import escapeStringRegexp from 'escape-string-regexp';
import { globby } from 'globby';
import ignoreModule, { type Ignore } from 'ignore';
import { getEncoding } from 'istextorbinary';
import { Mime } from 'mime';
import otherMimes from 'mime/types/other.js';
import standardMimes from 'mime/types/standard.js';
import { gte, minVersion, satisfies } from 'semver';
import { escapePath, glob } from 'tinyglobby';

import {
ACTOR_ENV_VARS,
Expand Down Expand Up @@ -134,8 +136,7 @@ const getTokenWithAuthFileFallback = (existingToken?: string) => {
return existingToken;
};

// biome-ignore format: off
type CJSAxiosHeaders = import('axios', { with: { 'resolution-mode': 'require' } }).AxiosRequestConfig['headers'];
type CJSAxiosHeaders = import('axios', { with: { 'resolution-mode': 'require' }}).AxiosRequestConfig['headers'];

/**
* Returns options for ApifyClient
Expand Down Expand Up @@ -231,9 +232,13 @@ export const setLocalEnv = async (actDir: string) => {
if (gitignoreAdditions.length > 0) {
if (gitignoreContents.length > 0) {
gitignoreAdditions.unshift('# Added by Apify CLI');
writeFileSync(gitignorePath, `\n${gitignoreAdditions.join('\n')}\n`, { flag: 'a' });
writeFileSync(gitignorePath, `\n${gitignoreAdditions.join('\n')}\n`, {
flag: 'a',
});
} else {
writeFileSync(gitignorePath, `${gitignoreAdditions.join('\n')}\n`, { flag: 'w' });
writeFileSync(gitignorePath, `${gitignoreAdditions.join('\n')}\n`, {
flag: 'w',
});
}
}
};
Expand Down Expand Up @@ -285,18 +290,121 @@ export const createSourceFiles = async (paths: string[], cwd: string) => {
});
};

/**
* Fallback for when git is unavailable: find all .gitignore files and build a filter
* using the `ignore` package, scoped to each file's directory.
* Also walks ancestor directories to pick up parent .gitignore files (e.g. monorepo root),
* stopping at the first .git boundary found.
*/
const getGitignoreFallbackFilter = async (cwd: string): Promise<(paths: string[]) => string[]> => {
const gitignoreFiles = await glob('**/.gitignore', {
dot: true,
cwd,
ignore: ['.git/**'],
expandDirectories: false,
});

// `ignore` is a CJS package; TypeScript sees its default import as the module
// object rather than the callable factory, so we cast through unknown.
const makeIg = ignoreModule as unknown as () => Ignore;

const filters: { dir: string; ig: Ignore; ancestorPrefix?: string }[] = [];

for (const gitignoreFile of gitignoreFiles) {
const gitignoreDir = dirname(gitignoreFile); // e.g. 'src' or '.'
const content = await readFile(join(cwd, gitignoreFile), 'utf-8');
filters.push({ dir: gitignoreDir === '.' ? '' : gitignoreDir, ig: makeIg().add(content) });
}

// Walk ancestor directories to pick up parent .gitignore files (e.g. monorepo root).
// Check for a .git boundary FIRST so we stop before processing the git root's own
// .gitignore — that file is handled by `git ls-files` when git is available, and
// avoids accidentally applying rules from an unrelated outer repository.
let parentDir = dirname(cwd);
while (parentDir !== dirname(parentDir)) {
if (existsSync(join(parentDir, '.git'))) {
break;
}

const parentGitignorePath = join(parentDir, '.gitignore');
if (existsSync(parentGitignorePath)) {
try {
const content = await readFile(parentGitignorePath, 'utf-8');
// Paths passed to this filter are relative to cwd. To test them against
// a .gitignore that lives above cwd we need to prepend the relative path
// from the ancestor dir to cwd so the ignore patterns see the right scope.
const ancestorPrefix = relative(parentDir, cwd);
filters.push({ dir: '', ig: makeIg().add(content), ancestorPrefix });
} catch {
// Ignore read errors
}
}

parentDir = dirname(parentDir);
}

if (filters.length === 0) {
return (paths) => paths;
}

return (paths) =>
paths.filter((filePath) => {
for (const { dir, ig, ancestorPrefix } of filters) {
let relativePath: string | null;
if (!dir) {
relativePath = ancestorPrefix ? `${ancestorPrefix}/${filePath}` : filePath;
} else if (filePath.startsWith(`${dir}/`)) {
relativePath = filePath.slice(dir.length + 1);
} else {
relativePath = null;
}
if (relativePath !== null && ig.ignores(relativePath)) {
return false;
}
}
return true;
});
};

/**
* Get Actor local files, omit files defined in .gitignore and .git folder
* All dot files(.file) and folders(.folder/) are included.
*/
export const getActorLocalFilePaths = async (cwd?: string) =>
globby(['*', '**/**'], {
ignore: ['.git/**', 'apify_storage', 'node_modules', 'storage', 'crawlee_storage'],
gitignore: true,
export const getActorLocalFilePaths = async (cwd?: string) => {
const resolvedCwd = cwd ?? process.cwd();

const ignore = ['.git/**', 'apify_storage', 'node_modules', 'storage', 'crawlee_storage'];

let fallbackFilter: ((paths: string[]) => string[]) | null = null;

// Use git ls-files to get gitignored paths — this correctly handles ancestor .gitignore files,
// nested .gitignore files, .git/info/exclude, and global gitignore config
try {
const gitIgnored = execSync('git ls-files --others --ignored --exclude-standard --directory', {
cwd: resolvedCwd,
encoding: 'utf-8',
stdio: ['ignore', 'pipe', 'ignore'],
})
.split('\n')
.filter(Boolean)
.map((p) => escapePath(p));

ignore.push(...gitIgnored);
} catch {
// git is unavailable or directory is not a git repo — fall back to parsing .gitignore files
fallbackFilter = await getGitignoreFallbackFilter(resolvedCwd);
}

const paths = await glob(['*', '**/**'], {
ignore,
dot: true,
cwd,
expandDirectories: false,
cwd: resolvedCwd,
});

return fallbackFilter ? fallbackFilter(paths) : paths;
};

/**
* Create zip file with all Actor files specified with pathsToZip
*/
Expand Down Expand Up @@ -444,7 +552,7 @@ export const getNpmCmd = (): string => {
* Returns true if apify storage is empty (expect INPUT.*)
*/
export const checkIfStorageIsEmpty = async () => {
const filesWithoutInput = await globby([
const filesWithoutInput = await glob([
`${getLocalStorageDir()}/**`,
// Omit INPUT.* file
`!${getLocalKeyValueStorePath()}/${KEY_VALUE_STORE_KEYS.INPUT}.*`,
Expand Down
162 changes: 162 additions & 0 deletions test/local/lib/utils-gitignore-fallback.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
import { mkdirSync, writeFileSync } from 'node:fs';
import { join } from 'node:path';

import { ensureFolderExistsSync } from '../../../src/lib/files.js';
import { getActorLocalFilePaths } from '../../../src/lib/utils.js';
import { useTempPath } from '../../__setup__/hooks/useTempPath.js';

// Mock execSync to simulate git not being available.
// vi.mock is hoisted before imports, so utils.ts gets the mocked version.
vi.mock('node:child_process', async (importOriginal) => {
const original = await importOriginal<typeof import('node:child_process')>();
return {
...original,
execSync: () => {
throw new Error('not a git repository');
},
};
});

const TEST_DIR = 'gitignore-fallback-test-dir';
const FOLDERS = ['src', 'src/utils'];
const FOLDERS_TO_IGNORE = ['dist', 'src/generated'];
const FILES = ['main.js', 'src/index.js', 'src/utils/helper.js'];
const FILES_IN_IGNORED_DIR = ['dist/bundle.js', 'src/generated/types.js'];
const FILES_TO_IGNORE = ['debug.log'];

describe('Utils - gitignore fallback (no git)', () => {
const { tmpPath, joinPath, beforeAllCalls, afterAllCalls } = useTempPath(TEST_DIR, {
create: true,
remove: true,
cwd: false,
cwdParent: false,
});

beforeAll(async () => {
await beforeAllCalls();

// NOTE: No git init here — execSync is mocked to throw, triggering the fallback path.

FOLDERS.concat(FOLDERS_TO_IGNORE).forEach((folder) => {
ensureFolderExistsSync(tmpPath, folder);
});

FILES.concat(FILES_TO_IGNORE, FILES_IN_IGNORED_DIR).forEach((file) =>
writeFileSync(joinPath(file), 'content', { flag: 'w' }),
);

const toIgnore = FOLDERS_TO_IGNORE.concat(FILES_TO_IGNORE).join('\n');
writeFileSync(joinPath('.gitignore'), toIgnore, { flag: 'w' });
});

afterAll(async () => {
await afterAllCalls();
});

it('should exclude files listed in .gitignore when git is unavailable', async () => {
const paths = await getActorLocalFilePaths(tmpPath);

FILES.forEach((file) => expect(paths).toContain(file));
FILES_IN_IGNORED_DIR.concat(FILES_TO_IGNORE).forEach((file) => expect(paths).not.toContain(file));
});
});

const NESTED_TEST_DIR = 'gitignore-nested-test-dir';

describe('Utils - nested .gitignore scoping (no git)', () => {
const { tmpPath, joinPath, beforeAllCalls, afterAllCalls } = useTempPath(NESTED_TEST_DIR, {
create: true,
remove: true,
cwd: false,
cwdParent: false,
});

beforeAll(async () => {
await beforeAllCalls();

// Create directory structure
ensureFolderExistsSync(tmpPath, 'src');
ensureFolderExistsSync(tmpPath, 'src/internal');

// Create files: one public, one that should be scoped-ignored by src/.gitignore
writeFileSync(joinPath('src/public.js'), 'content', { flag: 'w' });
writeFileSync(joinPath('src/internal/secret.js'), 'content', { flag: 'w' });

// Only a nested .gitignore — the root has no entry for src/internal
writeFileSync(joinPath('src/.gitignore'), 'internal/', { flag: 'w' });
});

afterAll(async () => {
await afterAllCalls();
});

it('should exclude files matched by a nested .gitignore scoped to its own directory', async () => {
const paths = await getActorLocalFilePaths(tmpPath);

// src/public.js should be present
expect(paths).toContain('src/public.js');

// src/internal/secret.js should be excluded by src/.gitignore's `internal/` rule
expect(paths).not.toContain('src/internal/secret.js');
});
});

const PARENT_TEST_DIR = 'gitignore-parent-test-dir';

describe('Utils - parent .gitignore applied to subproject (no git)', () => {
// tmpPath is the "project root" that holds the parent .gitignore.
// The actual cwd passed to getActorLocalFilePaths is tmpPath/subproject/.
const { tmpPath, beforeAllCalls, afterAllCalls } = useTempPath(PARENT_TEST_DIR, {
create: true,
remove: true,
cwd: false,
cwdParent: false,
});

let subprojectPath: string;

beforeAll(async () => {
await beforeAllCalls();

subprojectPath = join(tmpPath, 'subproject');

// Parent .gitignore — rules that should apply to everything inside subproject/.
// No fake .git is needed: the ancestor-walker already stops at the apify-cli
// repo root (.git lives there) before touching its own .gitignore.
writeFileSync(join(tmpPath, '.gitignore'), '*.secret\nbuild/\n', { flag: 'w' });

// Subproject directory structure
mkdirSync(subprojectPath, { recursive: true });
ensureFolderExistsSync(subprojectPath, 'src');
ensureFolderExistsSync(subprojectPath, 'build');

// Files that should be kept
writeFileSync(join(subprojectPath, 'main.js'), 'content', { flag: 'w' });
writeFileSync(join(subprojectPath, 'src', 'utils.js'), 'content', { flag: 'w' });

// Files/dirs that should be excluded by parent .gitignore
writeFileSync(join(subprojectPath, 'config.secret'), 'content', { flag: 'w' });
writeFileSync(join(subprojectPath, 'src', 'db.secret'), 'content', { flag: 'w' });
writeFileSync(join(subprojectPath, 'build', 'output.js'), 'content', { flag: 'w' });
});

afterAll(async () => {
await afterAllCalls();
});

it('should exclude files matched by *.secret pattern in parent .gitignore', async () => {
const paths = await getActorLocalFilePaths(subprojectPath);

expect(paths).toContain('main.js');
expect(paths).toContain('src/utils.js');

expect(paths).not.toContain('config.secret');
expect(paths).not.toContain('src/db.secret');
});

it('should exclude directory matched by build/ pattern in parent .gitignore', async () => {
const paths = await getActorLocalFilePaths(subprojectPath);

expect(paths).not.toContain('build/output.js');
});
});
8 changes: 7 additions & 1 deletion test/local/lib/utils.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,18 @@ describe('Utils', () => {
beforeAll(async () => {
await beforeAllCalls();

// Initialize a fresh git repo so the local .gitignore is parsed independently
// from the parent repo (which gitignores test/tmp entirely)
await execWithLog({ cmd: 'git', args: ['init'], opts: { cwd: tmpPath } });

FOLDERS.concat(FOLDERS_TO_IGNORE).forEach((folder) => {
ensureFolderExistsSync(tmpPath, folder);
});

FILES.concat(FILES_TO_IGNORE, FILES_IN_IGNORED_DIR).forEach((file) =>
writeFileSync(joinPath(file), Math.random().toString(36).substring(7), { flag: 'w' }),
writeFileSync(joinPath(file), Math.random().toString(36).substring(7), {
flag: 'w',
}),
);

const toIgnore = FOLDERS_TO_IGNORE.concat(FILES_TO_IGNORE).join('\n');
Expand Down
Loading