From bf24828f06488558debd9bcbb7d36c1871ea8324 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Richard=20Sol=C3=A1r?= Date: Tue, 10 Feb 2026 12:24:41 +0100 Subject: [PATCH 01/11] swapt globby for tinyglobby --- package.json | 3 +- src/lib/utils.ts | 49 ++++++++++++--- test/local/lib/utils.test.ts | 116 ++++++++++++++++++++++++++++++++++- yarn.lock | 33 +--------- 4 files changed, 159 insertions(+), 42 deletions(-) diff --git a/package.json b/package.json index ac148caff..f03bf0783 100644 --- a/package.json +++ b/package.json @@ -92,8 +92,8 @@ "escape-string-regexp": "~5.0.0", "execa": "^9.5.2", "express": "~5.2.0", - "globby": "~15.0.0", "handlebars": "~4.7.8", + "ignore": "^7.0.5", "indent-string": "^5.0.0", "is-ci": "~4.1.0", "istextorbinary": "~9.5.0", @@ -107,6 +107,7 @@ "string-width": "^8.0.0", "strip-ansi": "^7.1.0", "tiged": "~2.12.7", + "tinyglobby": "^0.2.15", "which": "^6.0.0", "widest-line": "^6.0.0", "wrap-ansi": "^9.0.0" diff --git a/src/lib/utils.ts b/src/lib/utils.ts index 1a3f0790e..6199dd305 100644 --- a/src/lib/utils.ts +++ b/src/lib/utils.ts @@ -15,12 +15,13 @@ import { type ActorRun, ApifyClient, type ApifyClientOptions, type Build } from import archiver from 'archiver'; import { AxiosHeaders } from 'axios'; import escapeStringRegexp from 'escape-string-regexp'; -import { globby } from 'globby'; +import ignore from 'ignore'; import { getEncoding } from 'istextorbinary'; import { Mime } from 'mime'; import otherMimes from 'mime/types/other.js'; import standardMimes from 'mime/types/standard.js'; import { gte, minVersion, satisfies } from 'semver'; +import { glob } from 'tinyglobby'; import { ACTOR_ENV_VARS, @@ -134,8 +135,7 @@ const getTokenWithAuthFileFallback = (existingToken?: string) => { return existingToken; }; -// biome-ignore format: off -type CJSAxiosHeaders = import('axios', { with: { 'resolution-mode': 'require' } }).AxiosRequestConfig['headers']; +type CJSAxiosHeaders = import('axios', { with: { 'resolution-mode': 'require' }}).AxiosRequestConfig['headers']; /** * Returns options for ApifyClient @@ -231,9 +231,13 @@ export const setLocalEnv = async (actDir: string) => { if (gitignoreAdditions.length > 0) { if (gitignoreContents.length > 0) { gitignoreAdditions.unshift('# Added by Apify CLI'); - writeFileSync(gitignorePath, `\n${gitignoreAdditions.join('\n')}\n`, { flag: 'a' }); + writeFileSync(gitignorePath, `\n${gitignoreAdditions.join('\n')}\n`, { + flag: 'a', + }); } else { - writeFileSync(gitignorePath, `${gitignoreAdditions.join('\n')}\n`, { flag: 'w' }); + writeFileSync(gitignorePath, `${gitignoreAdditions.join('\n')}\n`, { + flag: 'w', + }); } } }; @@ -285,18 +289,43 @@ export const createSourceFiles = async (paths: string[], cwd: string) => { }); }; +/** + * Reads .gitignore from the given directory and returns an ignore filter. + * Returns undefined if no .gitignore file exists. + */ +const getGitignoreFilter = (dir: string) => { + const gitignorePath = join(dir, '.gitignore'); + + if (!existsSync(gitignorePath)) { + return undefined; + } + + const content = readFileSync(gitignorePath, { encoding: 'utf-8' }); + return ignore().add(content); +}; + /** * Get Actor local files, omit files defined in .gitignore and .git folder * All dot files(.file) and folders(.folder/) are included. */ -export const getActorLocalFilePaths = async (cwd?: string) => - globby(['*', '**/**'], { +export const getActorLocalFilePaths = async (cwd?: string) => { + const resolvedCwd = cwd ?? process.cwd(); + const ig = getGitignoreFilter(resolvedCwd); + + const paths = await glob(['*', '**/**'], { ignore: ['.git/**', 'apify_storage', 'node_modules', 'storage', 'crawlee_storage'], - gitignore: true, dot: true, - cwd, + expandDirectories: false, + cwd: resolvedCwd, }); + if (!ig) { + return paths; + } + + return ig.filter(paths); +}; + /** * Create zip file with all Actor files specified with pathsToZip */ @@ -441,7 +470,7 @@ export const getNpmCmd = (): string => { * Returns true if apify storage is empty (expect INPUT.*) */ export const checkIfStorageIsEmpty = async () => { - const filesWithoutInput = await globby([ + const filesWithoutInput = await glob([ `${getLocalStorageDir()}/**`, // Omit INPUT.* file `!${getLocalKeyValueStorePath()}/${KEY_VALUE_STORE_KEYS.INPUT}.*`, diff --git a/test/local/lib/utils.test.ts b/test/local/lib/utils.test.ts index 4d8c5f548..3f2fa391a 100644 --- a/test/local/lib/utils.test.ts +++ b/test/local/lib/utils.test.ts @@ -40,7 +40,9 @@ describe('Utils', () => { }); FILES.concat(FILES_TO_IGNORE, FILES_IN_IGNORED_DIR).forEach((file) => - writeFileSync(joinPath(file), Math.random().toString(36).substring(7), { flag: 'w' }), + writeFileSync(joinPath(file), Math.random().toString(36).substring(7), { + flag: 'w', + }), ); const toIgnore = FOLDERS_TO_IGNORE.concat(FILES_TO_IGNORE).join('\n'); @@ -80,6 +82,118 @@ describe('Utils', () => { }); }); + describe('getActorLocalFilePaths()', () => { + describe('without .gitignore', () => { + const { tmpPath, joinPath, beforeAllCalls, afterAllCalls } = useTempPath('no-gitignore-dir', { + create: true, + remove: true, + cwd: false, + cwdParent: false, + }); + + beforeAll(async () => { + await beforeAllCalls(); + + ensureFolderExistsSync(tmpPath, 'src'); + writeFileSync(joinPath('main.js'), 'console.log("hi")', { flag: 'w' }); + writeFileSync(joinPath('src/index.js'), 'export default {}', { + flag: 'w', + }); + writeFileSync(joinPath('.env'), 'SECRET=123', { flag: 'w' }); + }); + + afterAll(async () => { + await afterAllCalls(); + }); + + it('should return all files when no .gitignore exists', async () => { + const paths = await getActorLocalFilePaths(tmpPath); + + expect(paths).toContain('main.js'); + expect(paths).toContain('src/index.js'); + expect(paths).toContain('.env'); + }); + }); + + describe('with .gitignore patterns', () => { + const { tmpPath, joinPath, beforeAllCalls, afterAllCalls } = useTempPath('gitignore-patterns-dir', { + create: true, + remove: true, + cwd: false, + cwdParent: false, + }); + + beforeAll(async () => { + await beforeAllCalls(); + + ensureFolderExistsSync(tmpPath, 'src'); + ensureFolderExistsSync(tmpPath, 'dist'); + ensureFolderExistsSync(tmpPath, '.actor'); + writeFileSync(joinPath('src/index.js'), 'export default {}', { + flag: 'w', + }); + writeFileSync(joinPath('dist/bundle.js'), 'compiled', { flag: 'w' }); + writeFileSync(joinPath('.env'), 'SECRET=123', { flag: 'w' }); + writeFileSync(joinPath('.actor/actor.json'), '{}', { flag: 'w' }); + writeFileSync(joinPath('package.json'), '{}', { flag: 'w' }); + }); + + afterAll(async () => { + await afterAllCalls(); + }); + + it('should exclude files matching .gitignore wildcard patterns', async () => { + writeFileSync(joinPath('.gitignore'), 'dist\n*.env\n', { flag: 'w' }); + const paths = await getActorLocalFilePaths(tmpPath); + + expect(paths).not.toContain('dist/bundle.js'); + expect(paths).not.toContain('.env'); + expect(paths).toContain('src/index.js'); + expect(paths).toContain('package.json'); + }); + + it('should include dot folders not in .gitignore', async () => { + writeFileSync(joinPath('.gitignore'), 'dist\n', { flag: 'w' }); + const paths = await getActorLocalFilePaths(tmpPath); + + expect(paths).toContain('.actor/actor.json'); + expect(paths).toContain('.env'); + }); + + it('should handle negation patterns in .gitignore', async () => { + writeFileSync(joinPath('.gitignore'), '*.env\n!.env\n', { + flag: 'w', + }); + const paths = await getActorLocalFilePaths(tmpPath); + + expect(paths).toContain('.env'); + expect(paths).toContain('src/index.js'); + expect(paths).toContain('dist/bundle.js'); + }); + + it('should handle comments and empty lines in .gitignore', async () => { + writeFileSync(joinPath('.gitignore'), '# This is a comment\n\ndist\n\n# Another comment\n', { + flag: 'w', + }); + const paths = await getActorLocalFilePaths(tmpPath); + + expect(paths).not.toContain('dist/bundle.js'); + expect(paths).toContain('src/index.js'); + expect(paths).toContain('.env'); + }); + + it('should handle empty .gitignore', async () => { + writeFileSync(joinPath('.gitignore'), '', { flag: 'w' }); + const paths = await getActorLocalFilePaths(tmpPath); + + expect(paths).toContain('src/index.js'); + expect(paths).toContain('dist/bundle.js'); + expect(paths).toContain('.env'); + expect(paths).toContain('.actor/actor.json'); + }); + }); + }); + describe('input file regex', () => { const validFiles = ['INPUT', 'INPUT.json', 'INPUT.bin']; diff --git a/yarn.lock b/yarn.lock index dfeac11ff..c84f872e1 100644 --- a/yarn.lock +++ b/yarn.lock @@ -2315,8 +2315,8 @@ __metadata: eslint-config-prettier: "npm:^10.1.2" execa: "npm:^9.5.2" express: "npm:~5.2.0" - globby: "npm:~15.0.0" handlebars: "npm:~4.7.8" + ignore: "npm:^7.0.5" indent-string: "npm:^5.0.0" is-ci: "npm:~4.1.0" istextorbinary: "npm:~9.5.0" @@ -2333,6 +2333,7 @@ __metadata: string-width: "npm:^8.0.0" strip-ansi: "npm:^7.1.0" tiged: "npm:~2.12.7" + tinyglobby: "npm:^0.2.15" tsup: "npm:^8.5.0" tsx: "npm:^4.16.5" typescript: "npm:^5.8.3" @@ -4407,7 +4408,7 @@ __metadata: languageName: node linkType: hard -"fast-glob@npm:^3.2.2, fast-glob@npm:^3.3.3": +"fast-glob@npm:^3.2.2": version: 3.3.3 resolution: "fast-glob@npm:3.3.3" dependencies: @@ -4949,20 +4950,6 @@ __metadata: languageName: node linkType: hard -"globby@npm:~15.0.0": - version: 15.0.0 - resolution: "globby@npm:15.0.0" - dependencies: - "@sindresorhus/merge-streams": "npm:^4.0.0" - fast-glob: "npm:^3.3.3" - ignore: "npm:^7.0.5" - path-type: "npm:^6.0.0" - slash: "npm:^5.1.0" - unicorn-magic: "npm:^0.3.0" - checksum: 10c0/e4107be0579bcdd9642b8dff86aeafeaf62b2b9dd116669ab6e02e0e0c07ada0d972c2db182dee7588b460fe8c8919ddcc6b1cc4db405ca3a2adc9d35fa6eb21 - languageName: node - linkType: hard - "globrex@npm:^0.1.2": version: 0.1.2 resolution: "globrex@npm:0.1.2" @@ -7007,13 +6994,6 @@ __metadata: languageName: node linkType: hard -"path-type@npm:^6.0.0": - version: 6.0.0 - resolution: "path-type@npm:6.0.0" - checksum: 10c0/55baa8b1187d6dc683d5a9cfcc866168d6adff58e5db91126795376d818eee46391e00b2a4d53e44d844c7524a7d96aa68cc68f4f3e500d3d069a39e6535481c - languageName: node - linkType: hard - "pathe@npm:^2.0.1, pathe@npm:^2.0.3": version: 2.0.3 resolution: "pathe@npm:2.0.3" @@ -7956,13 +7936,6 @@ __metadata: languageName: node linkType: hard -"slash@npm:^5.1.0": - version: 5.1.0 - resolution: "slash@npm:5.1.0" - checksum: 10c0/eb48b815caf0bdc390d0519d41b9e0556a14380f6799c72ba35caf03544d501d18befdeeef074bc9c052acf69654bc9e0d79d7f1de0866284137a40805299eb3 - languageName: node - linkType: hard - "slice-ansi@npm:^7.1.0": version: 7.1.2 resolution: "slice-ansi@npm:7.1.2" From 2747b91e9877511a1be7ee56ce0ecf3f7d196b6e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Richard=20Sol=C3=A1r?= Date: Tue, 10 Feb 2026 12:45:27 +0100 Subject: [PATCH 02/11] support nested gitignores --- src/lib/utils.ts | 39 +++++++++++++++++++-------------------- 1 file changed, 19 insertions(+), 20 deletions(-) diff --git a/src/lib/utils.ts b/src/lib/utils.ts index 6199dd305..9bd93e537 100644 --- a/src/lib/utils.ts +++ b/src/lib/utils.ts @@ -289,41 +289,40 @@ export const createSourceFiles = async (paths: string[], cwd: string) => { }); }; -/** - * Reads .gitignore from the given directory and returns an ignore filter. - * Returns undefined if no .gitignore file exists. - */ -const getGitignoreFilter = (dir: string) => { - const gitignorePath = join(dir, '.gitignore'); - - if (!existsSync(gitignorePath)) { - return undefined; - } - - const content = readFileSync(gitignorePath, { encoding: 'utf-8' }); - return ignore().add(content); -}; - /** * Get Actor local files, omit files defined in .gitignore and .git folder * All dot files(.file) and folders(.folder/) are included. */ export const getActorLocalFilePaths = async (cwd?: string) => { const resolvedCwd = cwd ?? process.cwd(); - const ig = getGitignoreFilter(resolvedCwd); - const paths = await glob(['*', '**/**'], { + let paths = await glob(['*', '**/**'], { ignore: ['.git/**', 'apify_storage', 'node_modules', 'storage', 'crawlee_storage'], dot: true, expandDirectories: false, cwd: resolvedCwd, }); - if (!ig) { - return paths; + // Collect all .gitignore files and apply each one scoped to its directory + const gitignoreFiles = paths.filter((p) => p === '.gitignore' || p.endsWith('/.gitignore')); + + for (const giPath of gitignoreFiles) { + const dir = dirname(giPath); + const content = readFileSync(join(resolvedCwd, giPath), { encoding: 'utf-8' }); + const ig = ignore().add(content); + + paths = paths.filter((p) => { + // Only apply this .gitignore to paths under its directory + if (dir !== '.') { + if (!p.startsWith(`${dir}/`)) return true; + return !ig.ignores(p.slice(dir.length + 1)); + } + + return !ig.ignores(p); + }); } - return ig.filter(paths); + return paths; }; /** From b9c816cd7a13379ccd235896ad90ee172a58a97a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Richard=20Sol=C3=A1r?= Date: Tue, 10 Feb 2026 12:46:13 +0100 Subject: [PATCH 03/11] update test --- test/local/lib/utils.test.ts | 63 ++++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) diff --git a/test/local/lib/utils.test.ts b/test/local/lib/utils.test.ts index 3f2fa391a..6206e7cf8 100644 --- a/test/local/lib/utils.test.ts +++ b/test/local/lib/utils.test.ts @@ -192,6 +192,69 @@ describe('Utils', () => { expect(paths).toContain('.actor/actor.json'); }); }); + + describe('with nested .gitignore files', () => { + const { tmpPath, joinPath, beforeAllCalls, afterAllCalls } = useTempPath('nested-gitignore-dir', { + create: true, + remove: true, + cwd: false, + cwdParent: false, + }); + + beforeAll(async () => { + await beforeAllCalls(); + + ensureFolderExistsSync(tmpPath, 'src'); + ensureFolderExistsSync(tmpPath, 'src/generated'); + ensureFolderExistsSync(tmpPath, 'lib'); + writeFileSync(joinPath('main.js'), 'entry', { flag: 'w' }); + writeFileSync(joinPath('src/app.js'), 'app', { flag: 'w' }); + writeFileSync(joinPath('src/generated/types.js'), 'generated', { flag: 'w' }); + writeFileSync(joinPath('src/generated/keep.js'), 'keep', { flag: 'w' }); + writeFileSync(joinPath('lib/helper.js'), 'helper', { flag: 'w' }); + writeFileSync(joinPath('lib/temp.log'), 'log', { flag: 'w' }); + }); + + afterAll(async () => { + await afterAllCalls(); + }); + + it('should apply nested .gitignore scoped to its directory', async () => { + writeFileSync(joinPath('.gitignore'), '', { flag: 'w' }); + writeFileSync(joinPath('src/.gitignore'), 'generated\n', { flag: 'w' }); + const paths = await getActorLocalFilePaths(tmpPath); + + expect(paths).toContain('main.js'); + expect(paths).toContain('src/app.js'); + expect(paths).not.toContain('src/generated/types.js'); + expect(paths).not.toContain('src/generated/keep.js'); + expect(paths).toContain('lib/helper.js'); + expect(paths).toContain('lib/temp.log'); + }); + + it('should not apply nested .gitignore patterns to sibling directories', async () => { + writeFileSync(joinPath('.gitignore'), '', { flag: 'w' }); + writeFileSync(joinPath('lib/.gitignore'), '*.log\n', { flag: 'w' }); + const paths = await getActorLocalFilePaths(tmpPath); + + expect(paths).not.toContain('lib/temp.log'); + expect(paths).toContain('lib/helper.js'); + expect(paths).toContain('src/app.js'); + expect(paths).toContain('main.js'); + }); + + it('should combine root and nested .gitignore rules', async () => { + writeFileSync(joinPath('.gitignore'), '*.log\n', { flag: 'w' }); + writeFileSync(joinPath('src/.gitignore'), 'generated\n', { flag: 'w' }); + const paths = await getActorLocalFilePaths(tmpPath); + + expect(paths).toContain('main.js'); + expect(paths).toContain('src/app.js'); + expect(paths).not.toContain('src/generated/types.js'); + expect(paths).not.toContain('lib/temp.log'); + expect(paths).toContain('lib/helper.js'); + }); + }); }); describe('input file regex', () => { From 6db1f1144e1ab1e96f662a0e0b428ce5f6a4f6eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Richard=20Sol=C3=A1r?= Date: Tue, 10 Feb 2026 21:46:05 +0100 Subject: [PATCH 04/11] fix gitignore paths --- src/lib/utils.ts | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/lib/utils.ts b/src/lib/utils.ts index 9bd93e537..a56f6f4d1 100644 --- a/src/lib/utils.ts +++ b/src/lib/utils.ts @@ -303,8 +303,10 @@ export const getActorLocalFilePaths = async (cwd?: string) => { cwd: resolvedCwd, }); - // Collect all .gitignore files and apply each one scoped to its directory - const gitignoreFiles = paths.filter((p) => p === '.gitignore' || p.endsWith('/.gitignore')); + // Collect all .gitignore files sorted by depth (root first) and apply each one scoped to its directory + const gitignoreFiles = paths + .filter((p) => p === '.gitignore' || p.endsWith('/.gitignore')) + .sort((a, b) => a.split('/').length - b.split('/').length); for (const giPath of gitignoreFiles) { const dir = dirname(giPath); From 039076ebf19a7e2e035cae2eec837c7d64a742c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Richard=20Sol=C3=A1r?= Date: Thu, 12 Feb 2026 00:51:22 +0100 Subject: [PATCH 05/11] remove ignore and use git ls-files --- package.json | 1 - src/lib/utils.ts | 61 +++++++++++++++++++++++++----------------------- 2 files changed, 32 insertions(+), 30 deletions(-) diff --git a/package.json b/package.json index f03bf0783..f1569a2aa 100644 --- a/package.json +++ b/package.json @@ -93,7 +93,6 @@ "execa": "^9.5.2", "express": "~5.2.0", "handlebars": "~4.7.8", - "ignore": "^7.0.5", "indent-string": "^5.0.0", "is-ci": "~4.1.0", "istextorbinary": "~9.5.0", diff --git a/src/lib/utils.ts b/src/lib/utils.ts index a56f6f4d1..1c5a33f3c 100644 --- a/src/lib/utils.ts +++ b/src/lib/utils.ts @@ -1,3 +1,4 @@ +import { execSync } from 'node:child_process'; import { createWriteStream, existsSync, mkdirSync, readdirSync, readFileSync, writeFileSync } from 'node:fs'; import { mkdir, readFile } from 'node:fs/promises'; import type { IncomingMessage } from 'node:http'; @@ -15,13 +16,12 @@ import { type ActorRun, ApifyClient, type ApifyClientOptions, type Build } from import archiver from 'archiver'; import { AxiosHeaders } from 'axios'; import escapeStringRegexp from 'escape-string-regexp'; -import ignore from 'ignore'; import { getEncoding } from 'istextorbinary'; import { Mime } from 'mime'; import otherMimes from 'mime/types/other.js'; import standardMimes from 'mime/types/standard.js'; import { gte, minVersion, satisfies } from 'semver'; -import { glob } from 'tinyglobby'; +import { escapePath, glob } from 'tinyglobby'; import { ACTOR_ENV_VARS, @@ -46,6 +46,7 @@ import { SUPPORTED_NODEJS_VERSION, } from './consts.js'; import { deleteFile, ensureFolderExistsSync, rimrafPromised } from './files.js'; +import { warning } from './outputs.js'; import type { AuthJSON } from './types.js'; // Export AJV properly: https://github.com/ajv-validator/ajv/issues/2132 @@ -296,35 +297,34 @@ export const createSourceFiles = async (paths: string[], cwd: string) => { export const getActorLocalFilePaths = async (cwd?: string) => { const resolvedCwd = cwd ?? process.cwd(); - let paths = await glob(['*', '**/**'], { - ignore: ['.git/**', 'apify_storage', 'node_modules', 'storage', 'crawlee_storage'], - dot: true, - expandDirectories: false, - cwd: resolvedCwd, - }); + const ignore = ['.git/**', 'apify_storage', 'node_modules', 'storage', 'crawlee_storage']; - // Collect all .gitignore files sorted by depth (root first) and apply each one scoped to its directory - const gitignoreFiles = paths - .filter((p) => p === '.gitignore' || p.endsWith('/.gitignore')) - .sort((a, b) => a.split('/').length - b.split('/').length); - - for (const giPath of gitignoreFiles) { - const dir = dirname(giPath); - const content = readFileSync(join(resolvedCwd, giPath), { encoding: 'utf-8' }); - const ig = ignore().add(content); - - paths = paths.filter((p) => { - // Only apply this .gitignore to paths under its directory - if (dir !== '.') { - if (!p.startsWith(`${dir}/`)) return true; - return !ig.ignores(p.slice(dir.length + 1)); - } - - return !ig.ignores(p); + // Use git ls-files to get gitignored paths — this correctly handles ancestor .gitignore files, + // nested .gitignore files, .git/info/exclude, and global gitignore config + try { + const gitIgnored = execSync('git ls-files --others --ignored --exclude-standard --directory', { + cwd: resolvedCwd, + encoding: 'utf-8', + stdio: ['ignore', 'pipe', 'ignore'], + }) + .split('\n') + .filter(Boolean) + .map((p) => escapePath(p)); + + ignore.push(...gitIgnored); + } catch { + warning({ + message: + 'Unable to read .gitignore rules — git is not installed or the directory is not in a git repository.', }); } - return paths; + return glob(['*', '**/**'], { + ignore, + dot: true, + expandDirectories: false, + cwd: resolvedCwd, + }); }; /** @@ -337,10 +337,13 @@ export const createActZip = async (zipName: string, pathsToZip: string[], cwd: s } const writeStream = createWriteStream(zipName); - const archive = archiver('zip'); + // Use compression level 6 for better balance between speed and compression ratio (default is 9) + const archive = archiver('zip', { + zlib: { level: 6 }, + }); archive.pipe(writeStream); - pathsToZip.forEach((globPath) => archive.glob(globPath, { cwd })); + pathsToZip.forEach((filePath) => archive.file(join(cwd, filePath), { name: filePath })); await archive.finalize(); }; From 45fa8bcbbf66efde68401767a90bca596c653fd3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Richard=20Sol=C3=A1r?= Date: Thu, 12 Feb 2026 00:55:41 +0100 Subject: [PATCH 06/11] remove unused tests --- test/local/lib/utils.test.ts | 175 ----------------------------------- 1 file changed, 175 deletions(-) diff --git a/test/local/lib/utils.test.ts b/test/local/lib/utils.test.ts index 6206e7cf8..dd5385d16 100644 --- a/test/local/lib/utils.test.ts +++ b/test/local/lib/utils.test.ts @@ -82,181 +82,6 @@ describe('Utils', () => { }); }); - describe('getActorLocalFilePaths()', () => { - describe('without .gitignore', () => { - const { tmpPath, joinPath, beforeAllCalls, afterAllCalls } = useTempPath('no-gitignore-dir', { - create: true, - remove: true, - cwd: false, - cwdParent: false, - }); - - beforeAll(async () => { - await beforeAllCalls(); - - ensureFolderExistsSync(tmpPath, 'src'); - writeFileSync(joinPath('main.js'), 'console.log("hi")', { flag: 'w' }); - writeFileSync(joinPath('src/index.js'), 'export default {}', { - flag: 'w', - }); - writeFileSync(joinPath('.env'), 'SECRET=123', { flag: 'w' }); - }); - - afterAll(async () => { - await afterAllCalls(); - }); - - it('should return all files when no .gitignore exists', async () => { - const paths = await getActorLocalFilePaths(tmpPath); - - expect(paths).toContain('main.js'); - expect(paths).toContain('src/index.js'); - expect(paths).toContain('.env'); - }); - }); - - describe('with .gitignore patterns', () => { - const { tmpPath, joinPath, beforeAllCalls, afterAllCalls } = useTempPath('gitignore-patterns-dir', { - create: true, - remove: true, - cwd: false, - cwdParent: false, - }); - - beforeAll(async () => { - await beforeAllCalls(); - - ensureFolderExistsSync(tmpPath, 'src'); - ensureFolderExistsSync(tmpPath, 'dist'); - ensureFolderExistsSync(tmpPath, '.actor'); - writeFileSync(joinPath('src/index.js'), 'export default {}', { - flag: 'w', - }); - writeFileSync(joinPath('dist/bundle.js'), 'compiled', { flag: 'w' }); - writeFileSync(joinPath('.env'), 'SECRET=123', { flag: 'w' }); - writeFileSync(joinPath('.actor/actor.json'), '{}', { flag: 'w' }); - writeFileSync(joinPath('package.json'), '{}', { flag: 'w' }); - }); - - afterAll(async () => { - await afterAllCalls(); - }); - - it('should exclude files matching .gitignore wildcard patterns', async () => { - writeFileSync(joinPath('.gitignore'), 'dist\n*.env\n', { flag: 'w' }); - const paths = await getActorLocalFilePaths(tmpPath); - - expect(paths).not.toContain('dist/bundle.js'); - expect(paths).not.toContain('.env'); - expect(paths).toContain('src/index.js'); - expect(paths).toContain('package.json'); - }); - - it('should include dot folders not in .gitignore', async () => { - writeFileSync(joinPath('.gitignore'), 'dist\n', { flag: 'w' }); - const paths = await getActorLocalFilePaths(tmpPath); - - expect(paths).toContain('.actor/actor.json'); - expect(paths).toContain('.env'); - }); - - it('should handle negation patterns in .gitignore', async () => { - writeFileSync(joinPath('.gitignore'), '*.env\n!.env\n', { - flag: 'w', - }); - const paths = await getActorLocalFilePaths(tmpPath); - - expect(paths).toContain('.env'); - expect(paths).toContain('src/index.js'); - expect(paths).toContain('dist/bundle.js'); - }); - - it('should handle comments and empty lines in .gitignore', async () => { - writeFileSync(joinPath('.gitignore'), '# This is a comment\n\ndist\n\n# Another comment\n', { - flag: 'w', - }); - const paths = await getActorLocalFilePaths(tmpPath); - - expect(paths).not.toContain('dist/bundle.js'); - expect(paths).toContain('src/index.js'); - expect(paths).toContain('.env'); - }); - - it('should handle empty .gitignore', async () => { - writeFileSync(joinPath('.gitignore'), '', { flag: 'w' }); - const paths = await getActorLocalFilePaths(tmpPath); - - expect(paths).toContain('src/index.js'); - expect(paths).toContain('dist/bundle.js'); - expect(paths).toContain('.env'); - expect(paths).toContain('.actor/actor.json'); - }); - }); - - describe('with nested .gitignore files', () => { - const { tmpPath, joinPath, beforeAllCalls, afterAllCalls } = useTempPath('nested-gitignore-dir', { - create: true, - remove: true, - cwd: false, - cwdParent: false, - }); - - beforeAll(async () => { - await beforeAllCalls(); - - ensureFolderExistsSync(tmpPath, 'src'); - ensureFolderExistsSync(tmpPath, 'src/generated'); - ensureFolderExistsSync(tmpPath, 'lib'); - writeFileSync(joinPath('main.js'), 'entry', { flag: 'w' }); - writeFileSync(joinPath('src/app.js'), 'app', { flag: 'w' }); - writeFileSync(joinPath('src/generated/types.js'), 'generated', { flag: 'w' }); - writeFileSync(joinPath('src/generated/keep.js'), 'keep', { flag: 'w' }); - writeFileSync(joinPath('lib/helper.js'), 'helper', { flag: 'w' }); - writeFileSync(joinPath('lib/temp.log'), 'log', { flag: 'w' }); - }); - - afterAll(async () => { - await afterAllCalls(); - }); - - it('should apply nested .gitignore scoped to its directory', async () => { - writeFileSync(joinPath('.gitignore'), '', { flag: 'w' }); - writeFileSync(joinPath('src/.gitignore'), 'generated\n', { flag: 'w' }); - const paths = await getActorLocalFilePaths(tmpPath); - - expect(paths).toContain('main.js'); - expect(paths).toContain('src/app.js'); - expect(paths).not.toContain('src/generated/types.js'); - expect(paths).not.toContain('src/generated/keep.js'); - expect(paths).toContain('lib/helper.js'); - expect(paths).toContain('lib/temp.log'); - }); - - it('should not apply nested .gitignore patterns to sibling directories', async () => { - writeFileSync(joinPath('.gitignore'), '', { flag: 'w' }); - writeFileSync(joinPath('lib/.gitignore'), '*.log\n', { flag: 'w' }); - const paths = await getActorLocalFilePaths(tmpPath); - - expect(paths).not.toContain('lib/temp.log'); - expect(paths).toContain('lib/helper.js'); - expect(paths).toContain('src/app.js'); - expect(paths).toContain('main.js'); - }); - - it('should combine root and nested .gitignore rules', async () => { - writeFileSync(joinPath('.gitignore'), '*.log\n', { flag: 'w' }); - writeFileSync(joinPath('src/.gitignore'), 'generated\n', { flag: 'w' }); - const paths = await getActorLocalFilePaths(tmpPath); - - expect(paths).toContain('main.js'); - expect(paths).toContain('src/app.js'); - expect(paths).not.toContain('src/generated/types.js'); - expect(paths).not.toContain('lib/temp.log'); - expect(paths).toContain('lib/helper.js'); - }); - }); - }); - describe('input file regex', () => { const validFiles = ['INPUT', 'INPUT.json', 'INPUT.bin']; From fca261d5d140130ed053cd46d6e0a92c1a3b3242 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Richard=20Sol=C3=A1r?= Date: Thu, 12 Feb 2026 01:27:26 +0100 Subject: [PATCH 07/11] update package lock --- yarn.lock | 1 - 1 file changed, 1 deletion(-) diff --git a/yarn.lock b/yarn.lock index 86e179bf0..1af6f57d9 100644 --- a/yarn.lock +++ b/yarn.lock @@ -2316,7 +2316,6 @@ __metadata: execa: "npm:^9.5.2" express: "npm:~5.2.0" handlebars: "npm:~4.7.8" - ignore: "npm:^7.0.5" indent-string: "npm:^5.0.0" is-ci: "npm:~4.1.0" istextorbinary: "npm:~9.5.0" From 1d6434d71fe20120b2f63219ccda029c05fd1077 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Richard=20Sol=C3=A1r?= Date: Fri, 13 Feb 2026 10:41:11 +0100 Subject: [PATCH 08/11] fix test --- test/local/lib/utils.test.ts | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/test/local/lib/utils.test.ts b/test/local/lib/utils.test.ts index dd5385d16..ab43f9d7b 100644 --- a/test/local/lib/utils.test.ts +++ b/test/local/lib/utils.test.ts @@ -35,6 +35,10 @@ describe('Utils', () => { beforeAll(async () => { await beforeAllCalls(); + // Initialize a fresh git repo so the local .gitignore is parsed independently + // from the parent repo (which gitignores test/tmp entirely) + await execWithLog({ cmd: 'git', args: ['init'], opts: { cwd: tmpPath } }); + FOLDERS.concat(FOLDERS_TO_IGNORE).forEach((folder) => { ensureFolderExistsSync(tmpPath, folder); }); From e3a8cf2624be8ce6e7b7ca9698b5309cf5c9182d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Richard=20Sol=C3=A1r?= Date: Fri, 20 Feb 2026 16:11:20 +0100 Subject: [PATCH 09/11] feat: fall back to ignore package for gitignore rules when git is unavailable --- package.json | 1 + src/lib/utils.ts | 58 ++++++++++++++++-- .../lib/utils-gitignore-fallback.test.ts | 61 +++++++++++++++++++ yarn.lock | 3 +- 4 files changed, 116 insertions(+), 7 deletions(-) create mode 100644 test/local/lib/utils-gitignore-fallback.test.ts diff --git a/package.json b/package.json index f1569a2aa..1ef0e5e4e 100644 --- a/package.json +++ b/package.json @@ -93,6 +93,7 @@ "execa": "^9.5.2", "express": "~5.2.0", "handlebars": "~4.7.8", + "ignore": "^5.3.2", "indent-string": "^5.0.0", "is-ci": "~4.1.0", "istextorbinary": "~9.5.0", diff --git a/src/lib/utils.ts b/src/lib/utils.ts index 1c5a33f3c..5983e6ca1 100644 --- a/src/lib/utils.ts +++ b/src/lib/utils.ts @@ -16,6 +16,7 @@ import { type ActorRun, ApifyClient, type ApifyClientOptions, type Build } from import archiver from 'archiver'; import { AxiosHeaders } from 'axios'; import escapeStringRegexp from 'escape-string-regexp'; +import ignoreModule, { type Ignore } from 'ignore'; import { getEncoding } from 'istextorbinary'; import { Mime } from 'mime'; import otherMimes from 'mime/types/other.js'; @@ -46,7 +47,6 @@ import { SUPPORTED_NODEJS_VERSION, } from './consts.js'; import { deleteFile, ensureFolderExistsSync, rimrafPromised } from './files.js'; -import { warning } from './outputs.js'; import type { AuthJSON } from './types.js'; // Export AJV properly: https://github.com/ajv-validator/ajv/issues/2132 @@ -290,6 +290,50 @@ export const createSourceFiles = async (paths: string[], cwd: string) => { }); }; +/** + * Fallback for when git is unavailable: find all .gitignore files and build a filter + * using the `ignore` package, scoped to each file's directory. + */ +const getGitignoreFallbackFilter = async (cwd: string): Promise<(paths: string[]) => string[]> => { + const gitignoreFiles = await glob('**/.gitignore', { + dot: true, + cwd, + ignore: ['.git/**'], + expandDirectories: false, + }); + + if (gitignoreFiles.length === 0) { + return (paths) => paths; + } + + const filters: { dir: string; ig: Ignore }[] = []; + + for (const gitignoreFile of gitignoreFiles) { + const gitignoreDir = dirname(gitignoreFile); // e.g. 'src' or '.' + const content = readFileSync(join(cwd, gitignoreFile), 'utf-8'); + const ig = (ignoreModule as unknown as () => Ignore)().add(content); + filters.push({ dir: gitignoreDir === '.' ? '' : gitignoreDir, ig }); + } + + return (paths) => + paths.filter((filePath) => { + for (const { dir, ig } of filters) { + let relativePath: string | null; + if (!dir) { + relativePath = filePath; + } else if (filePath.startsWith(`${dir}/`)) { + relativePath = filePath.slice(dir.length + 1); + } else { + relativePath = null; + } + if (relativePath !== null && ig.ignores(relativePath)) { + return false; + } + } + return true; + }); +}; + /** * Get Actor local files, omit files defined in .gitignore and .git folder * All dot files(.file) and folders(.folder/) are included. @@ -299,6 +343,8 @@ export const getActorLocalFilePaths = async (cwd?: string) => { const ignore = ['.git/**', 'apify_storage', 'node_modules', 'storage', 'crawlee_storage']; + let fallbackFilter: ((paths: string[]) => string[]) | null = null; + // Use git ls-files to get gitignored paths — this correctly handles ancestor .gitignore files, // nested .gitignore files, .git/info/exclude, and global gitignore config try { @@ -313,18 +359,18 @@ export const getActorLocalFilePaths = async (cwd?: string) => { ignore.push(...gitIgnored); } catch { - warning({ - message: - 'Unable to read .gitignore rules — git is not installed or the directory is not in a git repository.', - }); + // git is unavailable or directory is not a git repo — fall back to parsing .gitignore files + fallbackFilter = await getGitignoreFallbackFilter(resolvedCwd); } - return glob(['*', '**/**'], { + const paths = await glob(['*', '**/**'], { ignore, dot: true, expandDirectories: false, cwd: resolvedCwd, }); + + return fallbackFilter ? fallbackFilter(paths) : paths; }; /** diff --git a/test/local/lib/utils-gitignore-fallback.test.ts b/test/local/lib/utils-gitignore-fallback.test.ts new file mode 100644 index 000000000..c861de50c --- /dev/null +++ b/test/local/lib/utils-gitignore-fallback.test.ts @@ -0,0 +1,61 @@ +import { writeFileSync } from 'node:fs'; + +import { ensureFolderExistsSync } from '../../../src/lib/files.js'; +import { getActorLocalFilePaths } from '../../../src/lib/utils.js'; +import { useTempPath } from '../../__setup__/hooks/useTempPath.js'; + +// Mock execSync to simulate git not being available. +// vi.mock is hoisted before imports, so utils.ts gets the mocked version. +vi.mock('node:child_process', async (importOriginal) => { + const original = await importOriginal(); + return { + ...original, + execSync: () => { + throw new Error('not a git repository'); + }, + }; +}); + +const TEST_DIR = 'gitignore-fallback-test-dir'; +const FOLDERS = ['src', 'src/utils']; +const FOLDERS_TO_IGNORE = ['dist', 'src/generated']; +const FILES = ['main.js', 'src/index.js', 'src/utils/helper.js']; +const FILES_IN_IGNORED_DIR = ['dist/bundle.js', 'src/generated/types.js']; +const FILES_TO_IGNORE = ['debug.log']; + +describe('Utils - gitignore fallback (no git)', () => { + const { tmpPath, joinPath, beforeAllCalls, afterAllCalls } = useTempPath(TEST_DIR, { + create: true, + remove: true, + cwd: false, + cwdParent: false, + }); + + beforeAll(async () => { + await beforeAllCalls(); + + // NOTE: No git init here — execSync is mocked to throw, triggering the fallback path. + + FOLDERS.concat(FOLDERS_TO_IGNORE).forEach((folder) => { + ensureFolderExistsSync(tmpPath, folder); + }); + + FILES.concat(FILES_TO_IGNORE, FILES_IN_IGNORED_DIR).forEach((file) => + writeFileSync(joinPath(file), 'content', { flag: 'w' }), + ); + + const toIgnore = FOLDERS_TO_IGNORE.concat(FILES_TO_IGNORE).join('\n'); + writeFileSync(joinPath('.gitignore'), toIgnore, { flag: 'w' }); + }); + + afterAll(async () => { + await afterAllCalls(); + }); + + it('should exclude files listed in .gitignore when git is unavailable', async () => { + const paths = await getActorLocalFilePaths(tmpPath); + + FILES.forEach((file) => expect(paths).toContain(file)); + FILES_IN_IGNORED_DIR.concat(FILES_TO_IGNORE).forEach((file) => expect(paths).not.toContain(file)); + }); +}); diff --git a/yarn.lock b/yarn.lock index 1af6f57d9..8975e5c1f 100644 --- a/yarn.lock +++ b/yarn.lock @@ -2316,6 +2316,7 @@ __metadata: execa: "npm:^9.5.2" express: "npm:~5.2.0" handlebars: "npm:~4.7.8" + ignore: "npm:^5.3.2" indent-string: "npm:^5.0.0" is-ci: "npm:~4.1.0" istextorbinary: "npm:~9.5.0" @@ -5257,7 +5258,7 @@ __metadata: languageName: node linkType: hard -"ignore@npm:^5.2.0": +"ignore@npm:^5.2.0, ignore@npm:^5.3.2": version: 5.3.2 resolution: "ignore@npm:5.3.2" checksum: 10c0/f9f652c957983634ded1e7f02da3b559a0d4cc210fca3792cb67f1b153623c9c42efdc1c4121af171e295444459fc4a9201101fb041b1104a3c000bccb188337 From fc73b3db93e0ba6ff73532e53127baed1d658a21 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Richard=20Sol=C3=A1r?= Date: Fri, 20 Feb 2026 16:18:46 +0100 Subject: [PATCH 10/11] refactor: use async readFile, add comment on CJS cast, add nested gitignore test --- src/lib/utils.ts | 4 +- .../lib/utils-gitignore-fallback.test.ts | 40 +++++++++++++++++++ 2 files changed, 43 insertions(+), 1 deletion(-) diff --git a/src/lib/utils.ts b/src/lib/utils.ts index 5983e6ca1..208dcbbe6 100644 --- a/src/lib/utils.ts +++ b/src/lib/utils.ts @@ -310,7 +310,9 @@ const getGitignoreFallbackFilter = async (cwd: string): Promise<(paths: string[] for (const gitignoreFile of gitignoreFiles) { const gitignoreDir = dirname(gitignoreFile); // e.g. 'src' or '.' - const content = readFileSync(join(cwd, gitignoreFile), 'utf-8'); + const content = await readFile(join(cwd, gitignoreFile), 'utf-8'); + // `ignore` is a CJS package; TypeScript sees its default import as the module + // object rather than the callable factory, so we cast through unknown. const ig = (ignoreModule as unknown as () => Ignore)().add(content); filters.push({ dir: gitignoreDir === '.' ? '' : gitignoreDir, ig }); } diff --git a/test/local/lib/utils-gitignore-fallback.test.ts b/test/local/lib/utils-gitignore-fallback.test.ts index c861de50c..a92af796d 100644 --- a/test/local/lib/utils-gitignore-fallback.test.ts +++ b/test/local/lib/utils-gitignore-fallback.test.ts @@ -59,3 +59,43 @@ describe('Utils - gitignore fallback (no git)', () => { FILES_IN_IGNORED_DIR.concat(FILES_TO_IGNORE).forEach((file) => expect(paths).not.toContain(file)); }); }); + +const NESTED_TEST_DIR = 'gitignore-nested-test-dir'; + +describe('Utils - nested .gitignore scoping (no git)', () => { + const { tmpPath, joinPath, beforeAllCalls, afterAllCalls } = useTempPath(NESTED_TEST_DIR, { + create: true, + remove: true, + cwd: false, + cwdParent: false, + }); + + beforeAll(async () => { + await beforeAllCalls(); + + // Create directory structure + ensureFolderExistsSync(tmpPath, 'src'); + ensureFolderExistsSync(tmpPath, 'src/internal'); + + // Create files: one public, one that should be scoped-ignored by src/.gitignore + writeFileSync(joinPath('src/public.js'), 'content', { flag: 'w' }); + writeFileSync(joinPath('src/internal/secret.js'), 'content', { flag: 'w' }); + + // Only a nested .gitignore — the root has no entry for src/internal + writeFileSync(joinPath('src/.gitignore'), 'internal/', { flag: 'w' }); + }); + + afterAll(async () => { + await afterAllCalls(); + }); + + it('should exclude files matched by a nested .gitignore scoped to its own directory', async () => { + const paths = await getActorLocalFilePaths(tmpPath); + + // src/public.js should be present + expect(paths).toContain('src/public.js'); + + // src/internal/secret.js should be excluded by src/.gitignore's `internal/` rule + expect(paths).not.toContain('src/internal/secret.js'); + }); +}); From b1657f6f4056fa1d45a7acbd7cd6e492baeef178 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Richard=20Sol=C3=A1r?= Date: Wed, 25 Feb 2026 14:23:59 +0100 Subject: [PATCH 11/11] feat(gitignore-fallback): add parent .gitignore support --- src/lib/utils.ts | 52 +++++++++++---- .../lib/utils-gitignore-fallback.test.ts | 63 ++++++++++++++++++- 2 files changed, 103 insertions(+), 12 deletions(-) diff --git a/src/lib/utils.ts b/src/lib/utils.ts index 208dcbbe6..e5425426e 100644 --- a/src/lib/utils.ts +++ b/src/lib/utils.ts @@ -4,7 +4,7 @@ import { mkdir, readFile } from 'node:fs/promises'; import type { IncomingMessage } from 'node:http'; import { get } from 'node:https'; import { homedir } from 'node:os'; -import { dirname, join } from 'node:path'; +import { dirname, join, relative } from 'node:path'; import process from 'node:process'; import { finished } from 'node:stream/promises'; @@ -293,6 +293,8 @@ export const createSourceFiles = async (paths: string[], cwd: string) => { /** * Fallback for when git is unavailable: find all .gitignore files and build a filter * using the `ignore` package, scoped to each file's directory. + * Also walks ancestor directories to pick up parent .gitignore files (e.g. monorepo root), + * stopping at the first .git boundary found. */ const getGitignoreFallbackFilter = async (cwd: string): Promise<(paths: string[]) => string[]> => { const gitignoreFiles = await glob('**/.gitignore', { @@ -302,27 +304,55 @@ const getGitignoreFallbackFilter = async (cwd: string): Promise<(paths: string[] expandDirectories: false, }); - if (gitignoreFiles.length === 0) { - return (paths) => paths; - } + // `ignore` is a CJS package; TypeScript sees its default import as the module + // object rather than the callable factory, so we cast through unknown. + const makeIg = ignoreModule as unknown as () => Ignore; - const filters: { dir: string; ig: Ignore }[] = []; + const filters: { dir: string; ig: Ignore; ancestorPrefix?: string }[] = []; for (const gitignoreFile of gitignoreFiles) { const gitignoreDir = dirname(gitignoreFile); // e.g. 'src' or '.' const content = await readFile(join(cwd, gitignoreFile), 'utf-8'); - // `ignore` is a CJS package; TypeScript sees its default import as the module - // object rather than the callable factory, so we cast through unknown. - const ig = (ignoreModule as unknown as () => Ignore)().add(content); - filters.push({ dir: gitignoreDir === '.' ? '' : gitignoreDir, ig }); + filters.push({ dir: gitignoreDir === '.' ? '' : gitignoreDir, ig: makeIg().add(content) }); + } + + // Walk ancestor directories to pick up parent .gitignore files (e.g. monorepo root). + // Check for a .git boundary FIRST so we stop before processing the git root's own + // .gitignore — that file is handled by `git ls-files` when git is available, and + // avoids accidentally applying rules from an unrelated outer repository. + let parentDir = dirname(cwd); + while (parentDir !== dirname(parentDir)) { + if (existsSync(join(parentDir, '.git'))) { + break; + } + + const parentGitignorePath = join(parentDir, '.gitignore'); + if (existsSync(parentGitignorePath)) { + try { + const content = await readFile(parentGitignorePath, 'utf-8'); + // Paths passed to this filter are relative to cwd. To test them against + // a .gitignore that lives above cwd we need to prepend the relative path + // from the ancestor dir to cwd so the ignore patterns see the right scope. + const ancestorPrefix = relative(parentDir, cwd); + filters.push({ dir: '', ig: makeIg().add(content), ancestorPrefix }); + } catch { + // Ignore read errors + } + } + + parentDir = dirname(parentDir); + } + + if (filters.length === 0) { + return (paths) => paths; } return (paths) => paths.filter((filePath) => { - for (const { dir, ig } of filters) { + for (const { dir, ig, ancestorPrefix } of filters) { let relativePath: string | null; if (!dir) { - relativePath = filePath; + relativePath = ancestorPrefix ? `${ancestorPrefix}/${filePath}` : filePath; } else if (filePath.startsWith(`${dir}/`)) { relativePath = filePath.slice(dir.length + 1); } else { diff --git a/test/local/lib/utils-gitignore-fallback.test.ts b/test/local/lib/utils-gitignore-fallback.test.ts index a92af796d..aa12d8aef 100644 --- a/test/local/lib/utils-gitignore-fallback.test.ts +++ b/test/local/lib/utils-gitignore-fallback.test.ts @@ -1,4 +1,5 @@ -import { writeFileSync } from 'node:fs'; +import { mkdirSync, writeFileSync } from 'node:fs'; +import { join } from 'node:path'; import { ensureFolderExistsSync } from '../../../src/lib/files.js'; import { getActorLocalFilePaths } from '../../../src/lib/utils.js'; @@ -99,3 +100,63 @@ describe('Utils - nested .gitignore scoping (no git)', () => { expect(paths).not.toContain('src/internal/secret.js'); }); }); + +const PARENT_TEST_DIR = 'gitignore-parent-test-dir'; + +describe('Utils - parent .gitignore applied to subproject (no git)', () => { + // tmpPath is the "project root" that holds the parent .gitignore. + // The actual cwd passed to getActorLocalFilePaths is tmpPath/subproject/. + const { tmpPath, beforeAllCalls, afterAllCalls } = useTempPath(PARENT_TEST_DIR, { + create: true, + remove: true, + cwd: false, + cwdParent: false, + }); + + let subprojectPath: string; + + beforeAll(async () => { + await beforeAllCalls(); + + subprojectPath = join(tmpPath, 'subproject'); + + // Parent .gitignore — rules that should apply to everything inside subproject/. + // No fake .git is needed: the ancestor-walker already stops at the apify-cli + // repo root (.git lives there) before touching its own .gitignore. + writeFileSync(join(tmpPath, '.gitignore'), '*.secret\nbuild/\n', { flag: 'w' }); + + // Subproject directory structure + mkdirSync(subprojectPath, { recursive: true }); + ensureFolderExistsSync(subprojectPath, 'src'); + ensureFolderExistsSync(subprojectPath, 'build'); + + // Files that should be kept + writeFileSync(join(subprojectPath, 'main.js'), 'content', { flag: 'w' }); + writeFileSync(join(subprojectPath, 'src', 'utils.js'), 'content', { flag: 'w' }); + + // Files/dirs that should be excluded by parent .gitignore + writeFileSync(join(subprojectPath, 'config.secret'), 'content', { flag: 'w' }); + writeFileSync(join(subprojectPath, 'src', 'db.secret'), 'content', { flag: 'w' }); + writeFileSync(join(subprojectPath, 'build', 'output.js'), 'content', { flag: 'w' }); + }); + + afterAll(async () => { + await afterAllCalls(); + }); + + it('should exclude files matched by *.secret pattern in parent .gitignore', async () => { + const paths = await getActorLocalFilePaths(subprojectPath); + + expect(paths).toContain('main.js'); + expect(paths).toContain('src/utils.js'); + + expect(paths).not.toContain('config.secret'); + expect(paths).not.toContain('src/db.secret'); + }); + + it('should exclude directory matched by build/ pattern in parent .gitignore', async () => { + const paths = await getActorLocalFilePaths(subprojectPath); + + expect(paths).not.toContain('build/output.js'); + }); +});