Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 32 additions & 1 deletion doc/api/fs.md
Original file line number Diff line number Diff line change
Expand Up @@ -1354,6 +1354,9 @@ changes:
- version: REPLACEME
pr-url: https://github.com/nodejs/node/pull/62695
description: Add support for the `followSymlinks` option.
- version: REPLACEME
pr-url: https://github.com/nodejs/node/issues/59202
description: Add support for the `encoding` option.
- version:
- v24.1.0
- v22.17.0
Expand All @@ -1377,6 +1380,13 @@ changes:
* `pattern` {string|string\[]}
* `options` {Object}
* `cwd` {string|URL} current working directory. **Default:** `process.cwd()`
* `encoding` {string} The character encoding to use for the yielded paths.
If set to `'buffer'`, paths are yielded as {Buffer} objects (and, when
`withFileTypes` is `true`, the `name` and `parentPath` of each yielded
{fs.Dirent} are {Buffer}s). This is useful when matching file names that
contain byte sequences which are not valid UTF-8, since otherwise such
bytes are silently replaced when decoded into a string. **Default:**
`'utf8'`.
* `exclude` {Function|string\[]} Function to filter out files/directories or a
list of glob patterns to be excluded. If a function is provided, return
`true` to exclude the item, `false` to include it. **Default:** `undefined`.
Expand Down Expand Up @@ -3475,6 +3485,9 @@ changes:
- version: REPLACEME
pr-url: https://github.com/nodejs/node/pull/62695
description: Add support for the `followSymlinks` option.
- version: REPLACEME
pr-url: https://github.com/nodejs/node/issues/59202
description: Add support for the `encoding` option.
- version:
- v24.1.0
- v22.17.0
Expand All @@ -3499,6 +3512,13 @@ changes:

* `options` {Object}
* `cwd` {string|URL} current working directory. **Default:** `process.cwd()`
* `encoding` {string} The character encoding to use for the returned paths.
If set to `'buffer'`, paths are returned as {Buffer} objects (and, when
`withFileTypes` is `true`, the `name` and `parentPath` of each returned
{fs.Dirent} are {Buffer}s). This is useful when matching file names that
contain byte sequences which are not valid UTF-8, since otherwise such
bytes are silently replaced when decoded into a string. **Default:**
`'utf8'`.
* `exclude` {Function|string\[]} Function to filter out files/directories or a
list of glob patterns to be excluded. If a function is provided, return
`true` to exclude the item, `false` to include it. **Default:** `undefined`.
Expand Down Expand Up @@ -6057,6 +6077,9 @@ changes:
- version: REPLACEME
pr-url: https://github.com/nodejs/node/pull/62695
description: Add support for the `followSymlinks` option.
- version: REPLACEME
pr-url: https://github.com/nodejs/node/issues/59202
description: Add support for the `encoding` option.
- version:
- v24.1.0
- v22.17.0
Expand All @@ -6080,14 +6103,22 @@ changes:
* `pattern` {string|string\[]}
* `options` {Object}
* `cwd` {string|URL} current working directory. **Default:** `process.cwd()`
* `encoding` {string} The character encoding to use for the returned paths.
If set to `'buffer'`, paths are returned as {Buffer} objects (and, when
`withFileTypes` is `true`, the `name` and `parentPath` of each returned
{fs.Dirent} are {Buffer}s). This is useful when matching file names that
contain byte sequences which are not valid UTF-8, since otherwise such
bytes are silently replaced when decoded into a string. **Default:**
`'utf8'`.
* `exclude` {Function|string\[]} Function to filter out files/directories or a
list of glob patterns to be excluded. If a function is provided, return
`true` to exclude the item, `false` to include it. **Default:** `undefined`.
* `followSymlinks` {boolean} When `true`, symbolic links to directories are
followed while expanding `**` patterns. **Default:** `false`.
* `withFileTypes` {boolean} `true` if the glob should return paths as Dirents,
`false` otherwise. **Default:** `false`.
* Returns: {string\[]} paths of files that match the pattern.
* Returns: {string\[]|Buffer\[]|Dirent\[]} paths (or Dirents, when
`withFileTypes` is `true`) of files that match the pattern.

When `followSymlinks` is enabled, detected symbolic link cycles are not
traversed recursively.
Expand Down
155 changes: 132 additions & 23 deletions lib/internal/fs/glob.js
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ const {
realpath,
stat,
} = require('fs/promises');
const { Buffer } = require('buffer');
const { join, resolve, basename, isAbsolute, dirname } = require('path');

const {
Expand All @@ -41,7 +42,7 @@ const {
validateString,
validateStringArray,
} = require('internal/validators');
const { DirentFromStats } = require('internal/fs/utils');
const { assertEncoding, DirentFromStats } = require('internal/fs/utils');
const {
codes: {
ERR_INVALID_ARG_TYPE,
Expand All @@ -58,13 +59,60 @@ function lazyMinimatch() {
}

/**
* Convert a latin1-encoded string path into a Buffer that preserves the
* original byte sequence, suitable for passing to the fs bindings when the
* caller requested `encoding: 'buffer'`.
* @param {string} path
* @returns {Buffer}
*/
function toRawBuffer(path) {
return Buffer.from(path, 'latin1');
}

/**
* When `readdir` is called with `encoding: 'buffer'`, each Dirent's `name`
* (and `parentPath`) is a Buffer. The matching machinery in this module
* operates on strings, so we decode them losslessly via latin1 (so each byte
* maps to a single 0..255 code unit). Names can be re-encoded to Buffer at
* the result-emission boundary via `toRawBuffer`.
* @param {Dirent} dirent
* @returns {Dirent}
*/
function decodeDirentName(dirent) {
if (Buffer.isBuffer(dirent.name)) {
dirent.name = dirent.name.toString('latin1');
}
if (Buffer.isBuffer(dirent.parentPath)) {
dirent.parentPath = dirent.parentPath.toString('latin1');
}
return dirent;
}

/**
* Convert a Dirent whose name/parentPath are latin1-encoded strings into a
* Dirent whose name/parentPath are the original Buffer byte sequences.
* @param {Dirent} dirent
* @returns {Dirent}
*/
function bufferifyDirent(dirent) {
if (typeof dirent.name === 'string') {
dirent.name = toRawBuffer(dirent.name);
}
if (typeof dirent.parentPath === 'string') {
dirent.parentPath = toRawBuffer(dirent.parentPath);
}
return dirent;
}

/**
* @param {string} path
* @param {boolean} useBuffer
* @returns {Promise<DirentFromStats|null>}
*/
async function getDirent(path) {
async function getDirent(path, useBuffer) {
let stat;
try {
stat = await lstat(path);
stat = await lstat(useBuffer ? toRawBuffer(path) : path);
} catch {
return null;
}
Expand All @@ -73,12 +121,13 @@ async function getDirent(path) {

/**
* @param {string} path
* @param {boolean} useBuffer
* @returns {DirentFromStats|null}
*/
function getDirentSync(path) {
function getDirentSync(path, useBuffer) {
let stat;
try {
stat = lstatSync(path);
stat = lstatSync(useBuffer ? toRawBuffer(path) : path);
} catch {
return null;
}
Expand Down Expand Up @@ -138,13 +187,26 @@ class Cache {
#followStatsCache = new SafeMap();
#readdirCache = new SafeMap();
#realpathCache = new SafeMap();
#useBuffer = false;

setUseBuffer(useBuffer) {
this.#useBuffer = useBuffer;
}

// When the caller requested `encoding: 'buffer'`, paths are tracked
// internally as latin1 strings (so each code unit corresponds to a single
// byte), but every fs binding call must receive a Buffer with the original
// byte sequence so that non-UTF-8 file names round-trip correctly.
#fsPath(path) {
return this.#useBuffer ? toRawBuffer(path) : path;
}

stat(path) {
const cached = this.#statsCache.get(path);
if (cached) {
return cached;
}
const promise = getDirent(path);
const promise = getDirent(path, this.#useBuffer);
this.#statsCache.set(path, promise);
return promise;
}
Expand All @@ -154,7 +216,7 @@ class Cache {
if (cached && !(cached instanceof Promise)) {
return cached;
}
const val = getDirentSync(path);
const val = getDirentSync(path, this.#useBuffer);
this.#statsCache.set(path, val);
return val;
}
Expand All @@ -163,7 +225,7 @@ class Cache {
if (cached) {
return cached;
}
const promise = PromisePrototypeThen(stat(path), null, () => null);
const promise = PromisePrototypeThen(stat(this.#fsPath(path)), null, () => null);
this.#followStatsCache.set(path, promise);
return promise;
}
Expand All @@ -174,7 +236,7 @@ class Cache {
}
let val;
try {
val = statSync(path);
val = statSync(this.#fsPath(path));
} catch {
val = null;
}
Expand All @@ -186,7 +248,12 @@ class Cache {
if (cached) {
return cached;
}
const promise = PromisePrototypeThen(realpath(path), null, () => null);
const useBuffer = this.#useBuffer;
const promise = PromisePrototypeThen(
realpath(this.#fsPath(path), useBuffer ? { __proto__: null, encoding: 'buffer' } : undefined),
(val) => (useBuffer && Buffer.isBuffer(val) ? val.toString('latin1') : val),
() => null,
);
this.#realpathCache.set(path, promise);
return promise;
}
Expand All @@ -197,7 +264,10 @@ class Cache {
}
let val;
try {
val = realpathSync(path);
val = realpathSync(this.#fsPath(path), this.#useBuffer ? { __proto__: null, encoding: 'buffer' } : undefined);
if (this.#useBuffer && Buffer.isBuffer(val)) {
val = val.toString('latin1');
}
} catch {
val = null;
}
Expand All @@ -212,7 +282,15 @@ class Cache {
if (cached) {
return cached;
}
const promise = PromisePrototypeThen(readdir(path, { __proto__: null, withFileTypes: true }), null, () => []);
const useBuffer = this.#useBuffer;
const opts = useBuffer ?
{ __proto__: null, withFileTypes: true, encoding: 'buffer' } :
{ __proto__: null, withFileTypes: true };
const promise = PromisePrototypeThen(
readdir(this.#fsPath(path), opts),
(entries) => (useBuffer ? ArrayPrototypeMap(entries, decodeDirentName) : entries),
() => [],
);
this.#readdirCache.set(path, promise);
return promise;
}
Expand All @@ -223,7 +301,14 @@ class Cache {
}
let val;
try {
val = readdirSync(path, { __proto__: null, withFileTypes: true });
const useBuffer = this.#useBuffer;
const opts = useBuffer ?
{ __proto__: null, withFileTypes: true, encoding: 'buffer' } :
{ __proto__: null, withFileTypes: true };
val = readdirSync(this.#fsPath(path), opts);
if (useBuffer) {
val = ArrayPrototypeMap(val, decodeDirentName);
}
} catch {
val = [];
}
Expand Down Expand Up @@ -336,15 +421,23 @@ class Glob {
#patterns;
#withFileTypes;
#followSymlinks = false;
#useBuffer = false;
#isExcluded = () => false;
constructor(pattern, options = kEmptyObject) {
validateObject(options, 'options');
const { exclude, cwd, followSymlinks, withFileTypes } = options;
const { encoding, exclude, cwd, followSymlinks, withFileTypes } = options;
this.#root = toPathIfFileURL(cwd) ?? '.';
if (followSymlinks != null) {
validateBoolean(followSymlinks, 'options.followSymlinks');
this.#followSymlinks = followSymlinks;
}
if (encoding !== undefined && encoding !== null) {
if (encoding !== 'buffer') {
assertEncoding(encoding);
}
this.#useBuffer = encoding === 'buffer';
this.#cache.setUseBuffer(this.#useBuffer);
}
this.#withFileTypes = !!withFileTypes;
if (exclude != null) {
validateStringArrayOrFunction(exclude, 'options.exclude');
Expand Down Expand Up @@ -391,14 +484,21 @@ class Glob {
.forEach((patterns, path) => ArrayPrototypePush(this.#queue, { __proto__: null, path, patterns }));
this.#subpatterns.clear();
}
return ArrayFrom(
this.#results,
this.#withFileTypes ? (path) => this.#cache.statSync(
isAbsolute(path) ?
path :
join(this.#root, path),
) : undefined,
);
const useBuffer = this.#useBuffer;
let mapper;
if (this.#withFileTypes) {
mapper = (path) => {
const dirent = this.#cache.statSync(
isAbsolute(path) ?
path :
join(this.#root, path),
);
return useBuffer && dirent ? bufferifyDirent(dirent) : dirent;
};
} else if (useBuffer) {
mapper = toRawBuffer;
}
return ArrayFrom(this.#results, mapper);
}
#isDirectorySync(path, stat, pattern) {
if (stat?.isDirectory()) {
Expand Down Expand Up @@ -686,11 +786,20 @@ class Glob {


async* glob() {
const useBuffer = this.#useBuffer;
const withFileTypes = this.#withFileTypes;
ArrayPrototypePush(this.#queue, { __proto__: null, path: '.', patterns: this.#patterns });
while (this.#queue.length > 0) {
const item = ArrayPrototypePop(this.#queue);
for (let i = 0; i < item.patterns.length; i++) {
yield* this.#iterateSubpatterns(item.path, item.patterns[i]);
const iter = this.#iterateSubpatterns(item.path, item.patterns[i]);
if (useBuffer) {
for await (const value of iter) {
yield withFileTypes ? bufferifyDirent(value) : toRawBuffer(value);
}
} else {
yield* iter;
}
}
this.#subpatterns
.forEach((patterns, path) => ArrayPrototypePush(this.#queue, { __proto__: null, path, patterns }));
Expand Down
Loading
Loading