Any ideas on an async directory search using fs.readdir? I realize that we could introduce recursion and call the read directory function with the next directory to read, but I'm a little worried about it not being async...
Any ideas? I've looked at node-walk which is great, but doesn't give me just the files in an array, like readdir does. Although
Looking for output like...
['file1.txt', 'file2.txt', 'dir/file3.txt']
There are basically two ways of accomplishing this. In an async environment you'll notice that there are two kinds of loops: serial and parallel. A serial loop waits for one iteration to complete before it moves onto the next iteration - this guarantees that every iteration of the loop completes in order. In a parallel loop, all the iterations are started at the same time, and one may complete before another, however, it is much faster than a serial loop. So in this case, it's probably better to use a parallel loop because it doesn't matter what order the walk completes in, just as long as it completes and returns the results (unless you want them in order).
A parallel loop would look like this:
var fs = require('fs');
var path = require('path');
var walk = function(dir, done) {
var results = [];
fs.readdir(dir, function(err, list) {
if (err) return done(err);
var pending = list.length;
if (!pending) return done(null, results);
list.forEach(function(file) {
file = path.resolve(dir, file);
fs.stat(file, function(err, stat) {
if (stat && stat.isDirectory()) {
walk(file, function(err, res) {
results = results.concat(res);
if (!--pending) done(null, results);
});
} else {
results.push(file);
if (!--pending) done(null, results);
}
});
});
});
};
A serial loop would look like this:
var fs = require('fs');
var path = require('path');
var walk = function(dir, done) {
var results = [];
fs.readdir(dir, function(err, list) {
if (err) return done(err);
var i = 0;
(function next() {
var file = list[i++];
if (!file) return done(null, results);
file = path.resolve(dir, file);
fs.stat(file, function(err, stat) {
if (stat && stat.isDirectory()) {
walk(file, function(err, res) {
results = results.concat(res);
next();
});
} else {
results.push(file);
next();
}
});
})();
});
};
And to test it out on your home directory (WARNING: the results list will be huge if you have a lot of stuff in your home directory):
walk(process.env.HOME, function(err, results) {
if (err) throw err;
console.log(results);
});
EDIT: Improved examples.
This one uses the maximum amount of new, buzzwordy features available in node 8, including Promises, util/promisify, destructuring, async-await, map+reduce and more, making your co-workers scratch their heads as they try to figure out what is going on.
Node 8+
No external dependencies.
const { promisify } = require('util');
const { resolve } = require('path');
const fs = require('fs');
const readdir = promisify(fs.readdir);
const stat = promisify(fs.stat);
async function getFiles(dir) {
const subdirs = await readdir(dir);
const files = await Promise.all(subdirs.map(async (subdir) => {
const res = resolve(dir, subdir);
return (await stat(res)).isDirectory() ? getFiles(res) : res;
}));
return files.reduce((a, f) => a.concat(f), []);
}
Usage
getFiles(__dirname)
.then(files => console.log(files))
.catch(e => console.error(e));
Node 10.10+
Updated for node 10+ with even more whizbang:
const { resolve } = require('path');
const { readdir } = require('fs').promises;
async function getFiles(dir) {
const dirents = await readdir(dir, { withFileTypes: true });
const files = await Promise.all(dirents.map((dirent) => {
const res = resolve(dir, dirent.name);
return dirent.isDirectory() ? getFiles(res) : res;
}));
return Array.prototype.concat(...files);
}
Note that starting with node 11.15.0 you can use files.flat()
instead of Array.prototype.concat(...files)
to flatten the files array.
Node 11+
If you want to blow everybody's head up completely, you can use the following version using async iterators. In addition to being really cool, it also allows consumers to pull results one-at-a-time, making it better suited for really large directories.
const { resolve } = require('path');
const { readdir } = require('fs').promises;
async function* getFiles(dir) {
const dirents = await readdir(dir, { withFileTypes: true });
for (const dirent of dirents) {
const res = resolve(dir, dirent.name);
if (dirent.isDirectory()) {
yield* getFiles(res);
} else {
yield res;
}
}
}
Usage has changed because the return type is now an async iterator instead of a promise
;(async () => {
for await (const f of getFiles('.')) {
console.log(f);
}
})()
In case somebody is interested, I've written more about async iterators here: https://qwtel.com/posts/software/async-generators-in-the-wild/
subdir
and subdirs
is misleading, as those may be actually files (I suggest something like itemInDir
or item_in_dir
or even simply item
instead.), but this solution feels cleaner than the accepted one and is much less code. I also don't find it much more complicated than the code in the accepted answer. +1
require(fs).promises
and just drop util.promisify
completely. Personally I alias fs to fs.promises.
readdir
AKA the options object like so readdir(dir, {withFileTypes: true})
this will return all the items with their type information, SO we won't need to call stat
at all to obtain the information that readdir
now gives us back. This saves us from needing to make additional sys calls. Details here
Just in case anyone finds it useful, I also put together a synchronous version.
var walk = function(dir) {
var results = [];
var list = fs.readdirSync(dir);
list.forEach(function(file) {
file = dir + '/' + file;
var stat = fs.statSync(file);
if (stat && stat.isDirectory()) {
/* Recurse into a subdirectory */
results = results.concat(walk(file));
} else {
/* Is a file */
results.push(file);
}
});
return results;
}
Tip: To use less resources when filtering. Filter within this function itself. E.g. Replace results.push(file);
with below code. Adjust as required:
file_type = file.split(".").pop();
file_name = file.split(/(\\|\/)/g).pop();
if (file_type == "json") results.push(file);
lstat
instead? Or else add a recursiveness check to limit the recursivity level.
A. Have a look at the file module. It has a function called walk:
file.walk(start, callback) Navigates a file tree, calling callback for each directory, passing in (null, dirPath, dirs, files).
This may be for you! And yes, it is async. However, I think you would have to aggregate the full path's yourself, if you needed them.
B. An alternative, and even one of my favourites: use the unix find
for that. Why do something again, that has already been programmed? Maybe not exactly what you need, but still worth checking out:
var execFile = require('child_process').execFile;
execFile('find', [ 'somepath/' ], function(err, stdout, stderr) {
var file_list = stdout.split('\n');
/* now you've got a list with full path file names */
});
Find has a nice build-in caching mechanism that makes subsequent searches very fast, as long as only few folder have changed.
I recommend using node-glob to accomplish that task.
var glob = require( 'glob' );
glob( 'dirname/**/*.js', function( err, files ) {
console.log( files );
});
Another nice npm package is glob.
npm install glob
It is very powerful and should cover all your recursing needs.
Edit:
I actually wasn't perfectly happy with glob, so I created readdirp.
I'm very confident that its API makes finding files and directories recursively and applying specific filters very easy.
Read through its documentation to get a better idea of what it does and install via:
npm install readdirp
If you want to use an npm package, wrench is pretty good.
var wrench = require("wrench");
var files = wrench.readdirSyncRecursive("directory");
wrench.readdirRecursive("directory", function (error, files) {
// live your dreams
});
EDIT (2018): Anyone reading through in recent time: The author deprecated this package in 2015:
wrench.js is deprecated, and hasn't been updated in quite some time. I heavily recommend using fs-extra to do any extra filesystem operations.
denodify
this? Callback is fired multiple times (recursively). So using Q.denodify(wrench.readdirRecursive)
returns only the first result.
Async
const fs = require('fs')
const path = require('path')
const readdir = (p, done, a = [], i = 0) => fs.readdir(p, (e, d = []) =>
d.map(f => readdir(a[a.push(path.join(p, f)) - 1], () =>
++i == d.length && done(a), a)).length || done(a))
readdir(__dirname, console.log)
Sync
const fs = require('fs')
const path = require('path')
const readdirSync = (p, a = []) => {
if (fs.statSync(p).isDirectory())
fs.readdirSync(p).map(f => readdirSync(a[a.push(path.join(p, f)) - 1], a))
return a
}
console.log(readdirSync(__dirname))
Async readable
function readdir (currentPath, done, allFiles = [], i = 0) {
fs.readdir(currentPath, function (e, directoryFiles = []) {
if (!directoryFiles.length)
return done(allFiles)
directoryFiles.map(function (file) {
var joinedPath = path.join(currentPath, file)
allFiles.push(joinedPath)
readdir(joinedPath, function () {
i = i + 1
if (i == directoryFiles.length)
done(allFiles)}
, allFiles)
})
})
}
readdir(__dirname, console.log)
Note: both versions will follow symlinks (same as the original fs.readdir
)
I loved the answer from chjj above and would not have been able to create my version of the parallel loop without that start.
var fs = require("fs");
var tree = function(dir, done) {
var results = {
"path": dir
,"children": []
};
fs.readdir(dir, function(err, list) {
if (err) { return done(err); }
var pending = list.length;
if (!pending) { return done(null, results); }
list.forEach(function(file) {
fs.stat(dir + '/' + file, function(err, stat) {
if (stat && stat.isDirectory()) {
tree(dir + '/' + file, function(err, res) {
results.children.push(res);
if (!--pending){ done(null, results); }
});
} else {
results.children.push({"path": dir + "/" + file});
if (!--pending) { done(null, results); }
}
});
});
});
};
module.exports = tree;
I created a Gist as well. Comments welcome. I am still starting out in the NodeJS realm so that is one way I hope to learn more.
With Recursion
var fs = require('fs')
var path = process.cwd()
var files = []
var getFiles = function(path, files){
fs.readdirSync(path).forEach(function(file){
var subpath = path + '/' + file;
if(fs.lstatSync(subpath).isDirectory()){
getFiles(subpath, files);
} else {
files.push(path + '/' + file);
}
});
}
Calling
getFiles(path, files)
console.log(files) // will log all files in directory
/
but using the path
module: path.join(searchPath, file)
. That way, you will get correct paths independent of the OS.
Use node-dir to produce exactly the output you like
var dir = require('node-dir');
dir.files(__dirname, function(err, files) {
if (err) throw err;
console.log(files);
//we have an array of files now, so now we can iterate that array
files.forEach(function(path) {
action(null, path);
})
});
Short and Efficient:
import {lstat, readdir} from 'node:fs/promises'
import {join} from 'node:path'
const deepReadDir = async (dirPath) => await Promise.all(
(await readdir(dirPath)).map(async (entity) => {
const path = join(dirPath, entity)
return (await lstat(path)).isDirectory() ? await deepReadDir(path) : path
}),
)
This auto folds each nested path in a new (nested) array. For example if:
const files = await deepReadDir('src')
files
would be like:
[
[
'src/client/api.js',
'src/client/http-constants.js',
'src/client/index.html',
'src/client/index.js',
[ 'src/client/res/favicon.ico' ],
'src/client/storage.js'
],
[ 'src/crypto/keygen.js' ],
'src/discover.js',
[
'src/mutations/createNewMutation.js',
'src/mutations/newAccount.js',
'src/mutations/transferCredit.js',
'src/mutations/updateApp.js'
],
[
'src/server/authentication.js',
'src/server/handlers.js',
'src/server/quick-response.js',
'src/server/server.js',
'src/server/static-resources.js'
],
[ 'src/util/prompt.js', 'src/util/safeWriteFile.js' ],
'src/util.js'
]
But you can easily flat it:
files.flat(Number.POSITIVE_INFINITY)
[
'src/client/api.js',
'src/client/http-constants.js',
'src/client/index.html',
'src/client/index.js',
'src/client/res/favicon.ico',
'src/client/storage.js',
'src/crypto/keygen.js',
'src/discover.js',
'src/mutations/createNewMutation.js',
'src/mutations/newAccount.js',
'src/mutations/transferCredit.js',
'src/mutations/updateApp.js',
'src/server/authentication.js',
'src/server/handlers.js',
'src/server/quick-response.js',
'src/server/server.js',
'src/server/static-resources.js',
'src/util/prompt.js',
'src/util/safeWriteFile.js',
'src/util.js'
]
qwtel's answer variant, in TypeScript
import { resolve } from 'path';
import { readdir } from 'fs/promises';
async function* getFiles(dir: string): AsyncGenerator<string> {
const entries = await readdir(dir, { withFileTypes: true });
for (const entry of entries) {
const res = resolve(dir, entry.name);
if (entry.isDirectory()) {
yield* getFiles(res);
} else {
yield res;
}
}
}
Modern promise based read dir recursive version:
const fs = require('fs');
const path = require('path');
const readDirRecursive = async (filePath) => {
const dir = await fs.promises.readdir(filePath);
const files = await Promise.all(dir.map(async relativePath => {
const absolutePath = path.join(filePath, relativePath);
const stat = await fs.promises.lstat(absolutePath);
return stat.isDirectory() ? readDirRecursive(absolutePath) : absolutePath;
}));
return files.flat();
}
Here is a simple synchronous recursive solution
const fs = require('fs')
const getFiles = path => {
const files = []
for (const file of fs.readdirSync(path)) {
const fullPath = path + '/' + file
if(fs.lstatSync(fullPath).isDirectory())
getFiles(fullPath).forEach(x => files.push(file + '/' + x))
else files.push(file)
}
return files
}
Usage:
const files = getFiles(process.cwd())
console.log(files)
You could write it asynchronously, but there is no need. Just make sure that the input directory exists and is accessible.
fs.readFileSync(file)
so x => files.push(fullPath + '/' + x))
fs.readFileSync
can understand relative paths though, so depends on where you're running it from but its likely the changes are not even required.
I've coded this recently, and thought it would make sense to share this here. The code makes use of the async library.
var fs = require('fs');
var async = require('async');
var scan = function(dir, suffix, callback) {
fs.readdir(dir, function(err, files) {
var returnFiles = [];
async.each(files, function(file, next) {
var filePath = dir + '/' + file;
fs.stat(filePath, function(err, stat) {
if (err) {
return next(err);
}
if (stat.isDirectory()) {
scan(filePath, suffix, function(err, results) {
if (err) {
return next(err);
}
returnFiles = returnFiles.concat(results);
next();
})
}
else if (stat.isFile()) {
if (file.indexOf(suffix, file.length - suffix.length) !== -1) {
returnFiles.push(filePath);
}
next();
}
});
}, function(err) {
callback(err, returnFiles);
});
});
};
You can use it like this:
scan('/some/dir', '.ext', function(err, files) {
// Do something with files that ends in '.ext'.
console.log(files);
});
A library called Filehound is another option. It will recursively search a given directory (working directory by default). It supports various filters, callbacks, promises and sync searches.
For example, search the current working directory for all files (using callbacks):
const Filehound = require('filehound');
Filehound.create()
.find((err, files) => {
if (err) {
return console.error(`error: ${err}`);
}
console.log(files); // array of files
});
Or promises and specifying a specific directory:
const Filehound = require('filehound');
Filehound.create()
.paths("/tmp")
.find()
.each(console.log);
Consult the docs for further use cases and examples of usage: https://github.com/nspragg/filehound
Disclaimer: I'm the author.
Using async/await, this should work:
const FS = require('fs');
const readDir = promisify(FS.readdir);
const fileStat = promisify(FS.stat);
async function getFiles(dir) {
let files = await readDir(dir);
let result = files.map(file => {
let path = Path.join(dir,file);
return fileStat(path).then(stat => stat.isDirectory() ? getFiles(path) : path);
});
return flatten(await Promise.all(result));
}
function flatten(arr) {
return Array.prototype.concat(...arr);
}
You can use bluebird.Promisify or this:
/**
* Returns a function that will wrap the given `nodeFunction`. Instead of taking a callback, the returned function will return a promise whose fate is decided by the callback behavior of the given node function. The node function should conform to node.js convention of accepting a callback as last argument and calling that callback with error as the first argument and success value on the second argument.
*
* @param {Function} nodeFunction
* @returns {Function}
*/
module.exports = function promisify(nodeFunction) {
return function(...args) {
return new Promise((resolve, reject) => {
nodeFunction.call(this, ...args, (err, data) => {
if(err) {
reject(err);
} else {
resolve(data);
}
})
});
};
};
Node 8+ has Promisify built-in
See my other answer for a generator approach that can give results even faster.
Check out the final-fs library. It provides a readdirRecursive
function:
ffs.readdirRecursive(dirPath, true, 'my/initial/path')
.then(function (files) {
// in the `files` variable you've got all the files
})
.otherwise(function (err) {
// something went wrong
});
Simple, Async Promise Based
const fs = require('fs/promises');
const getDirRecursive = async (dir) => {
try {
const items = await fs.readdir(dir);
let files = [];
for (const item of items) {
if ((await fs.lstat(`${dir}/${item}`)).isDirectory()) files = [...files, ...(await getDirRecursive(`${dir}/${item}`))];
else files.push({file: item, path: `${dir}/${item}`, parents: dir.split("/")});
}
return files;
} catch (e) {
return e
}
};
Usage: await getDirRecursive("./public");
Standalone promise implementation
I am using the when.js promise library in this example.
var fs = require('fs')
, path = require('path')
, when = require('when')
, nodefn = require('when/node/function');
function walk (directory, includeDir) {
var results = [];
return when.map(nodefn.call(fs.readdir, directory), function(file) {
file = path.join(directory, file);
return nodefn.call(fs.stat, file).then(function(stat) {
if (stat.isFile()) { return results.push(file); }
if (includeDir) { results.push(file + path.sep); }
return walk(file, includeDir).then(function(filesInDir) {
results = results.concat(filesInDir);
});
});
}).then(function() {
return results;
});
};
walk(__dirname).then(function(files) {
console.log(files);
}).otherwise(function(error) {
console.error(error.stack || error);
});
I've included an optional parameter includeDir
which will include directories in the file listing if set to true
.
Here's yet another implementation. None of the above solutions have any limiters, and so if your directory structure is large, they're all going to thrash and eventually run out of resources.
var async = require('async');
var fs = require('fs');
var resolve = require('path').resolve;
var scan = function(path, concurrency, callback) {
var list = [];
var walker = async.queue(function(path, callback) {
fs.stat(path, function(err, stats) {
if (err) {
return callback(err);
} else {
if (stats.isDirectory()) {
fs.readdir(path, function(err, files) {
if (err) {
callback(err);
} else {
for (var i = 0; i < files.length; i++) {
walker.push(resolve(path, files[i]));
}
callback();
}
});
} else {
list.push(path);
callback();
}
}
});
}, concurrency);
walker.push(path);
walker.drain = function() {
callback(list);
}
};
Using a concurrency of 50 works pretty well, and is almost as fast as simpler implementations for small directory structures.
I modified Trevor Senior's Promise based answer to work with Bluebird
var fs = require('fs'),
path = require('path'),
Promise = require('bluebird');
var readdirAsync = Promise.promisify(fs.readdir);
var statAsync = Promise.promisify(fs.stat);
function walkFiles (directory) {
var results = [];
return readdirAsync(directory).map(function(file) {
file = path.join(directory, file);
return statAsync(file).then(function(stat) {
if (stat.isFile()) {
return results.push(file);
}
return walkFiles(file).then(function(filesInDir) {
results = results.concat(filesInDir);
});
});
}).then(function() {
return results;
});
}
//use
walkDir(__dirname).then(function(files) {
console.log(files);
}).catch(function(e) {
console.error(e); {
});
For fun, here is a flow based version that works with highland.js streams library. It was co-authored by Victor Vu.
###
directory >---m------> dirFilesStream >---------o----> out
| |
| |
+--------< returnPipe <-----------+
legend: (m)erge (o)bserve
+ directory has the initial file
+ dirListStream does a directory listing
+ out prints out the full path of the file
+ returnPipe runs stat and filters on directories
###
_ = require('highland')
fs = require('fs')
fsPath = require('path')
directory = _(['someDirectory'])
mergePoint = _()
dirFilesStream = mergePoint.merge().flatMap((parentPath) ->
_.wrapCallback(fs.readdir)(parentPath).sequence().map (path) ->
fsPath.join parentPath, path
)
out = dirFilesStream
# Create the return pipe
returnPipe = dirFilesStream.observe().flatFilter((path) ->
_.wrapCallback(fs.stat)(path).map (v) ->
v.isDirectory()
)
# Connect up the merge point now that we have all of our streams.
mergePoint.write directory
mergePoint.write returnPipe
mergePoint.end()
# Release backpressure. This will print files as they are discovered
out.each H.log
# Another way would be to queue them all up and then print them all out at once.
# out.toArray((files)-> console.log(files))
Using Promises (Q) to solve this in a Functional style:
var fs = require('fs'),
fsPath = require('path'),
Q = require('q');
var walk = function (dir) {
return Q.ninvoke(fs, 'readdir', dir).then(function (files) {
return Q.all(files.map(function (file) {
file = fsPath.join(dir, file);
return Q.ninvoke(fs, 'lstat', file).then(function (stat) {
if (stat.isDirectory()) {
return walk(file);
} else {
return [file];
}
});
}));
}).then(function (files) {
return files.reduce(function (pre, cur) {
return pre.concat(cur);
});
});
};
It returns a promise of an array, so you can use it as:
walk('/home/mypath').then(function (files) { console.log(files); });
I must add the Promise-based sander library to the list.
var sander = require('sander');
sander.lsr(directory).then( filenames => { console.log(filenames) } );
Using bluebird promise.coroutine:
let promise = require('bluebird'),
PC = promise.coroutine,
fs = promise.promisifyAll(require('fs'));
let getFiles = PC(function*(dir){
let files = [];
let contents = yield fs.readdirAsync(dir);
for (let i = 0, l = contents.length; i < l; i ++) {
//to remove dot(hidden) files on MAC
if (/^\..*/.test(contents[i])) contents.splice(i, 1);
}
for (let i = 0, l = contents.length; i < l; i ++) {
let content = path.resolve(dir, contents[i]);
let contentStat = yield fs.statAsync(content);
if (contentStat && contentStat.isDirectory()) {
let subFiles = yield getFiles(content);
files = files.concat(subFiles);
} else {
files.push(content);
}
}
return files;
});
//how to use
//easy error handling in one place
getFiles(your_dir).then(console.log).catch(err => console.log(err));
Yet another answer, but this time using TypeScript:
/** * Recursively walk a directory asynchronously and obtain all file names (with full path). * * @param dir Folder name you want to recursively process * @param done Callback function, returns all files with full path. * @param filter Optional filter to specify which files to include, * e.g. for json files: (f: string) => /.json$/.test(f) */ const walk = ( dir: string, done: (err: Error | null, results ? : string[]) => void, filter ? : (f: string) => boolean ) => { let results: string[] = []; fs.readdir(dir, (err: Error, list: string[]) => { if (err) { return done(err); } let pending = list.length; if (!pending) { return done(null, results); } list.forEach((file: string) => { file = path.resolve(dir, file); fs.stat(file, (err2, stat) => { if (stat && stat.isDirectory()) { walk(file, (err3, res) => { if (res) { results = results.concat(res); } if (!--pending) { done(null, results); } }, filter); } else { if (typeof filter === 'undefined' || (filter && filter(file))) { results.push(file); } if (!--pending) { done(null, results); } } }); }); }); };
Success story sharing
require('node-dir').files(__dirname, function(err, files) { console.log(files); });
!--
syntax, a question has been asked about itfs
instead offs.promises
? wouldntfs.promises
be even better?