node.js fs.readdir recursive directory search


Question

Any ideas on an async directory search using fs.readdir? I realise that we could introduce recursion and call the read directory function with the next directory to read, but am a little worried about it not being async...

Any ideas? I've looked at node-walk which is great, but doesn't give me just the files in an array, like readdir does. Although

Looking for output like...

['file1.txt', 'file2.txt', 'dir/file3.txt']
1
235
2/15/2017 10:17:47 AM

Accepted Answer

There are basically two ways of accomplishing this. In an async environment you'll notice that there are two kinds of loops: serial and parallel. A serial loop waits for one iteration to complete before it moves onto the next iteration - this guarantees that every iteration of the loop completes in order. In a parallel loop, all the iterations are started at the same time, and one may complete before another, however, it is much faster than a serial loop. So in this case, it's probably better to use a parallel loop because it doesn't matter what order the walk completes in, just as long as it completes and returns the results (unless you want them in order).

A parallel loop would look like this:

var fs = require('fs');
var path = require('path');
var walk = function(dir, done) {
  var results = [];
  fs.readdir(dir, function(err, list) {
    if (err) return done(err);
    var pending = list.length;
    if (!pending) return done(null, results);
    list.forEach(function(file) {
      file = path.resolve(dir, file);
      fs.stat(file, function(err, stat) {
        if (stat && stat.isDirectory()) {
          walk(file, function(err, res) {
            results = results.concat(res);
            if (!--pending) done(null, results);
          });
        } else {
          results.push(file);
          if (!--pending) done(null, results);
        }
      });
    });
  });
};

A serial loop would look like this:

var fs = require('fs');
var walk = function(dir, done) {
  var results = [];
  fs.readdir(dir, function(err, list) {
    if (err) return done(err);
    var i = 0;
    (function next() {
      var file = list[i++];
      if (!file) return done(null, results);
      file = dir + '/' + file;
      fs.stat(file, function(err, stat) {
        if (stat && stat.isDirectory()) {
          walk(file, function(err, res) {
            results = results.concat(res);
            next();
          });
        } else {
          results.push(file);
          next();
        }
      });
    })();
  });
};

And to test it out on your home directory (WARNING: the results list will be huge if you have a lot of stuff in your home directory):

walk(process.env.HOME, function(err, results) {
  if (err) throw err;
  console.log(results);
});

EDIT: Improved examples.

358
5/30/2015 11:24:20 AM

Just in case anyone finds it useful, I also put together a synchronous version.

var walk = function(dir) {
    var results = [];
    var list = fs.readdirSync(dir);
    list.forEach(function(file) {
        file = dir + '/' + file;
        var stat = fs.statSync(file);
        if (stat && stat.isDirectory()) { 
            /* Recurse into a subdirectory */
            results = results.concat(walk(file));
        } else { 
            /* Is a file */
            results.push(file);
        }
    });
    return results;
}

Tip: To use less resources when filtering. Filter within this function itself. E.g. Replace results.push(file); with below code. Adjust as required:

    file_type = file.split(".").pop();
    file_name = file.split(/(\\|\/)/g).pop();
    if (file_type == "json") results.push(file);

Licensed under: CC-BY-SA with attribution
Not affiliated with: Stack Overflow
Icon