Sampling a random subset from an array

What is a clean way of taking a random sample, without replacement from an array in javascript? So suppose there is an array

x = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]

and I want to randomly sample 5 unique values; i.e. generate a random subset of length 5. To generate one random sample one could do something like:

x[Math.floor(Math.random()*x.length)];

But if this is done multiple times, there is a risk of a grabbing the same entry multiple times.

Answers:

Answer

I suggest shuffling a copy of the array using the Fisher-Yates shuffle and taking a slice:

function getRandomSubarray(arr, size) {
    var shuffled = arr.slice(0), i = arr.length, temp, index;
    while (i--) {
        index = Math.floor((i + 1) * Math.random());
        temp = shuffled[index];
        shuffled[index] = shuffled[i];
        shuffled[i] = temp;
    }
    return shuffled.slice(0, size);
}

var x = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15];
var fiveRandomMembers = getRandomSubarray(x, 5);

Note that this will not be the most efficient method for getting a small random subset of a large array because it shuffles the whole array unnecessarily. For better performance you could do a partial shuffle instead:

function getRandomSubarray(arr, size) {
    var shuffled = arr.slice(0), i = arr.length, min = i - size, temp, index;
    while (i-- > min) {
        index = Math.floor((i + 1) * Math.random());
        temp = shuffled[index];
        shuffled[index] = shuffled[i];
        shuffled[i] = temp;
    }
    return shuffled.slice(min);
}
Answer

A little late to the party but this could be solved with underscore's new sample method (underscore 1.5.2 - Sept 2013):

var x = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15];

var randomFiveNumbers = _.sample(x, 5);
Answer

Or... if you use underscore.js...

_und = require('underscore');

...

function sample(a, n) {
    return _und.take(_und.shuffle(a), n);
}

Simple enough.

Answer

In my opinion, I do not think shuffling the entire deck necessary. You just need to make sure your sample is random not your deck. What you can do, is select the size amount from the front then swap each one in the sampling array with another position in it. So, if you allow replacement you get more and more shuffled.

function getRandom(length) { return Math.floor(Math.random()*(length)); }

function getRandomSample(array, size) {
    var length = array.length;

    for(var i = size; i--;) {
        var index = getRandom(length);
        var temp = array[index];
        array[index] = array[i];
        array[i] = temp;
    }

    return array.slice(0, size);
}

This algorithm is only 2*size steps, if you include the slice method, to select the random sample.


More Random

To make the sample more random, we can randomly select the starting point of the sample. But it is a little more expensive to get the sample.

function getRandomSample(array, size) {
    var length = array.length, start = getRandom(length);

    for(var i = size; i--;) {
        var index = (start + i)%length, rindex = getRandom(length);
        var temp = array[rindex];
        array[rindex] = array[index];
        array[index] = temp;
    }
    var end = start + size, sample = array.slice(start, end);
    if(end > length)
        sample = sample.concat(array.slice(0, end - length));
    return sample;
}

What makes this more random is the fact that when you always just shuffling the front items you tend to not get them very often in the sample if the sampling array is large and the sample is small. This would not be a problem if the array was not supposed to always be the same. So, what this method does is change up this position where the shuffled region starts.


No Replacement

To not have to copy the sampling array and not worry about replacement, you can do the following but it does give you 3*size vs the 2*size.

function getRandomSample(array, size) {
    var length = array.length, swaps = [], i = size, temp;

    while(i--) {
        var rindex = getRandom(length);
        temp = array[rindex];
        array[rindex] = array[i];
        array[i] = temp;
        swaps.push({ from: i, to: rindex });
    }

    var sample = array.slice(0, size);

    // Put everything back.
    i = size;
    while(i--) {
         var pop = swaps.pop();
         temp = array[pop.from];
         array[pop.from] = array[pop.to];
         array[pop.to] = temp;
    }

    return sample;
}

No Replacement and More Random

To apply the algorithm that gave a little bit more random samples to the no replacement function:

function getRandomSample(array, size) {
    var length = array.length, start = getRandom(length),
        swaps = [], i = size, temp;

    while(i--) {
        var index = (start + i)%length, rindex = getRandom(length);
        temp = array[rindex];
        array[rindex] = array[index];
        array[index] = temp;
        swaps.push({ from: index, to: rindex });
    }

    var end = start + size, sample = array.slice(start, end);
    if(end > length)
        sample = sample.concat(array.slice(0, end - length));

    // Put everything back.
    i = size;
    while(i--) {
         var pop = swaps.pop();
         temp = array[pop.from];
         array[pop.from] = array[pop.to];
         array[pop.to] = temp;
    }

    return sample;
}

Faster...

Like all of these post, this uses the Fisher-Yates Shuffle. But, I removed the over head of copying the array.

function getRandomSample(array, size) {
    var r, i = array.length, end = i - size, temp, swaps = getRandomSample.swaps;

    while (i-- > end) {
        r = getRandom(i + 1);
        temp = array[r];
        array[r] = array[i];
        array[i] = temp;
        swaps.push(i);
        swaps.push(r);
    }

    var sample = array.slice(end);

    while(size--) {
        i = swaps.pop();
        r = swaps.pop();
        temp = array[i];
        array[i] = array[r];
        array[r] = temp;
    }

    return sample;
}
getRandomSample.swaps = [];
Answer

Here is another implementation based on Fisher-Yater Shuffle. But this one is optimized for the case where the sample size is significantly smaller than the array length. This implementation doesn't scan the entire array nor allocates arrays as large as the original array. It uses sparse arrays to reduce memory allocation.

function getRandomSample(array, count) {
    var indices = [];
    var result = new Array(count);
    for (let i = 0; i < count; i++ ) {
        let j = Math.floor(Math.random() * (array.length - i) + i);
        result[i] = array[indices[j] === undefined ? j : indices[j]];
        indices[j] = indices[i] === undefined ? i : indices[i];
    }
    return result;
}
Answer

You can get a 5 elements sample by this way:

var sample = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
.map(a => [a,Math.random()])
.sort((a,b) => {return a[1] < b[1] ? -1 : 1;})
.slice(0,5)
.map(a => a[0]);

You can define it as a function to use in your code:

var randomSample = function(arr,num){ return arr.map(a => [a,Math.random()]).sort((a,b) => {return a[1] < b[1] ? -1 : 1;}).slice(0,num).map(a => a[0]); }

Or add it to the Array object itself:

    Array.prototype.sample = function(num){ return this.map(a => [a,Math.random()]).sort((a,b) => {return a[1] < b[1] ? -1 : 1;}).slice(0,num).map(a => a[0]); };

if you want, you can separate the code for to have 2 functionalities (Shuffle and Sample):

    Array.prototype.shuffle = function(){ return this.map(a => [a,Math.random()]).sort((a,b) => {return a[1] < b[1] ? -1 : 1;}).map(a => a[0]); };
    Array.prototype.sample = function(num){ return this.shuffle().slice(0,num); };
Answer

While I strongly support using the Fisher-Yates Shuffle, as suggested by Tim Down, here's a very short method for achieving a random subset as requested, mathematically correct, including the empty set, and the given set itself.

Note solution depends on lodash / underscore:

const _ = require('loadsh')

function subset(arr) {
    return _.sample(arr, _.random(arr.length));
}
Answer

Perhaps I am missing something, but it seems there is a solution that does not require the complexity or potential overhead of a shuffle:

function sample(array,size) {
  const results = [],
    sampled = {};
  while(results.length<size && results.length<array.length) {
    const index = Math.trunc(Math.random() * array.length);
    if(!sampled[index]) {
      results.push(array[index]);
      sampled[index] = true;
    }
  }
  return results;
}
Answer

You can remove the elements from a copy of the array as you select them. Performance is probably not ideal, but it might be OK for what you need:

function getRandom(arr, size) {
  var copy = arr.slice(0), rand = [];
  for (var i = 0; i < size && i < copy.length; i++) {
    var index = Math.floor(Math.random() * copy.length);
    rand.push(copy.splice(index, 1)[0]);
  }
  return rand;
}
Answer

If you're using lodash the API changed in 4.x:

const oneItem = _.sample(arr);
const nItems = _.sampleSize(arr, n);

https://lodash.com/docs#sampleSize

Tags

Recent Questions

Top Questions

Home Tags Terms of Service Privacy Policy DMCA Contact Us Javascript

©2020 All rights reserved.