<p>您可以使用一种称为<a href="http://en.wikipedia.org/wiki/Reservoir_sampling" rel="nofollow">Reservoir Sampling</a>的技术。它只在输入上迭代一次,因此需要O(n)。这是你的样本代码,工作C++ 11储层采样:</p>
<pre><code>#include <iostream>
#include <string>
#include <vector>
#include <random>
#include <iterator>
#include <algorithm>
#include <cassert>
template<class InputIt, class Generator>
std::vector<typename std::iterator_traits<InputIt>::value_type> sample(InputIt first, InputIt last, size_t n, Generator& engine) {
assert(distance(first, last) >= n);
InputIt nth = next(first, n);
std::vector<typename std::iterator_traits<InputIt>::value_type> result{first, nth};
size_t k=n+1;
for (InputIt it = nth; it != last; ++it, ++k) {
size_t r = std::uniform_int_distribution<size_t>{0, k}(engine);
if (r<n)
result[r] = *it;
}
return result;
}
using namespace std;
int main()
{
string xs[] = {"Alberto", "Bruno", "Carlo", "Dario", "Elio", "Francesco", "Giovanni", "Luca", "Marco", "Nicola", "Oreste", "Pietro", "Rino", "Sandro", "Tonino", "Valerio", "Vittorio"};
mt19937 engine{random_device{}()};
vector<string> ys = sample(begin(xs), end(xs), 5, engine);
for (const string& s : ys)
cout << s << endl;
return 0;
}
</code></pre>