summaryrefslogtreecommitdiff
path: root/geom_bottleneck/include/dnn/parallel/utils.h
diff options
context:
space:
mode:
Diffstat (limited to 'geom_bottleneck/include/dnn/parallel/utils.h')
-rw-r--r--geom_bottleneck/include/dnn/parallel/utils.h100
1 files changed, 100 insertions, 0 deletions
diff --git a/geom_bottleneck/include/dnn/parallel/utils.h b/geom_bottleneck/include/dnn/parallel/utils.h
new file mode 100644
index 0000000..9809e77
--- /dev/null
+++ b/geom_bottleneck/include/dnn/parallel/utils.h
@@ -0,0 +1,100 @@
+#ifndef HERA_BT_PARALLEL_UTILS_H
+#define HERA_BT_PARALLEL_UTILS_H
+
+#include "../utils.h"
+
+namespace hera
+{
+namespace bt
+{
+namespace dnn
+{
+ // Assumes rng is synchronized across ranks
+ template<class DataVector, class RNGType, class SwapFunctor>
+ void shuffle(mpi::communicator& world, DataVector& data, RNGType& rng, const SwapFunctor& swap, DataVector empty = DataVector());
+
+ template<class DataVector, class RNGType>
+ void shuffle(mpi::communicator& world, DataVector& data, RNGType& rng)
+ {
+ typedef decltype(data[0]) T;
+ shuffle(world, data, rng, [](T& x, T& y) { std::swap(x,y); });
+ }
+}
+}
+}
+
+template<class DataVector, class RNGType, class SwapFunctor>
+void
+hera::bt::dnn::shuffle(mpi::communicator& world, DataVector& data, RNGType& rng, const SwapFunctor& swap, DataVector empty)
+{
+ // This is not a perfect shuffle: it dishes out data in chunks of 1/size.
+ // (It can be interpreted as generating a bistochastic matrix by taking the
+ // sum of size random permutation matrices.) Hopefully, it works for our purposes.
+
+ typedef typename RNGType::result_type RNGResult;
+
+ int size = world.size();
+ int rank = world.rank();
+
+ // Generate local seeds
+ boost::uniform_int<RNGResult> uniform;
+ RNGResult seed;
+ for (size_t i = 0; i < size; ++i)
+ {
+ RNGResult v = uniform(rng);
+ if (i == rank)
+ seed = v;
+ }
+ RNGType local_rng(seed);
+
+ // Shuffle local data
+ hera::bt::dnn::random_shuffle(data.begin(), data.end(), local_rng, swap);
+
+ // Decide how much of our data goes to i-th processor
+ std::vector<size_t> out_counts(size);
+ std::vector<int> ranks(boost::counting_iterator<int>(0),
+ boost::counting_iterator<int>(size));
+ for (size_t i = 0; i < size; ++i)
+ {
+ hera::bt::dnn::random_shuffle(ranks.begin(), ranks.end(), rng);
+ ++out_counts[ranks[rank]];
+ }
+
+ // Fill the outgoing array
+ size_t total = 0;
+ std::vector< DataVector > outgoing(size, empty);
+ for (size_t i = 0; i < size; ++i)
+ {
+ size_t count = data.size()*out_counts[i]/size;
+ if (total + count > data.size())
+ count = data.size() - total;
+
+ outgoing[i].reserve(count);
+ for (size_t j = total; j < total + count; ++j)
+ outgoing[i].push_back(data[j]);
+
+ total += count;
+ }
+
+ boost::uniform_int<size_t> uniform_outgoing(0,size-1); // in range [0,size-1]
+ while(total < data.size()) // send leftover to random processes
+ {
+ outgoing[uniform_outgoing(local_rng)].push_back(data[total]);
+ ++total;
+ }
+ data.clear();
+
+ // Exchange the data
+ std::vector< DataVector > incoming(size, empty);
+ mpi::all_to_all(world, outgoing, incoming);
+ outgoing.clear();
+
+ // Assemble our data
+ for(const DataVector& vec : incoming)
+ for (size_t i = 0; i < vec.size(); ++i)
+ data.push_back(vec[i]);
+ hera::bt::dnn::random_shuffle(data.begin(), data.end(), local_rng, swap);
+ // XXX: the final shuffle is irrelevant for our purposes. But it's also cheap.
+}
+
+#endif