summaryrefslogtreecommitdiff
path: root/ot/gpu/cudamat/examples/bench_cudamat.py
diff options
context:
space:
mode:
Diffstat (limited to 'ot/gpu/cudamat/examples/bench_cudamat.py')
-rw-r--r--ot/gpu/cudamat/examples/bench_cudamat.py97
1 files changed, 97 insertions, 0 deletions
diff --git a/ot/gpu/cudamat/examples/bench_cudamat.py b/ot/gpu/cudamat/examples/bench_cudamat.py
new file mode 100644
index 0000000..b3a5c19
--- /dev/null
+++ b/ot/gpu/cudamat/examples/bench_cudamat.py
@@ -0,0 +1,97 @@
+from __future__ import print_function, division
+import sys
+import numpy as np
+import cudamat as cmt
+import time
+import timeit
+from inspect import getmodule, getmembers, isfunction
+try: from itertools import ifilter as filter
+except: pass
+
+# heat-up time in seconds before starting the benchmark
+HEATUP = 2
+
+# shapes used for the small and large test matrix
+XS_SHAPE = (400, 256)
+XL_SHAPE = (4096, 4096)
+
+# timeit number and repeat parameter
+NUM_ITER = 100
+NUM_REPEATS = 5
+
+def setup(shape):
+ """Creates two matrices and corresponding row/column vectors"""
+ mat = cmt.empty(shape).fill_with_randn()
+ mat2 = cmt.empty(shape).fill_with_randn()
+ col = cmt.empty((shape[0], 1)).assign(0)
+ row = cmt.empty((1, shape[1])).assign(0)
+ return mat, mat2, col, row
+
+def bench_dot(X, Y, col, row):
+ cmt.dot(X.T, Y)
+
+def bench_add(X, Y, col, row):
+ X.add(Y)
+bench_add.repeats = 5 # 5 times more repetitions than usual
+
+def bench_mult(X, Y, col, row):
+ X.mult(Y)
+
+def bench_sigm(X, Y, col, row):
+ X.apply_sigmoid()
+
+def bench_colsum(X, Y, col, row):
+ X.sum(axis=0, target=row)
+
+def bench_rowsum(X, Y, col, row):
+ X.sum(axis=1, target=col)
+
+def bench_addcolsum(X, Y, col, row):
+ row.add_sums(X, axis=0, mult=3.2, beta=0.2)
+
+def bench_addrowsum(X, Y, col, row):
+ col.add_sums(X, axis=1, mult=3.2, beta=0.2)
+
+def bench_colmax(X, Y, col, row):
+ X.max(axis=0, target=row)
+
+def bench_rowmax(X, Y, col, row):
+ X.max(axis=1, target=col)
+
+def bench_addcolmult(X, Y, col, row):
+ X.add_col_mult(col, mult=3.2)
+
+def heatup(duration):
+ """Heat-up the GPU for a while so it enters full-performance mode"""
+ t1 = time.time()
+ while time.time() - t1 < duration:
+ cmt.dot(cmt.empty((200, 200)), cmt.empty((200, 200)))
+
+def main():
+ cmt.init()
+ cmt.CUDAMatrix.init_random()
+ if HEATUP:
+ print("heating up for %g seconds..." % HEATUP, end=' ')
+ sys.stdout.flush()
+ heatup(HEATUP)
+ print("done.")
+ print("small matrix shape:", XS_SHAPE)
+ print("large matrix shape:", XL_SHAPE)
+ for funcname, func in filter(lambda f: f[0].startswith('bench_'),
+ getmembers(getmodule(main), isfunction)):
+ print("%-15s" % funcname[len('bench_'):], end=' ')
+ sys.stdout.flush()
+ for size, shape, factor in ('small', XS_SHAPE, 10), ('large', XL_SHAPE, 1):
+ repeat = NUM_REPEATS * getattr(func, 'repeats', 1)
+ time = min(timeit.repeat(\
+ setup="from __main__ import setup, %s\nmats = setup(%s)" % (funcname, shape),
+ stmt="%s(*mats)" % funcname, repeat=repeat,
+ number=NUM_ITER * factor)) / (NUM_ITER * factor)
+ print("%.3es (%s) " % (time, size), end=' ')
+ sys.stdout.flush()
+ print()
+ cmt.shutdown()
+
+if __name__=="__main__":
+ main()
+