summaryrefslogtreecommitdiff
path: root/ot/gpu/cudamat/examples
diff options
context:
space:
mode:
Diffstat (limited to 'ot/gpu/cudamat/examples')
-rw-r--r--ot/gpu/cudamat/examples/bench_cudamat.py97
-rw-r--r--ot/gpu/cudamat/examples/nn_cudamat.py133
-rw-r--r--ot/gpu/cudamat/examples/rbm_cudamat.py98
-rw-r--r--ot/gpu/cudamat/examples/rbm_numpy.py72
-rw-r--r--ot/gpu/cudamat/examples/util.py22
5 files changed, 422 insertions, 0 deletions
diff --git a/ot/gpu/cudamat/examples/bench_cudamat.py b/ot/gpu/cudamat/examples/bench_cudamat.py
new file mode 100644
index 0000000..b3a5c19
--- /dev/null
+++ b/ot/gpu/cudamat/examples/bench_cudamat.py
@@ -0,0 +1,97 @@
+from __future__ import print_function, division
+import sys
+import numpy as np
+import cudamat as cmt
+import time
+import timeit
+from inspect import getmodule, getmembers, isfunction
+try: from itertools import ifilter as filter
+except: pass
+
+# heat-up time in seconds before starting the benchmark
+HEATUP = 2
+
+# shapes used for the small and large test matrix
+XS_SHAPE = (400, 256)
+XL_SHAPE = (4096, 4096)
+
+# timeit number and repeat parameter
+NUM_ITER = 100
+NUM_REPEATS = 5
+
+def setup(shape):
+ """Creates two matrices and corresponding row/column vectors"""
+ mat = cmt.empty(shape).fill_with_randn()
+ mat2 = cmt.empty(shape).fill_with_randn()
+ col = cmt.empty((shape[0], 1)).assign(0)
+ row = cmt.empty((1, shape[1])).assign(0)
+ return mat, mat2, col, row
+
+def bench_dot(X, Y, col, row):
+ cmt.dot(X.T, Y)
+
+def bench_add(X, Y, col, row):
+ X.add(Y)
+bench_add.repeats = 5 # 5 times more repetitions than usual
+
+def bench_mult(X, Y, col, row):
+ X.mult(Y)
+
+def bench_sigm(X, Y, col, row):
+ X.apply_sigmoid()
+
+def bench_colsum(X, Y, col, row):
+ X.sum(axis=0, target=row)
+
+def bench_rowsum(X, Y, col, row):
+ X.sum(axis=1, target=col)
+
+def bench_addcolsum(X, Y, col, row):
+ row.add_sums(X, axis=0, mult=3.2, beta=0.2)
+
+def bench_addrowsum(X, Y, col, row):
+ col.add_sums(X, axis=1, mult=3.2, beta=0.2)
+
+def bench_colmax(X, Y, col, row):
+ X.max(axis=0, target=row)
+
+def bench_rowmax(X, Y, col, row):
+ X.max(axis=1, target=col)
+
+def bench_addcolmult(X, Y, col, row):
+ X.add_col_mult(col, mult=3.2)
+
+def heatup(duration):
+ """Heat-up the GPU for a while so it enters full-performance mode"""
+ t1 = time.time()
+ while time.time() - t1 < duration:
+ cmt.dot(cmt.empty((200, 200)), cmt.empty((200, 200)))
+
+def main():
+ cmt.init()
+ cmt.CUDAMatrix.init_random()
+ if HEATUP:
+ print("heating up for %g seconds..." % HEATUP, end=' ')
+ sys.stdout.flush()
+ heatup(HEATUP)
+ print("done.")
+ print("small matrix shape:", XS_SHAPE)
+ print("large matrix shape:", XL_SHAPE)
+ for funcname, func in filter(lambda f: f[0].startswith('bench_'),
+ getmembers(getmodule(main), isfunction)):
+ print("%-15s" % funcname[len('bench_'):], end=' ')
+ sys.stdout.flush()
+ for size, shape, factor in ('small', XS_SHAPE, 10), ('large', XL_SHAPE, 1):
+ repeat = NUM_REPEATS * getattr(func, 'repeats', 1)
+ time = min(timeit.repeat(\
+ setup="from __main__ import setup, %s\nmats = setup(%s)" % (funcname, shape),
+ stmt="%s(*mats)" % funcname, repeat=repeat,
+ number=NUM_ITER * factor)) / (NUM_ITER * factor)
+ print("%.3es (%s) " % (time, size), end=' ')
+ sys.stdout.flush()
+ print()
+ cmt.shutdown()
+
+if __name__=="__main__":
+ main()
+
diff --git a/ot/gpu/cudamat/examples/nn_cudamat.py b/ot/gpu/cudamat/examples/nn_cudamat.py
new file mode 100644
index 0000000..7c56c7d
--- /dev/null
+++ b/ot/gpu/cudamat/examples/nn_cudamat.py
@@ -0,0 +1,133 @@
+# This file shows how to implement a single hidden layer neural network for
+# performing binary classification on the GPU using cudamat.
+
+from __future__ import division
+import pdb
+import time
+import numpy as np
+import cudamat as cm
+from cudamat import learn as cl
+import util
+
+# initialize CUDA
+cm.cublas_init()
+
+# load data
+util.load('mnist49.dat', globals())
+
+# Put training data onto the GPU.
+dat_train = dat_train/255.
+dat_train = dat_train - (np.mean(dat_train, 1)+10**-8)[:, np.newaxis]
+dev_train = cm.CUDAMatrix(dat_train)
+dev_lbl = cm.CUDAMatrix(lbl_train)
+
+# training parameters
+epsilon = 0.01
+momentum = 0.9
+
+num_epochs = 30
+batch_size = 128
+num_batches = dat_train.shape[1]//batch_size
+
+# model parameters
+dim_in = dat_train.shape[0]
+dim_out = 1
+num_hid = 1024
+
+# initialize weights
+w_w1 = cm.CUDAMatrix(dim_in ** -0.5 * np.random.randn(dim_in, num_hid))
+w_b1 = cm.CUDAMatrix(np.zeros((num_hid, 1)))
+w_w2 = cm.CUDAMatrix(num_hid ** -0.5 * np.random.randn(num_hid, dim_out))
+w_b2 = cm.CUDAMatrix(np.zeros((dim_out, 1)))
+
+# initialize weight update matrices
+wu_w1 = cm.empty(w_w1.shape).assign(0)
+wu_b1 = cm.empty(w_b1.shape).assign(0)
+wu_w2 = cm.empty(w_w2.shape).assign(0)
+wu_b2 = cm.empty(w_b2.shape).assign(0)
+
+# initialize temporary storage
+h = cm.empty((num_hid, batch_size))
+out = cm.empty((dim_out, batch_size))
+delta = cm.empty((num_hid, batch_size))
+
+# Train neural network.
+start_time = time.time()
+for epoch in range(num_epochs):
+ print("Epoch %i" % (epoch + 1))
+ err = []
+
+ for batch in range(num_batches):
+ # get current minibatch
+ inp = dev_train.slice(batch*batch_size,(batch + 1)*batch_size)
+ target = dev_lbl.slice(batch*batch_size,(batch + 1)*batch_size)
+
+ # forward pass
+ cm.dot(w_w1.T, inp, target = h)
+
+ h.add_col_vec(w_b1)
+ h.apply_sigmoid()
+
+ cm.dot(w_w2.T, h, target = out)
+
+ out.add_col_vec(w_b2)
+ out.apply_sigmoid()
+
+ # back prop errors
+ out.subtract(target) # compute error
+
+ # gradients for w_w2 and w_b2
+ wu_w2.add_dot(h, out.T, beta = momentum)
+ wu_b2.add_sums(out, axis = 1, beta = momentum)
+
+ # compute delta
+ cm.dot(w_w2, out, target = delta)
+
+ # delta = delta * h * (1 - h)
+ cl.mult_by_sigmoid_deriv(delta, h)
+
+ # gradients for w_w1 and w_b1
+ wu_w1.add_dot(inp, delta.T, beta = momentum)
+ wu_b1.add_sums(delta, axis = 1, beta = momentum)
+
+ # update weights
+ w_w1.subtract_mult(wu_w1, epsilon/batch_size)
+ w_b1.subtract_mult(wu_b1, epsilon/batch_size)
+ w_w2.subtract_mult(wu_w2, epsilon/batch_size)
+ w_b2.subtract_mult(wu_b2, epsilon/batch_size)
+
+ # calculate error on current minibatch
+ err.append(np.abs(out.asarray())>0.5)
+
+ print("Training misclassification rate: %f" % np.mean(err))
+ print("Time: %f" % (time.time() - start_time))
+
+# Evaluate neural network on test data.
+
+# Load test data onto the GPU.
+dat_test = dat_test/255.
+dat_test = dat_test - np.mean(dat_test, 1)[:, np.newaxis]
+dev_test = cm.CUDAMatrix(dat_test)
+dev_lbl = cm.CUDAMatrix(lbl_test)
+
+# Initalize temporary storage.
+h = cm.empty((num_hid, dat_test.shape[1]))
+out = cm.empty((dim_out, dat_test.shape[1]))
+
+# forward pass
+cm.dot(w_w1.T, dev_test, target = h)
+
+h.add_col_vec(w_b1)
+h.apply_sigmoid()
+
+cm.dot(w_w2.T, h, target = out)
+
+out.add_col_vec(w_b2)
+out.apply_sigmoid()
+
+# compute error
+out.subtract(dev_lbl)
+
+print("Testing misclassification rate: %f" % np.mean(np.abs(out.asarray())>0.5))
+
+cm.cublas_shutdown()
diff --git a/ot/gpu/cudamat/examples/rbm_cudamat.py b/ot/gpu/cudamat/examples/rbm_cudamat.py
new file mode 100644
index 0000000..3f6a900
--- /dev/null
+++ b/ot/gpu/cudamat/examples/rbm_cudamat.py
@@ -0,0 +1,98 @@
+from __future__ import division
+import time
+import numpy as np
+import cudamat as cm
+import util
+
+# initialize CUDA
+cm.cublas_init()
+cm.CUDAMatrix.init_random(1)
+
+# load data
+util.load('mnist.dat', globals())
+dev_dat = cm.CUDAMatrix(cm.reformat(dat/255.))
+
+# training parameters
+epsilon = 0.1
+momentum = 0.9
+
+num_epochs = 30
+batch_size = 128
+num_batches = dat.shape[1]//batch_size
+
+# model parameters
+num_vis = dat.shape[0]
+num_hid = 4096
+
+# initialize weights
+w_vh = cm.CUDAMatrix(0.1 * np.random.randn(num_vis, num_hid))
+w_v = cm.CUDAMatrix(np.zeros((num_vis, 1)))
+w_h = cm.CUDAMatrix(-4.*np.ones((num_hid, 1)))
+
+# initialize weight updates
+wu_vh = cm.CUDAMatrix(np.zeros((num_vis, num_hid)))
+wu_v = cm.CUDAMatrix(np.zeros((num_vis, 1)))
+wu_h = cm.CUDAMatrix(np.zeros((num_hid, 1)))
+
+# initialize temporary storage
+v = cm.empty((num_vis, batch_size))
+h = cm.empty((num_hid, batch_size))
+r = cm.empty((num_hid, batch_size))
+
+start_time = time.time()
+for epoch in range(num_epochs):
+ print("Epoch %i" % (epoch + 1))
+ err = []
+
+ for batch in range(num_batches):
+ # get current minibatch
+ v_true = dev_dat.slice(batch*batch_size,(batch + 1)*batch_size)
+ v.assign(v_true)
+
+ # apply momentum
+ wu_vh.mult(momentum)
+ wu_v.mult(momentum)
+ wu_h.mult(momentum)
+
+ # positive phase
+ cm.dot(w_vh.T, v, target = h)
+ h.add_col_vec(w_h)
+ h.apply_sigmoid()
+
+ wu_vh.add_dot(v, h.T)
+ wu_v.add_sums(v, axis = 1)
+ wu_h.add_sums(h, axis = 1)
+
+ # sample hiddens
+ r.fill_with_rand()
+ r.less_than(h, target = h)
+
+ # negative phase
+ cm.dot(w_vh, h, target = v)
+ v.add_col_vec(w_v)
+ v.apply_sigmoid()
+
+ cm.dot(w_vh.T, v, target = h)
+ h.add_col_vec(w_h)
+ h.apply_sigmoid()
+
+ wu_vh.subtract_dot(v, h.T)
+ wu_v.add_sums(v, axis = 1, mult = -1.)
+ wu_h.add_sums(h, axis = 1, mult = -1.)
+
+ # update weights
+ w_vh.add_mult(wu_vh, epsilon/batch_size)
+ w_v.add_mult(wu_v, epsilon/batch_size)
+ w_h.add_mult(wu_h, epsilon/batch_size)
+
+ # calculate reconstruction error
+ v.subtract(v_true)
+ err.append(v.euclid_norm()**2/(num_vis*batch_size))
+
+ print("Mean squared error: %f" % np.mean(err))
+ print("Time: %f" % (time.time() - start_time))
+
+w_vh.copy_to_host()
+util.save('weights.dat', 'w_vh', {'w_vh': w_vh.numpy_array})
+
+cm.cublas_shutdown()
diff --git a/ot/gpu/cudamat/examples/rbm_numpy.py b/ot/gpu/cudamat/examples/rbm_numpy.py
new file mode 100644
index 0000000..1331566
--- /dev/null
+++ b/ot/gpu/cudamat/examples/rbm_numpy.py
@@ -0,0 +1,72 @@
+from __future__ import division
+import time
+import numpy as np
+import util
+
+# load data
+util.load('mnist.dat', globals())
+dat = dat/255.
+
+# training parameters
+epsilon = 0.01
+momentum = 0.9
+
+num_epochs = 10
+batch_size = 64
+num_batches = dat.shape[1]//batch_size
+
+# model parameters
+num_vis = dat.shape[0]
+num_hid = 1024
+
+# initialize weights
+w_vh = 0.1 * np.random.randn(num_vis, num_hid)
+w_v = np.zeros((num_vis, 1))
+w_h = np.zeros((num_hid, 1))
+
+# initialize weight updates
+wu_vh = np.zeros((num_vis, num_hid))
+wu_v = np.zeros((num_vis, 1))
+wu_h = np.zeros((num_hid, 1))
+
+start_time = time.time()
+for epoch in range(num_epochs):
+ print("Epoch %i" % (epoch + 1))
+ err = []
+
+ for batch in range(num_batches):
+ v_true = dat[:, batch*batch_size:(batch + 1)*batch_size]
+ v = v_true
+
+ # apply momentum
+ wu_vh *= momentum
+ wu_v *= momentum
+ wu_h *= momentum
+
+ # positive phase
+ h = 1. / (1 + np.exp(-(np.dot(w_vh.T, v) + w_h)))
+
+ wu_vh += np.dot(v, h.T)
+ wu_v += v.sum(1)[:, np.newaxis]
+ wu_h += h.sum(1)[:, np.newaxis]
+
+ # sample hiddens
+ h = 1. * (h > np.random.rand(num_hid, batch_size))
+
+ # negative phase
+ v = 1. / (1 + np.exp(-(np.dot(w_vh, h) + w_v)))
+ h = 1. / (1 + np.exp(-(np.dot(w_vh.T, v) + w_h)))
+
+ wu_vh -= np.dot(v, h.T)
+ wu_v -= v.sum(1)[:, np.newaxis]
+ wu_h -= h.sum(1)[:, np.newaxis]
+
+ # update weights
+ w_vh += epsilon/batch_size * wu_vh
+ w_v += epsilon/batch_size * wu_v
+ w_h += epsilon/batch_size * wu_h
+
+ err.append(np.mean((v - v_true)**2))
+
+ print("Mean squared error: %f" % np.mean(err))
+ print("Time: %f" % (time.time() - start_time))
diff --git a/ot/gpu/cudamat/examples/util.py b/ot/gpu/cudamat/examples/util.py
new file mode 100644
index 0000000..79ceead
--- /dev/null
+++ b/ot/gpu/cudamat/examples/util.py
@@ -0,0 +1,22 @@
+from __future__ import division
+import gzip
+try: import cPickle as pickle
+except: import pickle
+
+def save(fname, var_list, source_dict):
+ var_list = [var.strip() for var in var_list.split() if len(var.strip())>0]
+ fo = gzip.GzipFile(fname, 'wb')
+ pickle.dump(var_list, fo)
+ for var in var_list:
+ pickle.dump(source_dict[var], fo, protocol=2)
+ fo.close()
+
+def load(fname, target_dict, verbose = True):
+ fo = gzip.GzipFile(fname, 'rb')
+ var_list = pickle.load(fo)
+ if verbose:
+ print(var_list)
+ for var in var_list:
+ target_dict[var] = pickle.load(fo)
+ fo.close()
+