diff options
Diffstat (limited to 'ot/gpu/cudamat/examples')
-rw-r--r-- | ot/gpu/cudamat/examples/bench_cudamat.py | 97 | ||||
-rw-r--r-- | ot/gpu/cudamat/examples/nn_cudamat.py | 133 | ||||
-rw-r--r-- | ot/gpu/cudamat/examples/rbm_cudamat.py | 98 | ||||
-rw-r--r-- | ot/gpu/cudamat/examples/rbm_numpy.py | 72 | ||||
-rw-r--r-- | ot/gpu/cudamat/examples/util.py | 22 |
5 files changed, 422 insertions, 0 deletions
diff --git a/ot/gpu/cudamat/examples/bench_cudamat.py b/ot/gpu/cudamat/examples/bench_cudamat.py new file mode 100644 index 0000000..b3a5c19 --- /dev/null +++ b/ot/gpu/cudamat/examples/bench_cudamat.py @@ -0,0 +1,97 @@ +from __future__ import print_function, division +import sys +import numpy as np +import cudamat as cmt +import time +import timeit +from inspect import getmodule, getmembers, isfunction +try: from itertools import ifilter as filter +except: pass + +# heat-up time in seconds before starting the benchmark +HEATUP = 2 + +# shapes used for the small and large test matrix +XS_SHAPE = (400, 256) +XL_SHAPE = (4096, 4096) + +# timeit number and repeat parameter +NUM_ITER = 100 +NUM_REPEATS = 5 + +def setup(shape): + """Creates two matrices and corresponding row/column vectors""" + mat = cmt.empty(shape).fill_with_randn() + mat2 = cmt.empty(shape).fill_with_randn() + col = cmt.empty((shape[0], 1)).assign(0) + row = cmt.empty((1, shape[1])).assign(0) + return mat, mat2, col, row + +def bench_dot(X, Y, col, row): + cmt.dot(X.T, Y) + +def bench_add(X, Y, col, row): + X.add(Y) +bench_add.repeats = 5 # 5 times more repetitions than usual + +def bench_mult(X, Y, col, row): + X.mult(Y) + +def bench_sigm(X, Y, col, row): + X.apply_sigmoid() + +def bench_colsum(X, Y, col, row): + X.sum(axis=0, target=row) + +def bench_rowsum(X, Y, col, row): + X.sum(axis=1, target=col) + +def bench_addcolsum(X, Y, col, row): + row.add_sums(X, axis=0, mult=3.2, beta=0.2) + +def bench_addrowsum(X, Y, col, row): + col.add_sums(X, axis=1, mult=3.2, beta=0.2) + +def bench_colmax(X, Y, col, row): + X.max(axis=0, target=row) + +def bench_rowmax(X, Y, col, row): + X.max(axis=1, target=col) + +def bench_addcolmult(X, Y, col, row): + X.add_col_mult(col, mult=3.2) + +def heatup(duration): + """Heat-up the GPU for a while so it enters full-performance mode""" + t1 = time.time() + while time.time() - t1 < duration: + cmt.dot(cmt.empty((200, 200)), cmt.empty((200, 200))) + +def main(): + cmt.init() + cmt.CUDAMatrix.init_random() + if HEATUP: + print("heating up for %g seconds..." % HEATUP, end=' ') + sys.stdout.flush() + heatup(HEATUP) + print("done.") + print("small matrix shape:", XS_SHAPE) + print("large matrix shape:", XL_SHAPE) + for funcname, func in filter(lambda f: f[0].startswith('bench_'), + getmembers(getmodule(main), isfunction)): + print("%-15s" % funcname[len('bench_'):], end=' ') + sys.stdout.flush() + for size, shape, factor in ('small', XS_SHAPE, 10), ('large', XL_SHAPE, 1): + repeat = NUM_REPEATS * getattr(func, 'repeats', 1) + time = min(timeit.repeat(\ + setup="from __main__ import setup, %s\nmats = setup(%s)" % (funcname, shape), + stmt="%s(*mats)" % funcname, repeat=repeat, + number=NUM_ITER * factor)) / (NUM_ITER * factor) + print("%.3es (%s) " % (time, size), end=' ') + sys.stdout.flush() + print() + cmt.shutdown() + +if __name__=="__main__": + main() + diff --git a/ot/gpu/cudamat/examples/nn_cudamat.py b/ot/gpu/cudamat/examples/nn_cudamat.py new file mode 100644 index 0000000..7c56c7d --- /dev/null +++ b/ot/gpu/cudamat/examples/nn_cudamat.py @@ -0,0 +1,133 @@ +# This file shows how to implement a single hidden layer neural network for +# performing binary classification on the GPU using cudamat. + +from __future__ import division +import pdb +import time +import numpy as np +import cudamat as cm +from cudamat import learn as cl +import util + +# initialize CUDA +cm.cublas_init() + +# load data +util.load('mnist49.dat', globals()) + +# Put training data onto the GPU. +dat_train = dat_train/255. +dat_train = dat_train - (np.mean(dat_train, 1)+10**-8)[:, np.newaxis] +dev_train = cm.CUDAMatrix(dat_train) +dev_lbl = cm.CUDAMatrix(lbl_train) + +# training parameters +epsilon = 0.01 +momentum = 0.9 + +num_epochs = 30 +batch_size = 128 +num_batches = dat_train.shape[1]//batch_size + +# model parameters +dim_in = dat_train.shape[0] +dim_out = 1 +num_hid = 1024 + +# initialize weights +w_w1 = cm.CUDAMatrix(dim_in ** -0.5 * np.random.randn(dim_in, num_hid)) +w_b1 = cm.CUDAMatrix(np.zeros((num_hid, 1))) +w_w2 = cm.CUDAMatrix(num_hid ** -0.5 * np.random.randn(num_hid, dim_out)) +w_b2 = cm.CUDAMatrix(np.zeros((dim_out, 1))) + +# initialize weight update matrices +wu_w1 = cm.empty(w_w1.shape).assign(0) +wu_b1 = cm.empty(w_b1.shape).assign(0) +wu_w2 = cm.empty(w_w2.shape).assign(0) +wu_b2 = cm.empty(w_b2.shape).assign(0) + +# initialize temporary storage +h = cm.empty((num_hid, batch_size)) +out = cm.empty((dim_out, batch_size)) +delta = cm.empty((num_hid, batch_size)) + +# Train neural network. +start_time = time.time() +for epoch in range(num_epochs): + print("Epoch %i" % (epoch + 1)) + err = [] + + for batch in range(num_batches): + # get current minibatch + inp = dev_train.slice(batch*batch_size,(batch + 1)*batch_size) + target = dev_lbl.slice(batch*batch_size,(batch + 1)*batch_size) + + # forward pass + cm.dot(w_w1.T, inp, target = h) + + h.add_col_vec(w_b1) + h.apply_sigmoid() + + cm.dot(w_w2.T, h, target = out) + + out.add_col_vec(w_b2) + out.apply_sigmoid() + + # back prop errors + out.subtract(target) # compute error + + # gradients for w_w2 and w_b2 + wu_w2.add_dot(h, out.T, beta = momentum) + wu_b2.add_sums(out, axis = 1, beta = momentum) + + # compute delta + cm.dot(w_w2, out, target = delta) + + # delta = delta * h * (1 - h) + cl.mult_by_sigmoid_deriv(delta, h) + + # gradients for w_w1 and w_b1 + wu_w1.add_dot(inp, delta.T, beta = momentum) + wu_b1.add_sums(delta, axis = 1, beta = momentum) + + # update weights + w_w1.subtract_mult(wu_w1, epsilon/batch_size) + w_b1.subtract_mult(wu_b1, epsilon/batch_size) + w_w2.subtract_mult(wu_w2, epsilon/batch_size) + w_b2.subtract_mult(wu_b2, epsilon/batch_size) + + # calculate error on current minibatch + err.append(np.abs(out.asarray())>0.5) + + print("Training misclassification rate: %f" % np.mean(err)) + print("Time: %f" % (time.time() - start_time)) + +# Evaluate neural network on test data. + +# Load test data onto the GPU. +dat_test = dat_test/255. +dat_test = dat_test - np.mean(dat_test, 1)[:, np.newaxis] +dev_test = cm.CUDAMatrix(dat_test) +dev_lbl = cm.CUDAMatrix(lbl_test) + +# Initalize temporary storage. +h = cm.empty((num_hid, dat_test.shape[1])) +out = cm.empty((dim_out, dat_test.shape[1])) + +# forward pass +cm.dot(w_w1.T, dev_test, target = h) + +h.add_col_vec(w_b1) +h.apply_sigmoid() + +cm.dot(w_w2.T, h, target = out) + +out.add_col_vec(w_b2) +out.apply_sigmoid() + +# compute error +out.subtract(dev_lbl) + +print("Testing misclassification rate: %f" % np.mean(np.abs(out.asarray())>0.5)) + +cm.cublas_shutdown() diff --git a/ot/gpu/cudamat/examples/rbm_cudamat.py b/ot/gpu/cudamat/examples/rbm_cudamat.py new file mode 100644 index 0000000..3f6a900 --- /dev/null +++ b/ot/gpu/cudamat/examples/rbm_cudamat.py @@ -0,0 +1,98 @@ +from __future__ import division +import time +import numpy as np +import cudamat as cm +import util + +# initialize CUDA +cm.cublas_init() +cm.CUDAMatrix.init_random(1) + +# load data +util.load('mnist.dat', globals()) +dev_dat = cm.CUDAMatrix(cm.reformat(dat/255.)) + +# training parameters +epsilon = 0.1 +momentum = 0.9 + +num_epochs = 30 +batch_size = 128 +num_batches = dat.shape[1]//batch_size + +# model parameters +num_vis = dat.shape[0] +num_hid = 4096 + +# initialize weights +w_vh = cm.CUDAMatrix(0.1 * np.random.randn(num_vis, num_hid)) +w_v = cm.CUDAMatrix(np.zeros((num_vis, 1))) +w_h = cm.CUDAMatrix(-4.*np.ones((num_hid, 1))) + +# initialize weight updates +wu_vh = cm.CUDAMatrix(np.zeros((num_vis, num_hid))) +wu_v = cm.CUDAMatrix(np.zeros((num_vis, 1))) +wu_h = cm.CUDAMatrix(np.zeros((num_hid, 1))) + +# initialize temporary storage +v = cm.empty((num_vis, batch_size)) +h = cm.empty((num_hid, batch_size)) +r = cm.empty((num_hid, batch_size)) + +start_time = time.time() +for epoch in range(num_epochs): + print("Epoch %i" % (epoch + 1)) + err = [] + + for batch in range(num_batches): + # get current minibatch + v_true = dev_dat.slice(batch*batch_size,(batch + 1)*batch_size) + v.assign(v_true) + + # apply momentum + wu_vh.mult(momentum) + wu_v.mult(momentum) + wu_h.mult(momentum) + + # positive phase + cm.dot(w_vh.T, v, target = h) + h.add_col_vec(w_h) + h.apply_sigmoid() + + wu_vh.add_dot(v, h.T) + wu_v.add_sums(v, axis = 1) + wu_h.add_sums(h, axis = 1) + + # sample hiddens + r.fill_with_rand() + r.less_than(h, target = h) + + # negative phase + cm.dot(w_vh, h, target = v) + v.add_col_vec(w_v) + v.apply_sigmoid() + + cm.dot(w_vh.T, v, target = h) + h.add_col_vec(w_h) + h.apply_sigmoid() + + wu_vh.subtract_dot(v, h.T) + wu_v.add_sums(v, axis = 1, mult = -1.) + wu_h.add_sums(h, axis = 1, mult = -1.) + + # update weights + w_vh.add_mult(wu_vh, epsilon/batch_size) + w_v.add_mult(wu_v, epsilon/batch_size) + w_h.add_mult(wu_h, epsilon/batch_size) + + # calculate reconstruction error + v.subtract(v_true) + err.append(v.euclid_norm()**2/(num_vis*batch_size)) + + print("Mean squared error: %f" % np.mean(err)) + print("Time: %f" % (time.time() - start_time)) + +w_vh.copy_to_host() +util.save('weights.dat', 'w_vh', {'w_vh': w_vh.numpy_array}) + +cm.cublas_shutdown() diff --git a/ot/gpu/cudamat/examples/rbm_numpy.py b/ot/gpu/cudamat/examples/rbm_numpy.py new file mode 100644 index 0000000..1331566 --- /dev/null +++ b/ot/gpu/cudamat/examples/rbm_numpy.py @@ -0,0 +1,72 @@ +from __future__ import division +import time +import numpy as np +import util + +# load data +util.load('mnist.dat', globals()) +dat = dat/255. + +# training parameters +epsilon = 0.01 +momentum = 0.9 + +num_epochs = 10 +batch_size = 64 +num_batches = dat.shape[1]//batch_size + +# model parameters +num_vis = dat.shape[0] +num_hid = 1024 + +# initialize weights +w_vh = 0.1 * np.random.randn(num_vis, num_hid) +w_v = np.zeros((num_vis, 1)) +w_h = np.zeros((num_hid, 1)) + +# initialize weight updates +wu_vh = np.zeros((num_vis, num_hid)) +wu_v = np.zeros((num_vis, 1)) +wu_h = np.zeros((num_hid, 1)) + +start_time = time.time() +for epoch in range(num_epochs): + print("Epoch %i" % (epoch + 1)) + err = [] + + for batch in range(num_batches): + v_true = dat[:, batch*batch_size:(batch + 1)*batch_size] + v = v_true + + # apply momentum + wu_vh *= momentum + wu_v *= momentum + wu_h *= momentum + + # positive phase + h = 1. / (1 + np.exp(-(np.dot(w_vh.T, v) + w_h))) + + wu_vh += np.dot(v, h.T) + wu_v += v.sum(1)[:, np.newaxis] + wu_h += h.sum(1)[:, np.newaxis] + + # sample hiddens + h = 1. * (h > np.random.rand(num_hid, batch_size)) + + # negative phase + v = 1. / (1 + np.exp(-(np.dot(w_vh, h) + w_v))) + h = 1. / (1 + np.exp(-(np.dot(w_vh.T, v) + w_h))) + + wu_vh -= np.dot(v, h.T) + wu_v -= v.sum(1)[:, np.newaxis] + wu_h -= h.sum(1)[:, np.newaxis] + + # update weights + w_vh += epsilon/batch_size * wu_vh + w_v += epsilon/batch_size * wu_v + w_h += epsilon/batch_size * wu_h + + err.append(np.mean((v - v_true)**2)) + + print("Mean squared error: %f" % np.mean(err)) + print("Time: %f" % (time.time() - start_time)) diff --git a/ot/gpu/cudamat/examples/util.py b/ot/gpu/cudamat/examples/util.py new file mode 100644 index 0000000..79ceead --- /dev/null +++ b/ot/gpu/cudamat/examples/util.py @@ -0,0 +1,22 @@ +from __future__ import division +import gzip +try: import cPickle as pickle +except: import pickle + +def save(fname, var_list, source_dict): + var_list = [var.strip() for var in var_list.split() if len(var.strip())>0] + fo = gzip.GzipFile(fname, 'wb') + pickle.dump(var_list, fo) + for var in var_list: + pickle.dump(source_dict[var], fo, protocol=2) + fo.close() + +def load(fname, target_dict, verbose = True): + fo = gzip.GzipFile(fname, 'rb') + var_list = pickle.load(fo) + if verbose: + print(var_list) + for var in var_list: + target_dict[var] = pickle.load(fo) + fo.close() + |