summaryrefslogtreecommitdiff
path: root/ot/gpu/cudamat/examples
diff options
context:
space:
mode:
Diffstat (limited to 'ot/gpu/cudamat/examples')
-rw-r--r--ot/gpu/cudamat/examples/bench_cudamat.py97
-rw-r--r--ot/gpu/cudamat/examples/nn_cudamat.py133
-rw-r--r--ot/gpu/cudamat/examples/rbm_cudamat.py98
-rw-r--r--ot/gpu/cudamat/examples/rbm_numpy.py72
-rw-r--r--ot/gpu/cudamat/examples/util.py22
5 files changed, 0 insertions, 422 deletions
diff --git a/ot/gpu/cudamat/examples/bench_cudamat.py b/ot/gpu/cudamat/examples/bench_cudamat.py
deleted file mode 100644
index b3a5c19..0000000
--- a/ot/gpu/cudamat/examples/bench_cudamat.py
+++ /dev/null
@@ -1,97 +0,0 @@
-from __future__ import print_function, division
-import sys
-import numpy as np
-import cudamat as cmt
-import time
-import timeit
-from inspect import getmodule, getmembers, isfunction
-try: from itertools import ifilter as filter
-except: pass
-
-# heat-up time in seconds before starting the benchmark
-HEATUP = 2
-
-# shapes used for the small and large test matrix
-XS_SHAPE = (400, 256)
-XL_SHAPE = (4096, 4096)
-
-# timeit number and repeat parameter
-NUM_ITER = 100
-NUM_REPEATS = 5
-
-def setup(shape):
- """Creates two matrices and corresponding row/column vectors"""
- mat = cmt.empty(shape).fill_with_randn()
- mat2 = cmt.empty(shape).fill_with_randn()
- col = cmt.empty((shape[0], 1)).assign(0)
- row = cmt.empty((1, shape[1])).assign(0)
- return mat, mat2, col, row
-
-def bench_dot(X, Y, col, row):
- cmt.dot(X.T, Y)
-
-def bench_add(X, Y, col, row):
- X.add(Y)
-bench_add.repeats = 5 # 5 times more repetitions than usual
-
-def bench_mult(X, Y, col, row):
- X.mult(Y)
-
-def bench_sigm(X, Y, col, row):
- X.apply_sigmoid()
-
-def bench_colsum(X, Y, col, row):
- X.sum(axis=0, target=row)
-
-def bench_rowsum(X, Y, col, row):
- X.sum(axis=1, target=col)
-
-def bench_addcolsum(X, Y, col, row):
- row.add_sums(X, axis=0, mult=3.2, beta=0.2)
-
-def bench_addrowsum(X, Y, col, row):
- col.add_sums(X, axis=1, mult=3.2, beta=0.2)
-
-def bench_colmax(X, Y, col, row):
- X.max(axis=0, target=row)
-
-def bench_rowmax(X, Y, col, row):
- X.max(axis=1, target=col)
-
-def bench_addcolmult(X, Y, col, row):
- X.add_col_mult(col, mult=3.2)
-
-def heatup(duration):
- """Heat-up the GPU for a while so it enters full-performance mode"""
- t1 = time.time()
- while time.time() - t1 < duration:
- cmt.dot(cmt.empty((200, 200)), cmt.empty((200, 200)))
-
-def main():
- cmt.init()
- cmt.CUDAMatrix.init_random()
- if HEATUP:
- print("heating up for %g seconds..." % HEATUP, end=' ')
- sys.stdout.flush()
- heatup(HEATUP)
- print("done.")
- print("small matrix shape:", XS_SHAPE)
- print("large matrix shape:", XL_SHAPE)
- for funcname, func in filter(lambda f: f[0].startswith('bench_'),
- getmembers(getmodule(main), isfunction)):
- print("%-15s" % funcname[len('bench_'):], end=' ')
- sys.stdout.flush()
- for size, shape, factor in ('small', XS_SHAPE, 10), ('large', XL_SHAPE, 1):
- repeat = NUM_REPEATS * getattr(func, 'repeats', 1)
- time = min(timeit.repeat(\
- setup="from __main__ import setup, %s\nmats = setup(%s)" % (funcname, shape),
- stmt="%s(*mats)" % funcname, repeat=repeat,
- number=NUM_ITER * factor)) / (NUM_ITER * factor)
- print("%.3es (%s) " % (time, size), end=' ')
- sys.stdout.flush()
- print()
- cmt.shutdown()
-
-if __name__=="__main__":
- main()
-
diff --git a/ot/gpu/cudamat/examples/nn_cudamat.py b/ot/gpu/cudamat/examples/nn_cudamat.py
deleted file mode 100644
index 7c56c7d..0000000
--- a/ot/gpu/cudamat/examples/nn_cudamat.py
+++ /dev/null
@@ -1,133 +0,0 @@
-# This file shows how to implement a single hidden layer neural network for
-# performing binary classification on the GPU using cudamat.
-
-from __future__ import division
-import pdb
-import time
-import numpy as np
-import cudamat as cm
-from cudamat import learn as cl
-import util
-
-# initialize CUDA
-cm.cublas_init()
-
-# load data
-util.load('mnist49.dat', globals())
-
-# Put training data onto the GPU.
-dat_train = dat_train/255.
-dat_train = dat_train - (np.mean(dat_train, 1)+10**-8)[:, np.newaxis]
-dev_train = cm.CUDAMatrix(dat_train)
-dev_lbl = cm.CUDAMatrix(lbl_train)
-
-# training parameters
-epsilon = 0.01
-momentum = 0.9
-
-num_epochs = 30
-batch_size = 128
-num_batches = dat_train.shape[1]//batch_size
-
-# model parameters
-dim_in = dat_train.shape[0]
-dim_out = 1
-num_hid = 1024
-
-# initialize weights
-w_w1 = cm.CUDAMatrix(dim_in ** -0.5 * np.random.randn(dim_in, num_hid))
-w_b1 = cm.CUDAMatrix(np.zeros((num_hid, 1)))
-w_w2 = cm.CUDAMatrix(num_hid ** -0.5 * np.random.randn(num_hid, dim_out))
-w_b2 = cm.CUDAMatrix(np.zeros((dim_out, 1)))
-
-# initialize weight update matrices
-wu_w1 = cm.empty(w_w1.shape).assign(0)
-wu_b1 = cm.empty(w_b1.shape).assign(0)
-wu_w2 = cm.empty(w_w2.shape).assign(0)
-wu_b2 = cm.empty(w_b2.shape).assign(0)
-
-# initialize temporary storage
-h = cm.empty((num_hid, batch_size))
-out = cm.empty((dim_out, batch_size))
-delta = cm.empty((num_hid, batch_size))
-
-# Train neural network.
-start_time = time.time()
-for epoch in range(num_epochs):
- print("Epoch %i" % (epoch + 1))
- err = []
-
- for batch in range(num_batches):
- # get current minibatch
- inp = dev_train.slice(batch*batch_size,(batch + 1)*batch_size)
- target = dev_lbl.slice(batch*batch_size,(batch + 1)*batch_size)
-
- # forward pass
- cm.dot(w_w1.T, inp, target = h)
-
- h.add_col_vec(w_b1)
- h.apply_sigmoid()
-
- cm.dot(w_w2.T, h, target = out)
-
- out.add_col_vec(w_b2)
- out.apply_sigmoid()
-
- # back prop errors
- out.subtract(target) # compute error
-
- # gradients for w_w2 and w_b2
- wu_w2.add_dot(h, out.T, beta = momentum)
- wu_b2.add_sums(out, axis = 1, beta = momentum)
-
- # compute delta
- cm.dot(w_w2, out, target = delta)
-
- # delta = delta * h * (1 - h)
- cl.mult_by_sigmoid_deriv(delta, h)
-
- # gradients for w_w1 and w_b1
- wu_w1.add_dot(inp, delta.T, beta = momentum)
- wu_b1.add_sums(delta, axis = 1, beta = momentum)
-
- # update weights
- w_w1.subtract_mult(wu_w1, epsilon/batch_size)
- w_b1.subtract_mult(wu_b1, epsilon/batch_size)
- w_w2.subtract_mult(wu_w2, epsilon/batch_size)
- w_b2.subtract_mult(wu_b2, epsilon/batch_size)
-
- # calculate error on current minibatch
- err.append(np.abs(out.asarray())>0.5)
-
- print("Training misclassification rate: %f" % np.mean(err))
- print("Time: %f" % (time.time() - start_time))
-
-# Evaluate neural network on test data.
-
-# Load test data onto the GPU.
-dat_test = dat_test/255.
-dat_test = dat_test - np.mean(dat_test, 1)[:, np.newaxis]
-dev_test = cm.CUDAMatrix(dat_test)
-dev_lbl = cm.CUDAMatrix(lbl_test)
-
-# Initalize temporary storage.
-h = cm.empty((num_hid, dat_test.shape[1]))
-out = cm.empty((dim_out, dat_test.shape[1]))
-
-# forward pass
-cm.dot(w_w1.T, dev_test, target = h)
-
-h.add_col_vec(w_b1)
-h.apply_sigmoid()
-
-cm.dot(w_w2.T, h, target = out)
-
-out.add_col_vec(w_b2)
-out.apply_sigmoid()
-
-# compute error
-out.subtract(dev_lbl)
-
-print("Testing misclassification rate: %f" % np.mean(np.abs(out.asarray())>0.5))
-
-cm.cublas_shutdown()
diff --git a/ot/gpu/cudamat/examples/rbm_cudamat.py b/ot/gpu/cudamat/examples/rbm_cudamat.py
deleted file mode 100644
index 3f6a900..0000000
--- a/ot/gpu/cudamat/examples/rbm_cudamat.py
+++ /dev/null
@@ -1,98 +0,0 @@
-from __future__ import division
-import time
-import numpy as np
-import cudamat as cm
-import util
-
-# initialize CUDA
-cm.cublas_init()
-cm.CUDAMatrix.init_random(1)
-
-# load data
-util.load('mnist.dat', globals())
-dev_dat = cm.CUDAMatrix(cm.reformat(dat/255.))
-
-# training parameters
-epsilon = 0.1
-momentum = 0.9
-
-num_epochs = 30
-batch_size = 128
-num_batches = dat.shape[1]//batch_size
-
-# model parameters
-num_vis = dat.shape[0]
-num_hid = 4096
-
-# initialize weights
-w_vh = cm.CUDAMatrix(0.1 * np.random.randn(num_vis, num_hid))
-w_v = cm.CUDAMatrix(np.zeros((num_vis, 1)))
-w_h = cm.CUDAMatrix(-4.*np.ones((num_hid, 1)))
-
-# initialize weight updates
-wu_vh = cm.CUDAMatrix(np.zeros((num_vis, num_hid)))
-wu_v = cm.CUDAMatrix(np.zeros((num_vis, 1)))
-wu_h = cm.CUDAMatrix(np.zeros((num_hid, 1)))
-
-# initialize temporary storage
-v = cm.empty((num_vis, batch_size))
-h = cm.empty((num_hid, batch_size))
-r = cm.empty((num_hid, batch_size))
-
-start_time = time.time()
-for epoch in range(num_epochs):
- print("Epoch %i" % (epoch + 1))
- err = []
-
- for batch in range(num_batches):
- # get current minibatch
- v_true = dev_dat.slice(batch*batch_size,(batch + 1)*batch_size)
- v.assign(v_true)
-
- # apply momentum
- wu_vh.mult(momentum)
- wu_v.mult(momentum)
- wu_h.mult(momentum)
-
- # positive phase
- cm.dot(w_vh.T, v, target = h)
- h.add_col_vec(w_h)
- h.apply_sigmoid()
-
- wu_vh.add_dot(v, h.T)
- wu_v.add_sums(v, axis = 1)
- wu_h.add_sums(h, axis = 1)
-
- # sample hiddens
- r.fill_with_rand()
- r.less_than(h, target = h)
-
- # negative phase
- cm.dot(w_vh, h, target = v)
- v.add_col_vec(w_v)
- v.apply_sigmoid()
-
- cm.dot(w_vh.T, v, target = h)
- h.add_col_vec(w_h)
- h.apply_sigmoid()
-
- wu_vh.subtract_dot(v, h.T)
- wu_v.add_sums(v, axis = 1, mult = -1.)
- wu_h.add_sums(h, axis = 1, mult = -1.)
-
- # update weights
- w_vh.add_mult(wu_vh, epsilon/batch_size)
- w_v.add_mult(wu_v, epsilon/batch_size)
- w_h.add_mult(wu_h, epsilon/batch_size)
-
- # calculate reconstruction error
- v.subtract(v_true)
- err.append(v.euclid_norm()**2/(num_vis*batch_size))
-
- print("Mean squared error: %f" % np.mean(err))
- print("Time: %f" % (time.time() - start_time))
-
-w_vh.copy_to_host()
-util.save('weights.dat', 'w_vh', {'w_vh': w_vh.numpy_array})
-
-cm.cublas_shutdown()
diff --git a/ot/gpu/cudamat/examples/rbm_numpy.py b/ot/gpu/cudamat/examples/rbm_numpy.py
deleted file mode 100644
index 1331566..0000000
--- a/ot/gpu/cudamat/examples/rbm_numpy.py
+++ /dev/null
@@ -1,72 +0,0 @@
-from __future__ import division
-import time
-import numpy as np
-import util
-
-# load data
-util.load('mnist.dat', globals())
-dat = dat/255.
-
-# training parameters
-epsilon = 0.01
-momentum = 0.9
-
-num_epochs = 10
-batch_size = 64
-num_batches = dat.shape[1]//batch_size
-
-# model parameters
-num_vis = dat.shape[0]
-num_hid = 1024
-
-# initialize weights
-w_vh = 0.1 * np.random.randn(num_vis, num_hid)
-w_v = np.zeros((num_vis, 1))
-w_h = np.zeros((num_hid, 1))
-
-# initialize weight updates
-wu_vh = np.zeros((num_vis, num_hid))
-wu_v = np.zeros((num_vis, 1))
-wu_h = np.zeros((num_hid, 1))
-
-start_time = time.time()
-for epoch in range(num_epochs):
- print("Epoch %i" % (epoch + 1))
- err = []
-
- for batch in range(num_batches):
- v_true = dat[:, batch*batch_size:(batch + 1)*batch_size]
- v = v_true
-
- # apply momentum
- wu_vh *= momentum
- wu_v *= momentum
- wu_h *= momentum
-
- # positive phase
- h = 1. / (1 + np.exp(-(np.dot(w_vh.T, v) + w_h)))
-
- wu_vh += np.dot(v, h.T)
- wu_v += v.sum(1)[:, np.newaxis]
- wu_h += h.sum(1)[:, np.newaxis]
-
- # sample hiddens
- h = 1. * (h > np.random.rand(num_hid, batch_size))
-
- # negative phase
- v = 1. / (1 + np.exp(-(np.dot(w_vh, h) + w_v)))
- h = 1. / (1 + np.exp(-(np.dot(w_vh.T, v) + w_h)))
-
- wu_vh -= np.dot(v, h.T)
- wu_v -= v.sum(1)[:, np.newaxis]
- wu_h -= h.sum(1)[:, np.newaxis]
-
- # update weights
- w_vh += epsilon/batch_size * wu_vh
- w_v += epsilon/batch_size * wu_v
- w_h += epsilon/batch_size * wu_h
-
- err.append(np.mean((v - v_true)**2))
-
- print("Mean squared error: %f" % np.mean(err))
- print("Time: %f" % (time.time() - start_time))
diff --git a/ot/gpu/cudamat/examples/util.py b/ot/gpu/cudamat/examples/util.py
deleted file mode 100644
index 79ceead..0000000
--- a/ot/gpu/cudamat/examples/util.py
+++ /dev/null
@@ -1,22 +0,0 @@
-from __future__ import division
-import gzip
-try: import cPickle as pickle
-except: import pickle
-
-def save(fname, var_list, source_dict):
- var_list = [var.strip() for var in var_list.split() if len(var.strip())>0]
- fo = gzip.GzipFile(fname, 'wb')
- pickle.dump(var_list, fo)
- for var in var_list:
- pickle.dump(source_dict[var], fo, protocol=2)
- fo.close()
-
-def load(fname, target_dict, verbose = True):
- fo = gzip.GzipFile(fname, 'rb')
- var_list = pickle.load(fo)
- if verbose:
- print(var_list)
- for var in var_list:
- target_dict[var] = pickle.load(fo)
- fo.close()
-