sinkhorn GPU implementation

author: Leo gautheron <gautheron@iv-cm-359.creatis.insa-lyon.fr> 2017-04-20 12:12:15 +0200
committer: Leo gautheron <gautheron@iv-cm-359.creatis.insa-lyon.fr> 2017-04-20 12:12:15 +0200
commit: 16f51f971607efab2c73958d207c582b389406c8 (patch)
tree: 299a4f6f13faf8545d2144767e9a7791098aacf8 /ot/gpu/cudamat/setup.py
parent: 48ec27d8e1c2599bd6d9015d15f4204b8116af28 (diff)
1 files changed, 121 insertions, 0 deletions
diff --git a/ot/gpu/cudamat/setup.py b/ot/gpu/cudamat/setup.py
new file mode 100755
index 0000000..ad386d1
--- /dev/null
+++ b/ot/gpu/cudamat/setup.py
@@ -0,0 +1,121 @@
+#!/usr/bin/env python
+
+import os
+# on Windows, we need the original PATH without Anaconda's compiler in it:
+PATH = os.environ.get('PATH')
+from distutils.spawn import spawn, find_executable
+from setuptools import setup, find_packages, Extension
+from setuptools.command.build_ext import build_ext
+import sys
+
+# CUDA specific config
+# nvcc is assumed to be in user's PATH
+nvcc_compile_args = ['-O', '--ptxas-options=-v', '--compiler-options=-fPIC']
+nvcc_compile_args = os.environ.get('NVCCFLAGS', '').split() + nvcc_compile_args
+cuda_libs = ['cublas']
+
+cudamat_ext = Extension('cudamat.libcudamat',
+                        sources=['cudamat/cudamat.cu',
+                                 'cudamat/cudamat_kernels.cu'],
+                        libraries=cuda_libs,
+                        extra_compile_args=nvcc_compile_args)
+cudalearn_ext = Extension('cudamat.libcudalearn',
+                          sources=['cudamat/learn.cu',
+                                   'cudamat/learn_kernels.cu'],
+                          libraries=cuda_libs,
+                          extra_compile_args=nvcc_compile_args)
+
+
+class CUDA_build_ext(build_ext):
+    """
+    Custom build_ext command that compiles CUDA files.
+    Note that all extension source files will be processed with this compiler.
+    """
+    def build_extensions(self):
+        self.compiler.src_extensions.append('.cu')
+        self.compiler.set_executable('compiler_so', 'nvcc')
+        self.compiler.set_executable('linker_so', 'nvcc --shared')
+        if hasattr(self.compiler, '_c_extensions'):
+            self.compiler._c_extensions.append('.cu')  # needed for Windows
+        self.compiler.spawn = self.spawn
+        build_ext.build_extensions(self)
+
+    def spawn(self, cmd, search_path=1, verbose=0, dry_run=0):
+        """
+        Perform any CUDA specific customizations before actually launching
+        compile/link etc. commands.
+        """
+        if (sys.platform == 'darwin' and len(cmd) >= 2 and cmd[0] == 'nvcc' and
+                cmd[1] == '--shared' and cmd.count('-arch') > 0):
+            # Versions of distutils on OSX earlier than 2.7.9 inject
+            # '-arch x86_64' which we need to strip while using nvcc for
+            # linking
+            while True:
+                try:
+                    index = cmd.index('-arch')
+                    del cmd[index:index+2]
+                except ValueError:
+                    break
+        elif self.compiler.compiler_type == 'msvc':
+            # There are several things we need to do to change the commands
+            # issued by MSVCCompiler into one that works with nvcc. In the end,
+            # it might have been easier to write our own CCompiler class for
+            # nvcc, as we're only interested in creating a shared library to
+            # load with ctypes, not in creating an importable Python extension.
+            # - First, we replace the cl.exe or link.exe call with an nvcc
+            #   call. In case we're running Anaconda, we search cl.exe in the
+            #   original search path we captured further above -- Anaconda
+            #   inserts a MSVC version into PATH that is too old for nvcc.
+            cmd[:1] = ['nvcc', '--compiler-bindir',
+                       os.path.dirname(find_executable("cl.exe", PATH))
+                       or cmd[0]]
+            # - Secondly, we fix a bunch of command line arguments.
+            for idx, c in enumerate(cmd):
+                # create .dll instead of .pyd files
+                if '.pyd' in c: cmd[idx] = c = c.replace('.pyd', '.dll')
+                # replace /c by -c
+                if c == '/c': cmd[idx] = '-c'
+                # replace /DLL by --shared
+                elif c == '/DLL': cmd[idx] = '--shared'
+                # remove --compiler-options=-fPIC
+                elif '-fPIC' in c: del cmd[idx]
+                # replace /Tc... by ...
+                elif c.startswith('/Tc'): cmd[idx] = c[3:]
+                # replace /Fo... by -o ...
+                elif c.startswith('/Fo'): cmd[idx:idx+1] = ['-o', c[3:]]
+                # replace /LIBPATH:... by -L...
+                elif c.startswith('/LIBPATH:'): cmd[idx] = '-L' + c[9:]
+                # replace /OUT:... by -o ...
+                elif c.startswith('/OUT:'): cmd[idx:idx+1] = ['-o', c[5:]]
+                # remove /EXPORT:initlibcudamat or /EXPORT:initlibcudalearn
+                elif c.startswith('/EXPORT:'): del cmd[idx]
+                # replace cublas.lib by -lcublas
+                elif c == 'cublas.lib': cmd[idx] = '-lcublas'
+            # - Finally, we pass on all arguments starting with a '/' to the
+            #   compiler or linker, and have nvcc handle all other arguments
+            if '--shared' in cmd:
+                pass_on = '--linker-options='
+                # we only need MSVCRT for a .dll, remove CMT if it sneaks in:
+                cmd.append('/NODEFAULTLIB:libcmt.lib')
+            else:
+                pass_on = '--compiler-options='
+            cmd = ([c for c in cmd if c[0] != '/'] +
+                   [pass_on + ','.join(c for c in cmd if c[0] == '/')])
+            # For the future: Apart from the wrongly set PATH by Anaconda, it
+            # would suffice to run the following for compilation on Windows:
+            # nvcc -c -O -o <file>.obj <file>.cu
+            # And the following for linking:
+            # nvcc --shared -o <file>.dll <file1>.obj <file2>.obj -lcublas
+            # This could be done by a NVCCCompiler class for all platforms.
+        spawn(cmd, search_path, verbose, dry_run)
+
+setup(name="cudamat",
+      version="0.3",
+      description="Performs linear algebra computation on the GPU via CUDA",
+      ext_modules=[cudamat_ext, cudalearn_ext],
+      packages=find_packages(exclude=['examples', 'test']),
+      include_package_data=True,
+      package_data={'cudamat': ['rnd_multipliers_32bit.txt']},
+      author="Volodymyr Mnih",
+      url="https://github.com/cudamat/cudamat",
+      cmdclass={'build_ext': CUDA_build_ext})
author	Leo gautheron <gautheron@iv-cm-359.creatis.insa-lyon.fr>	2017-04-20 12:12:15 +0200
committer	Leo gautheron <gautheron@iv-cm-359.creatis.insa-lyon.fr>	2017-04-20 12:12:15 +0200
commit	16f51f971607efab2c73958d207c582b389406c8 (patch)
tree	299a4f6f13faf8545d2144767e9a7791098aacf8 /ot/gpu/cudamat/setup.py
parent	48ec27d8e1c2599bd6d9015d15f4204b8116af28 (diff)