diff options
Diffstat (limited to 'ot/gpu/cudamat/setup.py')
-rwxr-xr-x | ot/gpu/cudamat/setup.py | 121 |
1 files changed, 121 insertions, 0 deletions
diff --git a/ot/gpu/cudamat/setup.py b/ot/gpu/cudamat/setup.py new file mode 100755 index 0000000..ad386d1 --- /dev/null +++ b/ot/gpu/cudamat/setup.py @@ -0,0 +1,121 @@ +#!/usr/bin/env python + +import os +# on Windows, we need the original PATH without Anaconda's compiler in it: +PATH = os.environ.get('PATH') +from distutils.spawn import spawn, find_executable +from setuptools import setup, find_packages, Extension +from setuptools.command.build_ext import build_ext +import sys + +# CUDA specific config +# nvcc is assumed to be in user's PATH +nvcc_compile_args = ['-O', '--ptxas-options=-v', '--compiler-options=-fPIC'] +nvcc_compile_args = os.environ.get('NVCCFLAGS', '').split() + nvcc_compile_args +cuda_libs = ['cublas'] + +cudamat_ext = Extension('cudamat.libcudamat', + sources=['cudamat/cudamat.cu', + 'cudamat/cudamat_kernels.cu'], + libraries=cuda_libs, + extra_compile_args=nvcc_compile_args) +cudalearn_ext = Extension('cudamat.libcudalearn', + sources=['cudamat/learn.cu', + 'cudamat/learn_kernels.cu'], + libraries=cuda_libs, + extra_compile_args=nvcc_compile_args) + + +class CUDA_build_ext(build_ext): + """ + Custom build_ext command that compiles CUDA files. + Note that all extension source files will be processed with this compiler. + """ + def build_extensions(self): + self.compiler.src_extensions.append('.cu') + self.compiler.set_executable('compiler_so', 'nvcc') + self.compiler.set_executable('linker_so', 'nvcc --shared') + if hasattr(self.compiler, '_c_extensions'): + self.compiler._c_extensions.append('.cu') # needed for Windows + self.compiler.spawn = self.spawn + build_ext.build_extensions(self) + + def spawn(self, cmd, search_path=1, verbose=0, dry_run=0): + """ + Perform any CUDA specific customizations before actually launching + compile/link etc. commands. + """ + if (sys.platform == 'darwin' and len(cmd) >= 2 and cmd[0] == 'nvcc' and + cmd[1] == '--shared' and cmd.count('-arch') > 0): + # Versions of distutils on OSX earlier than 2.7.9 inject + # '-arch x86_64' which we need to strip while using nvcc for + # linking + while True: + try: + index = cmd.index('-arch') + del cmd[index:index+2] + except ValueError: + break + elif self.compiler.compiler_type == 'msvc': + # There are several things we need to do to change the commands + # issued by MSVCCompiler into one that works with nvcc. In the end, + # it might have been easier to write our own CCompiler class for + # nvcc, as we're only interested in creating a shared library to + # load with ctypes, not in creating an importable Python extension. + # - First, we replace the cl.exe or link.exe call with an nvcc + # call. In case we're running Anaconda, we search cl.exe in the + # original search path we captured further above -- Anaconda + # inserts a MSVC version into PATH that is too old for nvcc. + cmd[:1] = ['nvcc', '--compiler-bindir', + os.path.dirname(find_executable("cl.exe", PATH)) + or cmd[0]] + # - Secondly, we fix a bunch of command line arguments. + for idx, c in enumerate(cmd): + # create .dll instead of .pyd files + if '.pyd' in c: cmd[idx] = c = c.replace('.pyd', '.dll') + # replace /c by -c + if c == '/c': cmd[idx] = '-c' + # replace /DLL by --shared + elif c == '/DLL': cmd[idx] = '--shared' + # remove --compiler-options=-fPIC + elif '-fPIC' in c: del cmd[idx] + # replace /Tc... by ... + elif c.startswith('/Tc'): cmd[idx] = c[3:] + # replace /Fo... by -o ... + elif c.startswith('/Fo'): cmd[idx:idx+1] = ['-o', c[3:]] + # replace /LIBPATH:... by -L... + elif c.startswith('/LIBPATH:'): cmd[idx] = '-L' + c[9:] + # replace /OUT:... by -o ... + elif c.startswith('/OUT:'): cmd[idx:idx+1] = ['-o', c[5:]] + # remove /EXPORT:initlibcudamat or /EXPORT:initlibcudalearn + elif c.startswith('/EXPORT:'): del cmd[idx] + # replace cublas.lib by -lcublas + elif c == 'cublas.lib': cmd[idx] = '-lcublas' + # - Finally, we pass on all arguments starting with a '/' to the + # compiler or linker, and have nvcc handle all other arguments + if '--shared' in cmd: + pass_on = '--linker-options=' + # we only need MSVCRT for a .dll, remove CMT if it sneaks in: + cmd.append('/NODEFAULTLIB:libcmt.lib') + else: + pass_on = '--compiler-options=' + cmd = ([c for c in cmd if c[0] != '/'] + + [pass_on + ','.join(c for c in cmd if c[0] == '/')]) + # For the future: Apart from the wrongly set PATH by Anaconda, it + # would suffice to run the following for compilation on Windows: + # nvcc -c -O -o <file>.obj <file>.cu + # And the following for linking: + # nvcc --shared -o <file>.dll <file1>.obj <file2>.obj -lcublas + # This could be done by a NVCCCompiler class for all platforms. + spawn(cmd, search_path, verbose, dry_run) + +setup(name="cudamat", + version="0.3", + description="Performs linear algebra computation on the GPU via CUDA", + ext_modules=[cudamat_ext, cudalearn_ext], + packages=find_packages(exclude=['examples', 'test']), + include_package_data=True, + package_data={'cudamat': ['rnd_multipliers_32bit.txt']}, + author="Volodymyr Mnih", + url="https://github.com/cudamat/cudamat", + cmdclass={'build_ext': CUDA_build_ext}) |