summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2017-09-30 17:19:17 +0200
committerCedric Nugteren <web@cedricnugteren.nl>2017-09-30 17:19:17 +0200
commitf4c4674cf66180dd71d140c4bfa9758a0e8ac86f (patch)
treeb7d0ed6c246dbbc222043b026ee410301306c054
parent0d8313708ca84f5338f405ae9b3261188e87b6f7 (diff)
Updated to version 1.1.0
-rw-r--r--.appveyor.yml5
-rw-r--r--.travis.yml2
-rw-r--r--CHANGELOG2
-rw-r--r--CMakeLists.txt4
-rw-r--r--README.md10
5 files changed, 17 insertions, 6 deletions
diff --git a/.appveyor.yml b/.appveyor.yml
index adb1860b..484f5164 100644
--- a/.appveyor.yml
+++ b/.appveyor.yml
@@ -1,5 +1,6 @@
environment:
global:
+ CLBLAST_VERSION: "1.1.0"
CLBLAST_BUILD: "C:\\clblast\\build"
OPENCL_REGISTRY: "https://www.khronos.org/registry/cl"
OPENCL_ROOT: "C:\\dependencies\\opencl"
@@ -58,8 +59,8 @@ build_script:
after_build:
- ps: pushd $env:CLBLAST_BUILD
- - 7z a CLBlast-1.0.0-Windows-x64.zip .\install_dir\*
- - ps: mv CLBlast-1.0.0-Windows-x64.zip $env:APPVEYOR_BUILD_FOLDER
+ - 7z a CLBlast-$env:CLBLAST_VERSION-Windows-x64.zip .\install_dir\*
+ - ps: mv CLBlast-$env:CLBLAST_VERSION-Windows-x64.zip $env:APPVEYOR_BUILD_FOLDER
artifacts:
- path: '*.zip'
diff --git a/.travis.yml b/.travis.yml
index abd39aac..0d18fbf4 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -21,7 +21,7 @@ matrix:
env:
global:
- - CLBLAST_VERSION=1.0.0
+ - CLBLAST_VERSION=1.1.0
- CLBLAST_ROOT=${TRAVIS_BUILD_DIR}/bin/clblast
- CLBLAST_INSTALL=${TRAVIS_BUILD_DIR}/bin/CLBlast-${CLBLAST_VERSION}
- CLBLAST_TAR=CLBlast-${CLBLAST_VERSION}-${TRAVIS_OS_NAME}-x64.tar.gz
diff --git a/CHANGELOG b/CHANGELOG
index f93e736d..5deecb53 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,5 +1,5 @@
-Development (next version)
+Version 1.1.0
- The tuning database now has defaults per architecture (e.g. NVIDIA Kepler SM3.5, AMD Fiji)
- The tuning database now has a dictionary to translate vendor/device names to a common set
- The tuners can now distinguish between different AMD GPU board names of the same architecture
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 3140905c..05e7393b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -18,8 +18,8 @@ set(CMAKE_USER_MAKE_RULES_OVERRIDE_CXX ${CMAKE_CURRENT_SOURCE_DIR}/cmake/cxx_fla
# CMake project details
project("clblast" C CXX)
set(clblast_VERSION_MAJOR 1)
-set(clblast_VERSION_MINOR 0)
-set(clblast_VERSION_PATCH 1)
+set(clblast_VERSION_MINOR 1)
+set(clblast_VERSION_PATCH 0)
# Options and their default values
option(BUILD_SHARED_LIBS "Build a shared (ON) or static library (OFF)" ON)
diff --git a/README.md b/README.md
index e8db38d2..c13770f6 100644
--- a/README.md
+++ b/README.md
@@ -316,6 +316,16 @@ Since there is no half-precision data-type in C or C++, OpenCL provides the `cl_
The `samples/haxpy.c` example shows how to use these convenience functions when calling the half-precision BLAS routine HAXPY.
+Known issues
+-------------
+
+Known performance related issues:
+
+* Severe performance issues with Beignet v1.3.0 due to missing support for local memory. Please downgrade to v1.2.1 or upgrade to v1.3.1 or newer.
+
+* Performance issues on ARM Mali GPUs due to missing compiler for support for loop unrolling and array-to-register promotion.
+
+
Contributing
-------------