Browse Source

Working julia

master
nixo 3 years ago
commit
99c751fc1f
  1. 522
      julia.scm
  2. 13
      patches/llvm-6.0-D44650.patch
  3. 39
      patches/llvm-6.0-DISABLE_ABI_CHECKS.patch
  4. 32
      patches/llvm-6.0-NVPTX-addrspaces.patch
  5. 35
      patches/llvm-6.0.0_D27296-libssp.patch
  6. 53
      patches/llvm-D27629-AArch64-large_model_6.0.1.patch
  7. 56
      patches/llvm-D34078-vectorize-fdiv.patch
  8. 82
      patches/llvm-D42262-jumpthreading-not-i1.patch
  9. 677
      patches/llvm-D44892-Perf-integration.patch
  10. 26
      patches/llvm-D46460.patch
  11. 187
      patches/llvm-D49832-SCEVPred.patch
  12. 89
      patches/llvm-D50010-VNCoercion-ni.patch
  13. 1153
      patches/llvm-D50167-scev-umin.patch
  14. 48
      patches/llvm-OProfile-line-num.patch
  15. 29
      patches/llvm-PPC-addrspaces.patch
  16. 45
      patches/llvm-rL323946-LSRTy.patch
  17. 301
      patches/llvm-rL326967-aligned-load.patch
  18. 6131
      patches/llvm-rL327898.patch

522
julia.scm

@ -0,0 +1,522 @@
(use-modules ((guix licenses)
#:prefix license:))
(use-modules
(guix packages))
(use-modules
(guix download))
(use-modules
(guix utils))
(use-modules
(guix git-download))
(use-modules
(guix build-system gnu))
(use-modules
(gnu packages))
(use-modules
(gnu packages algebra))
(use-modules
(gnu packages base))
(use-modules
(gnu packages compression))
(use-modules
(gnu packages elf))
(use-modules
(gnu packages gcc))
(use-modules
(gnu packages llvm))
(use-modules
(gnu packages libevent))
(use-modules
(gnu packages libunwind))
(use-modules
(gnu packages maths))
(use-modules
(gnu packages multiprecision)) ; mpfr)
(use-modules
(gnu packages pcre))
(use-modules
(gnu packages perl))
(use-modules
(gnu packages pkg-config))
(use-modules
(gnu packages python))
(use-modules
(gnu packages python-xyz))
(use-modules
(gnu packages textutils))
(use-modules
(gnu packages tls))
(use-modules
(gnu packages version-control))
(use-modules
(gnu packages wget))
(use-modules
(ice-9 match))
;; (define openblas-julia
;; (package
;; (inherit openblas)
;; (name "openblas-julia")
;; INTERFACE64=
;; ))
;; This works *BUT* we need to apply a lot of patches. Compiling this
;; with julia applies the patches automatically
(define llvm-julia
(package
(inherit llvm-6)
(name "llvm-julia")
(source
(origin
(method url-fetch)
(uri
(string-append
"http://releases.llvm.org/6.0.1/llvm-6.0.1.src.tar.xz"))
(sha256
(base32
"1qpls3vk85lydi5b4axl0809fv932qgsqgdgrk098567z4jc7mmn"))
(patches '("./llvm-6.0-D44650.patch"
"./llvm-6.0-DISABLE_ABI_CHECKS.patch"
"./llvm-6.0-NVPTX-addrspaces.patch"
"./llvm-6.0.0_D27296-libssp.patch"
"./llvm-D27629-AArch64-large_model_6.0.1.patch"
"./llvm-D34078-vectorize-fdiv.patch"
"./llvm-D42262-jumpthreading-not-i1.patch"
"./llvm-D44892-Perf-integration.patch"
"./llvm-D46460.patch"
"./llvm-D49832-SCEVPred.patch"
"./llvm-D50010-VNCoercion-ni.patch"
"./llvm-D50167-scev-umin.patch"
"./llvm-OProfile-line-num.patch"
"./llvm-PPC-addrspaces.patch"
"./llvm-rL323946-LSRTy.patch"
"./llvm-rL326967-aligned-load.patch"
"./llvm-rL327898.patch"
))
))
(arguments
(substitute-keyword-arguments
(package-arguments llvm-6)
((#:configure-flags flags)
'(list ;; Taken from NixOS. Only way I could get libLLVM-6.0.so
"-DCMAKE_BUILD_TYPE=Release"
"-DLLVM_INSTALL_UTILS=ON"
"-DLLVM_BUILD_TESTS=ON"
"-DLLVM_ENABLE_FFI=ON"
"-DLLVM_ENABLE_RTTI=ON"
;; "-DLLVM_HOST_TRIPLE=${stdenv.hostPlatform.config}"
;; "-DLLVM_DEFAULT_TARGET_TRIPLE=${stdenv.hostPlatform.config}"
"-DLLVM_EXPERIMENTAL_TARGETS_TO_BUILD=WebAssembly"
"-DLLVM_ENABLE_DUMP=ON"
"-DLLVM_LINK_LLVM_DYLIB=ON")
)))))
(define libuv-julia
(let
((commit "2348256acf5759a544e5ca7935f638d2bc091d60"))
(package
(inherit libuv)
(name "libuv-julia")
(version commit)
(source
(origin
(method url-fetch)
(uri (string-append
"https://api.github.com/repos/JuliaLang/libuv/tarball/"
commit))
(sha256
(base32
"1363f4vqayfcv5zqg07qmzjff56yhad74k16c22ian45lram8mv8"))))
(build-system gnu-build-system)
(arguments
(substitute-keyword-arguments
(package-arguments libuv)
((#:phases phases)
`(modify-phases ,phases
(delete 'autogen)))))
(home-page "https://github.com/JuliaLang/libuv"))))
(package
(name "julia")
(version "1.1.0")
(source
(origin
(method url-fetch)
(uri
(string-append
"https://github.com/JuliaLang/julia/releases/download/v"
version "/julia-" version ".tar.gz"))
(sha256
(base32
"1bd6c5gqd7f2i837ay8iqi8h36smhcg0lq7f8c2axxaw8x6rcfmx"))))
(build-system gnu-build-system)
(arguments
`(#:test-target "test"
#:modules
((ice-9 match)
(guix build gnu-build-system)
(guix build utils))
;; Do not strip binaries to keep support for full backtraces.
;; See https://github.com/JuliaLang/julia/issues/17831
#:strip-binaries? #f
;; The DSOs use $ORIGIN to refer to each other, but (guix build
;; gremlin) doesn't support it yet, so skip this phase.
#:validate-runpath? #f
#:phases
(modify-phases %standard-phases
(delete 'configure)
(add-after 'unpack 'prepare-deps
(lambda*
(#:key inputs #:allow-other-keys)
(mkdir "deps/srccache")
(copy-file
(assoc-ref inputs "dsfmt")
"deps/srccache/dsfmt-2.2.3.tar.gz")
(copy-file
(string-append
(assoc-ref inputs "virtualenv")
"/bin/virtualenv")
"julia-env")
(copy-file
(assoc-ref inputs "libwhich")
(string-append "deps/srccache/libwhich-"
"81e9723c0273d78493dc8c8ed570f68d9ce7e89e"
".tar.gz"))
(copy-file (assoc-ref inputs "rmath")
"deps/srccache/Rmath-julia-0.1.tar.gz")
(copy-file
(assoc-ref inputs "objconv")
"deps/srccache/objconv.zip")
(copy-file
(assoc-ref inputs "suitesparse")
"deps/srccache/SuiteSparse-4.4.5.tar.gz")
;; needed by libwhich
(setenv "LD_LIBRARY_PATH"
(string-join
(map (lambda (pkg)
(string-append (assoc-ref inputs pkg) "/lib"))
(list
"arpack-ng" "fftw" "gmp" "lapack"
"libgit2" "mpfr" "openblas" "openlibm"
"openspecfun" "pcre2"
))
":"))
;; (copy-file
;; (assoc-ref inputs "llvm")
;; "deps/srccache/llvm-6.0.0.src.tar.xz")
#t))
;; FIXME: Building the documentation requires Julia packages that
;; would be downloaded from the Internet. We should build them in a
;; separate build phase.
(add-after 'unpack 'disable-documentation
(lambda _
(substitute* "Makefile"
(("(install: .*) \\$\\(BUILDROOT\\)/doc/_build/html/en/index.html" _ line)
(string-append line "\n"))
(("src ui doc deps")
"src ui deps"))
#t))
(add-before 'check 'set-home
;; Some tests require a home directory to be set.
(lambda _
(setenv "HOME" "/tmp")
#t))
(add-after 'unpack 'hardcode-soname-map
;; ./src/runtime_ccall.cpp creates a map from library names to paths
;; using the output of "/sbin/ldconfig -p". Since ldconfig is not
;; used in Guix, we patch runtime_ccall.cpp to contain a static map.
(lambda* (#:key inputs #:allow-other-keys)
(use-modules (ice-9 match))
(substitute* "src/runtime_ccall.cpp"
;; Patch out invocations of '/sbin/ldconfig' to avoid getting
;; error messages about missing '/sbin/ldconfig' on Guix System.
(("popen\\(.*ldconfig.*\\);")
"NULL;\n")
;; Populate 'sonameMap'.
(("jl_read_sonames.*;")
(string-join
(map (match-lambda
((input libname soname)
(string-append
"sonameMap[\"" libname "\"] = "
"\"" (assoc-ref inputs input) "/lib/" soname "\";")))
'(("libc" "libc" "libc.so.6")
("pcre2" "libpcre2-8" "libpcre2-8.so")
("mpfr" "libmpfr" "libmpfr.so")
("openblas" "libblas" "libopenblas.so")
("arpack-ng" "libarpack" "libarpack.so")
("lapack" "liblapack" "liblapack.so")
("libgit2" "libgit2" "libgit2.so")
("gmp" "libgmp" "libgmp.so")
;; ("openlibm" "libopenlibm" "libopenlibm.so")
("openspecfun" "libopenspecfun" "libopenspecfun.so")
("fftw" "libfftw3" "libfftw3_threads.so")
("fftwf" "libfftw3f" "libfftw3f_threads.so"))))))
;; FIXME: NIXO
;; (substitute* "base/fft/FFTW.jl"
;; (("const libfftw = Base.libfftw_name")
;; (string-append "const libfftw = \""
;; (assoc-ref inputs "fftw") "/lib/libfftw3_threads.so"
;; "\""))
;; (("const libfftwf = Base.libfftwf_name")
;; (string-append "const libfftwf = \""
;; (assoc-ref inputs "fftwf") "/lib/libfftw3f_threads.so"
;; "\"")))
(substitute* "base/math.jl"
(("const libm = Base.libm_name")
(string-append "const libm = \""
(assoc-ref inputs "openlibm")
"/lib/libopenlibm.so"
"\""))
(("const openspecfun = \"libopenspecfun\"")
(string-append "const openspecfun = \""
(assoc-ref inputs "openspecfun")
"/lib/libopenspecfun.so"
"\"")))
;; (substitute* "base/pcre.jl"
;; (("const PCRE_LIB = \"libpcre2-8\"")
;; (string-append "const PCRE_LIB = \""
;; (assoc-ref inputs "pcre2")
;; "/lib/libpcre2-8.so" "\"")))
#t))
(add-before 'build 'fix-include-and-link-paths
(lambda*
(#:key inputs #:allow-other-keys)
;; LIBUTF8PROC is a linker flag, not a build target. It is
;; included in the LIBFILES_* variable which is used as a
;; collection of build targets and a list of libraries to link
;; against.
(substitute* "src/flisp/Makefile"
(("\\$\\(BUILDDIR\\)/\\$\\(EXENAME\\): \\$\\(OBJS\\) \\$\\(LIBFILES_release\\)")
"$(BUILDDIR)/$(EXENAME): $(OBJS) $(LLT_release)")
(("\\$\\(BUILDDIR\\)/\\$\\(EXENAME\\)-debug: \\$\\(DOBJS\\) \\$\\(LIBFILES_debug\\)")
"$(BUILDDIR)/$(EXENAME)-debug: $(DOBJS) $(LLT_debug)"))
;; The REPL must be linked with libuv.
(substitute* "ui/Makefile"
(("JLDFLAGS \\+= ")
(string-append "JLDFLAGS += "
(assoc-ref %build-inputs "libuv")
"/lib/libuv.so ")))
(substitute* "base/Makefile"
(("\\$\\(build_includedir\\)/uv/errno.h")
(string-append
(assoc-ref inputs "libuv")
"/include/uv/errno.h")))
#t))
(add-before 'build 'replace-default-shell
(lambda _
(substitute* "base/client.jl"
(("/bin/sh")
(which "sh")))
#t))
(add-after 'unpack 'hardcode-paths
(lambda _
(substitute* "stdlib/InteractiveUtils/src/InteractiveUtils.jl"
(("`which") (string-append "`" (which "which")))
(("`wget") (string-append "`" (which "wget"))))
#t))
(add-before 'check 'disable-broken-tests
(lambda _
(define (touch file-name)
(call-with-output-file file-name (const #t)))
;; Don't know why FIXME
;; (substitute* "stdlib/LibGit2/test/libgit2.jl"
;; (("!LibGit2.use_http_path(cfg, github_cred)")
;; "true")
;; (("LibGit2.use_http_path(cfg, mygit_cred)")
;; "true"))
(map (lambda (test)
(delete-file test)
(touch test))
'("stdlib/Sockets/test/runtests.jl"
"stdlib/Distributed/test/runtests.jl"
"stdlib/LibGit2/test/libgit2.jl"
))
(substitute* "test/choosetests.jl"
;; These tests fail, probably because some of the input
;; binaries have been stripped and thus backtraces don't look
;; as expected.
(("\"backtrace\",")
"")
(("\"cmdlineargs\",")
""))
#t)))
#:make-flags
(list
(string-append "prefix=" (assoc-ref %outputs "out"))
(string-append "PREFIX=" (assoc-ref %outputs "out"))
;; Passing the MARCH flag is necessary to build binary substitutes for
;; the supported architectures.
,(match
(or
(%current-target-system)
(%current-system))
("x86_64-linux" "MARCH=x86-64")
("i686-linux" "MARCH=pentium4")
("aarch64-linux" "MARCH=armv8-a")
;; Prevent errors when querying this package on unsupported
;; platforms, e.g. when running "guix package --search="
(_ "MARCH=UNSUPPORTED"))
"CONFIG_SHELL=bash" ;needed to build bundled libraries
"USE_SYSTEM_DSFMT=0" ;not packaged for Guix and upstream has no
;build system for a shared library.
"USE_SYSTEM_LAPACK=1"
"USE_SYSTEM_BLAS=1"
;; TODO: What about building blas with 64 support?
"USE_BLAS64=0" ;needed when USE_SYSTEM_BLAS=1
"LIBBLAS=-lopenblas"
"LIBBLASNAME=libopenblas"
"USE_SYSTEM_FFTW=1"
"LIBFFTWNAME=libfftw3"
"LIBFFTWFNAME=libfftw3f"
;; TODO: Suitesparse does not install shared libraries, so we cannot
;; use the suitesparse package.
;; "USE_SYSTEM_SUITESPARSE=1"
;; (string-append "SUITESPARSE_INC=-I "
;; (assoc-ref %build-inputs "suitesparse")
;; "/include")
"USE_GPL_LIBS=1" ;proudly
"USE_SYSTEM_UTF8PROC=1"
(string-append "UTF8PROC_INC="
(assoc-ref %build-inputs "utf8proc")
"/include")
"USE_SYSTEM_LLVM=1"
"LLVM_VER=6.0.1"
;; "LLVM_VER=6.0.0"
"USE_LLVM_SHLIB=1" ; FIXME: fails when set to 1
"USE_SYSTEM_LIBUNWIND=1"
"USE_SYSTEM_LIBUV=1"
(string-append "LIBUV="
(assoc-ref %build-inputs "libuv")
"/lib/libuv.so")
(string-append "LIBUV_INC="
(assoc-ref %build-inputs "libuv")
"/include")
"USE_SYSTEM_PATCHELF=1"
"USE_SYSTEM_PCRE=1"
"USE_SYSTEM_OPENLIBM=1"
"USE_SYSTEM_GMP=1"
"USE_SYSTEM_MPFR=1"
"USE_SYSTEM_ARPACK=1"
"USE_SYSTEM_LIBGIT2=1"
"USE_SYSTEM_ZLIB=1"
"USE_SYSTEM_OPENSPECFUN=1")))
(inputs
`( ("llvm" ,llvm-julia)
;; The bundled version is 3.3.0 so stick to that version. With other
;; versions, we get test failures in 'linalg/arnoldi' as described in
;; <https://bugs.gnu.org/30282>.
("arpack-ng" ,arpack-ng-3.3.0)
("coreutils" ,coreutils)
;for bindings to "mkdir" and the like
("lapack" ,lapack)
("openblas" ,openblas)
;Julia does not build with Atlas
("libunwind" ,libunwind)
("openlibm" ,openlibm)
("openspecfun" ,openspecfun)
("libuv", libuv-julia)
("libgit2" ,libgit2)
("fftw" ,fftw)
("fftwf" ,fftwf)
("fortran" ,gfortran)
("pcre2" ,pcre2)
("utf8proc" ,utf8proc)
("mpfr" ,mpfr)
("wget" ,wget)
("which" ,which)
("zlib" ,zlib)
("gmp" ,gmp)
("virtualenv" ,python2-virtualenv)
;; FIXME: The following inputs are downloaded from upstream to allow us
;; to use the lightweight Julia release tarball. Ideally, these inputs
;; would eventually be replaced with proper Guix packages.
;; TODO: run "make -f contrib/repackage_system_suitesparse4.make" to copy static lib
("rmath"
,(origin
(method url-fetch)
(uri "https://api.github.com/repos/JuliaLang/Rmath-julia/tarball/v0.1")
(sha256
(base32
"1qyps217175qhid46l8f5i1v8i82slgp23ia63x2hzxwfmx8617p"))))
("suitesparse"
,(origin
(method url-fetch)
(uri "http://faculty.cse.tamu.edu/davis/SuiteSparse/SuiteSparse-4.4.5.tar.gz")
(sha256
(base32
"1jcbxb8jx5wlcixzf6n5dca2rcfx6mlcms1k2rl5gp67ay3bix43"))))
("objconv"
,(origin
(method url-fetch)
;; No versioned URL, see <https://www.agner.org/optimize/> for updates.
(uri "https://www.agner.org/optimize/objconv.zip")
(file-name "objconv-2018-10-07.zip")
(sha256
(base32
"0wp6ld9vk11f4nnkn56627zmlv9k5vafi99qa3yyn1pgcd61zcfs"))))
("libwhich"
,(origin
(method url-fetch)
(uri
(string-append
"https://api.github.com/repos/vtjnash/libwhich/tarball/"
"81e9723c0273d78493dc8c8ed570f68d9ce7e89e"))
(sha256
(base32
"1p7zg31kpmpbmh1znrk1xrbd074agx13b9q4dcw8n2zrwwdlbz3b"))))
;; ("llvm"
;; ,(origin
;; (method url-fetch)
;; (uri
;; (string-append
;; "http://releases.llvm.org/6.0.0/llvm-6.0.0.src.tar.xz"))
;; (sha256
;; (base32
;; "0224xvfg6h40y5lrbnb9qaq3grmdc5rg00xq03s1wxjfbf8krx8z"))))
;; ("cmake" ,cmake) ;; required to build llvm
("dsfmt"
,(origin
(method url-fetch)
(uri
(string-append
"http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/"
"SFMT/dSFMT-src-2.2.3.tar.gz"))
(sha256
(base32
"03kaqbjbi6viz0n33dk5jlf6ayxqlsq4804n7kwkndiga9s4hd42"))))))
(native-inputs
`(("openssl" ,openssl)
("perl" ,perl)
("patchelf" ,patchelf)
("pkg-config" ,pkg-config)
("python" ,python-2)))
;; Julia is not officially released for ARM and MIPS.
;; See https://github.com/JuliaLang/julia/issues/10639
(supported-systems
'("i686-linux" "x86_64-linux" "aarch64-linux"))
(home-page "https://julialang.org/")
(synopsis "High-performance dynamic language for technical computing")
(description
"Julia is a high-level, high-performance dynamic programming language for
technical computing, with syntax that is familiar to users of other technical
computing environments. It provides a sophisticated compiler, distributed
parallel execution, numerical accuracy, and an extensive mathematical function
library.")
(license license:expat))

13
patches/llvm-6.0-D44650.patch

@ -0,0 +1,13 @@
Index: tools/llvm-cfi-verify/CMakeLists.txt
===================================================================
--- a/tools/llvm-cfi-verify/CMakeLists.txt
+++ b/tools/llvm-cfi-verify/CMakeLists.txt
@@ -11,7 +11,7 @@
Symbolize
)
-add_llvm_tool(llvm-cfi-verify
+add_llvm_tool(llvm-cfi-verify DISABLE_LLVM_LINK_LLVM_DYLIB
llvm-cfi-verify.cpp)
add_subdirectory(lib)

39
patches/llvm-6.0-DISABLE_ABI_CHECKS.patch

@ -0,0 +1,39 @@
From d793ba4bacae51ae25be19c1636fcf38707938fd Mon Sep 17 00:00:00 2001
From: Valentin Churavy <v.churavy@gmail.com>
Date: Fri, 1 Jun 2018 17:43:55 -0400
Subject: [PATCH] fix LLVM_DISABLE_ABI_BREAKING_CHECKS_ENFORCING
---
cmake/modules/HandleLLVMOptions.cmake | 2 +-
include/llvm/Config/abi-breaking.h.cmake | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/cmake/modules/HandleLLVMOptions.cmake b/cmake/modules/HandleLLVMOptions.cmake
index 3d2dd48018c..b67ee6a896e 100644
--- a/cmake/modules/HandleLLVMOptions.cmake
+++ b/cmake/modules/HandleLLVMOptions.cmake
@@ -572,7 +572,7 @@ if (LLVM_ENABLE_WARNINGS AND (LLVM_COMPILER_IS_GCC_COMPATIBLE OR CLANG_CL))
if (LLVM_ENABLE_PEDANTIC AND LLVM_COMPILER_IS_GCC_COMPATIBLE)
append("-pedantic" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
- append("-Wno-long-long" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
+ append("-Wno-long-long -Wundef" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
endif()
add_flag_if_supported("-Wcovered-switch-default" COVERED_SWITCH_DEFAULT_FLAG)
diff --git a/include/llvm/Config/abi-breaking.h.cmake b/include/llvm/Config/abi-breaking.h.cmake
index 7ae401e5b8a..d52c4609101 100644
--- a/include/llvm/Config/abi-breaking.h.cmake
+++ b/include/llvm/Config/abi-breaking.h.cmake
@@ -20,7 +20,7 @@
/* Allow selectively disabling link-time mismatch checking so that header-only
ADT content from LLVM can be used without linking libSupport. */
-#if !LLVM_DISABLE_ABI_BREAKING_CHECKS_ENFORCING
+#ifndef LLVM_DISABLE_ABI_BREAKING_CHECKS_ENFORCING
// ABI_BREAKING_CHECKS protection: provides link-time failure when clients build
// mismatch with LLVM
--
2.17.0

32
patches/llvm-6.0-NVPTX-addrspaces.patch

@ -0,0 +1,32 @@
diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp
index f1e4251a44b..73d49f5d7e4 100644
--- a/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -1248,6 +1248,14 @@ SDValue NVPTXTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
}
}
+bool NVPTXTargetLowering::isNoopAddrSpaceCast(unsigned SrcAS,
+ unsigned DestAS) const {
+ assert(SrcAS != DestAS && "Expected different address spaces!");
+
+ return (SrcAS == ADDRESS_SPACE_GENERIC || SrcAS > ADDRESS_SPACE_LOCAL) &&
+ (DestAS == ADDRESS_SPACE_GENERIC || DestAS > ADDRESS_SPACE_LOCAL);
+}
+
SDValue
NVPTXTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
SDLoc dl(Op);
diff --git a/lib/Target/NVPTX/NVPTXISelLowering.h b/lib/Target/NVPTX/NVPTXISelLowering.h
index ef04a8573d4..68a9a7195c4 100644
--- a/lib/Target/NVPTX/NVPTXISelLowering.h
+++ b/lib/Target/NVPTX/NVPTXISelLowering.h
@@ -443,6 +443,8 @@ public:
const NVPTXSubtarget &STI);
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
+ bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override;
+
SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
const char *getTargetNodeName(unsigned Opcode) const override;

35
patches/llvm-6.0.0_D27296-libssp.patch

@ -0,0 +1,35 @@
Index: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
===================================================================
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -2098,7 +2098,8 @@
void X86TargetLowering::insertSSPDeclarations(Module &M) const {
// MSVC CRT provides functionalities for stack protection.
- if (Subtarget.getTargetTriple().isOSMSVCRT()) {
+ if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
+ Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
// MSVC CRT has a global variable holding security cookie.
M.getOrInsertGlobal("__security_cookie",
Type::getInt8PtrTy(M.getContext()));
@@ -2120,15 +2121,19 @@
Value *X86TargetLowering::getSDagStackGuard(const Module &M) const {
// MSVC CRT has a global variable holding security cookie.
- if (Subtarget.getTargetTriple().isOSMSVCRT())
+ if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
+ Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
return M.getGlobalVariable("__security_cookie");
+ }
return TargetLowering::getSDagStackGuard(M);
}
Value *X86TargetLowering::getSSPStackGuardCheck(const Module &M) const {
// MSVC CRT has a function to validate security cookie.
- if (Subtarget.getTargetTriple().isOSMSVCRT())
+ if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
+ Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
return M.getFunction("__security_check_cookie");
+ }
return TargetLowering::getSSPStackGuardCheck(M);
}

53
patches/llvm-D27629-AArch64-large_model_6.0.1.patch

@ -0,0 +1,53 @@
From f76abe65e6d07fea5e838c4f8c9a9421c16debb0 Mon Sep 17 00:00:00 2001
From: Valentin Churavy <v.churavy@gmail.com>
Date: Thu, 5 Jul 2018 12:37:50 -0400
Subject: [PATCH] Fix unwind info relocation with large code model on AArch64
---
lib/MC/MCObjectFileInfo.cpp | 2 ++
.../AArch64/ELF_ARM64_large-relocations.s | 20 +++++++++++++++++++
2 files changed, 22 insertions(+)
create mode 100644 test/ExecutionEngine/RuntimeDyld/AArch64/ELF_ARM64_large-relocations.s
diff --git a/lib/MC/MCObjectFileInfo.cpp b/lib/MC/MCObjectFileInfo.cpp
index 328f000f37c..938b35f20d1 100644
--- a/lib/MC/MCObjectFileInfo.cpp
+++ b/lib/MC/MCObjectFileInfo.cpp
@@ -291,6 +291,8 @@ void MCObjectFileInfo::initELFMCObjectFileInfo(const Triple &T, bool Large) {
break;
case Triple::ppc64:
case Triple::ppc64le:
+ case Triple::aarch64:
+ case Triple::aarch64_be:
case Triple::x86_64:
FDECFIEncoding = dwarf::DW_EH_PE_pcrel |
(Large ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4);
diff --git a/test/ExecutionEngine/RuntimeDyld/AArch64/ELF_ARM64_large-relocations.s b/test/ExecutionEngine/RuntimeDyld/AArch64/ELF_ARM64_large-relocations.s
new file mode 100644
index 00000000000..66f28dabd79
--- /dev/null
+++ b/test/ExecutionEngine/RuntimeDyld/AArch64/ELF_ARM64_large-relocations.s
@@ -0,0 +1,20 @@
+# RUN: llvm-mc -triple=arm64-none-linux-gnu -large-code-model -filetype=obj -o %T/large-reloc.o %s
+# RUN: llvm-rtdyld -triple=arm64-none-linux-gnu -verify -map-section large-reloc.o,.eh_frame=0x10000 -map-section large-reloc.o,.text=0xffff000000000000 -check=%s %T/large-reloc.o
+# RUN-BE: llvm-mc -triple=aarch64_be-none-linux-gnu -large-code-model -filetype=obj -o %T/be-large-reloc.o %s
+# RUN-BE: llvm-rtdyld -triple=aarch64_be-none-linux-gnu -verify -map-section be-large-reloc.o,.eh_frame=0x10000 -map-section be-large-reloc.o,.text=0xffff000000000000 -check=%s %T/be-large-reloc.o
+
+ .text
+ .globl g
+ .p2align 2
+ .type g,@function
+g:
+ .cfi_startproc
+ mov x0, xzr
+ ret
+ .Lfunc_end0:
+ .size g, .Lfunc_end0-g
+ .cfi_endproc
+
+# Skip the CIE and load the 8 bytes PC begin pointer.
+# Assuming the CIE and the FDE length are both 4 bytes.
+# rtdyld-check: *{8}(section_addr(large-reloc.o, .eh_frame) + (*{4}(section_addr(large-reloc.o, .eh_frame))) + 0xc) = g - (section_addr(large-reloc.o, .eh_frame) + (*{4}(section_addr(large-reloc.o, .eh_frame))) + 0xc)
--
2.18.0

56
patches/llvm-D34078-vectorize-fdiv.patch

@ -0,0 +1,56 @@
From f94d12b6108b944199b715f31f25a022f75d2feb Mon Sep 17 00:00:00 2001
From: Yichao Yu <yyc1992@gmail.com>
Date: Sat, 10 Jun 2017 08:45:13 -0400
Subject: [PATCH 4/4] Enable support for floating-point division reductions
Similar to fsub, fdiv can also be vectorized using fmul.
---
lib/Transforms/Utils/LoopUtils.cpp | 1 +
test/Transforms/LoopVectorize/float-reduction.ll | 22 ++++++++++++++++++++++
2 files changed, 23 insertions(+)
diff --git a/lib/Transforms/Utils/LoopUtils.cpp b/lib/Transforms/Utils/LoopUtils.cpp
index 3c522786641..a4aced53a95 100644
--- a/lib/Transforms/Utils/LoopUtils.cpp
+++ b/lib/Transforms/Utils/LoopUtils.cpp
@@ -451,6 +451,7 @@ RecurrenceDescriptor::isRecurrenceInstr(Instruction *I, RecurrenceKind Kind,
return InstDesc(Kind == RK_IntegerOr, I);
case Instruction::Xor:
return InstDesc(Kind == RK_IntegerXor, I);
+ case Instruction::FDiv:
case Instruction::FMul:
return InstDesc(Kind == RK_FloatMult, I, UAI);
case Instruction::FSub:
diff --git a/test/Transforms/LoopVectorize/float-reduction.ll b/test/Transforms/LoopVectorize/float-reduction.ll
index f3b95d0ead7..669c54d55a2 100644
--- a/test/Transforms/LoopVectorize/float-reduction.ll
+++ b/test/Transforms/LoopVectorize/float-reduction.ll
@@ -44,3 +44,25 @@ for.body: ; preds = %for.body, %entry
for.end: ; preds = %for.body
ret float %sub
}
+
+;CHECK-LABEL: @foodiv(
+;CHECK: fdiv fast <4 x float>
+;CHECK: ret
+define float @foodiv(float* nocapture %A, i32* nocapture %n) nounwind uwtable readonly ssp {
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %sum.04 = phi float [ 1.000000e+00, %entry ], [ %sub, %for.body ]
+ %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv
+ %0 = load float, float* %arrayidx, align 4
+ %sub = fdiv fast float %sum.04, %0
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, 200
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ ret float %sub
+}
--
2.14.1

82
patches/llvm-D42262-jumpthreading-not-i1.patch

@ -0,0 +1,82 @@
commit 6a311a7a804831fea43cfb2f61322adcb407a1af
Author: Keno Fischer <keno@juliacomputing.com>
Date: Thu Jan 18 15:57:05 2018 -0500
[JumpThreading] Don't restrict cast-traversal to i1
Summary:
In D17663, JumpThreading learned to look trough simple cast instructions,
but only if the source of those cast instructions was a phi/cmp i1
(in an effort to limit compile time effects). I think this condition
is too restrictive. For switches with limited value range, InstCombine
will readily introduce an extra `trunc` instruction to a smaller
integer type (e.g. from i8 to i2), leaving us in the somewhat perverse
situation that jump-threading would work before running instcombine,
but not after. Since instcombine produces this pattern, I think we
need to consider it canonical and support it in JumpThreading.
In general, for limiting recursion, I think the existing restriction
to phi and cmp nodes should be sufficient to avoid looking through
unprofitable chains of instructions.
Reviewers: haicheng, gberry, bmakam, mcrosier
Subscribers: llvm-commits
Differential Revision: https://reviews.llvm.org/D42262
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index 95c4650..1155e18 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -647,11 +647,9 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors(
}
// Handle Cast instructions. Only see through Cast when the source operand is
- // PHI or Cmp and the source type is i1 to save the compilation time.
+ // PHI or Cmp to save the compilation time.
if (CastInst *CI = dyn_cast<CastInst>(I)) {
Value *Source = CI->getOperand(0);
- if (!Source->getType()->isIntegerTy(1))
- return false;
if (!isa<PHINode>(Source) && !isa<CmpInst>(Source))
return false;
ComputeValueKnownInPredecessors(Source, BB, Result, Preference, CxtI);
diff --git a/test/Transforms/JumpThreading/basic.ll b/test/Transforms/JumpThreading/basic.ll
index ce86cba..16e7549 100644
--- a/test/Transforms/JumpThreading/basic.ll
+++ b/test/Transforms/JumpThreading/basic.ll
@@ -547,6 +547,34 @@ l5:
; CHECK: }
}
+define i1 @trunc_switch(i1 %arg) {
+; CHECK-LABEL: @trunc_switch
+top:
+; CHECK: br i1 %arg, label %exitA, label %exitB
+ br i1 %arg, label %common, label %B
+
+B:
+ br label %common
+
+common:
+ %phi = phi i8 [ 2, %B ], [ 1, %top ]
+ %trunc = trunc i8 %phi to i2
+; CHECK-NOT: switch
+ switch i2 %trunc, label %unreach [
+ i2 1, label %exitA
+ i2 -2, label %exitB
+ ]
+
+unreach:
+ unreachable
+
+exitA:
+ ret i1 true
+
+exitB:
+ ret i1 false
+}
+
; CHECK-LABEL: define void @h_con(i32 %p) {
define void @h_con(i32 %p) {
%x = icmp ult i32 %p, 5

677
patches/llvm-D44892-Perf-integration.patch

@ -0,0 +1,677 @@
From 45bc0f0badbdbabaed7d204757c2aad7ab49a3fe Mon Sep 17 00:00:00 2001
From: DokFaust <rodia@autistici.org>
Date: Mon, 11 Jun 2018 12:59:42 +0200
Subject: [PATCH] PerfJITEventListener integration, requires compile flag
LLVM_USE_PERF
---
CMakeLists.txt | 13 +
include/llvm/Config/config.h.cmake | 3 +
include/llvm/Config/llvm-config.h.cmake | 3 +
.../llvm/ExecutionEngine/JITEventListener.h | 9 +
lib/ExecutionEngine/CMakeLists.txt | 4 +
lib/ExecutionEngine/LLVMBuild.txt | 2 +-
lib/ExecutionEngine/Orc/LLVMBuild.txt | 2 +-
.../PerfJITEvents/CMakeLists.txt | 5 +
.../PerfJITEvents/LLVMBuild.txt | 23 +
.../PerfJITEvents/PerfJITEventListener.cpp | 492 ++++++++++++++++++
10 files changed, 554 insertions(+), 2 deletions(-)
create mode 100644 lib/ExecutionEngine/PerfJITEvents/CMakeLists.txt
create mode 100644 lib/ExecutionEngine/PerfJITEvents/LLVMBuild.txt
create mode 100644 lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp
diff --git a/CMakeLists.txt b/CMakeLists.txt
index f8da6cf9211..fb92c825a46 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -426,6 +426,16 @@ if( LLVM_USE_OPROFILE )
endif( NOT CMAKE_SYSTEM_NAME MATCHES "Linux" )
endif( LLVM_USE_OPROFILE )
+option(LLVM_USE_PERF
+ "Use perf JIT interface to inform perf about JIT code" OFF)
+
+# If enabled, verify we are on a platform that supports perf.
+if( LLVM_USE_PERF )
+ if( NOT CMAKE_SYSTEM_NAME MATCHES "Linux" )
+ message(FATAL_ERROR "perf support is available on Linux only.")
+ endif( NOT CMAKE_SYSTEM_NAME MATCHES "Linux" )
+endif( LLVM_USE_PERF )
+
set(LLVM_USE_SANITIZER "" CACHE STRING
"Define the sanitizer used to build binaries and tests.")
set(LLVM_LIB_FUZZING_ENGINE "" CACHE PATH
@@ -634,6 +644,9 @@ endif (LLVM_USE_INTEL_JITEVENTS)
if (LLVM_USE_OPROFILE)
set(LLVMOPTIONALCOMPONENTS ${LLVMOPTIONALCOMPONENTS} OProfileJIT)
endif (LLVM_USE_OPROFILE)
+if (LLVM_USE_PERF)
+ set(LLVMOPTIONALCOMPONENTS ${LLVMOPTIONALCOMPONENTS} PerfJITEvents)
+endif (LLVM_USE_PERF)
message(STATUS "Constructing LLVMBuild project information")
execute_process(
diff --git a/include/llvm/Config/config.h.cmake b/include/llvm/Config/config.h.cmake
index 940f8420304..17787ed779b 100644
--- a/include/llvm/Config/config.h.cmake
+++ b/include/llvm/Config/config.h.cmake
@@ -377,6 +377,9 @@
/* Define if we have the oprofile JIT-support library */
#cmakedefine01 LLVM_USE_OPROFILE
+/* Define if we have the perf JIT-support library */
+#cmakedefine01 LLVM_USE_PERF
+
/* LLVM version information */
#cmakedefine LLVM_VERSION_INFO "${LLVM_VERSION_INFO}"
diff --git a/include/llvm/Config/llvm-config.h.cmake b/include/llvm/Config/llvm-config.h.cmake
index 4daa00f3bc4..8d9c3b24d52 100644
--- a/include/llvm/Config/llvm-config.h.cmake
+++ b/include/llvm/Config/llvm-config.h.cmake
@@ -65,6 +65,9 @@
/* Define if we have the oprofile JIT-support library */
#cmakedefine01 LLVM_USE_OPROFILE
+/* Define if we have the perf JIT-support library */
+#cmakedefine01 LLVM_USE_PERF
+
/* Major version of the LLVM API */
#define LLVM_VERSION_MAJOR ${LLVM_VERSION_MAJOR}
diff --git a/include/llvm/ExecutionEngine/JITEventListener.h b/include/llvm/ExecutionEngine/JITEventListener.h
index ff7840f00a4..1cc2c423a8b 100644
--- a/include/llvm/ExecutionEngine/JITEventListener.h
+++ b/include/llvm/ExecutionEngine/JITEventListener.h
@@ -115,6 +115,15 @@ public:
}
#endif // USE_OPROFILE
+#ifdef LLVM_USE_PERF
+ static JITEventListener *createPerfJITEventListener();
+#else
+ static JITEventListener *createPerfJITEventListener()
+ {
+ return nullptr;
+ }
+#endif //USE_PERF
+
private:
virtual void anchor();
};
diff --git a/lib/ExecutionEngine/CMakeLists.txt b/lib/ExecutionEngine/CMakeLists.txt
index 84b34919e44..893d113a685 100644
--- a/lib/ExecutionEngine/CMakeLists.txt
+++ b/lib/ExecutionEngine/CMakeLists.txt
@@ -30,3 +30,7 @@ endif( LLVM_USE_OPROFILE )
if( LLVM_USE_INTEL_JITEVENTS )
add_subdirectory(IntelJITEvents)
endif( LLVM_USE_INTEL_JITEVENTS )
+
+if( LLVM_USE_PERF )
+ add_subdirectory(PerfJITEvents)
+endif( LLVM_USE_PERF )
diff --git a/lib/ExecutionEngine/LLVMBuild.txt b/lib/ExecutionEngine/LLVMBuild.txt
index 9d29a41f504..b6e1bda6a51 100644
--- a/lib/ExecutionEngine/LLVMBuild.txt
+++ b/lib/ExecutionEngine/LLVMBuild.txt
@@ -16,7 +16,7 @@
;===------------------------------------------------------------------------===;
[common]
-subdirectories = Interpreter MCJIT RuntimeDyld IntelJITEvents OProfileJIT Orc
+subdirectories = Interpreter MCJIT RuntimeDyld IntelJITEvents OProfileJIT Orc PerfJITEvents
[component_0]
type = Library
diff --git a/lib/ExecutionEngine/Orc/LLVMBuild.txt b/lib/ExecutionEngine/Orc/LLVMBuild.txt
index 8f05172e77a..ef4ae64e823 100644
--- a/lib/ExecutionEngine/Orc/LLVMBuild.txt
+++ b/lib/ExecutionEngine/Orc/LLVMBuild.txt
@@ -19,4 +19,4 @@
type = Library
name = OrcJIT
parent = ExecutionEngine
-required_libraries = Core ExecutionEngine Object RuntimeDyld Support TransformUtils
+required_libraries = Core ExecutionEngine Object RuntimeDyld Support TransformUtils
diff --git a/lib/ExecutionEngine/PerfJITEvents/CMakeLists.txt b/lib/ExecutionEngine/PerfJITEvents/CMakeLists.txt
new file mode 100644
index 00000000000..136cc429d02
--- /dev/null
+++ b/lib/ExecutionEngine/PerfJITEvents/CMakeLists.txt
@@ -0,0 +1,5 @@
+add_llvm_library(LLVMPerfJITEvents
+ PerfJITEventListener.cpp
+ )
+
+add_dependencies(LLVMPerfJITEvents LLVMCodeGen)
diff --git a/lib/ExecutionEngine/PerfJITEvents/LLVMBuild.txt b/lib/ExecutionEngine/PerfJITEvents/LLVMBuild.txt
new file mode 100644
index 00000000000..b1958a69260
--- /dev/null
+++ b/lib/ExecutionEngine/PerfJITEvents/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/ExecutionEngine/PerfJITEvents/LLVMBuild.txt ----------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = OptionalLibrary
+name = PerfJITEvents
+parent = ExecutionEngine
+required_libraries = CodeGen Core DebugInfoDWARF ExecutionEngine Object Support TransformUtils
+
diff --git a/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp b/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp
new file mode 100644
index 00000000000..c2b97dd59f3
--- /dev/null
+++ b/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp
@@ -0,0 +1,492 @@
+//===-- PerfJITEventListener.cpp - Tell Linux's perf about JITted code ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a JITEventListener object that tells perf about JITted
+// functions, including source line information.
+//
+// Documentation for perf jit integration is available at:
+// https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/tools/perf/Documentation/jitdump-specification.txt
+// https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/tools/perf/Documentation/jit-interface.txt
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/Twine.h"
+#include "llvm/Config/config.h"
+#include "llvm/DebugInfo/DWARF/DWARFContext.h"
+#include "llvm/ExecutionEngine/JITEventListener.h"
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Object/SymbolSize.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Errno.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Mutex.h"
+#include "llvm/Support/MutexGuard.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/Process.h"
+#include "llvm/Support/Threading.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <sys/mman.h> // mmap()
+#include <sys/types.h> // getpid()
+#include <time.h> // clock_gettime(), time(), localtime_r() */
+#include <unistd.h> // for getpid(), read(), close()
+
+using namespace llvm;
+using namespace llvm::object;
+typedef DILineInfoSpecifier::FileLineInfoKind FileLineInfoKind;
+
+namespace {
+
+// language identifier (XXX: should we generate something better from debug
+// info?)
+#define JIT_LANG "llvm-IR"
+#define LLVM_PERF_JIT_MAGIC \
+ ((uint32_t)'J' << 24 | (uint32_t)'i' << 16 | (uint32_t)'T' << 8 | \
+ (uint32_t)'D')
+#define LLVM_PERF_JIT_VERSION 1
+
+// bit 0: set if the jitdump file is using an architecture-specific timestamp
+// clock source
+#define JITDUMP_FLAGS_ARCH_TIMESTAMP (1ULL << 0)
+
+struct LLVMPerfJitHeader;
+
+class PerfJITEventListener : public JITEventListener {
+public:
+ PerfJITEventListener();
+ ~PerfJITEventListener() {
+ if (MarkerAddr)
+ CloseMarker();
+ }
+
+ void NotifyObjectEmitted(const ObjectFile &Obj,
+ const RuntimeDyld::LoadedObjectInfo &L) override;
+ void NotifyFreeingObject(const ObjectFile &Obj) override;
+
+private:
+ bool InitDebuggingDir();
+ bool OpenMarker();
+ void CloseMarker();
+ static bool FillMachine(LLVMPerfJitHeader &hdr);
+
+ void NotifyCode(Expected<llvm::StringRef> &Symbol, uint64_t CodeAddr,
+ uint64_t CodeSize);
+ void NotifyDebug(uint64_t CodeAddr, DILineInfoTable Lines);
+
+ // cache lookups
+ pid_t Pid;
+
+ // base directory for output data
+ std::string JitPath;
+
+ // output data stream, closed via Dumpstream
+ int DumpFd = -1;
+
+ // output data stream
+ std::unique_ptr<raw_fd_ostream> Dumpstream;
+
+ // prevent concurrent dumps from messing up the output file
+ sys::Mutex Mutex;
+
+ // perf mmap marker
+ void *MarkerAddr = NULL;
+
+ // perf support ready
+ bool SuccessfullyInitialized = false;
+
+ // identifier for functions, primarily to identify when moving them around
+ uint64_t CodeGeneration = 1;
+};
+
+// The following are POD struct definitions from the perf jit specification
+
+enum LLVMPerfJitRecordType {
+ JIT_CODE_LOAD = 0,
+ JIT_CODE_MOVE = 1, // not emitted, code isn't moved
+ JIT_CODE_DEBUG_INFO = 2,
+ JIT_CODE_CLOSE = 3, // not emitted, unnecessary
+ JIT_CODE_UNWINDING_INFO = 4, // not emitted
+
+ JIT_CODE_MAX
+};
+
+struct LLVMPerfJitHeader {
+ uint32_t Magic; // characters "JiTD"
+ uint32_t Version; // header version
+ uint32_t TotalSize; // total size of header
+ uint32_t ElfMach; // elf mach target
+ uint32_t Pad1; // reserved
+ uint32_t Pid;
+ uint64_t Timestamp; // timestamp
+ uint64_t Flags; // flags
+};
+
+// record prefix (mandatory in each record)
+struct LLVMPerfJitRecordPrefix {
+ uint32_t Id; // record type identifier
+ uint32_t TotalSize;
+ uint64_t Timestamp;
+};
+
+struct LLVMPerfJitRecordCodeLoad {
+ LLVMPerfJitRecordPrefix Prefix;
+
+ uint32_t Pid;
+ uint32_t Tid;
+ uint64_t Vma;
+ uint64_t CodeAddr;
+ uint64_t CodeSize;
+ uint64_t CodeIndex;
+};
+
+struct LLVMPerfJitDebugEntry {
+ uint64_t Addr;
+ int Lineno; // source line number starting at 1
+ int Discrim; // column discriminator, 0 is default
+ // followed by null terminated filename, \xff\0 if same as previous entry
+};
+
+struct LLVMPerfJitRecordDebugInfo {
+ LLVMPerfJitRecordPrefix Prefix;
+
+ uint64_t CodeAddr;
+ uint64_t NrEntry;
+ // followed by NrEntry LLVMPerfJitDebugEntry records
+};
+
+static inline uint64_t timespec_to_ns(const struct timespec *ts) {
+ const uint64_t NanoSecPerSec = 1000000000;
+ return ((uint64_t)ts->tv_sec * NanoSecPerSec) + ts->tv_nsec;
+}
+
+static inline uint64_t perf_get_timestamp(void) {
+ struct timespec ts;
+ int ret;
+
+ ret = clock_gettime(CLOCK_MONOTONIC, &ts);
+ if (ret)
+ return 0;
+
+ return timespec_to_ns(&ts);
+}
+
+PerfJITEventListener::PerfJITEventListener() : Pid(::getpid()) {
+ // check if clock-source is supported
+ if (!perf_get_timestamp()) {
+ errs() << "kernel does not support CLOCK_MONOTONIC\n";
+ return;
+ }
+
+ if (!InitDebuggingDir()) {
+ errs() << "could not initialize debugging directory\n";
+ return;
+ }
+
+ std::string Filename;
+ raw_string_ostream FilenameBuf(Filename);
+ FilenameBuf << JitPath << "/jit-" << Pid << ".dump";
+
+ // Need to open ourselves, because we need to hand the FD to OpenMarker() and
+ // raw_fd_ostream doesn't expose the FD.
+ using sys::fs::openFileForWrite;
+ if (auto EC =
+ openFileForWrite(FilenameBuf.str(), DumpFd, sys::fs::F_RW, 0666)) {
+ errs() << "could not open JIT dump file " << FilenameBuf.str() << ": "
+ << EC.message() << "\n";
+ return;
+ }
+
+ Dumpstream = make_unique<raw_fd_ostream>(DumpFd, true);
+
+ LLVMPerfJitHeader Header = {0};
+ if (!FillMachine(Header))
+ return;
+
+ // signal this process emits JIT information
+ if (!OpenMarker())
+ return;
+
+ // emit dumpstream header
+ Header.Magic = LLVM_PERF_JIT_MAGIC;
+ Header.Version = LLVM_PERF_JIT_VERSION;
+ Header.TotalSize = sizeof(Header);
+ Header.Pid = Pid;
+ Header.Timestamp = perf_get_timestamp();
+ Dumpstream->write(reinterpret_cast<const char *>(&Header), sizeof(Header));
+
+ // Everything initialized, can do profiling now.
+ if (!Dumpstream->has_error())
+ SuccessfullyInitialized = true;
+}
+
+void PerfJITEventListener::NotifyObjectEmitted(
+ const ObjectFile &Obj, const RuntimeDyld::LoadedObjectInfo &L) {
+
+ if (!SuccessfullyInitialized)
+ return;
+
+ OwningBinary<ObjectFile> DebugObjOwner = L.getObjectForDebug(Obj);
+ const ObjectFile &DebugObj = *DebugObjOwner.getBinary();
+
+ // Get the address of the object image for use as a unique identifier
+ std::unique_ptr<DIContext> Context = DWARFContext::create(DebugObj);
+
+ // Use symbol info to iterate over functions in the object.
+ for (const std::pair<SymbolRef, uint64_t> &P : computeSymbolSizes(DebugObj)) {
+ SymbolRef Sym = P.first;
+ std::string SourceFileName;
+
+ Expected<SymbolRef::Type> SymTypeOrErr = Sym.getType();
+ if (!SymTypeOrErr) {
+ // There's not much we can with errors here
+ consumeError(SymTypeOrErr.takeError());
+ continue;
+ }
+ SymbolRef::Type SymType = *SymTypeOrErr;
+ if (SymType != SymbolRef::ST_Function)
+ continue;
+
+ Expected<StringRef> Name = Sym.getName();
+ if (!Name) {
+ consumeError(Name.takeError());
+ continue;
+ }
+
+ Expected<uint64_t> AddrOrErr = Sym.getAddress();
+ if (!AddrOrErr) {
+ consumeError(AddrOrErr.takeError());
+ continue;
+ }
+ uint64_t Addr = *AddrOrErr;
+ uint64_t Size = P.second;
+
+ // According to spec debugging info has to come before loading the
+ // corresonding code load.
+ DILineInfoTable Lines = Context->getLineInfoForAddressRange(
+ Addr, Size, FileLineInfoKind::AbsoluteFilePath);
+
+ NotifyDebug(Addr, Lines);
+ NotifyCode(Name, Addr, Size);
+ }
+
+ Dumpstream->flush();
+}
+
+void PerfJITEventListener::NotifyFreeingObject(const ObjectFile &Obj) {
+ // perf currently doesn't have an interface for unloading. But munmap()ing the
+ // code section does, so that's ok.
+}
+
+bool PerfJITEventListener::InitDebuggingDir() {
+ time_t Time;
+ struct tm LocalTime;
+ char TimeBuffer[sizeof("YYYYMMDD")];
+ SmallString<64> Path;
+
+ // search for location to dump data to
+ if (const char *BaseDir = getenv("JITDUMPDIR"))
+ Path.append(BaseDir);
+ else if (!sys::path::home_directory(Path))
+ Path = ".";
+
+ // create debug directory
+ Path += "/.debug/jit/";
+ if (auto EC = sys::fs::create_directories(Path)) {
+ errs() << "could not create jit cache directory " << Path << ": "
+ << EC.message() << "\n";
+ return false;
+ }
+
+ // create unique directory for dump data related to this process
+ time(&Time);
+ localtime_r(&Time, &LocalTime);
+ strftime(TimeBuffer, sizeof(TimeBuffer), "%Y%m%d", &LocalTime);
+ Path += JIT_LANG "-jit-";
+ Path += TimeBuffer;
+
+ SmallString<128> UniqueDebugDir;
+
+ using sys::fs::createUniqueDirectory;
+ if (auto EC = createUniqueDirectory(Path, UniqueDebugDir)) {
+ errs() << "could not create unique jit cache directory " << UniqueDebugDir
+ << ": " << EC.message() << "\n";
+ return false;
+ }
+
+ JitPath = UniqueDebugDir.str();
+
+ return true;
+}
+
+bool PerfJITEventListener::OpenMarker() {
+ // We mmap the jitdump to create an MMAP RECORD in perf.data file. The mmap
+ // is captured either live (perf record running when we mmap) or in deferred
+ // mode, via /proc/PID/maps. The MMAP record is used as a marker of a jitdump
+ // file for more meta data info about the jitted code. Perf report/annotate
+ // detect this special filename and process the jitdump file.
+ //
+ // Mapping must be PROT_EXEC to ensure it is captured by perf record
+ // even when not using -d option.
+ MarkerAddr = ::mmap(NULL, sys::Process::getPageSize(), PROT_READ | PROT_EXEC,
+ MAP_PRIVATE, DumpFd, 0);
+
+ if (MarkerAddr == MAP_FAILED) {
+ errs() << "could not mmap JIT marker\n";
+ return false;
+ }
+ return true;
+}
+
+void PerfJITEventListener::CloseMarker() {
+ if (!MarkerAddr)
+ return;
+
+ munmap(MarkerAddr, sys::Process::getPageSize());
+ MarkerAddr = nullptr;
+}
+
+bool PerfJITEventListener::FillMachine(LLVMPerfJitHeader &hdr) {
+ char id[16];
+ struct {
+ uint16_t e_type;
+ uint16_t e_machine;
+ } info;
+
+ size_t RequiredMemory = sizeof(id) + sizeof(info);
+
+ ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
+ MemoryBuffer::getFileSlice("/proc/self/exe",
+ RequiredMemory,
+ 0);
+
+ // This'll not guarantee that enough data was actually read from the
+ // underlying file. Instead the trailing part of the buffer would be
+ // zeroed. Given the ELF signature check below that seems ok though,
+ // it's unlikely that the file ends just after that, and the
+ // consequence would just be that perf wouldn't recognize the
+ // signature.
+ if (auto EC = MB.getError()) {
+ errs() << "could not open /proc/self/exe: " << EC.message() << "\n";
+ return false;
+ }
+
+ memcpy(&id, (*MB)->getBufferStart(), sizeof(id));
+ memcpy(&info, (*MB)->getBufferStart() + sizeof(id), sizeof(info));
+
+ // check ELF signature
+ if (id[0] != 0x7f || id[1] != 'E' || id[2] != 'L' || id[3] != 'F') {
+ errs() << "invalid elf signature\n";
+ return false;
+ }
+
+ hdr.ElfMach = info.e_machine;
+
+ return true;
+}
+
+void PerfJITEventListener::NotifyCode(Expected<llvm::StringRef> &Symbol,
+ uint64_t CodeAddr, uint64_t CodeSize) {
+ assert(SuccessfullyInitialized);
+
+ // 0 length functions can't have samples.
+ if (CodeSize == 0)
+ return;
+
+ LLVMPerfJitRecordCodeLoad rec;
+ rec.Prefix.Id = JIT_CODE_LOAD;
+ rec.Prefix.TotalSize = sizeof(rec) + // debug record itself
+ Symbol->size() + 1 + // symbol name
+ CodeSize; // and code
+ rec.Prefix.Timestamp = perf_get_timestamp();
+
+ rec.CodeSize = CodeSize;
+ rec.Vma = 0;
+ rec.CodeAddr = CodeAddr;
+ rec.Pid = Pid;
+ rec.Tid = get_threadid();
+
+ // avoid interspersing output
+ MutexGuard Guard(Mutex);
+
+ rec.CodeIndex = CodeGeneration++; // under lock!
+
+ Dumpstream->write(reinterpret_cast<const char *>(&rec), sizeof(rec));
+ Dumpstream->write(Symbol->data(), Symbol->size() + 1);
+ Dumpstream->write(reinterpret_cast<const char *>(CodeAddr), CodeSize);
+}
+
+void PerfJITEventListener::NotifyDebug(uint64_t CodeAddr,
+ DILineInfoTable Lines) {
+ assert(SuccessfullyInitialized);
+
+ // Didn't get useful debug info.
+ if (Lines.empty())
+ return;
+
+ LLVMPerfJitRecordDebugInfo rec;
+ rec.Prefix.Id = JIT_CODE_DEBUG_INFO;
+ rec.Prefix.TotalSize = sizeof(rec); // will be increased further
+ rec.Prefix.Timestamp = perf_get_timestamp();
+ rec.CodeAddr = CodeAddr;
+ rec.NrEntry = Lines.size();
+
+ // compute total size size of record (variable due to filenames)
+ DILineInfoTable::iterator Begin = Lines.begin();
+ DILineInfoTable::iterator End = Lines.end();
+ for (DILineInfoTable::iterator It = Begin; It != End; ++It) {
+ DILineInfo &line = It->second;
+ rec.Prefix.TotalSize += sizeof(LLVMPerfJitDebugEntry);
+ rec.Prefix.TotalSize += line.FileName.size() + 1;
+ }
+
+ // The debug_entry describes the source line information. It is defined as
+ // follows in order:
+ // * uint64_t code_addr: address of function for which the debug information
+ // is generated
+ // * uint32_t line : source file line number (starting at 1)
+ // * uint32_t discrim : column discriminator, 0 is default
+ // * char name[n] : source file name in ASCII, including null termination
+
+ // avoid interspersing output
+ MutexGuard Guard(Mutex);
+
+ Dumpstream->write(reinterpret_cast<const char *>(&rec), sizeof(rec));
+
+ for (DILineInfoTable::iterator It = Begin; It != End; ++It) {
+ LLVMPerfJitDebugEntry LineInfo;
+ DILineInfo &Line = It->second;
+
+ LineInfo.Addr = It->first;
+ // The function re-created by perf is preceded by a elf
+ // header. Need to adjust for that, otherwise the results are
+ // wrong.
+ LineInfo.Addr += 0x40;
+ LineInfo.Lineno = Line.Line;
+ LineInfo.Discrim = Line.Discriminator;
+
+ Dumpstream->write(reinterpret_cast<const char *>(&LineInfo),
+ sizeof(LineInfo));
+ Dumpstream->write(Line.FileName.c_str(), Line.FileName.size() + 1);
+ }
+}
+
+// There should be only a single event listener per process, otherwise perf gets
+// confused.
+llvm::ManagedStatic<PerfJITEventListener> PerfListener;
+
+} // end anonymous namespace
+
+namespace llvm {
+JITEventListener *JITEventListener::createPerfJITEventListener() {
+ return &*PerfListener;
+}
+
+} // namespace llvm
+
--
2.17.1

26
patches/llvm-D46460.patch

@ -0,0 +1,26 @@
Index: lib/Analysis/LoopInfo.cpp
===================================================================
--- a/lib/Analysis/LoopInfo.cpp
+++ b/lib/Analysis/LoopInfo.cpp
@@ -223,15 +223,14 @@
BasicBlock *H = getHeader();
for (BasicBlock *BB : this->blocks()) {
TerminatorInst *TI = BB->getTerminator();
- MDNode *MD = nullptr;
// Check if this terminator branches to the loop header.
- for (BasicBlock *Successor : TI->successors()) {
- if (Successor == H) {
- MD = TI->getMetadata(LLVMContext::MD_loop);
- break;
- }
- }
+ bool IsPredecessor = any_of(TI->successors(),
+ [=](BasicBlock *Successor) { return Successor == H; });
+ if (!IsPredecessor)
+ continue;
+
+ MDNode *MD = TI->getMetadata(LLVMContext::MD_loop);
if (!MD)
return nullptr;

187
patches/llvm-D49832-SCEVPred.patch

@ -0,0 +1,187 @@
commit 98592fcc61307968f7df1362771534595a1e1c21
Author: Keno Fischer <keno@juliacomputing.com>
Date: Wed Jul 25 19:29:02 2018 -0400
[SCEV] Don't expand Wrap predicate using inttoptr in ni addrspaces
Summary:
In non-integral address spaces, we're not allowed to introduce inttoptr/ptrtoint
intrinsics. Instead, we need to expand any pointer arithmetic as geps on the
base pointer. Luckily this is a common task for SCEV, so all we have to do here
is hook up the corresponding helper function and add test case.
Fixes PR38290
Reviewers: reames, sanjoy
Subscribers: javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D49832
diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp
index 7f76f057216..f441a3647fb 100644
--- a/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -2157,8 +2157,9 @@ Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR,
const SCEV *Step = AR->getStepRecurrence(SE);
const SCEV *Start = AR->getStart();
+ Type *ARTy = AR->getType();
unsigned SrcBits = SE.getTypeSizeInBits(ExitCount->getType());
- unsigned DstBits = SE.getTypeSizeInBits(AR->getType());
+ unsigned DstBits = SE.getTypeSizeInBits(ARTy);
// The expression {Start,+,Step} has nusw/nssw if
// Step < 0, Start - |Step| * Backedge <= Start
@@ -2170,11 +2171,12 @@ Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR,
Value *TripCountVal = expandCodeFor(ExitCount, CountTy, Loc);
IntegerType *Ty =
- IntegerType::get(Loc->getContext(), SE.getTypeSizeInBits(AR->getType()));
+ IntegerType::get(Loc->getContext(), SE.getTypeSizeInBits(ARTy));
+ Type *ARExpandTy = DL.isNonIntegralPointerType(ARTy) ? ARTy : Ty;
Value *StepValue = expandCodeFor(Step, Ty, Loc);
Value *NegStepValue = expandCodeFor(SE.getNegativeSCEV(Step), Ty, Loc);
- Value *StartValue = expandCodeFor(Start, Ty, Loc);
+ Value *StartValue = expandCodeFor(Start, ARExpandTy, Loc);
ConstantInt *Zero =
ConstantInt::get(Loc->getContext(), APInt::getNullValue(DstBits));
@@ -2197,8 +2199,21 @@ Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR,
// Compute:
// Start + |Step| * Backedge < Start
// Start - |Step| * Backedge > Start
- Value *Add = Builder.CreateAdd(StartValue, MulV);
- Value *Sub = Builder.CreateSub(StartValue, MulV);
+ Value *Add = nullptr, *Sub = nullptr;
+ if (ARExpandTy->isPointerTy()) {
+ PointerType *ARPtrTy = cast<PointerType>(ARExpandTy);
+ const SCEV *MulS = SE.getSCEV(MulV);
+ const SCEV *const StepArray[2] = {MulS, SE.getNegativeSCEV(MulS)};
+ Add = Builder.CreateBitCast(
+ expandAddToGEP(&StepArray[0], &StepArray[1], ARPtrTy, Ty, StartValue),
+ ARPtrTy);
+ Sub = Builder.CreateBitCast(
+ expandAddToGEP(&StepArray[1], &StepArray[2], ARPtrTy, Ty, StartValue),
+ ARPtrTy);
+ } else {
+ Add = Builder.CreateAdd(StartValue, MulV);
+ Sub = Builder.CreateSub(StartValue, MulV);
+ }
Value *EndCompareGT = Builder.CreateICmp(
Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT, Sub, StartValue);
diff --git a/test/Analysis/LoopAccessAnalysis/wrapping-pointer-ni.ll b/test/Analysis/LoopAccessAnalysis/wrapping-pointer-ni.ll
new file mode 100644
index 00000000000..ddcf5e1a195
--- /dev/null
+++ b/test/Analysis/LoopAccessAnalysis/wrapping-pointer-ni.ll
@@ -0,0 +1,73 @@
+; RUN: opt -loop-versioning -S < %s | FileCheck %s -check-prefix=LV
+
+; NB: addrspaces 10-13 are non-integral
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:10:11:12:13"
+
+; This matches the test case from PR38290
+; Check that we expand the SCEV predicate check using GEP, rather
+; than ptrtoint.
+
+%jl_value_t = type opaque
+%jl_array_t = type { i8 addrspace(13)*, i64, i16, i16, i32 }
+
+declare i64 @julia_steprange_last_4949()
+
+define void @"japi1_align!_9477"(%jl_value_t addrspace(10)**) #0 {
+; LV-LAVEL: L26.lver.check
+; LV: [[OFMul:%[^ ]*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[Step:%[^ ]*]])
+; LV-NEXT: [[OFMulResult:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul]], 0
+; LV-NEXT: [[OFMulOverflow:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul]], 1
+; LV-NEXT: [[PosGEP:%[^ ]*]] = getelementptr i32, i32 addrspace(13)* [[Base:%[^ ]*]], i64 [[Step]]
+; LV-NEXT: [[NegGEP:%[^ ]*]] = getelementptr i32, i32 addrspace(13)* [[Base]], i64 [[NegStep:%[^ ]*]]
+; LV-NEXT: icmp ugt i32 addrspace(13)* [[NegGEP]], [[Base]]
+; LV-NEXT: icmp ult i32 addrspace(13)* [[PosGEP]], [[Base]]
+; LV-NOT: inttoptr
+; LV-NOT: ptrtoint
+top:
+ %1 = load %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)** %0, align 8, !nonnull !1, !dereferenceable !2, !align !3
+ %2 = load i32, i32* inttoptr (i64 12 to i32*), align 4, !tbaa !4
+ %3 = sub i32 0, %2
+ %4 = call i64 @julia_steprange_last_4949()
+ %5 = addrspacecast %jl_value_t addrspace(10)* %1 to %jl_value_t addrspace(11)*
+ %6 = bitcast %jl_value_t addrspace(11)* %5 to %jl_value_t addrspace(10)* addrspace(11)*
+ %7 = load %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)* addrspace(11)* %6, align 8, !tbaa !4, !nonnull !1, !dereferenceable !9, !align !2
+ %8 = addrspacecast %jl_value_t addrspace(10)* %7 to %jl_value_t addrspace(11)*
+ %9 = bitcast %jl_value_t addrspace(11)* %8 to i32 addrspace(13)* addrspace(11)*
+ %10 = load i32 addrspace(13)*, i32 addrspace(13)* addrspace(11)* %9, align 8, !tbaa !10, !nonnull !1
+ %11 = sext i32 %3 to i64
+ br label %L26
+
+L26: ; preds = %L26, %top
+ %value_phi3 = phi i64 [ 0, %top ], [ %12, %L26 ]
+ %12 = add i64 %value_phi3, -1
+ %13 = getelementptr inbounds i32, i32 addrspace(13)* %10, i64 %12
+ %14 = load i32, i32 addrspace(13)* %13, align 4, !tbaa !13
+ %15 = add i64 %12, %11
+ %16 = getelementptr inbounds i32, i32 addrspace(13)* %10, i64 %15
+ store i32 %14, i32 addrspace(13)* %16, align 4, !tbaa !13
+ %17 = icmp eq i64 %value_phi3, %4
+ br i1 %17, label %L45, label %L26
+
+L45: ; preds = %L26
+ ret void
+}
+
+attributes #0 = { "thunk" }
+
+!llvm.module.flags = !{!0}
+
+!0 = !{i32 1, !"Debug Info Version", i32 3}
+!1 = !{}
+!2 = !{i64 16}
+!3 = !{i64 8}
+!4 = !{!5, !5, i64 0}
+!5 = !{!"jtbaa_mutab", !6, i64 0}
+!6 = !{!"jtbaa_value", !7, i64 0}
+!7 = !{!"jtbaa_data", !8, i64 0}
+!8 = !{!"jtbaa"}
+!9 = !{i64 40}
+!10 = !{!11, !11, i64 0}
+!11 = !{!"jtbaa_arrayptr", !12, i64 0}
+!12 = !{!"jtbaa_array", !8, i64 0}
+!13 = !{!14, !14, i64 0}
+!14 = !{!"jtbaa_arraybuf", !7, i64 0}
diff --git a/test/Analysis/LoopAccessAnalysis/wrapping-pointer-versioning.ll b/test/Analysis/LoopAccessAnalysis/wrapping-pointer-versioning.ll
index a7e5bce7445..fa6fccecbf1 100644
--- a/test/Analysis/LoopAccessAnalysis/wrapping-pointer-versioning.ll
+++ b/test/Analysis/LoopAccessAnalysis/wrapping-pointer-versioning.ll
@@ -58,10 +58,10 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
; LV-NEXT: [[OFMul1:%[^ ]*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[BE]])
; LV-NEXT: [[OFMulResult1:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul1]], 0
; LV-NEXT: [[OFMulOverflow1:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul1]], 1
-; LV-NEXT: [[AddEnd1:%[^ ]*]] = add i64 %a2, [[OFMulResult1]]
-; LV-NEXT: [[SubEnd1:%[^ ]*]] = sub i64 %a2, [[OFMulResult1]]
-; LV-NEXT: [[CmpNeg1:%[^ ]*]] = icmp ugt i64 [[SubEnd1]], %a2
-; LV-NEXT: [[CmpPos1:%[^ ]*]] = icmp ult i64 [[AddEnd1]], %a2
+; LV-NEXT: [[AddEnd1:%[^ ]*]] = add i64 [[A0:%[^ ]*]], [[OFMulResult1]]
+; LV-NEXT: [[SubEnd1:%[^ ]*]] = sub i64 [[A0]], [[OFMulResult1]]
+; LV-NEXT: [[CmpNeg1:%[^ ]*]] = icmp ugt i64 [[SubEnd1]], [[A0]]
+; LV-NEXT: [[CmpPos1:%[^ ]*]] = icmp ult i64 [[AddEnd1]], [[A0]]
; LV-NEXT: [[Cmp:%[^ ]*]] = select i1 false, i1 [[CmpNeg1]], i1 [[CmpPos1]]
; LV-NEXT: [[PredCheck1:%[^ ]*]] = or i1 [[Cmp]], [[OFMulOverflow1]]
@@ -233,10 +233,10 @@ for.end: ; preds = %for.body
; LV: [[OFMul1:%[^ ]*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[BE:%[^ ]*]])
; LV-NEXT: [[OFMulResult1:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul1]], 0
; LV-NEXT: [[OFMulOverflow1:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul1]], 1
-; LV-NEXT: [[AddEnd1:%[^ ]*]] = add i64 %a2, [[OFMulResult1]]
-; LV-NEXT: [[SubEnd1:%[^ ]*]] = sub i64 %a2, [[OFMulResult1]]
-; LV-NEXT: [[CmpNeg1:%[^ ]*]] = icmp ugt i64 [[SubEnd1]], %a2
-; LV-NEXT: [[CmpPos1:%[^ ]*]] = icmp ult i64 [[AddEnd1]], %a2
+; LV-NEXT: [[AddEnd1:%[^ ]*]] = add i64 [[A0:%[^ ]*]], [[OFMulResult1]]
+; LV-NEXT: [[SubEnd1:%[^ ]*]] = sub i64 [[A0]], [[OFMulResult1]]
+; LV-NEXT: [[CmpNeg1:%[^ ]*]] = icmp ugt i64 [[SubEnd1]], [[A0]]
+; LV-NEXT: [[CmpPos1:%[^ ]*]] = icmp ult i64 [[AddEnd1]], [[A0]]
; LV-NEXT: [[Cmp:%[^ ]*]] = select i1 false, i1 [[CmpNeg1]], i1 [[CmpPos1]]
; LV-NEXT: [[PredCheck1:%[^ ]*]] = or i1 [[Cmp]], [[OFMulOverflow1]]

89
patches/llvm-D50010-VNCoercion-ni.patch

@ -0,0 +1,89 @@
commit 8eb2b102a203d83fb713f3bf79acf235dabdd8cd
Author: Keno Fischer <keno@juliacomputing.com>
Date: Mon Jul 30 16:59:08 2018 -0400
[VNCoercion] Disallow coercion between different ni addrspaces
Summary:
I'm not sure if it would be legal by the IR reference to introduce
an addrspacecast here, since the IR reference is a bit vague on
the exact semantics, but at least for our usage of it (and I
suspect for many other's usage) it is not. For us, addrspacecasts
between non-integral address spaces carry frontend information that the
optimizer cannot deduce afterwards in a generic way (though we
have frontend specific passes in our pipline that do propagate
these). In any case, I'm sure nobody is using it this way at
the moment, since it would have introduced inttoptrs, which
are definitely illegal.
Fixes PR38375
Reviewers: sanjoy, reames, dberlin
Subscribers: llvm-commits
Differential Revision: https://reviews.llvm.org/D50010
diff --git a/lib/Transforms/Utils/VNCoercion.cpp b/lib/Transforms/Utils/VNCoercion.cpp
index c3feea6a0a4..735d1e7b792 100644
--- a/lib/Transforms/Utils/VNCoercion.cpp
+++ b/lib/Transforms/Utils/VNCoercion.cpp
@@ -20,14 +20,21 @@ bool canCoerceMustAliasedValueToLoad(Value *StoredVal, Type *LoadTy,
StoredVal->getType()->isStructTy() || StoredVal->getType()->isArrayTy())
return false;
+ Type *StoredValTy = StoredVal->getType();
+
// The store has to be at least as big as the load.
if (DL.getTypeSizeInBits(StoredVal->getType()) < DL.getTypeSizeInBits(LoadTy))
return false;
- // Don't coerce non-integral pointers to integers or vice versa.
- if (DL.isNonIntegralPointerType(StoredVal->getType()) !=
- DL.isNonIntegralPointerType(LoadTy))
+ bool StoredNI = DL.isNonIntegralPointerType(StoredValTy);
+ bool LoadNI = DL.isNonIntegralPointerType(LoadTy);
+ if (StoredNI != LoadNI) {
return false;
+ } else if (StoredNI && LoadNI &&
+ cast<PointerType>(StoredValTy)->getAddressSpace() !=
+ cast<PointerType>(LoadTy)->getAddressSpace()) {
+ return false;
+ }
return true;
}
diff --git a/test/Transforms/GVN/non-integral-pointers.ll b/test/Transforms/GVN/non-integral-pointers.ll
index 9ae4132231d..5217fc1a06a 100644
--- a/test/Transforms/GVN/non-integral-pointers.ll
+++ b/test/Transforms/GVN/non-integral-pointers.ll
@@ -1,6 +1,6 @@
; RUN: opt -gvn -S < %s | FileCheck %s
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:4"
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:4:5"
target triple = "x86_64-unknown-linux-gnu"
define void @f0(i1 %alwaysFalse, i64 %val, i64* %loc) {
@@ -37,3 +37,21 @@ define i64 @f1(i1 %alwaysFalse, i8 addrspace(4)* %val, i8 addrspace(4)** %loc) {
alwaysTaken:
ret i64 42
}
+
+ define i8 addrspace(5)* @multini(i1 %alwaysFalse, i8 addrspace(4)* %val, i8 addrspace(4)** %loc) {
+ ; CHECK-LABEL: @multini(
+ ; CHECK-NOT: inttoptr
+ ; CHECK-NOT: ptrtoint
+ ; CHECK-NOT: addrspacecast
+ entry:
+ store i8 addrspace(4)* %val, i8 addrspace(4)** %loc
+ br i1 %alwaysFalse, label %neverTaken, label %alwaysTaken
+
+ neverTaken:
+ %loc.bc = bitcast i8 addrspace(4)** %loc to i8 addrspace(5)**
+ %differentas = load i8 addrspace(5)*, i8 addrspace(5)** %loc.bc