Working julia

master
nixo 2019-04-20 14:37:36 +02:00
commit 99c751fc1f
18 changed files with 9518 additions and 0 deletions

522
julia.scm Normal file
View File

@ -0,0 +1,522 @@
(use-modules ((guix licenses)
#:prefix license:))
(use-modules
(guix packages))
(use-modules
(guix download))
(use-modules
(guix utils))
(use-modules
(guix git-download))
(use-modules
(guix build-system gnu))
(use-modules
(gnu packages))
(use-modules
(gnu packages algebra))
(use-modules
(gnu packages base))
(use-modules
(gnu packages compression))
(use-modules
(gnu packages elf))
(use-modules
(gnu packages gcc))
(use-modules
(gnu packages llvm))
(use-modules
(gnu packages libevent))
(use-modules
(gnu packages libunwind))
(use-modules
(gnu packages maths))
(use-modules
(gnu packages multiprecision)) ; mpfr)
(use-modules
(gnu packages pcre))
(use-modules
(gnu packages perl))
(use-modules
(gnu packages pkg-config))
(use-modules
(gnu packages python))
(use-modules
(gnu packages python-xyz))
(use-modules
(gnu packages textutils))
(use-modules
(gnu packages tls))
(use-modules
(gnu packages version-control))
(use-modules
(gnu packages wget))
(use-modules
(ice-9 match))
;; (define openblas-julia
;; (package
;; (inherit openblas)
;; (name "openblas-julia")
;; INTERFACE64=
;; ))
;; This works *BUT* we need to apply a lot of patches. Compiling this
;; with julia applies the patches automatically
(define llvm-julia
(package
(inherit llvm-6)
(name "llvm-julia")
(source
(origin
(method url-fetch)
(uri
(string-append
"http://releases.llvm.org/6.0.1/llvm-6.0.1.src.tar.xz"))
(sha256
(base32
"1qpls3vk85lydi5b4axl0809fv932qgsqgdgrk098567z4jc7mmn"))
(patches '("./llvm-6.0-D44650.patch"
"./llvm-6.0-DISABLE_ABI_CHECKS.patch"
"./llvm-6.0-NVPTX-addrspaces.patch"
"./llvm-6.0.0_D27296-libssp.patch"
"./llvm-D27629-AArch64-large_model_6.0.1.patch"
"./llvm-D34078-vectorize-fdiv.patch"
"./llvm-D42262-jumpthreading-not-i1.patch"
"./llvm-D44892-Perf-integration.patch"
"./llvm-D46460.patch"
"./llvm-D49832-SCEVPred.patch"
"./llvm-D50010-VNCoercion-ni.patch"
"./llvm-D50167-scev-umin.patch"
"./llvm-OProfile-line-num.patch"
"./llvm-PPC-addrspaces.patch"
"./llvm-rL323946-LSRTy.patch"
"./llvm-rL326967-aligned-load.patch"
"./llvm-rL327898.patch"
))
))
(arguments
(substitute-keyword-arguments
(package-arguments llvm-6)
((#:configure-flags flags)
'(list ;; Taken from NixOS. Only way I could get libLLVM-6.0.so
"-DCMAKE_BUILD_TYPE=Release"
"-DLLVM_INSTALL_UTILS=ON"
"-DLLVM_BUILD_TESTS=ON"
"-DLLVM_ENABLE_FFI=ON"
"-DLLVM_ENABLE_RTTI=ON"
;; "-DLLVM_HOST_TRIPLE=${stdenv.hostPlatform.config}"
;; "-DLLVM_DEFAULT_TARGET_TRIPLE=${stdenv.hostPlatform.config}"
"-DLLVM_EXPERIMENTAL_TARGETS_TO_BUILD=WebAssembly"
"-DLLVM_ENABLE_DUMP=ON"
"-DLLVM_LINK_LLVM_DYLIB=ON")
)))))
(define libuv-julia
(let
((commit "2348256acf5759a544e5ca7935f638d2bc091d60"))
(package
(inherit libuv)
(name "libuv-julia")
(version commit)
(source
(origin
(method url-fetch)
(uri (string-append
"https://api.github.com/repos/JuliaLang/libuv/tarball/"
commit))
(sha256
(base32
"1363f4vqayfcv5zqg07qmzjff56yhad74k16c22ian45lram8mv8"))))
(build-system gnu-build-system)
(arguments
(substitute-keyword-arguments
(package-arguments libuv)
((#:phases phases)
`(modify-phases ,phases
(delete 'autogen)))))
(home-page "https://github.com/JuliaLang/libuv"))))
(package
(name "julia")
(version "1.1.0")
(source
(origin
(method url-fetch)
(uri
(string-append
"https://github.com/JuliaLang/julia/releases/download/v"
version "/julia-" version ".tar.gz"))
(sha256
(base32
"1bd6c5gqd7f2i837ay8iqi8h36smhcg0lq7f8c2axxaw8x6rcfmx"))))
(build-system gnu-build-system)
(arguments
`(#:test-target "test"
#:modules
((ice-9 match)
(guix build gnu-build-system)
(guix build utils))
;; Do not strip binaries to keep support for full backtraces.
;; See https://github.com/JuliaLang/julia/issues/17831
#:strip-binaries? #f
;; The DSOs use $ORIGIN to refer to each other, but (guix build
;; gremlin) doesn't support it yet, so skip this phase.
#:validate-runpath? #f
#:phases
(modify-phases %standard-phases
(delete 'configure)
(add-after 'unpack 'prepare-deps
(lambda*
(#:key inputs #:allow-other-keys)
(mkdir "deps/srccache")
(copy-file
(assoc-ref inputs "dsfmt")
"deps/srccache/dsfmt-2.2.3.tar.gz")
(copy-file
(string-append
(assoc-ref inputs "virtualenv")
"/bin/virtualenv")
"julia-env")
(copy-file
(assoc-ref inputs "libwhich")
(string-append "deps/srccache/libwhich-"
"81e9723c0273d78493dc8c8ed570f68d9ce7e89e"
".tar.gz"))
(copy-file (assoc-ref inputs "rmath")
"deps/srccache/Rmath-julia-0.1.tar.gz")
(copy-file
(assoc-ref inputs "objconv")
"deps/srccache/objconv.zip")
(copy-file
(assoc-ref inputs "suitesparse")
"deps/srccache/SuiteSparse-4.4.5.tar.gz")
;; needed by libwhich
(setenv "LD_LIBRARY_PATH"
(string-join
(map (lambda (pkg)
(string-append (assoc-ref inputs pkg) "/lib"))
(list
"arpack-ng" "fftw" "gmp" "lapack"
"libgit2" "mpfr" "openblas" "openlibm"
"openspecfun" "pcre2"
))
":"))
;; (copy-file
;; (assoc-ref inputs "llvm")
;; "deps/srccache/llvm-6.0.0.src.tar.xz")
#t))
;; FIXME: Building the documentation requires Julia packages that
;; would be downloaded from the Internet. We should build them in a
;; separate build phase.
(add-after 'unpack 'disable-documentation
(lambda _
(substitute* "Makefile"
(("(install: .*) \\$\\(BUILDROOT\\)/doc/_build/html/en/index.html" _ line)
(string-append line "\n"))
(("src ui doc deps")
"src ui deps"))
#t))
(add-before 'check 'set-home
;; Some tests require a home directory to be set.
(lambda _
(setenv "HOME" "/tmp")
#t))
(add-after 'unpack 'hardcode-soname-map
;; ./src/runtime_ccall.cpp creates a map from library names to paths
;; using the output of "/sbin/ldconfig -p". Since ldconfig is not
;; used in Guix, we patch runtime_ccall.cpp to contain a static map.
(lambda* (#:key inputs #:allow-other-keys)
(use-modules (ice-9 match))
(substitute* "src/runtime_ccall.cpp"
;; Patch out invocations of '/sbin/ldconfig' to avoid getting
;; error messages about missing '/sbin/ldconfig' on Guix System.
(("popen\\(.*ldconfig.*\\);")
"NULL;\n")
;; Populate 'sonameMap'.
(("jl_read_sonames.*;")
(string-join
(map (match-lambda
((input libname soname)
(string-append
"sonameMap[\"" libname "\"] = "
"\"" (assoc-ref inputs input) "/lib/" soname "\";")))
'(("libc" "libc" "libc.so.6")
("pcre2" "libpcre2-8" "libpcre2-8.so")
("mpfr" "libmpfr" "libmpfr.so")
("openblas" "libblas" "libopenblas.so")
("arpack-ng" "libarpack" "libarpack.so")
("lapack" "liblapack" "liblapack.so")
("libgit2" "libgit2" "libgit2.so")
("gmp" "libgmp" "libgmp.so")
;; ("openlibm" "libopenlibm" "libopenlibm.so")
("openspecfun" "libopenspecfun" "libopenspecfun.so")
("fftw" "libfftw3" "libfftw3_threads.so")
("fftwf" "libfftw3f" "libfftw3f_threads.so"))))))
;; FIXME: NIXO
;; (substitute* "base/fft/FFTW.jl"
;; (("const libfftw = Base.libfftw_name")
;; (string-append "const libfftw = \""
;; (assoc-ref inputs "fftw") "/lib/libfftw3_threads.so"
;; "\""))
;; (("const libfftwf = Base.libfftwf_name")
;; (string-append "const libfftwf = \""
;; (assoc-ref inputs "fftwf") "/lib/libfftw3f_threads.so"
;; "\"")))
(substitute* "base/math.jl"
(("const libm = Base.libm_name")
(string-append "const libm = \""
(assoc-ref inputs "openlibm")
"/lib/libopenlibm.so"
"\""))
(("const openspecfun = \"libopenspecfun\"")
(string-append "const openspecfun = \""
(assoc-ref inputs "openspecfun")
"/lib/libopenspecfun.so"
"\"")))
;; (substitute* "base/pcre.jl"
;; (("const PCRE_LIB = \"libpcre2-8\"")
;; (string-append "const PCRE_LIB = \""
;; (assoc-ref inputs "pcre2")
;; "/lib/libpcre2-8.so" "\"")))
#t))
(add-before 'build 'fix-include-and-link-paths
(lambda*
(#:key inputs #:allow-other-keys)
;; LIBUTF8PROC is a linker flag, not a build target. It is
;; included in the LIBFILES_* variable which is used as a
;; collection of build targets and a list of libraries to link
;; against.
(substitute* "src/flisp/Makefile"
(("\\$\\(BUILDDIR\\)/\\$\\(EXENAME\\): \\$\\(OBJS\\) \\$\\(LIBFILES_release\\)")
"$(BUILDDIR)/$(EXENAME): $(OBJS) $(LLT_release)")
(("\\$\\(BUILDDIR\\)/\\$\\(EXENAME\\)-debug: \\$\\(DOBJS\\) \\$\\(LIBFILES_debug\\)")
"$(BUILDDIR)/$(EXENAME)-debug: $(DOBJS) $(LLT_debug)"))
;; The REPL must be linked with libuv.
(substitute* "ui/Makefile"
(("JLDFLAGS \\+= ")
(string-append "JLDFLAGS += "
(assoc-ref %build-inputs "libuv")
"/lib/libuv.so ")))
(substitute* "base/Makefile"
(("\\$\\(build_includedir\\)/uv/errno.h")
(string-append
(assoc-ref inputs "libuv")
"/include/uv/errno.h")))
#t))
(add-before 'build 'replace-default-shell
(lambda _
(substitute* "base/client.jl"
(("/bin/sh")
(which "sh")))
#t))
(add-after 'unpack 'hardcode-paths
(lambda _
(substitute* "stdlib/InteractiveUtils/src/InteractiveUtils.jl"
(("`which") (string-append "`" (which "which")))
(("`wget") (string-append "`" (which "wget"))))
#t))
(add-before 'check 'disable-broken-tests
(lambda _
(define (touch file-name)
(call-with-output-file file-name (const #t)))
;; Don't know why FIXME
;; (substitute* "stdlib/LibGit2/test/libgit2.jl"
;; (("!LibGit2.use_http_path(cfg, github_cred)")
;; "true")
;; (("LibGit2.use_http_path(cfg, mygit_cred)")
;; "true"))
(map (lambda (test)
(delete-file test)
(touch test))
'("stdlib/Sockets/test/runtests.jl"
"stdlib/Distributed/test/runtests.jl"
"stdlib/LibGit2/test/libgit2.jl"
))
(substitute* "test/choosetests.jl"
;; These tests fail, probably because some of the input
;; binaries have been stripped and thus backtraces don't look
;; as expected.
(("\"backtrace\",")
"")
(("\"cmdlineargs\",")
""))
#t)))
#:make-flags
(list
(string-append "prefix=" (assoc-ref %outputs "out"))
(string-append "PREFIX=" (assoc-ref %outputs "out"))
;; Passing the MARCH flag is necessary to build binary substitutes for
;; the supported architectures.
,(match
(or
(%current-target-system)
(%current-system))
("x86_64-linux" "MARCH=x86-64")
("i686-linux" "MARCH=pentium4")
("aarch64-linux" "MARCH=armv8-a")
;; Prevent errors when querying this package on unsupported
;; platforms, e.g. when running "guix package --search="
(_ "MARCH=UNSUPPORTED"))
"CONFIG_SHELL=bash" ;needed to build bundled libraries
"USE_SYSTEM_DSFMT=0" ;not packaged for Guix and upstream has no
;build system for a shared library.
"USE_SYSTEM_LAPACK=1"
"USE_SYSTEM_BLAS=1"
;; TODO: What about building blas with 64 support?
"USE_BLAS64=0" ;needed when USE_SYSTEM_BLAS=1
"LIBBLAS=-lopenblas"
"LIBBLASNAME=libopenblas"
"USE_SYSTEM_FFTW=1"
"LIBFFTWNAME=libfftw3"
"LIBFFTWFNAME=libfftw3f"
;; TODO: Suitesparse does not install shared libraries, so we cannot
;; use the suitesparse package.
;; "USE_SYSTEM_SUITESPARSE=1"
;; (string-append "SUITESPARSE_INC=-I "
;; (assoc-ref %build-inputs "suitesparse")
;; "/include")
"USE_GPL_LIBS=1" ;proudly
"USE_SYSTEM_UTF8PROC=1"
(string-append "UTF8PROC_INC="
(assoc-ref %build-inputs "utf8proc")
"/include")
"USE_SYSTEM_LLVM=1"
"LLVM_VER=6.0.1"
;; "LLVM_VER=6.0.0"
"USE_LLVM_SHLIB=1" ; FIXME: fails when set to 1
"USE_SYSTEM_LIBUNWIND=1"
"USE_SYSTEM_LIBUV=1"
(string-append "LIBUV="
(assoc-ref %build-inputs "libuv")
"/lib/libuv.so")
(string-append "LIBUV_INC="
(assoc-ref %build-inputs "libuv")
"/include")
"USE_SYSTEM_PATCHELF=1"
"USE_SYSTEM_PCRE=1"
"USE_SYSTEM_OPENLIBM=1"
"USE_SYSTEM_GMP=1"
"USE_SYSTEM_MPFR=1"
"USE_SYSTEM_ARPACK=1"
"USE_SYSTEM_LIBGIT2=1"
"USE_SYSTEM_ZLIB=1"
"USE_SYSTEM_OPENSPECFUN=1")))
(inputs
`( ("llvm" ,llvm-julia)
;; The bundled version is 3.3.0 so stick to that version. With other
;; versions, we get test failures in 'linalg/arnoldi' as described in
;; <https://bugs.gnu.org/30282>.
("arpack-ng" ,arpack-ng-3.3.0)
("coreutils" ,coreutils)
;for bindings to "mkdir" and the like
("lapack" ,lapack)
("openblas" ,openblas)
;Julia does not build with Atlas
("libunwind" ,libunwind)
("openlibm" ,openlibm)
("openspecfun" ,openspecfun)
("libuv", libuv-julia)
("libgit2" ,libgit2)
("fftw" ,fftw)
("fftwf" ,fftwf)
("fortran" ,gfortran)
("pcre2" ,pcre2)
("utf8proc" ,utf8proc)
("mpfr" ,mpfr)
("wget" ,wget)
("which" ,which)
("zlib" ,zlib)
("gmp" ,gmp)
("virtualenv" ,python2-virtualenv)
;; FIXME: The following inputs are downloaded from upstream to allow us
;; to use the lightweight Julia release tarball. Ideally, these inputs
;; would eventually be replaced with proper Guix packages.
;; TODO: run "make -f contrib/repackage_system_suitesparse4.make" to copy static lib
("rmath"
,(origin
(method url-fetch)
(uri "https://api.github.com/repos/JuliaLang/Rmath-julia/tarball/v0.1")
(sha256
(base32
"1qyps217175qhid46l8f5i1v8i82slgp23ia63x2hzxwfmx8617p"))))
("suitesparse"
,(origin
(method url-fetch)
(uri "http://faculty.cse.tamu.edu/davis/SuiteSparse/SuiteSparse-4.4.5.tar.gz")
(sha256
(base32
"1jcbxb8jx5wlcixzf6n5dca2rcfx6mlcms1k2rl5gp67ay3bix43"))))
("objconv"
,(origin
(method url-fetch)
;; No versioned URL, see <https://www.agner.org/optimize/> for updates.
(uri "https://www.agner.org/optimize/objconv.zip")
(file-name "objconv-2018-10-07.zip")
(sha256
(base32
"0wp6ld9vk11f4nnkn56627zmlv9k5vafi99qa3yyn1pgcd61zcfs"))))
("libwhich"
,(origin
(method url-fetch)
(uri
(string-append
"https://api.github.com/repos/vtjnash/libwhich/tarball/"
"81e9723c0273d78493dc8c8ed570f68d9ce7e89e"))
(sha256
(base32
"1p7zg31kpmpbmh1znrk1xrbd074agx13b9q4dcw8n2zrwwdlbz3b"))))
;; ("llvm"
;; ,(origin
;; (method url-fetch)
;; (uri
;; (string-append
;; "http://releases.llvm.org/6.0.0/llvm-6.0.0.src.tar.xz"))
;; (sha256
;; (base32
;; "0224xvfg6h40y5lrbnb9qaq3grmdc5rg00xq03s1wxjfbf8krx8z"))))
;; ("cmake" ,cmake) ;; required to build llvm
("dsfmt"
,(origin
(method url-fetch)
(uri
(string-append
"http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/"
"SFMT/dSFMT-src-2.2.3.tar.gz"))
(sha256
(base32
"03kaqbjbi6viz0n33dk5jlf6ayxqlsq4804n7kwkndiga9s4hd42"))))))
(native-inputs
`(("openssl" ,openssl)
("perl" ,perl)
("patchelf" ,patchelf)
("pkg-config" ,pkg-config)
("python" ,python-2)))
;; Julia is not officially released for ARM and MIPS.
;; See https://github.com/JuliaLang/julia/issues/10639
(supported-systems
'("i686-linux" "x86_64-linux" "aarch64-linux"))
(home-page "https://julialang.org/")
(synopsis "High-performance dynamic language for technical computing")
(description
"Julia is a high-level, high-performance dynamic programming language for
technical computing, with syntax that is familiar to users of other technical
computing environments. It provides a sophisticated compiler, distributed
parallel execution, numerical accuracy, and an extensive mathematical function
library.")
(license license:expat))

View File

@ -0,0 +1,13 @@
Index: tools/llvm-cfi-verify/CMakeLists.txt
===================================================================
--- a/tools/llvm-cfi-verify/CMakeLists.txt
+++ b/tools/llvm-cfi-verify/CMakeLists.txt
@@ -11,7 +11,7 @@
Symbolize
)
-add_llvm_tool(llvm-cfi-verify
+add_llvm_tool(llvm-cfi-verify DISABLE_LLVM_LINK_LLVM_DYLIB
llvm-cfi-verify.cpp)
add_subdirectory(lib)

View File

@ -0,0 +1,39 @@
From d793ba4bacae51ae25be19c1636fcf38707938fd Mon Sep 17 00:00:00 2001
From: Valentin Churavy <v.churavy@gmail.com>
Date: Fri, 1 Jun 2018 17:43:55 -0400
Subject: [PATCH] fix LLVM_DISABLE_ABI_BREAKING_CHECKS_ENFORCING
---
cmake/modules/HandleLLVMOptions.cmake | 2 +-
include/llvm/Config/abi-breaking.h.cmake | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/cmake/modules/HandleLLVMOptions.cmake b/cmake/modules/HandleLLVMOptions.cmake
index 3d2dd48018c..b67ee6a896e 100644
--- a/cmake/modules/HandleLLVMOptions.cmake
+++ b/cmake/modules/HandleLLVMOptions.cmake
@@ -572,7 +572,7 @@ if (LLVM_ENABLE_WARNINGS AND (LLVM_COMPILER_IS_GCC_COMPATIBLE OR CLANG_CL))
if (LLVM_ENABLE_PEDANTIC AND LLVM_COMPILER_IS_GCC_COMPATIBLE)
append("-pedantic" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
- append("-Wno-long-long" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
+ append("-Wno-long-long -Wundef" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
endif()
add_flag_if_supported("-Wcovered-switch-default" COVERED_SWITCH_DEFAULT_FLAG)
diff --git a/include/llvm/Config/abi-breaking.h.cmake b/include/llvm/Config/abi-breaking.h.cmake
index 7ae401e5b8a..d52c4609101 100644
--- a/include/llvm/Config/abi-breaking.h.cmake
+++ b/include/llvm/Config/abi-breaking.h.cmake
@@ -20,7 +20,7 @@
/* Allow selectively disabling link-time mismatch checking so that header-only
ADT content from LLVM can be used without linking libSupport. */
-#if !LLVM_DISABLE_ABI_BREAKING_CHECKS_ENFORCING
+#ifndef LLVM_DISABLE_ABI_BREAKING_CHECKS_ENFORCING
// ABI_BREAKING_CHECKS protection: provides link-time failure when clients build
// mismatch with LLVM
--
2.17.0

View File

@ -0,0 +1,32 @@
diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp
index f1e4251a44b..73d49f5d7e4 100644
--- a/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -1248,6 +1248,14 @@ SDValue NVPTXTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
}
}
+bool NVPTXTargetLowering::isNoopAddrSpaceCast(unsigned SrcAS,
+ unsigned DestAS) const {
+ assert(SrcAS != DestAS && "Expected different address spaces!");
+
+ return (SrcAS == ADDRESS_SPACE_GENERIC || SrcAS > ADDRESS_SPACE_LOCAL) &&
+ (DestAS == ADDRESS_SPACE_GENERIC || DestAS > ADDRESS_SPACE_LOCAL);
+}
+
SDValue
NVPTXTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
SDLoc dl(Op);
diff --git a/lib/Target/NVPTX/NVPTXISelLowering.h b/lib/Target/NVPTX/NVPTXISelLowering.h
index ef04a8573d4..68a9a7195c4 100644
--- a/lib/Target/NVPTX/NVPTXISelLowering.h
+++ b/lib/Target/NVPTX/NVPTXISelLowering.h
@@ -443,6 +443,8 @@ public:
const NVPTXSubtarget &STI);
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
+ bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override;
+
SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
const char *getTargetNodeName(unsigned Opcode) const override;

View File

@ -0,0 +1,35 @@
Index: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
===================================================================
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -2098,7 +2098,8 @@
void X86TargetLowering::insertSSPDeclarations(Module &M) const {
// MSVC CRT provides functionalities for stack protection.
- if (Subtarget.getTargetTriple().isOSMSVCRT()) {
+ if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
+ Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
// MSVC CRT has a global variable holding security cookie.
M.getOrInsertGlobal("__security_cookie",
Type::getInt8PtrTy(M.getContext()));
@@ -2120,15 +2121,19 @@
Value *X86TargetLowering::getSDagStackGuard(const Module &M) const {
// MSVC CRT has a global variable holding security cookie.
- if (Subtarget.getTargetTriple().isOSMSVCRT())
+ if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
+ Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
return M.getGlobalVariable("__security_cookie");
+ }
return TargetLowering::getSDagStackGuard(M);
}
Value *X86TargetLowering::getSSPStackGuardCheck(const Module &M) const {
// MSVC CRT has a function to validate security cookie.
- if (Subtarget.getTargetTriple().isOSMSVCRT())
+ if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
+ Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
return M.getFunction("__security_check_cookie");
+ }
return TargetLowering::getSSPStackGuardCheck(M);
}

View File

@ -0,0 +1,53 @@
From f76abe65e6d07fea5e838c4f8c9a9421c16debb0 Mon Sep 17 00:00:00 2001
From: Valentin Churavy <v.churavy@gmail.com>
Date: Thu, 5 Jul 2018 12:37:50 -0400
Subject: [PATCH] Fix unwind info relocation with large code model on AArch64
---
lib/MC/MCObjectFileInfo.cpp | 2 ++
.../AArch64/ELF_ARM64_large-relocations.s | 20 +++++++++++++++++++
2 files changed, 22 insertions(+)
create mode 100644 test/ExecutionEngine/RuntimeDyld/AArch64/ELF_ARM64_large-relocations.s
diff --git a/lib/MC/MCObjectFileInfo.cpp b/lib/MC/MCObjectFileInfo.cpp
index 328f000f37c..938b35f20d1 100644
--- a/lib/MC/MCObjectFileInfo.cpp
+++ b/lib/MC/MCObjectFileInfo.cpp
@@ -291,6 +291,8 @@ void MCObjectFileInfo::initELFMCObjectFileInfo(const Triple &T, bool Large) {
break;
case Triple::ppc64:
case Triple::ppc64le:
+ case Triple::aarch64:
+ case Triple::aarch64_be:
case Triple::x86_64:
FDECFIEncoding = dwarf::DW_EH_PE_pcrel |
(Large ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4);
diff --git a/test/ExecutionEngine/RuntimeDyld/AArch64/ELF_ARM64_large-relocations.s b/test/ExecutionEngine/RuntimeDyld/AArch64/ELF_ARM64_large-relocations.s
new file mode 100644
index 00000000000..66f28dabd79
--- /dev/null
+++ b/test/ExecutionEngine/RuntimeDyld/AArch64/ELF_ARM64_large-relocations.s
@@ -0,0 +1,20 @@
+# RUN: llvm-mc -triple=arm64-none-linux-gnu -large-code-model -filetype=obj -o %T/large-reloc.o %s
+# RUN: llvm-rtdyld -triple=arm64-none-linux-gnu -verify -map-section large-reloc.o,.eh_frame=0x10000 -map-section large-reloc.o,.text=0xffff000000000000 -check=%s %T/large-reloc.o
+# RUN-BE: llvm-mc -triple=aarch64_be-none-linux-gnu -large-code-model -filetype=obj -o %T/be-large-reloc.o %s
+# RUN-BE: llvm-rtdyld -triple=aarch64_be-none-linux-gnu -verify -map-section be-large-reloc.o,.eh_frame=0x10000 -map-section be-large-reloc.o,.text=0xffff000000000000 -check=%s %T/be-large-reloc.o
+
+ .text
+ .globl g
+ .p2align 2
+ .type g,@function
+g:
+ .cfi_startproc
+ mov x0, xzr
+ ret
+ .Lfunc_end0:
+ .size g, .Lfunc_end0-g
+ .cfi_endproc
+
+# Skip the CIE and load the 8 bytes PC begin pointer.
+# Assuming the CIE and the FDE length are both 4 bytes.
+# rtdyld-check: *{8}(section_addr(large-reloc.o, .eh_frame) + (*{4}(section_addr(large-reloc.o, .eh_frame))) + 0xc) = g - (section_addr(large-reloc.o, .eh_frame) + (*{4}(section_addr(large-reloc.o, .eh_frame))) + 0xc)
--
2.18.0

View File

@ -0,0 +1,56 @@
From f94d12b6108b944199b715f31f25a022f75d2feb Mon Sep 17 00:00:00 2001
From: Yichao Yu <yyc1992@gmail.com>
Date: Sat, 10 Jun 2017 08:45:13 -0400
Subject: [PATCH 4/4] Enable support for floating-point division reductions
Similar to fsub, fdiv can also be vectorized using fmul.
---
lib/Transforms/Utils/LoopUtils.cpp | 1 +
test/Transforms/LoopVectorize/float-reduction.ll | 22 ++++++++++++++++++++++
2 files changed, 23 insertions(+)
diff --git a/lib/Transforms/Utils/LoopUtils.cpp b/lib/Transforms/Utils/LoopUtils.cpp
index 3c522786641..a4aced53a95 100644
--- a/lib/Transforms/Utils/LoopUtils.cpp
+++ b/lib/Transforms/Utils/LoopUtils.cpp
@@ -451,6 +451,7 @@ RecurrenceDescriptor::isRecurrenceInstr(Instruction *I, RecurrenceKind Kind,
return InstDesc(Kind == RK_IntegerOr, I);
case Instruction::Xor:
return InstDesc(Kind == RK_IntegerXor, I);
+ case Instruction::FDiv:
case Instruction::FMul:
return InstDesc(Kind == RK_FloatMult, I, UAI);
case Instruction::FSub:
diff --git a/test/Transforms/LoopVectorize/float-reduction.ll b/test/Transforms/LoopVectorize/float-reduction.ll
index f3b95d0ead7..669c54d55a2 100644
--- a/test/Transforms/LoopVectorize/float-reduction.ll
+++ b/test/Transforms/LoopVectorize/float-reduction.ll
@@ -44,3 +44,25 @@ for.body: ; preds = %for.body, %entry
for.end: ; preds = %for.body
ret float %sub
}
+
+;CHECK-LABEL: @foodiv(
+;CHECK: fdiv fast <4 x float>
+;CHECK: ret
+define float @foodiv(float* nocapture %A, i32* nocapture %n) nounwind uwtable readonly ssp {
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %sum.04 = phi float [ 1.000000e+00, %entry ], [ %sub, %for.body ]
+ %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv
+ %0 = load float, float* %arrayidx, align 4
+ %sub = fdiv fast float %sum.04, %0
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, 200
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ ret float %sub
+}
--
2.14.1

View File

@ -0,0 +1,82 @@
commit 6a311a7a804831fea43cfb2f61322adcb407a1af
Author: Keno Fischer <keno@juliacomputing.com>
Date: Thu Jan 18 15:57:05 2018 -0500
[JumpThreading] Don't restrict cast-traversal to i1
Summary:
In D17663, JumpThreading learned to look trough simple cast instructions,
but only if the source of those cast instructions was a phi/cmp i1
(in an effort to limit compile time effects). I think this condition
is too restrictive. For switches with limited value range, InstCombine
will readily introduce an extra `trunc` instruction to a smaller
integer type (e.g. from i8 to i2), leaving us in the somewhat perverse
situation that jump-threading would work before running instcombine,
but not after. Since instcombine produces this pattern, I think we
need to consider it canonical and support it in JumpThreading.
In general, for limiting recursion, I think the existing restriction
to phi and cmp nodes should be sufficient to avoid looking through
unprofitable chains of instructions.
Reviewers: haicheng, gberry, bmakam, mcrosier
Subscribers: llvm-commits
Differential Revision: https://reviews.llvm.org/D42262
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index 95c4650..1155e18 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -647,11 +647,9 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors(
}
// Handle Cast instructions. Only see through Cast when the source operand is
- // PHI or Cmp and the source type is i1 to save the compilation time.
+ // PHI or Cmp to save the compilation time.
if (CastInst *CI = dyn_cast<CastInst>(I)) {
Value *Source = CI->getOperand(0);
- if (!Source->getType()->isIntegerTy(1))
- return false;
if (!isa<PHINode>(Source) && !isa<CmpInst>(Source))
return false;
ComputeValueKnownInPredecessors(Source, BB, Result, Preference, CxtI);
diff --git a/test/Transforms/JumpThreading/basic.ll b/test/Transforms/JumpThreading/basic.ll
index ce86cba..16e7549 100644
--- a/test/Transforms/JumpThreading/basic.ll
+++ b/test/Transforms/JumpThreading/basic.ll
@@ -547,6 +547,34 @@ l5:
; CHECK: }
}
+define i1 @trunc_switch(i1 %arg) {
+; CHECK-LABEL: @trunc_switch
+top:
+; CHECK: br i1 %arg, label %exitA, label %exitB
+ br i1 %arg, label %common, label %B
+
+B:
+ br label %common
+
+common:
+ %phi = phi i8 [ 2, %B ], [ 1, %top ]
+ %trunc = trunc i8 %phi to i2
+; CHECK-NOT: switch
+ switch i2 %trunc, label %unreach [
+ i2 1, label %exitA
+ i2 -2, label %exitB
+ ]
+
+unreach:
+ unreachable
+
+exitA:
+ ret i1 true
+
+exitB:
+ ret i1 false
+}
+
; CHECK-LABEL: define void @h_con(i32 %p) {
define void @h_con(i32 %p) {
%x = icmp ult i32 %p, 5

View File

@ -0,0 +1,677 @@
From 45bc0f0badbdbabaed7d204757c2aad7ab49a3fe Mon Sep 17 00:00:00 2001
From: DokFaust <rodia@autistici.org>
Date: Mon, 11 Jun 2018 12:59:42 +0200
Subject: [PATCH] PerfJITEventListener integration, requires compile flag
LLVM_USE_PERF
---
CMakeLists.txt | 13 +
include/llvm/Config/config.h.cmake | 3 +
include/llvm/Config/llvm-config.h.cmake | 3 +
.../llvm/ExecutionEngine/JITEventListener.h | 9 +
lib/ExecutionEngine/CMakeLists.txt | 4 +
lib/ExecutionEngine/LLVMBuild.txt | 2 +-
lib/ExecutionEngine/Orc/LLVMBuild.txt | 2 +-
.../PerfJITEvents/CMakeLists.txt | 5 +
.../PerfJITEvents/LLVMBuild.txt | 23 +
.../PerfJITEvents/PerfJITEventListener.cpp | 492 ++++++++++++++++++
10 files changed, 554 insertions(+), 2 deletions(-)
create mode 100644 lib/ExecutionEngine/PerfJITEvents/CMakeLists.txt
create mode 100644 lib/ExecutionEngine/PerfJITEvents/LLVMBuild.txt
create mode 100644 lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp
diff --git a/CMakeLists.txt b/CMakeLists.txt
index f8da6cf9211..fb92c825a46 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -426,6 +426,16 @@ if( LLVM_USE_OPROFILE )
endif( NOT CMAKE_SYSTEM_NAME MATCHES "Linux" )
endif( LLVM_USE_OPROFILE )
+option(LLVM_USE_PERF
+ "Use perf JIT interface to inform perf about JIT code" OFF)
+
+# If enabled, verify we are on a platform that supports perf.
+if( LLVM_USE_PERF )
+ if( NOT CMAKE_SYSTEM_NAME MATCHES "Linux" )
+ message(FATAL_ERROR "perf support is available on Linux only.")
+ endif( NOT CMAKE_SYSTEM_NAME MATCHES "Linux" )
+endif( LLVM_USE_PERF )
+
set(LLVM_USE_SANITIZER "" CACHE STRING
"Define the sanitizer used to build binaries and tests.")
set(LLVM_LIB_FUZZING_ENGINE "" CACHE PATH
@@ -634,6 +644,9 @@ endif (LLVM_USE_INTEL_JITEVENTS)
if (LLVM_USE_OPROFILE)
set(LLVMOPTIONALCOMPONENTS ${LLVMOPTIONALCOMPONENTS} OProfileJIT)
endif (LLVM_USE_OPROFILE)
+if (LLVM_USE_PERF)
+ set(LLVMOPTIONALCOMPONENTS ${LLVMOPTIONALCOMPONENTS} PerfJITEvents)
+endif (LLVM_USE_PERF)
message(STATUS "Constructing LLVMBuild project information")
execute_process(
diff --git a/include/llvm/Config/config.h.cmake b/include/llvm/Config/config.h.cmake
index 940f8420304..17787ed779b 100644
--- a/include/llvm/Config/config.h.cmake
+++ b/include/llvm/Config/config.h.cmake
@@ -377,6 +377,9 @@
/* Define if we have the oprofile JIT-support library */
#cmakedefine01 LLVM_USE_OPROFILE
+/* Define if we have the perf JIT-support library */
+#cmakedefine01 LLVM_USE_PERF
+
/* LLVM version information */
#cmakedefine LLVM_VERSION_INFO "${LLVM_VERSION_INFO}"
diff --git a/include/llvm/Config/llvm-config.h.cmake b/include/llvm/Config/llvm-config.h.cmake
index 4daa00f3bc4..8d9c3b24d52 100644
--- a/include/llvm/Config/llvm-config.h.cmake
+++ b/include/llvm/Config/llvm-config.h.cmake
@@ -65,6 +65,9 @@
/* Define if we have the oprofile JIT-support library */
#cmakedefine01 LLVM_USE_OPROFILE
+/* Define if we have the perf JIT-support library */
+#cmakedefine01 LLVM_USE_PERF
+
/* Major version of the LLVM API */
#define LLVM_VERSION_MAJOR ${LLVM_VERSION_MAJOR}
diff --git a/include/llvm/ExecutionEngine/JITEventListener.h b/include/llvm/ExecutionEngine/JITEventListener.h
index ff7840f00a4..1cc2c423a8b 100644
--- a/include/llvm/ExecutionEngine/JITEventListener.h
+++ b/include/llvm/ExecutionEngine/JITEventListener.h
@@ -115,6 +115,15 @@ public:
}
#endif // USE_OPROFILE
+#ifdef LLVM_USE_PERF
+ static JITEventListener *createPerfJITEventListener();
+#else
+ static JITEventListener *createPerfJITEventListener()
+ {
+ return nullptr;
+ }
+#endif //USE_PERF
+
private:
virtual void anchor();
};
diff --git a/lib/ExecutionEngine/CMakeLists.txt b/lib/ExecutionEngine/CMakeLists.txt
index 84b34919e44..893d113a685 100644
--- a/lib/ExecutionEngine/CMakeLists.txt
+++ b/lib/ExecutionEngine/CMakeLists.txt
@@ -30,3 +30,7 @@ endif( LLVM_USE_OPROFILE )
if( LLVM_USE_INTEL_JITEVENTS )
add_subdirectory(IntelJITEvents)
endif( LLVM_USE_INTEL_JITEVENTS )
+
+if( LLVM_USE_PERF )
+ add_subdirectory(PerfJITEvents)
+endif( LLVM_USE_PERF )
diff --git a/lib/ExecutionEngine/LLVMBuild.txt b/lib/ExecutionEngine/LLVMBuild.txt
index 9d29a41f504..b6e1bda6a51 100644
--- a/lib/ExecutionEngine/LLVMBuild.txt
+++ b/lib/ExecutionEngine/LLVMBuild.txt
@@ -16,7 +16,7 @@
;===------------------------------------------------------------------------===;
[common]
-subdirectories = Interpreter MCJIT RuntimeDyld IntelJITEvents OProfileJIT Orc
+subdirectories = Interpreter MCJIT RuntimeDyld IntelJITEvents OProfileJIT Orc PerfJITEvents
[component_0]
type = Library
diff --git a/lib/ExecutionEngine/Orc/LLVMBuild.txt b/lib/ExecutionEngine/Orc/LLVMBuild.txt
index 8f05172e77a..ef4ae64e823 100644
--- a/lib/ExecutionEngine/Orc/LLVMBuild.txt
+++ b/lib/ExecutionEngine/Orc/LLVMBuild.txt
@@ -19,4 +19,4 @@
type = Library
name = OrcJIT
parent = ExecutionEngine
-required_libraries = Core ExecutionEngine Object RuntimeDyld Support TransformUtils
+required_libraries = Core ExecutionEngine Object RuntimeDyld Support TransformUtils
diff --git a/lib/ExecutionEngine/PerfJITEvents/CMakeLists.txt b/lib/ExecutionEngine/PerfJITEvents/CMakeLists.txt
new file mode 100644
index 00000000000..136cc429d02
--- /dev/null
+++ b/lib/ExecutionEngine/PerfJITEvents/CMakeLists.txt
@@ -0,0 +1,5 @@
+add_llvm_library(LLVMPerfJITEvents
+ PerfJITEventListener.cpp
+ )
+
+add_dependencies(LLVMPerfJITEvents LLVMCodeGen)
diff --git a/lib/ExecutionEngine/PerfJITEvents/LLVMBuild.txt b/lib/ExecutionEngine/PerfJITEvents/LLVMBuild.txt
new file mode 100644
index 00000000000..b1958a69260
--- /dev/null
+++ b/lib/ExecutionEngine/PerfJITEvents/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/ExecutionEngine/PerfJITEvents/LLVMBuild.txt ----------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = OptionalLibrary
+name = PerfJITEvents
+parent = ExecutionEngine
+required_libraries = CodeGen Core DebugInfoDWARF ExecutionEngine Object Support TransformUtils
+
diff --git a/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp b/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp
new file mode 100644
index 00000000000..c2b97dd59f3
--- /dev/null
+++ b/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp
@@ -0,0 +1,492 @@
+//===-- PerfJITEventListener.cpp - Tell Linux's perf about JITted code ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a JITEventListener object that tells perf about JITted
+// functions, including source line information.
+//
+// Documentation for perf jit integration is available at:
+// https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/tools/perf/Documentation/jitdump-specification.txt
+// https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/tools/perf/Documentation/jit-interface.txt
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/Twine.h"
+#include "llvm/Config/config.h"
+#include "llvm/DebugInfo/DWARF/DWARFContext.h"
+#include "llvm/ExecutionEngine/JITEventListener.h"
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Object/SymbolSize.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Errno.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Mutex.h"
+#include "llvm/Support/MutexGuard.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/Process.h"
+#include "llvm/Support/Threading.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <sys/mman.h> // mmap()
+#include <sys/types.h> // getpid()
+#include <time.h> // clock_gettime(), time(), localtime_r() */
+#include <unistd.h> // for getpid(), read(), close()
+
+using namespace llvm;
+using namespace llvm::object;
+typedef DILineInfoSpecifier::FileLineInfoKind FileLineInfoKind;
+
+namespace {
+
+// language identifier (XXX: should we generate something better from debug
+// info?)
+#define JIT_LANG "llvm-IR"
+#define LLVM_PERF_JIT_MAGIC \
+ ((uint32_t)'J' << 24 | (uint32_t)'i' << 16 | (uint32_t)'T' << 8 | \
+ (uint32_t)'D')
+#define LLVM_PERF_JIT_VERSION 1
+
+// bit 0: set if the jitdump file is using an architecture-specific timestamp
+// clock source
+#define JITDUMP_FLAGS_ARCH_TIMESTAMP (1ULL << 0)
+
+struct LLVMPerfJitHeader;
+
+class PerfJITEventListener : public JITEventListener {
+public:
+ PerfJITEventListener();
+ ~PerfJITEventListener() {
+ if (MarkerAddr)
+ CloseMarker();
+ }
+
+ void NotifyObjectEmitted(const ObjectFile &Obj,
+ const RuntimeDyld::LoadedObjectInfo &L) override;
+ void NotifyFreeingObject(const ObjectFile &Obj) override;
+
+private:
+ bool InitDebuggingDir();
+ bool OpenMarker();
+ void CloseMarker();
+ static bool FillMachine(LLVMPerfJitHeader &hdr);
+
+ void NotifyCode(Expected<llvm::StringRef> &Symbol, uint64_t CodeAddr,
+ uint64_t CodeSize);
+ void NotifyDebug(uint64_t CodeAddr, DILineInfoTable Lines);
+
+ // cache lookups
+ pid_t Pid;
+
+ // base directory for output data
+ std::string JitPath;
+
+ // output data stream, closed via Dumpstream
+ int DumpFd = -1;
+
+ // output data stream
+ std::unique_ptr<raw_fd_ostream> Dumpstream;
+
+ // prevent concurrent dumps from messing up the output file
+ sys::Mutex Mutex;
+
+ // perf mmap marker
+ void *MarkerAddr = NULL;
+
+ // perf support ready
+ bool SuccessfullyInitialized = false;
+
+ // identifier for functions, primarily to identify when moving them around
+ uint64_t CodeGeneration = 1;
+};
+
+// The following are POD struct definitions from the perf jit specification
+
+enum LLVMPerfJitRecordType {
+ JIT_CODE_LOAD = 0,
+ JIT_CODE_MOVE = 1, // not emitted, code isn't moved
+ JIT_CODE_DEBUG_INFO = 2,
+ JIT_CODE_CLOSE = 3, // not emitted, unnecessary
+ JIT_CODE_UNWINDING_INFO = 4, // not emitted
+
+ JIT_CODE_MAX
+};
+
+struct LLVMPerfJitHeader {
+ uint32_t Magic; // characters "JiTD"
+ uint32_t Version; // header version
+ uint32_t TotalSize; // total size of header
+ uint32_t ElfMach; // elf mach target
+ uint32_t Pad1; // reserved
+ uint32_t Pid;
+ uint64_t Timestamp; // timestamp
+ uint64_t Flags; // flags
+};
+
+// record prefix (mandatory in each record)
+struct LLVMPerfJitRecordPrefix {
+ uint32_t Id; // record type identifier
+ uint32_t TotalSize;
+ uint64_t Timestamp;
+};
+
+struct LLVMPerfJitRecordCodeLoad {
+ LLVMPerfJitRecordPrefix Prefix;
+
+ uint32_t Pid;
+ uint32_t Tid;
+ uint64_t Vma;
+ uint64_t CodeAddr;
+ uint64_t CodeSize;
+ uint64_t CodeIndex;
+};
+
+struct LLVMPerfJitDebugEntry {
+ uint64_t Addr;
+ int Lineno; // source line number starting at 1
+ int Discrim; // column discriminator, 0 is default
+ // followed by null terminated filename, \xff\0 if same as previous entry
+};
+
+struct LLVMPerfJitRecordDebugInfo {
+ LLVMPerfJitRecordPrefix Prefix;
+
+ uint64_t CodeAddr;
+ uint64_t NrEntry;
+ // followed by NrEntry LLVMPerfJitDebugEntry records
+};
+
+static inline uint64_t timespec_to_ns(const struct timespec *ts) {
+ const uint64_t NanoSecPerSec = 1000000000;
+ return ((uint64_t)ts->tv_sec * NanoSecPerSec) + ts->tv_nsec;
+}
+
+static inline uint64_t perf_get_timestamp(void) {
+ struct timespec ts;
+ int ret;
+
+ ret = clock_gettime(CLOCK_MONOTONIC, &ts);
+ if (ret)
+ return 0;
+
+ return timespec_to_ns(&ts);
+}
+
+PerfJITEventListener::PerfJITEventListener() : Pid(::getpid()) {
+ // check if clock-source is supported
+ if (!perf_get_timestamp()) {
+ errs() << "kernel does not support CLOCK_MONOTONIC\n";
+ return;
+ }
+
+ if (!InitDebuggingDir()) {
+ errs() << "could not initialize debugging directory\n";
+ return;
+ }
+
+ std::string Filename;
+ raw_string_ostream FilenameBuf(Filename);
+ FilenameBuf << JitPath << "/jit-" << Pid << ".dump";
+
+ // Need to open ourselves, because we need to hand the FD to OpenMarker() and
+ // raw_fd_ostream doesn't expose the FD.
+ using sys::fs::openFileForWrite;
+ if (auto EC =
+ openFileForWrite(FilenameBuf.str(), DumpFd, sys::fs::F_RW, 0666)) {
+ errs() << "could not open JIT dump file " << FilenameBuf.str() << ": "
+ << EC.message() << "\n";
+ return;
+ }
+
+ Dumpstream = make_unique<raw_fd_ostream>(DumpFd, true);
+
+ LLVMPerfJitHeader Header = {0};
+ if (!FillMachine(Header))
+ return;
+
+ // signal this process emits JIT information
+ if (!OpenMarker())
+ return;
+
+ // emit dumpstream header
+ Header.Magic = LLVM_PERF_JIT_MAGIC;
+ Header.Version = LLVM_PERF_JIT_VERSION;
+ Header.TotalSize = sizeof(Header);
+ Header.Pid = Pid;
+ Header.Timestamp = perf_get_timestamp();
+ Dumpstream->write(reinterpret_cast<const char *>(&Header), sizeof(Header));
+
+ // Everything initialized, can do profiling now.
+ if (!Dumpstream->has_error())
+ SuccessfullyInitialized = true;
+}
+
+void PerfJITEventListener::NotifyObjectEmitted(
+ const ObjectFile &Obj, const RuntimeDyld::LoadedObjectInfo &L) {
+
+ if (!SuccessfullyInitialized)
+ return;
+
+ OwningBinary<ObjectFile> DebugObjOwner = L.getObjectForDebug(Obj);
+ const ObjectFile &DebugObj = *DebugObjOwner.getBinary();
+
+ // Get the address of the object image for use as a unique identifier
+ std::unique_ptr<DIContext> Context = DWARFContext::create(DebugObj);
+
+ // Use symbol info to iterate over functions in the object.
+ for (const std::pair<SymbolRef, uint64_t> &P : computeSymbolSizes(DebugObj)) {
+ SymbolRef Sym = P.first;
+ std::string SourceFileName;
+
+ Expected<SymbolRef::Type> SymTypeOrErr = Sym.getType();
+ if (!SymTypeOrErr) {
+ // There's not much we can with errors here
+ consumeError(SymTypeOrErr.takeError());
+ continue;
+ }
+ SymbolRef::Type SymType = *SymTypeOrErr;
+ if (SymType != SymbolRef::ST_Function)
+ continue;
+
+ Expected<StringRef> Name = Sym.getName();
+ if (!Name) {
+ consumeError(Name.takeError());
+ continue;
+ }
+
+ Expected<uint64_t> AddrOrErr = Sym.getAddress();
+ if (!AddrOrErr) {
+ consumeError(AddrOrErr.takeError());
+ continue;
+ }
+ uint64_t Addr = *AddrOrErr;
+ uint64_t Size = P.second;
+
+ // According to spec debugging info has to come before loading the
+ // corresonding code load.
+ DILineInfoTable Lines = Context->getLineInfoForAddressRange(
+ Addr, Size, FileLineInfoKind::AbsoluteFilePath);
+
+ NotifyDebug(Addr, Lines);
+ NotifyCode(Name, Addr, Size);
+ }
+
+ Dumpstream->flush();
+}
+
+void PerfJITEventListener::NotifyFreeingObject(const ObjectFile &Obj) {
+ // perf currently doesn't have an interface for unloading. But munmap()ing the
+ // code section does, so that's ok.
+}
+
+bool PerfJITEventListener::InitDebuggingDir() {
+ time_t Time;
+ struct tm LocalTime;
+ char TimeBuffer[sizeof("YYYYMMDD")];
+ SmallString<64> Path;
+
+ // search for location to dump data to
+ if (const char *BaseDir = getenv("JITDUMPDIR"))
+ Path.append(BaseDir);
+ else if (!sys::path::home_directory(Path))
+ Path = ".";
+
+ // create debug directory
+ Path += "/.debug/jit/";
+ if (auto EC = sys::fs::create_directories(Path)) {
+ errs() << "could not create jit cache directory " << Path << ": "
+ << EC.message() << "\n";
+ return false;
+ }
+
+ // create unique directory for dump data related to this process
+ time(&Time);
+ localtime_r(&Time, &LocalTime);
+ strftime(TimeBuffer, sizeof(TimeBuffer), "%Y%m%d", &LocalTime);
+ Path += JIT_LANG "-jit-";
+ Path += TimeBuffer;
+
+ SmallString<128> UniqueDebugDir;
+
+ using sys::fs::createUniqueDirectory;
+ if (auto EC = createUniqueDirectory(Path, UniqueDebugDir)) {
+ errs() << "could not create unique jit cache directory " << UniqueDebugDir
+ << ": " << EC.message() << "\n";
+ return false;
+ }
+
+ JitPath = UniqueDebugDir.str();
+
+ return true;
+}
+
+bool PerfJITEventListener::OpenMarker() {
+ // We mmap the jitdump to create an MMAP RECORD in perf.data file. The mmap
+ // is captured either live (perf record running when we mmap) or in deferred
+ // mode, via /proc/PID/maps. The MMAP record is used as a marker of a jitdump
+ // file for more meta data info about the jitted code. Perf report/annotate
+ // detect this special filename and process the jitdump file.
+ //
+ // Mapping must be PROT_EXEC to ensure it is captured by perf record
+ // even when not using -d option.
+ MarkerAddr = ::mmap(NULL, sys::Process::getPageSize(), PROT_READ | PROT_EXEC,
+ MAP_PRIVATE, DumpFd, 0);
+
+ if (MarkerAddr == MAP_FAILED) {
+ errs() << "could not mmap JIT marker\n";
+ return false;
+ }
+ return true;
+}
+
+void PerfJITEventListener::CloseMarker() {
+ if (!MarkerAddr)
+ return;
+
+ munmap(MarkerAddr, sys::Process::getPageSize());
+ MarkerAddr = nullptr;
+}
+
+bool PerfJITEventListener::FillMachine(LLVMPerfJitHeader &hdr) {
+ char id[16];
+ struct {
+ uint16_t e_type;
+ uint16_t e_machine;
+ } info;
+
+ size_t RequiredMemory = sizeof(id) + sizeof(info);
+
+ ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
+ MemoryBuffer::getFileSlice("/proc/self/exe",
+ RequiredMemory,
+ 0);
+
+ // This'll not guarantee that enough data was actually read from the
+ // underlying file. Instead the trailing part of the buffer would be
+ // zeroed. Given the ELF signature check below that seems ok though,
+ // it's unlikely that the file ends just after that, and the
+ // consequence would just be that perf wouldn't recognize the
+ // signature.
+ if (auto EC = MB.getError()) {
+ errs() << "could not open /proc/self/exe: " << EC.message() << "\n";
+ return false;
+ }
+
+ memcpy(&id, (*MB)->getBufferStart(), sizeof(id));
+ memcpy(&info, (*MB)->getBufferStart() + sizeof(id), sizeof(info));
+
+ // check ELF signature
+ if (id[0] != 0x7f || id[1] != 'E' || id[2] != 'L' || id[3] != 'F') {
+ errs() << "invalid elf signature\n";
+ return false;
+ }
+
+ hdr.ElfMach = info.e_machine;
+
+ return true;
+}
+
+void PerfJITEventListener::NotifyCode(Expected<llvm::StringRef> &Symbol,
+ uint64_t CodeAddr, uint64_t CodeSize) {
+ assert(SuccessfullyInitialized);
+
+ // 0 length functions can't have samples.
+ if (CodeSize == 0)
+ return;
+
+ LLVMPerfJitRecordCodeLoad rec;
+ rec.Prefix.Id = JIT_CODE_LOAD;
+ rec.Prefix.TotalSize = sizeof(rec) + // debug record itself
+ Symbol->size() + 1 + // symbol name
+ CodeSize; // and code
+ rec.Prefix.Timestamp = perf_get_timestamp();
+
+ rec.CodeSize = CodeSize;
+ rec.Vma = 0;
+ rec.CodeAddr = CodeAddr;
+ rec.Pid = Pid;
+ rec.Tid = get_threadid();
+
+ // avoid interspersing output
+ MutexGuard Guard(Mutex);
+
+ rec.CodeIndex = CodeGeneration++; // under lock!
+
+ Dumpstream->write(reinterpret_cast<const char *>(&rec), sizeof(rec));
+ Dumpstream->write(Symbol->data(), Symbol->size() + 1);
+ Dumpstream->write(reinterpret_cast<const char *>(CodeAddr), CodeSize);
+}
+
+void PerfJITEventListener::NotifyDebug(uint64_t CodeAddr,
+ DILineInfoTable Lines) {
+ assert(SuccessfullyInitialized);
+
+ // Didn't get useful debug info.
+ if (Lines.empty())
+ return;
+
+ LLVMPerfJitRecordDebugInfo rec;
+ rec.Prefix.Id = JIT_CODE_DEBUG_INFO;
+ rec.Prefix.TotalSize = sizeof(rec); // will be increased further
+ rec.Prefix.Timestamp = perf_get_timestamp();
+ rec.CodeAddr = CodeAddr;
+ rec.NrEntry = Lines.size();
+
+ // compute total size size of record (variable due to filenames)
+ DILineInfoTable::iterator Begin = Lines.begin();
+ DILineInfoTable::iterator End = Lines.end();
+ for (DILineInfoTable::iterator It = Begin; It != End; ++It) {
+ DILineInfo &line = It->second;
+ rec.Prefix.TotalSize += sizeof(LLVMPerfJitDebugEntry);
+ rec.Prefix.TotalSize += line.FileName.size() + 1;
+ }
+
+ // The debug_entry describes the source line information. It is defined as
+ // follows in order:
+ // * uint64_t code_addr: address of function for which the debug information
+ // is generated
+ // * uint32_t line : source file line number (starting at 1)
+ // * uint32_t discrim : column discriminator, 0 is default
+ // * char name[n] : source file name in ASCII, including null termination
+
+ // avoid interspersing output
+ MutexGuard Guard(Mutex);
+
+ Dumpstream->write(reinterpret_cast<const char *>(&rec), sizeof(rec));
+
+ for (DILineInfoTable::iterator It = Begin; It != End; ++It) {
+ LLVMPerfJitDebugEntry LineInfo;
+ DILineInfo &Line = It->second;
+
+ LineInfo.Addr = It->first;
+ // The function re-created by perf is preceded by a elf
+ // header. Need to adjust for that, otherwise the results are
+ // wrong.
+ LineInfo.Addr += 0x40;
+ LineInfo.Lineno = Line.Line;
+ LineInfo.Discrim = Line.Discriminator;
+
+ Dumpstream->write(reinterpret_cast<const char *>(&LineInfo),
+ sizeof(LineInfo));
+ Dumpstream->write(Line.FileName.c_str(), Line.FileName.size() + 1);
+ }
+}
+
+// There should be only a single event listener per process, otherwise perf gets
+// confused.
+llvm::ManagedStatic<PerfJITEventListener> PerfListener;
+
+} // end anonymous namespace
+
+namespace llvm {
+JITEventListener *JITEventListener::createPerfJITEventListener() {
+ return &*PerfListener;
+}
+
+} // namespace llvm
+
--
2.17.1

26
patches/llvm-D46460.patch Normal file
View File

@ -0,0 +1,26 @@
Index: lib/Analysis/LoopInfo.cpp
===================================================================
--- a/lib/Analysis/LoopInfo.cpp
+++ b/lib/Analysis/LoopInfo.cpp
@@ -223,15 +223,14 @@
BasicBlock *H = getHeader();
for (BasicBlock *BB : this->blocks()) {
TerminatorInst *TI = BB->getTerminator();
- MDNode *MD = nullptr;
// Check if this terminator branches to the loop header.
- for (BasicBlock *Successor : TI->successors()) {
- if (Successor == H) {
- MD = TI->getMetadata(LLVMContext::MD_loop);
- break;
- }
- }
+ bool IsPredecessor = any_of(TI->successors(),
+ [=](BasicBlock *Successor) { return Successor == H; });
+ if (!IsPredecessor)
+ continue;
+
+ MDNode *MD = TI->getMetadata(LLVMContext::MD_loop);
if (!MD)
return nullptr;

View File

@ -0,0 +1,187 @@
commit 98592fcc61307968f7df1362771534595a1e1c21
Author: Keno Fischer <keno@juliacomputing.com>
Date: Wed Jul 25 19:29:02 2018 -0400
[SCEV] Don't expand Wrap predicate using inttoptr in ni addrspaces
Summary:
In non-integral address spaces, we're not allowed to introduce inttoptr/ptrtoint
intrinsics. Instead, we need to expand any pointer arithmetic as geps on the
base pointer. Luckily this is a common task for SCEV, so all we have to do here
is hook up the corresponding helper function and add test case.
Fixes PR38290
Reviewers: reames, sanjoy
Subscribers: javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D49832
diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp
index 7f76f057216..f441a3647fb 100644
--- a/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -2157,8 +2157,9 @@ Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR,
const SCEV *Step = AR->getStepRecurrence(SE);
const SCEV *Start = AR->getStart();
+ Type *ARTy = AR->getType();
unsigned SrcBits = SE.getTypeSizeInBits(ExitCount->getType());
- unsigned DstBits = SE.getTypeSizeInBits(AR->getType());
+ unsigned DstBits = SE.getTypeSizeInBits(ARTy);
// The expression {Start,+,Step} has nusw/nssw if
// Step < 0, Start - |Step| * Backedge <= Start
@@ -2170,11 +2171,12 @@ Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR,
Value *TripCountVal = expandCodeFor(ExitCount, CountTy, Loc);
IntegerType *Ty =
- IntegerType::get(Loc->getContext(), SE.getTypeSizeInBits(AR->getType()));
+ IntegerType::get(Loc->getContext(), SE.getTypeSizeInBits(ARTy));
+ Type *ARExpandTy = DL.isNonIntegralPointerType(ARTy) ? ARTy : Ty;
Value *StepValue = expandCodeFor(Step, Ty, Loc);
Value *NegStepValue = expandCodeFor(SE.getNegativeSCEV(Step), Ty, Loc);
- Value *StartValue = expandCodeFor(Start, Ty, Loc);
+ Value *StartValue = expandCodeFor(Start, ARExpandTy, Loc);
ConstantInt *Zero =
ConstantInt::get(Loc->getContext(), APInt::getNullValue(DstBits));
@@ -2197,8 +2199,21 @@ Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR,
// Compute:
// Start + |Step| * Backedge < Start
// Start - |Step| * Backedge > Start
- Value *Add = Builder.CreateAdd(StartValue, MulV);
- Value *Sub = Builder.CreateSub(StartValue, MulV);
+ Value *Add = nullptr, *Sub = nullptr;
+ if (ARExpandTy->isPointerTy()) {
+ PointerType *ARPtrTy = cast<PointerType>(ARExpandTy);
+ const SCEV *MulS = SE.getSCEV(MulV);
+ const SCEV *const StepArray[2] = {MulS, SE.getNegativeSCEV(MulS)};
+ Add = Builder.CreateBitCast(
+ expandAddToGEP(&StepArray[0], &StepArray[1], ARPtrTy, Ty, StartValue),
+ ARPtrTy);
+ Sub = Builder.CreateBitCast(
+ expandAddToGEP(&StepArray[1], &StepArray[2], ARPtrTy, Ty, StartValue),
+ ARPtrTy);
+ } else {
+ Add = Builder.CreateAdd(StartValue, MulV);
+ Sub = Builder.CreateSub(StartValue, MulV);
+ }
Value *EndCompareGT = Builder.CreateICmp(
Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT, Sub, StartValue);
diff --git a/test/Analysis/LoopAccessAnalysis/wrapping-pointer-ni.ll b/test/Analysis/LoopAccessAnalysis/wrapping-pointer-ni.ll
new file mode 100644
index 00000000000..ddcf5e1a195
--- /dev/null
+++ b/test/Analysis/LoopAccessAnalysis/wrapping-pointer-ni.ll
@@ -0,0 +1,73 @@
+; RUN: opt -loop-versioning -S < %s | FileCheck %s -check-prefix=LV
+
+; NB: addrspaces 10-13 are non-integral
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:10:11:12:13"
+
+; This matches the test case from PR38290
+; Check that we expand the SCEV predicate check using GEP, rather
+; than ptrtoint.
+
+%jl_value_t = type opaque
+%jl_array_t = type { i8 addrspace(13)*, i64, i16, i16, i32 }
+
+declare i64 @julia_steprange_last_4949()
+
+define void @"japi1_align!_9477"(%jl_value_t addrspace(10)**) #0 {
+; LV-LAVEL: L26.lver.check
+; LV: [[OFMul:%[^ ]*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[Step:%[^ ]*]])
+; LV-NEXT: [[OFMulResult:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul]], 0
+; LV-NEXT: [[OFMulOverflow:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul]], 1
+; LV-NEXT: [[PosGEP:%[^ ]*]] = getelementptr i32, i32 addrspace(13)* [[Base:%[^ ]*]], i64 [[Step]]
+; LV-NEXT: [[NegGEP:%[^ ]*]] = getelementptr i32, i32 addrspace(13)* [[Base]], i64 [[NegStep:%[^ ]*]]
+; LV-NEXT: icmp ugt i32 addrspace(13)* [[NegGEP]], [[Base]]
+; LV-NEXT: icmp ult i32 addrspace(13)* [[PosGEP]], [[Base]]
+; LV-NOT: inttoptr
+; LV-NOT: ptrtoint
+top:
+ %1 = load %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)** %0, align 8, !nonnull !1, !dereferenceable !2, !align !3
+ %2 = load i32, i32* inttoptr (i64 12 to i32*), align 4, !tbaa !4
+ %3 = sub i32 0, %2
+ %4 = call i64 @julia_steprange_last_4949()
+ %5 = addrspacecast %jl_value_t addrspace(10)* %1 to %jl_value_t addrspace(11)*
+ %6 = bitcast %jl_value_t addrspace(11)* %5 to %jl_value_t addrspace(10)* addrspace(11)*
+ %7 = load %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)* addrspace(11)* %6, align 8, !tbaa !4, !nonnull !1, !dereferenceable !9, !align !2
+ %8 = addrspacecast %jl_value_t addrspace(10)* %7 to %jl_value_t addrspace(11)*
+ %9 = bitcast %jl_value_t addrspace(11)* %8 to i32 addrspace(13)* addrspace(11)*
+ %10 = load i32 addrspace(13)*, i32 addrspace(13)* addrspace(11)* %9, align 8, !tbaa !10, !nonnull !1
+ %11 = sext i32 %3 to i64
+ br label %L26
+
+L26: ; preds = %L26, %top
+ %value_phi3 = phi i64 [ 0, %top ], [ %12, %L26 ]
+ %12 = add i64 %value_phi3, -1
+ %13 = getelementptr inbounds i32, i32 addrspace(13)* %10, i64 %12
+ %14 = load i32, i32 addrspace(13)* %13, align 4, !tbaa !13
+ %15 = add i64 %12, %11
+ %16 = getelementptr inbounds i32, i32 addrspace(13)* %10, i64 %15
+ store i32 %14, i32 addrspace(13)* %16, align 4, !tbaa !13
+ %17 = icmp eq i64 %value_phi3, %4
+ br i1 %17, label %L45, label %L26
+
+L45: ; preds = %L26
+ ret void
+}
+
+attributes #0 = { "thunk" }
+
+!llvm.module.flags = !{!0}
+
+!0 = !{i32 1, !"Debug Info Version", i32 3}
+!1 = !{}
+!2 = !{i64 16}
+!3 = !{i64 8}
+!4 = !{!5, !5, i64 0}
+!5 = !{!"jtbaa_mutab", !6, i64 0}
+!6 = !{!"jtbaa_value", !7, i64 0}
+!7 = !{!"jtbaa_data", !8, i64 0}
+!8 = !{!"jtbaa"}
+!9 = !{i64 40}
+!10 = !{!11, !11, i64 0}
+!11 = !{!"jtbaa_arrayptr", !12, i64 0}
+!12 = !{!"jtbaa_array", !8, i64 0}
+!13 = !{!14, !14, i64 0}
+!14 = !{!"jtbaa_arraybuf", !7, i64 0}
diff --git a/test/Analysis/LoopAccessAnalysis/wrapping-pointer-versioning.ll b/test/Analysis/LoopAccessAnalysis/wrapping-pointer-versioning.ll
index a7e5bce7445..fa6fccecbf1 100644
--- a/test/Analysis/LoopAccessAnalysis/wrapping-pointer-versioning.ll
+++ b/test/Analysis/LoopAccessAnalysis/wrapping-pointer-versioning.ll
@@ -58,10 +58,10 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
; LV-NEXT: [[OFMul1:%[^ ]*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[BE]])
; LV-NEXT: [[OFMulResult1:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul1]], 0
; LV-NEXT: [[OFMulOverflow1:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul1]], 1
-; LV-NEXT: [[AddEnd1:%[^ ]*]] = add i64 %a2, [[OFMulResult1]]
-; LV-NEXT: [[SubEnd1:%[^ ]*]] = sub i64 %a2, [[OFMulResult1]]
-; LV-NEXT: [[CmpNeg1:%[^ ]*]] = icmp ugt i64 [[SubEnd1]], %a2
-; LV-NEXT: [[CmpPos1:%[^ ]*]] = icmp ult i64 [[AddEnd1]], %a2
+; LV-NEXT: [[AddEnd1:%[^ ]*]] = add i64 [[A0:%[^ ]*]], [[OFMulResult1]]
+; LV-NEXT: [[SubEnd1:%[^ ]*]] = sub i64 [[A0]], [[OFMulResult1]]
+; LV-NEXT: [[CmpNeg1:%[^ ]*]] = icmp ugt i64 [[SubEnd1]], [[A0]]
+; LV-NEXT: [[CmpPos1:%[^ ]*]] = icmp ult i64 [[AddEnd1]], [[A0]]
; LV-NEXT: [[Cmp:%[^ ]*]] = select i1 false, i1 [[CmpNeg1]], i1 [[CmpPos1]]
; LV-NEXT: [[PredCheck1:%[^ ]*]] = or i1 [[Cmp]], [[OFMulOverflow1]]
@@ -233,10 +233,10 @@ for.end: ; preds = %for.body
; LV: [[OFMul1:%[^ ]*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[BE:%[^ ]*]])
; LV-NEXT: [[OFMulResult1:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul1]], 0
; LV-NEXT: [[OFMulOverflow1:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul1]], 1
-; LV-NEXT: [[AddEnd1:%[^ ]*]] = add i64 %a2, [[OFMulResult1]]
-; LV-NEXT: [[SubEnd1:%[^ ]*]] = sub i64 %a2, [[OFMulResult1]]
-; LV-NEXT: [[CmpNeg1:%[^ ]*]] = icmp ugt i64 [[SubEnd1]], %a2
-; LV-NEXT: [[CmpPos1:%[^ ]*]] = icmp ult i64 [[AddEnd1]], %a2
+; LV-NEXT: [[AddEnd1:%[^ ]*]] = add i64 [[A0:%[^ ]*]], [[OFMulResult1]]
+; LV-NEXT: [[SubEnd1:%[^ ]*]] = sub i64 [[A0]], [[OFMulResult1]]
+; LV-NEXT: [[CmpNeg1:%[^ ]*]] = icmp ugt i64 [[SubEnd1]], [[A0]]
+; LV-NEXT: [[CmpPos1:%[^ ]*]] = icmp ult i64 [[AddEnd1]], [[A0]]
; LV-NEXT: [[Cmp:%[^ ]*]] = select i1 false, i1 [[CmpNeg1]], i1 [[CmpPos1]]
; LV-NEXT: [[PredCheck1:%[^ ]*]] = or i1 [[Cmp]], [[OFMulOverflow1]]

View File

@ -0,0 +1,89 @@
commit 8eb2b102a203d83fb713f3bf79acf235dabdd8cd
Author: Keno Fischer <keno@juliacomputing.com>
Date: Mon Jul 30 16:59:08 2018 -0400
[VNCoercion] Disallow coercion between different ni addrspaces
Summary:
I'm not sure if it would be legal by the IR reference to introduce
an addrspacecast here, since the IR reference is a bit vague on
the exact semantics, but at least for our usage of it (and I
suspect for many other's usage) it is not. For us, addrspacecasts
between non-integral address spaces carry frontend information that the
optimizer cannot deduce afterwards in a generic way (though we
have frontend specific passes in our pipline that do propagate
these). In any case, I'm sure nobody is using it this way at
the moment, since it would have introduced inttoptrs, which
are definitely illegal.
Fixes PR38375
Reviewers: sanjoy, reames, dberlin
Subscribers: llvm-commits
Differential Revision: https://reviews.llvm.org/D50010
diff --git a/lib/Transforms/Utils/VNCoercion.cpp b/lib/Transforms/Utils/VNCoercion.cpp
index c3feea6a0a4..735d1e7b792 100644
--- a/lib/Transforms/Utils/VNCoercion.cpp
+++ b/lib/Transforms/Utils/VNCoercion.cpp
@@ -20,14 +20,21 @@ bool canCoerceMustAliasedValueToLoad(Value *StoredVal, Type *LoadTy,
StoredVal->getType()->isStructTy() || StoredVal->getType()->isArrayTy())
return false;
+ Type *StoredValTy = StoredVal->getType();
+
// The store has to be at least as big as the load.
if (DL.getTypeSizeInBits(StoredVal->getType()) < DL.getTypeSizeInBits(LoadTy))
return false;
- // Don't coerce non-integral pointers to integers or vice versa.
- if (DL.isNonIntegralPointerType(StoredVal->getType()) !=
- DL.isNonIntegralPointerType(LoadTy))
+ bool StoredNI = DL.isNonIntegralPointerType(StoredValTy);
+ bool LoadNI = DL.isNonIntegralPointerType(LoadTy);
+ if (StoredNI != LoadNI) {
return false;
+ } else if (StoredNI && LoadNI &&
+ cast<PointerType>(StoredValTy)->getAddressSpace() !=
+ cast<PointerType>(LoadTy)->getAddressSpace()) {
+ return false;
+ }
return true;
}
diff --git a/test/Transforms/GVN/non-integral-pointers.ll b/test/Transforms/GVN/non-integral-pointers.ll
index 9ae4132231d..5217fc1a06a 100644
--- a/test/Transforms/GVN/non-integral-pointers.ll
+++ b/test/Transforms/GVN/non-integral-pointers.ll
@@ -1,6 +1,6 @@
; RUN: opt -gvn -S < %s | FileCheck %s
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:4"
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:4:5"
target triple = "x86_64-unknown-linux-gnu"
define void @f0(i1 %alwaysFalse, i64 %val, i64* %loc) {
@@ -37,3 +37,21 @@ define i64 @f1(i1 %alwaysFalse, i8 addrspace(4)* %val, i8 addrspace(4)** %loc) {
alwaysTaken:
ret i64 42
}
+
+ define i8 addrspace(5)* @multini(i1 %alwaysFalse, i8 addrspace(4)* %val, i8 addrspace(4)** %loc) {
+ ; CHECK-LABEL: @multini(
+ ; CHECK-NOT: inttoptr
+ ; CHECK-NOT: ptrtoint
+ ; CHECK-NOT: addrspacecast
+ entry:
+ store i8 addrspace(4)* %val, i8 addrspace(4)** %loc
+ br i1 %alwaysFalse, label %neverTaken, label %alwaysTaken
+
+ neverTaken:
+ %loc.bc = bitcast i8 addrspace(4)** %loc to i8 addrspace(5)**
+ %differentas = load i8 addrspace(5)*, i8 addrspace(5)** %loc.bc
+ ret i8 addrspace(5)* %differentas
+
+ alwaysTaken:
+ ret i8 addrspace(5)* null
+ }

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,48 @@
commit 4840cf7299bb312125d41fc84733c15c2370f18e
Author: DokFaust <rodia@autistici.org>
Date: Fri Jun 8 19:23:01 2018 +0200
Add debug line-level code information to OProfile module
diff --git a/lib/ExecutionEngine/OProfileJIT/LLVMBuild.txt b/lib/ExecutionEngine/OProfileJIT/LLVMBuild.txt
index 7d5550046a5..ea100286318 100644
--- a/lib/ExecutionEngine/OProfileJIT/LLVMBuild.txt
+++ b/lib/ExecutionEngine/OProfileJIT/LLVMBuild.txt
@@ -24 +24 @@ parent = ExecutionEngine
-required_libraries = Support Object ExecutionEngine
+required_libraries = DebugInfoDWARF Support Object ExecutionEngine
diff --git a/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp b/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp
index 3581d645839..045ecb82853 100644
--- a/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp
+++ b/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp
@@ -26,0 +27,2 @@
+#include "llvm/DebugInfo/DIContext.h"
+#include "llvm/DebugInfo/DWARF/DWARFContext.h"
@@ -86,0 +89,2 @@ void OProfileJITEventListener::NotifyObjectEmitted(
+ std::unique_ptr<DIContext> Context = DWARFContext::create(DebugObj);
+ std::string SourceFileName;
@@ -111 +115,23 @@ void OProfileJITEventListener::NotifyObjectEmitted(
- // TODO: support line number info (similar to IntelJITEventListener.cpp)
+ DILineInfoTable Lines = Context->getLineInfoForAddressRange(Addr, Size);
+ DILineInfoTable::iterator Begin = Lines.begin();
+ DILineInfoTable::iterator End = Lines.end();
+ size_t i = 0;
+
+ size_t num_entries = std::distance(Begin, End);
+ static struct debug_line_info* debug_line;
+ debug_line = (struct debug_line_info * )calloc(num_entries, sizeof(struct debug_line_info));
+
+ for(DILineInfoTable::iterator It=Begin; It != End; ++It){
+ i = std::distance(Begin,It);
+ debug_line[i].vma = (unsigned long) It->first;
+ debug_line[i].lineno = It->second.Line;
+ SourceFileName = Lines.front().second.FileName;
+ debug_line[i].filename = const_cast<char *>(SourceFileName.c_str());
+ }
+
+ if(Wrapper->op_write_debug_line_info((void*) Addr, num_entries, debug_line) == -1) {
+ DEBUG(dbgs() << "Failed to tell OProfiler about debug object at ["
+ << (void*) Addr << "-" << ((char *) Addr + Size)
+ << "]\n");
+ continue;
+ }

View File

@ -0,0 +1,29 @@
From 15899eaab58e96bb7bbe7a14099674e255656a50 Mon Sep 17 00:00:00 2001
From: Valentin Churavy <v.churavy@gmail.com>
Date: Fri, 23 Feb 2018 14:41:20 -0500
Subject: [PATCH] Make AddrSpaceCast noops on PPC
PPC as AArch64 doesn't have address-spaces so we can drop them in the backend
---
lib/Target/PowerPC/PPCISelLowering.h | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index e60504507d3..c9b89773968 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -761,6 +761,11 @@ namespace llvm {
ReuseLoadInfo() : IsInvariant(false), Alignment(0), Ranges(nullptr) {}
};
+ bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override {
+ // Addrspacecasts are always noops.
+ return true;
+ }
+
bool canReuseLoadAddress(SDValue Op, EVT MemVT, ReuseLoadInfo &RLI,
SelectionDAG &DAG,
ISD::LoadExtType ET = ISD::NON_EXTLOAD) const;
--
2.16.2

View File

@ -0,0 +1,45 @@
commit ab60b05a472e8651cbe53c19513b7e62b9ff32df
Author: Mikael Holmen <mikael.holmen@ericsson.com>
Date: Thu Feb 1 06:38:34 2018 +0000
[LSR] Don't force bases of foldable formulae to the final type.
Summary:
Before emitting code for scaled registers, we prevent
SCEVExpander from hoisting any scaled addressing mode
by emitting all the bases first. However, these bases
are being forced to the final type, resulting in some
odd code.
For example, if the type of the base is an integer and
the final type is a pointer, we will emit an inttoptr
for the base, a ptrtoint for the scale, and then a
'reverse' GEP where the GEP pointer is actually the base
integer and the index is the pointer. It's more intuitive
to use the pointer as a pointer and the integer as index.
Patch by: Bevin Hansson
Reviewers: atrick, qcolombet, sanjoy
Reviewed By: qcolombet
Subscribers: llvm-commits
Differential Revision: https://reviews.llvm.org/D42103
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@323946 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 332c074a1df..4b8e2286ed9 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -4993,7 +4993,7 @@ Value *LSRInstance::Expand(const LSRUse &LU, const LSRFixup &LF,
// Unless the addressing mode will not be folded.
if (!Ops.empty() && LU.Kind == LSRUse::Address &&
isAMCompletelyFolded(TTI, LU, F)) {
- Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty);
+ Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), nullptr);
Ops.clear();
Ops.push_back(SE.getUnknown(FullV));
}

View File

@ -0,0 +1,301 @@
commit b398d8e1fa5a5a914957fa22d0a64db97f6c265e
Author: Craig Topper <craig.topper@intel.com>
Date: Thu Mar 8 00:21:17 2018 +0000
[X86] Fix some isel patterns that used aligned vector load instructions with unaligned predicates.
These patterns weren't checking the alignment of the load, but were using the aligned instructions. This will cause a GP fault if the data isn't aligned.
I believe these were introduced in r312450.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@326967 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Target/X86/X86InstrVecCompiler.td b/lib/Target/X86/X86InstrVecCompiler.td
index db3dfe56531..50c7763a2c3 100644
--- a/lib/Target/X86/X86InstrVecCompiler.td
+++ b/lib/Target/X86/X86InstrVecCompiler.td
@@ -261,10 +261,10 @@ let Predicates = [HasVLX] in {
// will zero the upper bits.
// TODO: Is there a safe way to detect whether the producing instruction
// already zeroed the upper bits?
-multiclass subvector_zero_lowering<string MoveStr, RegisterClass RC,
- ValueType DstTy, ValueType SrcTy,
- ValueType ZeroTy, PatFrag memop,
- SubRegIndex SubIdx> {
+multiclass subvector_zero_lowering<string MoveStr, string LoadStr,
+ RegisterClass RC, ValueType DstTy,
+ ValueType SrcTy, ValueType ZeroTy,
+ PatFrag memop, SubRegIndex SubIdx> {
def : Pat<(DstTy (insert_subvector (bitconvert (ZeroTy immAllZerosV)),
(SrcTy RC:$src), (iPTR 0))),
(SUBREG_TO_REG (i64 0),
@@ -274,91 +274,91 @@ multiclass subvector_zero_lowering<string MoveStr, RegisterClass RC,
(SrcTy (bitconvert (memop addr:$src))),
(iPTR 0))),
(SUBREG_TO_REG (i64 0),
- (!cast<Instruction>("VMOV"#MoveStr#"rm") addr:$src), SubIdx)>;
+ (!cast<Instruction>("VMOV"#LoadStr#"rm") addr:$src), SubIdx)>;
}
let Predicates = [HasAVX, NoVLX] in {
- defm : subvector_zero_lowering<"APD", VR128, v4f64, v2f64, v8i32, loadv2f64,
- sub_xmm>;
- defm : subvector_zero_lowering<"APS", VR128, v8f32, v4f32, v8i32, loadv4f32,
- sub_xmm>;
- defm : subvector_zero_lowering<"DQA", VR128, v4i64, v2i64, v8i32, loadv2i64,
- sub_xmm>;
- defm : subvector_zero_lowering<"DQA", VR128, v8i32, v4i32, v8i32, loadv2i64,
- sub_xmm>;
- defm : subvector_zero_lowering<"DQA", VR128, v16i16, v8i16, v8i32, loadv2i64,
- sub_xmm>;
- defm : subvector_zero_lowering<"DQA", VR128, v32i8, v16i8, v8i32, loadv2i64,
- sub_xmm>;
-}
-
-let Predicates = [HasVLX] in {
- defm : subvector_zero_lowering<"APDZ128", VR128X, v4f64, v2f64, v8i32,
+ defm : subvector_zero_lowering<"APD", "UPD", VR128, v4f64, v2f64, v8i32,
loadv2f64, sub_xmm>;
- defm : subvector_zero_lowering<"APSZ128", VR128X, v8f32, v4f32, v8i32,
+ defm : subvector_zero_lowering<"APS", "UPS", VR128, v8f32, v4f32, v8i32,
loadv4f32, sub_xmm>;
- defm : subvector_zero_lowering<"DQA64Z128", VR128X, v4i64, v2i64, v8i32,
+ defm : subvector_zero_lowering<"DQA", "DQU", VR128, v4i64, v2i64, v8i32,
loadv2i64, sub_xmm>;
- defm : subvector_zero_lowering<"DQA64Z128", VR128X, v8i32, v4i32, v8i32,
+ defm : subvector_zero_lowering<"DQA", "DQU", VR128, v8i32, v4i32, v8i32,
loadv2i64, sub_xmm>;
- defm : subvector_zero_lowering<"DQA64Z128", VR128X, v16i16, v8i16, v8i32,
+ defm : subvector_zero_lowering<"DQA", "DQU", VR128, v16i16, v8i16, v8i32,
loadv2i64, sub_xmm>;
- defm : subvector_zero_lowering<"DQA64Z128", VR128X, v32i8, v16i8, v8i32,
- loadv2i64, sub_xmm>;
-
- defm : subvector_zero_lowering<"APDZ128", VR128X, v8f64, v2f64, v16i32,
- loadv2f64, sub_xmm>;
- defm : subvector_zero_lowering<"APSZ128", VR128X, v16f32, v4f32, v16i32,
- loadv4f32, sub_xmm>;
- defm : subvector_zero_lowering<"DQA64Z128", VR128X, v8i64, v2i64, v16i32,
- loadv2i64, sub_xmm>;
- defm : subvector_zero_lowering<"DQA64Z128", VR128X, v16i32, v4i32, v16i32,
- loadv2i64, sub_xmm>;
- defm : subvector_zero_lowering<"DQA64Z128", VR128X, v32i16, v8i16, v16i32,
- loadv2i64, sub_xmm>;
- defm : subvector_zero_lowering<"DQA64Z128", VR128X, v64i8, v16i8, v16i32,
+ defm : subvector_zero_lowering<"DQA", "DQU", VR128, v32i8, v16i8, v8i32,
loadv2i64, sub_xmm>;
+}
- defm : subvector_zero_lowering<"APDZ256", VR256X, v8f64, v4f64, v16i32,
- loadv4f64, sub_ymm>;
- defm : subvector_zero_lowering<"APSZ256", VR256X, v16f32, v8f32, v16i32,
- loadv8f32, sub_ymm>;
- defm : subvector_zero_lowering<"DQA64Z256", VR256X, v8i64, v4i64, v16i32,
- loadv4i64, sub_ymm>;
- defm : subvector_zero_lowering<"DQA64Z256", VR256X, v16i32, v8i32, v16i32,
- loadv4i64, sub_ymm>;
- defm : subvector_zero_lowering<"DQA64Z256", VR256X, v32i16, v16i16, v16i32,
- loadv4i64, sub_ymm>;
- defm : subvector_zero_lowering<"DQA64Z256", VR256X, v64i8, v32i8, v16i32,
- loadv4i64, sub_ymm>;
+let Predicates = [HasVLX] in {
+ defm : subvector_zero_lowering<"APDZ128", "UPDZ128", VR128X, v4f64,
+ v2f64, v8i32, loadv2f64, sub_xmm>;
+ defm : subvector_zero_lowering<"APSZ128", "UPSZ128", VR128X, v8f32,
+ v4f32, v8i32, loadv4f32, sub_xmm>;
+ defm : subvector_zero_lowering<"DQA64Z128", "DQU64Z128", VR128X, v4i64,
+ v2i64, v8i32, loadv2i64, sub_xmm>;
+ defm : subvector_zero_lowering<"DQA64Z128", "DQU64Z128", VR128X, v8i32,
+ v4i32, v8i32, loadv2i64, sub_xmm>;
+ defm : subvector_zero_lowering<"DQA64Z128", "DQU64Z128", VR128X, v16i16,
+ v8i16, v8i32, loadv2i64, sub_xmm>;
+ defm : subvector_zero_lowering<"DQA64Z128", "DQU64Z128", VR128X, v32i8,
+ v16i8, v8i32, loadv2i64, sub_xmm>;
+
+ defm : subvector_zero_lowering<"APDZ128", "UPDZ128", VR128X, v8f64,
+ v2f64, v16i32, loadv2f64, sub_xmm>;
+ defm : subvector_zero_lowering<"APSZ128", "UPSZ128", VR128X, v16f32,
+ v4f32, v16i32, loadv4f32, sub_xmm>;
+ defm : subvector_zero_lowering<"DQA64Z128", "DQU64Z128", VR128X, v8i64,
+ v2i64, v16i32, loadv2i64, sub_xmm>;
+ defm : subvector_zero_lowering<"DQA64Z128", "DQU64Z128", VR128X, v16i32,
+ v4i32, v16i32, loadv2i64, sub_xmm>;
+ defm : subvector_zero_lowering<"DQA64Z128", "DQU64Z128", VR128X, v32i16,
+ v8i16, v16i32, loadv2i64, sub_xmm>;
+ defm : subvector_zero_lowering<"DQA64Z128", "DQU64Z128", VR128X, v64i8,
+ v16i8, v16i32, loadv2i64, sub_xmm>;
+
+ defm : subvector_zero_lowering<"APDZ256", "UPDZ256", VR256X, v8f64,
+ v4f64, v16i32, loadv4f64, sub_ymm>;
+ defm : subvector_zero_lowering<"APSZ256", "UPDZ256", VR256X, v16f32,
+ v8f32, v16i32, loadv8f32, sub_ymm>;
+ defm : subvector_zero_lowering<"DQA64Z256", "DQU64Z256", VR256X, v8i64,
+ v4i64, v16i32, loadv4i64, sub_ymm>;
+ defm : subvector_zero_lowering<"DQA64Z256", "DQU64Z256", VR256X, v16i32,
+ v8i32, v16i32, loadv4i64, sub_ymm>;
+ defm : subvector_zero_lowering<"DQA64Z256", "DQU64Z256", VR256X, v32i16,
+ v16i16, v16i32, loadv4i64, sub_ymm>;
+ defm : subvector_zero_lowering<"DQA64Z256", "DQU64Z256", VR256X, v64i8,
+ v32i8, v16i32, loadv4i64, sub_ymm>;
}
let Predicates = [HasAVX512, NoVLX] in {
- defm : subvector_zero_lowering<"APD", VR128, v8f64, v2f64, v16i32, loadv2f64,
- sub_xmm>;
- defm : subvector_zero_lowering<"APS", VR128, v16f32, v4f32, v16i32, loadv4f32,
- sub_xmm>;
- defm : subvector_zero_lowering<"DQA", VR128, v8i64, v2i64, v16i32, loadv2i64,
- sub_xmm>;
- defm : subvector_zero_lowering<"DQA", VR128, v16i32, v4i32, v16i32, loadv2i64,
- sub_xmm>;
- defm : subvector_zero_lowering<"DQA", VR128, v32i16, v8i16, v16i32, loadv2i64,
- sub_xmm>;
- defm : subvector_zero_lowering<"DQA", VR128, v64i8, v16i8, v16i32, loadv2i64,
- sub_xmm>;
-
- defm : subvector_zero_lowering<"APDY", VR256, v8f64, v4f64, v16i32,
- loadv4f64, sub_ymm>;
- defm : subvector_zero_lowering<"APSY", VR256, v16f32, v8f32, v16i32,
- loadv8f32, sub_ymm>;
- defm : subvector_zero_lowering<"DQAY", VR256, v8i64, v4i64, v16i32,
- loadv4i64, sub_ymm>;
- defm : subvector_zero_lowering<"DQAY", VR256, v16i32, v8i32, v16i32,
- loadv4i64, sub_ymm>;
- defm : subvector_zero_lowering<"DQAY", VR256, v32i16, v16i16, v16i32,
- loadv4i64, sub_ymm>;
- defm : subvector_zero_lowering<"DQAY", VR256, v64i8, v32i8, v16i32,
- loadv4i64, sub_ymm>;
+ defm : subvector_zero_lowering<"APD", "UPD", VR128, v8f64, v2f64,
+ v16i32,loadv2f64, sub_xmm>;
+ defm : subvector_zero_lowering<"APS", "UPS", VR128, v16f32, v4f32,
+ v16i32, loadv4f32, sub_xmm>;
+ defm : subvector_zero_lowering<"DQA", "DQU", VR128, v8i64, v2i64,
+ v16i32, loadv2i64, sub_xmm>;
+ defm : subvector_zero_lowering<"DQA", "DQU", VR128, v16i32, v4i32,
+ v16i32, loadv2i64, sub_xmm>;
+ defm : subvector_zero_lowering<"DQA", "DQU", VR128, v32i16, v8i16,
+ v16i32, loadv2i64, sub_xmm>;
+ defm : subvector_zero_lowering<"DQA", "DQU", VR128, v64i8, v16i8,
+ v16i32, loadv2i64, sub_xmm>;
+
+ defm : subvector_zero_lowering<"APDY", "UPDY", VR256, v8f64, v4f64,
+ v16i32, loadv4f64, sub_ymm>;
+ defm : subvector_zero_lowering<"APSY", "UPSY", VR256, v16f32, v8f32,
+ v16i32, loadv8f32, sub_ymm>;
+ defm : subvector_zero_lowering<"DQAY", "DQUY", VR256, v8i64, v4i64,
+ v16i32, loadv4i64, sub_ymm>;
+ defm : subvector_zero_lowering<"DQAY", "DQUY", VR256, v16i32, v8i32,
+ v16i32, loadv4i64, sub_ymm>;
+ defm : subvector_zero_lowering<"DQAY", "DQUY", VR256, v32i16, v16i16,
+ v16i32, loadv4i64, sub_ymm>;
+ defm : subvector_zero_lowering<"DQAY", "DQUY", VR256, v64i8, v32i8,
+ v16i32, loadv4i64, sub_ymm>;
}
// List of opcodes that guaranteed to zero the upper elements of vector regs.
diff --git a/test/CodeGen/X86/merge-consecutive-loads-256.ll b/test/CodeGen/X86/merge-consecutive-loads-256.ll
index 6ecd8116443..0f2cf594b1c 100644
--- a/test/CodeGen/X86/merge-consecutive-loads-256.ll
+++ b/test/CodeGen/X86/merge-consecutive-loads-256.ll
@@ -28,13 +28,13 @@ define <4 x double> @merge_4f64_2f64_23(<2 x double>* %ptr) nounwind uwtable noi
define <4 x double> @merge_4f64_2f64_2z(<2 x double>* %ptr) nounwind uwtable noinline ssp {
; AVX-LABEL: merge_4f64_2f64_2z:
; AVX: # %bb.0:
-; AVX-NEXT: vmovaps 32(%rdi), %xmm0
+; AVX-NEXT: vmovups 32(%rdi), %xmm0
; AVX-NEXT: retq
;
; X32-AVX-LABEL: merge_4f64_2f64_2z:
; X32-AVX: # %bb.0:
; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-AVX-NEXT: vmovaps 32(%eax), %xmm0
+; X32-AVX-NEXT: vmovups 32(%eax), %xmm0
; X32-AVX-NEXT: retl
%ptr0 = getelementptr inbounds <2 x double>, <2 x double>* %ptr, i64 2
%val0 = load <2 x double>, <2 x double>* %ptr0
@@ -109,13 +109,13 @@ define <4 x double> @merge_4f64_f64_34uu(double* %ptr) nounwind uwtable noinline
define <4 x double> @merge_4f64_f64_45zz(double* %ptr) nounwind uwtable noinline ssp {
; AVX-LABEL: merge_4f64_f64_45zz:
; AVX: # %bb.0:
-; AVX-NEXT: vmovaps 32(%rdi), %xmm0
+; AVX-NEXT: vmovups 32(%rdi), %xmm0
; AVX-NEXT: retq
;
; X32-AVX-LABEL: merge_4f64_f64_45zz:
; X32-AVX: # %bb.0:
; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-AVX-NEXT: vmovaps 32(%eax), %xmm0
+; X32-AVX-NEXT: vmovups 32(%eax), %xmm0
; X32-AVX-NEXT: retl
%ptr0 = getelementptr inbounds double, double* %ptr, i64 4
%ptr1 = getelementptr inbounds double, double* %ptr, i64 5
@@ -155,13 +155,13 @@ define <4 x double> @merge_4f64_f64_34z6(double* %ptr) nounwind uwtable noinline
define <4 x i64> @merge_4i64_2i64_3z(<2 x i64>* %ptr) nounwind uwtable noinline ssp {
; AVX-LABEL: merge_4i64_2i64_3z:
; AVX: # %bb.0:
-; AVX-NEXT: vmovaps 48(%rdi), %xmm0
+; AVX-NEXT: vmovups 48(%rdi), %xmm0
; AVX-NEXT: retq
;
; X32-AVX-LABEL: merge_4i64_2i64_3z:
; X32-AVX: # %bb.0:
; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-AVX-NEXT: vmovaps 48(%eax), %xmm0
+; X32-AVX-NEXT: vmovups 48(%eax), %xmm0
; X32-AVX-NEXT: retl
%ptr0 = getelementptr inbounds <2 x i64>, <2 x i64>* %ptr, i64 3
%val0 = load <2 x i64>, <2 x i64>* %ptr0
@@ -217,13 +217,13 @@ define <4 x i64> @merge_4i64_i64_1zzu(i64* %ptr) nounwind uwtable noinline ssp {
define <4 x i64> @merge_4i64_i64_23zz(i64* %ptr) nounwind uwtable noinline ssp {
; AVX-LABEL: merge_4i64_i64_23zz:
; AVX: # %bb.0:
-; AVX-NEXT: vmovaps 16(%rdi), %xmm0
+; AVX-NEXT: vmovups 16(%rdi), %xmm0
; AVX-NEXT: retq
;
; X32-AVX-LABEL: merge_4i64_i64_23zz:
; X32-AVX: # %bb.0:
; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-AVX-NEXT: vmovaps 16(%eax), %xmm0
+; X32-AVX-NEXT: vmovups 16(%eax), %xmm0
; X32-AVX-NEXT: retl
%ptr0 = getelementptr inbounds i64, i64* %ptr, i64 2
%ptr1 = getelementptr inbounds i64, i64* %ptr, i64 3
diff --git a/test/CodeGen/X86/merge-consecutive-loads-512.ll b/test/CodeGen/X86/merge-consecutive-loads-512.ll
index 62102eb382c..3c6eaf65292 100644
--- a/test/CodeGen/X86/merge-consecutive-loads-512.ll
+++ b/test/CodeGen/X86/merge-consecutive-loads-512.ll
@@ -106,13 +106,13 @@ define <8 x double> @merge_8f64_f64_23uuuuu9(double* %ptr) nounwind uwtable noin
define <8 x double> @merge_8f64_f64_12zzuuzz(double* %ptr) nounwind uwtable noinline ssp {
; ALL-LABEL: merge_8f64_f64_12zzuuzz:
; ALL: # %bb.0:
-; ALL-NEXT: vmovaps 8(%rdi), %xmm0
+; ALL-NEXT: vmovups 8(%rdi), %xmm0
; ALL-NEXT: retq
;
; X32-AVX512F-LABEL: merge_8f64_f64_12zzuuzz:
; X32-AVX512F: # %bb.0:
; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-AVX512F-NEXT: vmovaps 8(%eax), %xmm0
+; X32-AVX512F-NEXT: vmovups 8(%eax), %xmm0
; X32-AVX512F-NEXT: retl
%ptr0 = getelementptr inbounds double, double* %ptr, i64 1
%ptr1 = getelementptr inbounds double, double* %ptr, i64 2
@@ -190,7 +190,7 @@ define <8 x i64> @merge_8i64_4i64_z3(<4 x i64>* %ptr) nounwind uwtable noinline
define <8 x i64> @merge_8i64_i64_56zz9uzz(i64* %ptr) nounwind uwtable noinline ssp {
; ALL-LABEL: merge_8i64_i64_56zz9uzz:
; ALL: # %bb.0:
-; ALL-NEXT: vmovaps 40(%rdi), %xmm0
+; ALL-NEXT: vmovups 40(%rdi), %xmm0
; ALL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
; ALL-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
; ALL-NEXT: retq
@@ -198,7 +198,7 @@ define <8 x i64> @merge_8i64_i64_56zz9uzz(i64* %ptr) nounwind uwtable noinline s
; X32-AVX512F-LABEL: merge_8i64_i64_56zz9uzz:
; X32-AVX512F: # %bb.0:
; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-AVX512F-NEXT: vmovaps 40(%eax), %xmm0
+; X32-AVX512F-NEXT: vmovups 40(%eax), %xmm0
; X32-AVX512F-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
; X32-AVX512F-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
; X32-AVX512F-NEXT: retl

6131
patches/llvm-rL327898.patch Normal file

File diff suppressed because it is too large Load Diff