Working julia
This commit is contained in:
commit
99c751fc1f
|
@ -0,0 +1,522 @@
|
||||||
|
(use-modules ((guix licenses)
|
||||||
|
#:prefix license:))
|
||||||
|
(use-modules
|
||||||
|
(guix packages))
|
||||||
|
(use-modules
|
||||||
|
(guix download))
|
||||||
|
(use-modules
|
||||||
|
(guix utils))
|
||||||
|
(use-modules
|
||||||
|
(guix git-download))
|
||||||
|
(use-modules
|
||||||
|
(guix build-system gnu))
|
||||||
|
(use-modules
|
||||||
|
(gnu packages))
|
||||||
|
(use-modules
|
||||||
|
(gnu packages algebra))
|
||||||
|
(use-modules
|
||||||
|
(gnu packages base))
|
||||||
|
(use-modules
|
||||||
|
(gnu packages compression))
|
||||||
|
(use-modules
|
||||||
|
(gnu packages elf))
|
||||||
|
(use-modules
|
||||||
|
(gnu packages gcc))
|
||||||
|
(use-modules
|
||||||
|
(gnu packages llvm))
|
||||||
|
(use-modules
|
||||||
|
(gnu packages libevent))
|
||||||
|
(use-modules
|
||||||
|
(gnu packages libunwind))
|
||||||
|
(use-modules
|
||||||
|
(gnu packages maths))
|
||||||
|
(use-modules
|
||||||
|
(gnu packages multiprecision)) ; mpfr)
|
||||||
|
(use-modules
|
||||||
|
(gnu packages pcre))
|
||||||
|
(use-modules
|
||||||
|
(gnu packages perl))
|
||||||
|
(use-modules
|
||||||
|
(gnu packages pkg-config))
|
||||||
|
(use-modules
|
||||||
|
(gnu packages python))
|
||||||
|
(use-modules
|
||||||
|
(gnu packages python-xyz))
|
||||||
|
(use-modules
|
||||||
|
(gnu packages textutils))
|
||||||
|
(use-modules
|
||||||
|
(gnu packages tls))
|
||||||
|
(use-modules
|
||||||
|
(gnu packages version-control))
|
||||||
|
|
||||||
|
(use-modules
|
||||||
|
(gnu packages wget))
|
||||||
|
|
||||||
|
(use-modules
|
||||||
|
(ice-9 match))
|
||||||
|
|
||||||
|
;; (define openblas-julia
|
||||||
|
;; (package
|
||||||
|
;; (inherit openblas)
|
||||||
|
;; (name "openblas-julia")
|
||||||
|
;; INTERFACE64=
|
||||||
|
;; ))
|
||||||
|
|
||||||
|
;; This works *BUT* we need to apply a lot of patches. Compiling this
|
||||||
|
;; with julia applies the patches automatically
|
||||||
|
|
||||||
|
(define llvm-julia
|
||||||
|
(package
|
||||||
|
(inherit llvm-6)
|
||||||
|
(name "llvm-julia")
|
||||||
|
(source
|
||||||
|
(origin
|
||||||
|
(method url-fetch)
|
||||||
|
(uri
|
||||||
|
(string-append
|
||||||
|
"http://releases.llvm.org/6.0.1/llvm-6.0.1.src.tar.xz"))
|
||||||
|
(sha256
|
||||||
|
(base32
|
||||||
|
"1qpls3vk85lydi5b4axl0809fv932qgsqgdgrk098567z4jc7mmn"))
|
||||||
|
(patches '("./llvm-6.0-D44650.patch"
|
||||||
|
"./llvm-6.0-DISABLE_ABI_CHECKS.patch"
|
||||||
|
"./llvm-6.0-NVPTX-addrspaces.patch"
|
||||||
|
"./llvm-6.0.0_D27296-libssp.patch"
|
||||||
|
"./llvm-D27629-AArch64-large_model_6.0.1.patch"
|
||||||
|
"./llvm-D34078-vectorize-fdiv.patch"
|
||||||
|
"./llvm-D42262-jumpthreading-not-i1.patch"
|
||||||
|
"./llvm-D44892-Perf-integration.patch"
|
||||||
|
"./llvm-D46460.patch"
|
||||||
|
"./llvm-D49832-SCEVPred.patch"
|
||||||
|
"./llvm-D50010-VNCoercion-ni.patch"
|
||||||
|
"./llvm-D50167-scev-umin.patch"
|
||||||
|
"./llvm-OProfile-line-num.patch"
|
||||||
|
"./llvm-PPC-addrspaces.patch"
|
||||||
|
"./llvm-rL323946-LSRTy.patch"
|
||||||
|
"./llvm-rL326967-aligned-load.patch"
|
||||||
|
"./llvm-rL327898.patch"
|
||||||
|
))
|
||||||
|
))
|
||||||
|
(arguments
|
||||||
|
(substitute-keyword-arguments
|
||||||
|
(package-arguments llvm-6)
|
||||||
|
((#:configure-flags flags)
|
||||||
|
'(list ;; Taken from NixOS. Only way I could get libLLVM-6.0.so
|
||||||
|
"-DCMAKE_BUILD_TYPE=Release"
|
||||||
|
"-DLLVM_INSTALL_UTILS=ON"
|
||||||
|
"-DLLVM_BUILD_TESTS=ON"
|
||||||
|
"-DLLVM_ENABLE_FFI=ON"
|
||||||
|
"-DLLVM_ENABLE_RTTI=ON"
|
||||||
|
;; "-DLLVM_HOST_TRIPLE=${stdenv.hostPlatform.config}"
|
||||||
|
;; "-DLLVM_DEFAULT_TARGET_TRIPLE=${stdenv.hostPlatform.config}"
|
||||||
|
"-DLLVM_EXPERIMENTAL_TARGETS_TO_BUILD=WebAssembly"
|
||||||
|
"-DLLVM_ENABLE_DUMP=ON"
|
||||||
|
"-DLLVM_LINK_LLVM_DYLIB=ON")
|
||||||
|
)))))
|
||||||
|
|
||||||
|
(define libuv-julia
|
||||||
|
(let
|
||||||
|
((commit "2348256acf5759a544e5ca7935f638d2bc091d60"))
|
||||||
|
(package
|
||||||
|
(inherit libuv)
|
||||||
|
(name "libuv-julia")
|
||||||
|
(version commit)
|
||||||
|
(source
|
||||||
|
(origin
|
||||||
|
(method url-fetch)
|
||||||
|
(uri (string-append
|
||||||
|
"https://api.github.com/repos/JuliaLang/libuv/tarball/"
|
||||||
|
commit))
|
||||||
|
(sha256
|
||||||
|
(base32
|
||||||
|
"1363f4vqayfcv5zqg07qmzjff56yhad74k16c22ian45lram8mv8"))))
|
||||||
|
(build-system gnu-build-system)
|
||||||
|
(arguments
|
||||||
|
(substitute-keyword-arguments
|
||||||
|
(package-arguments libuv)
|
||||||
|
((#:phases phases)
|
||||||
|
`(modify-phases ,phases
|
||||||
|
(delete 'autogen)))))
|
||||||
|
(home-page "https://github.com/JuliaLang/libuv"))))
|
||||||
|
|
||||||
|
(package
|
||||||
|
(name "julia")
|
||||||
|
(version "1.1.0")
|
||||||
|
(source
|
||||||
|
(origin
|
||||||
|
(method url-fetch)
|
||||||
|
(uri
|
||||||
|
(string-append
|
||||||
|
"https://github.com/JuliaLang/julia/releases/download/v"
|
||||||
|
version "/julia-" version ".tar.gz"))
|
||||||
|
(sha256
|
||||||
|
(base32
|
||||||
|
"1bd6c5gqd7f2i837ay8iqi8h36smhcg0lq7f8c2axxaw8x6rcfmx"))))
|
||||||
|
(build-system gnu-build-system)
|
||||||
|
(arguments
|
||||||
|
`(#:test-target "test"
|
||||||
|
#:modules
|
||||||
|
((ice-9 match)
|
||||||
|
(guix build gnu-build-system)
|
||||||
|
(guix build utils))
|
||||||
|
;; Do not strip binaries to keep support for full backtraces.
|
||||||
|
;; See https://github.com/JuliaLang/julia/issues/17831
|
||||||
|
#:strip-binaries? #f
|
||||||
|
|
||||||
|
;; The DSOs use $ORIGIN to refer to each other, but (guix build
|
||||||
|
;; gremlin) doesn't support it yet, so skip this phase.
|
||||||
|
#:validate-runpath? #f
|
||||||
|
|
||||||
|
#:phases
|
||||||
|
(modify-phases %standard-phases
|
||||||
|
(delete 'configure)
|
||||||
|
(add-after 'unpack 'prepare-deps
|
||||||
|
(lambda*
|
||||||
|
(#:key inputs #:allow-other-keys)
|
||||||
|
(mkdir "deps/srccache")
|
||||||
|
(copy-file
|
||||||
|
(assoc-ref inputs "dsfmt")
|
||||||
|
"deps/srccache/dsfmt-2.2.3.tar.gz")
|
||||||
|
(copy-file
|
||||||
|
(string-append
|
||||||
|
(assoc-ref inputs "virtualenv")
|
||||||
|
"/bin/virtualenv")
|
||||||
|
"julia-env")
|
||||||
|
(copy-file
|
||||||
|
(assoc-ref inputs "libwhich")
|
||||||
|
(string-append "deps/srccache/libwhich-"
|
||||||
|
"81e9723c0273d78493dc8c8ed570f68d9ce7e89e"
|
||||||
|
".tar.gz"))
|
||||||
|
(copy-file (assoc-ref inputs "rmath")
|
||||||
|
"deps/srccache/Rmath-julia-0.1.tar.gz")
|
||||||
|
(copy-file
|
||||||
|
(assoc-ref inputs "objconv")
|
||||||
|
"deps/srccache/objconv.zip")
|
||||||
|
(copy-file
|
||||||
|
(assoc-ref inputs "suitesparse")
|
||||||
|
"deps/srccache/SuiteSparse-4.4.5.tar.gz")
|
||||||
|
;; needed by libwhich
|
||||||
|
(setenv "LD_LIBRARY_PATH"
|
||||||
|
(string-join
|
||||||
|
(map (lambda (pkg)
|
||||||
|
(string-append (assoc-ref inputs pkg) "/lib"))
|
||||||
|
(list
|
||||||
|
"arpack-ng" "fftw" "gmp" "lapack"
|
||||||
|
"libgit2" "mpfr" "openblas" "openlibm"
|
||||||
|
"openspecfun" "pcre2"
|
||||||
|
))
|
||||||
|
":"))
|
||||||
|
|
||||||
|
;; (copy-file
|
||||||
|
;; (assoc-ref inputs "llvm")
|
||||||
|
;; "deps/srccache/llvm-6.0.0.src.tar.xz")
|
||||||
|
#t))
|
||||||
|
;; FIXME: Building the documentation requires Julia packages that
|
||||||
|
;; would be downloaded from the Internet. We should build them in a
|
||||||
|
;; separate build phase.
|
||||||
|
(add-after 'unpack 'disable-documentation
|
||||||
|
(lambda _
|
||||||
|
(substitute* "Makefile"
|
||||||
|
(("(install: .*) \\$\\(BUILDROOT\\)/doc/_build/html/en/index.html" _ line)
|
||||||
|
(string-append line "\n"))
|
||||||
|
(("src ui doc deps")
|
||||||
|
"src ui deps"))
|
||||||
|
#t))
|
||||||
|
(add-before 'check 'set-home
|
||||||
|
;; Some tests require a home directory to be set.
|
||||||
|
(lambda _
|
||||||
|
(setenv "HOME" "/tmp")
|
||||||
|
#t))
|
||||||
|
(add-after 'unpack 'hardcode-soname-map
|
||||||
|
;; ./src/runtime_ccall.cpp creates a map from library names to paths
|
||||||
|
;; using the output of "/sbin/ldconfig -p". Since ldconfig is not
|
||||||
|
;; used in Guix, we patch runtime_ccall.cpp to contain a static map.
|
||||||
|
(lambda* (#:key inputs #:allow-other-keys)
|
||||||
|
(use-modules (ice-9 match))
|
||||||
|
(substitute* "src/runtime_ccall.cpp"
|
||||||
|
;; Patch out invocations of '/sbin/ldconfig' to avoid getting
|
||||||
|
;; error messages about missing '/sbin/ldconfig' on Guix System.
|
||||||
|
(("popen\\(.*ldconfig.*\\);")
|
||||||
|
"NULL;\n")
|
||||||
|
;; Populate 'sonameMap'.
|
||||||
|
(("jl_read_sonames.*;")
|
||||||
|
(string-join
|
||||||
|
(map (match-lambda
|
||||||
|
((input libname soname)
|
||||||
|
(string-append
|
||||||
|
"sonameMap[\"" libname "\"] = "
|
||||||
|
"\"" (assoc-ref inputs input) "/lib/" soname "\";")))
|
||||||
|
'(("libc" "libc" "libc.so.6")
|
||||||
|
("pcre2" "libpcre2-8" "libpcre2-8.so")
|
||||||
|
("mpfr" "libmpfr" "libmpfr.so")
|
||||||
|
("openblas" "libblas" "libopenblas.so")
|
||||||
|
("arpack-ng" "libarpack" "libarpack.so")
|
||||||
|
("lapack" "liblapack" "liblapack.so")
|
||||||
|
("libgit2" "libgit2" "libgit2.so")
|
||||||
|
("gmp" "libgmp" "libgmp.so")
|
||||||
|
;; ("openlibm" "libopenlibm" "libopenlibm.so")
|
||||||
|
("openspecfun" "libopenspecfun" "libopenspecfun.so")
|
||||||
|
("fftw" "libfftw3" "libfftw3_threads.so")
|
||||||
|
("fftwf" "libfftw3f" "libfftw3f_threads.so"))))))
|
||||||
|
;; FIXME: NIXO
|
||||||
|
;; (substitute* "base/fft/FFTW.jl"
|
||||||
|
;; (("const libfftw = Base.libfftw_name")
|
||||||
|
;; (string-append "const libfftw = \""
|
||||||
|
;; (assoc-ref inputs "fftw") "/lib/libfftw3_threads.so"
|
||||||
|
;; "\""))
|
||||||
|
;; (("const libfftwf = Base.libfftwf_name")
|
||||||
|
;; (string-append "const libfftwf = \""
|
||||||
|
;; (assoc-ref inputs "fftwf") "/lib/libfftw3f_threads.so"
|
||||||
|
;; "\"")))
|
||||||
|
(substitute* "base/math.jl"
|
||||||
|
(("const libm = Base.libm_name")
|
||||||
|
(string-append "const libm = \""
|
||||||
|
(assoc-ref inputs "openlibm")
|
||||||
|
"/lib/libopenlibm.so"
|
||||||
|
"\""))
|
||||||
|
(("const openspecfun = \"libopenspecfun\"")
|
||||||
|
(string-append "const openspecfun = \""
|
||||||
|
(assoc-ref inputs "openspecfun")
|
||||||
|
"/lib/libopenspecfun.so"
|
||||||
|
"\"")))
|
||||||
|
;; (substitute* "base/pcre.jl"
|
||||||
|
;; (("const PCRE_LIB = \"libpcre2-8\"")
|
||||||
|
;; (string-append "const PCRE_LIB = \""
|
||||||
|
;; (assoc-ref inputs "pcre2")
|
||||||
|
;; "/lib/libpcre2-8.so" "\"")))
|
||||||
|
#t))
|
||||||
|
(add-before 'build 'fix-include-and-link-paths
|
||||||
|
(lambda*
|
||||||
|
(#:key inputs #:allow-other-keys)
|
||||||
|
;; LIBUTF8PROC is a linker flag, not a build target. It is
|
||||||
|
;; included in the LIBFILES_* variable which is used as a
|
||||||
|
;; collection of build targets and a list of libraries to link
|
||||||
|
;; against.
|
||||||
|
(substitute* "src/flisp/Makefile"
|
||||||
|
(("\\$\\(BUILDDIR\\)/\\$\\(EXENAME\\): \\$\\(OBJS\\) \\$\\(LIBFILES_release\\)")
|
||||||
|
"$(BUILDDIR)/$(EXENAME): $(OBJS) $(LLT_release)")
|
||||||
|
(("\\$\\(BUILDDIR\\)/\\$\\(EXENAME\\)-debug: \\$\\(DOBJS\\) \\$\\(LIBFILES_debug\\)")
|
||||||
|
"$(BUILDDIR)/$(EXENAME)-debug: $(DOBJS) $(LLT_debug)"))
|
||||||
|
;; The REPL must be linked with libuv.
|
||||||
|
(substitute* "ui/Makefile"
|
||||||
|
(("JLDFLAGS \\+= ")
|
||||||
|
(string-append "JLDFLAGS += "
|
||||||
|
(assoc-ref %build-inputs "libuv")
|
||||||
|
"/lib/libuv.so ")))
|
||||||
|
(substitute* "base/Makefile"
|
||||||
|
(("\\$\\(build_includedir\\)/uv/errno.h")
|
||||||
|
(string-append
|
||||||
|
(assoc-ref inputs "libuv")
|
||||||
|
"/include/uv/errno.h")))
|
||||||
|
#t))
|
||||||
|
(add-before 'build 'replace-default-shell
|
||||||
|
(lambda _
|
||||||
|
(substitute* "base/client.jl"
|
||||||
|
(("/bin/sh")
|
||||||
|
(which "sh")))
|
||||||
|
#t))
|
||||||
|
(add-after 'unpack 'hardcode-paths
|
||||||
|
(lambda _
|
||||||
|
(substitute* "stdlib/InteractiveUtils/src/InteractiveUtils.jl"
|
||||||
|
(("`which") (string-append "`" (which "which")))
|
||||||
|
(("`wget") (string-append "`" (which "wget"))))
|
||||||
|
#t))
|
||||||
|
(add-before 'check 'disable-broken-tests
|
||||||
|
(lambda _
|
||||||
|
(define (touch file-name)
|
||||||
|
(call-with-output-file file-name (const #t)))
|
||||||
|
;; Don't know why FIXME
|
||||||
|
;; (substitute* "stdlib/LibGit2/test/libgit2.jl"
|
||||||
|
;; (("!LibGit2.use_http_path(cfg, github_cred)")
|
||||||
|
;; "true")
|
||||||
|
;; (("LibGit2.use_http_path(cfg, mygit_cred)")
|
||||||
|
;; "true"))
|
||||||
|
(map (lambda (test)
|
||||||
|
(delete-file test)
|
||||||
|
(touch test))
|
||||||
|
'("stdlib/Sockets/test/runtests.jl"
|
||||||
|
"stdlib/Distributed/test/runtests.jl"
|
||||||
|
"stdlib/LibGit2/test/libgit2.jl"
|
||||||
|
))
|
||||||
|
|
||||||
|
(substitute* "test/choosetests.jl"
|
||||||
|
;; These tests fail, probably because some of the input
|
||||||
|
;; binaries have been stripped and thus backtraces don't look
|
||||||
|
;; as expected.
|
||||||
|
(("\"backtrace\",")
|
||||||
|
"")
|
||||||
|
(("\"cmdlineargs\",")
|
||||||
|
""))
|
||||||
|
#t)))
|
||||||
|
#:make-flags
|
||||||
|
(list
|
||||||
|
(string-append "prefix=" (assoc-ref %outputs "out"))
|
||||||
|
(string-append "PREFIX=" (assoc-ref %outputs "out"))
|
||||||
|
;; Passing the MARCH flag is necessary to build binary substitutes for
|
||||||
|
;; the supported architectures.
|
||||||
|
,(match
|
||||||
|
(or
|
||||||
|
(%current-target-system)
|
||||||
|
(%current-system))
|
||||||
|
("x86_64-linux" "MARCH=x86-64")
|
||||||
|
("i686-linux" "MARCH=pentium4")
|
||||||
|
("aarch64-linux" "MARCH=armv8-a")
|
||||||
|
;; Prevent errors when querying this package on unsupported
|
||||||
|
;; platforms, e.g. when running "guix package --search="
|
||||||
|
(_ "MARCH=UNSUPPORTED"))
|
||||||
|
"CONFIG_SHELL=bash" ;needed to build bundled libraries
|
||||||
|
"USE_SYSTEM_DSFMT=0" ;not packaged for Guix and upstream has no
|
||||||
|
;build system for a shared library.
|
||||||
|
"USE_SYSTEM_LAPACK=1"
|
||||||
|
"USE_SYSTEM_BLAS=1"
|
||||||
|
|
||||||
|
;; TODO: What about building blas with 64 support?
|
||||||
|
"USE_BLAS64=0" ;needed when USE_SYSTEM_BLAS=1
|
||||||
|
"LIBBLAS=-lopenblas"
|
||||||
|
"LIBBLASNAME=libopenblas"
|
||||||
|
|
||||||
|
"USE_SYSTEM_FFTW=1"
|
||||||
|
"LIBFFTWNAME=libfftw3"
|
||||||
|
"LIBFFTWFNAME=libfftw3f"
|
||||||
|
|
||||||
|
;; TODO: Suitesparse does not install shared libraries, so we cannot
|
||||||
|
;; use the suitesparse package.
|
||||||
|
;; "USE_SYSTEM_SUITESPARSE=1"
|
||||||
|
;; (string-append "SUITESPARSE_INC=-I "
|
||||||
|
;; (assoc-ref %build-inputs "suitesparse")
|
||||||
|
;; "/include")
|
||||||
|
|
||||||
|
"USE_GPL_LIBS=1" ;proudly
|
||||||
|
"USE_SYSTEM_UTF8PROC=1"
|
||||||
|
(string-append "UTF8PROC_INC="
|
||||||
|
(assoc-ref %build-inputs "utf8proc")
|
||||||
|
"/include")
|
||||||
|
"USE_SYSTEM_LLVM=1"
|
||||||
|
"LLVM_VER=6.0.1"
|
||||||
|
|
||||||
|
;; "LLVM_VER=6.0.0"
|
||||||
|
"USE_LLVM_SHLIB=1" ; FIXME: fails when set to 1
|
||||||
|
|
||||||
|
"USE_SYSTEM_LIBUNWIND=1"
|
||||||
|
"USE_SYSTEM_LIBUV=1"
|
||||||
|
(string-append "LIBUV="
|
||||||
|
(assoc-ref %build-inputs "libuv")
|
||||||
|
"/lib/libuv.so")
|
||||||
|
(string-append "LIBUV_INC="
|
||||||
|
(assoc-ref %build-inputs "libuv")
|
||||||
|
"/include")
|
||||||
|
"USE_SYSTEM_PATCHELF=1"
|
||||||
|
"USE_SYSTEM_PCRE=1"
|
||||||
|
"USE_SYSTEM_OPENLIBM=1"
|
||||||
|
|
||||||
|
"USE_SYSTEM_GMP=1"
|
||||||
|
"USE_SYSTEM_MPFR=1"
|
||||||
|
"USE_SYSTEM_ARPACK=1"
|
||||||
|
"USE_SYSTEM_LIBGIT2=1"
|
||||||
|
"USE_SYSTEM_ZLIB=1"
|
||||||
|
"USE_SYSTEM_OPENSPECFUN=1")))
|
||||||
|
(inputs
|
||||||
|
`( ("llvm" ,llvm-julia)
|
||||||
|
;; The bundled version is 3.3.0 so stick to that version. With other
|
||||||
|
;; versions, we get test failures in 'linalg/arnoldi' as described in
|
||||||
|
;; <https://bugs.gnu.org/30282>.
|
||||||
|
("arpack-ng" ,arpack-ng-3.3.0)
|
||||||
|
("coreutils" ,coreutils)
|
||||||
|
;for bindings to "mkdir" and the like
|
||||||
|
("lapack" ,lapack)
|
||||||
|
("openblas" ,openblas)
|
||||||
|
;Julia does not build with Atlas
|
||||||
|
("libunwind" ,libunwind)
|
||||||
|
("openlibm" ,openlibm)
|
||||||
|
("openspecfun" ,openspecfun)
|
||||||
|
("libuv", libuv-julia)
|
||||||
|
("libgit2" ,libgit2)
|
||||||
|
("fftw" ,fftw)
|
||||||
|
("fftwf" ,fftwf)
|
||||||
|
("fortran" ,gfortran)
|
||||||
|
("pcre2" ,pcre2)
|
||||||
|
("utf8proc" ,utf8proc)
|
||||||
|
("mpfr" ,mpfr)
|
||||||
|
("wget" ,wget)
|
||||||
|
("which" ,which)
|
||||||
|
("zlib" ,zlib)
|
||||||
|
("gmp" ,gmp)
|
||||||
|
("virtualenv" ,python2-virtualenv)
|
||||||
|
;; FIXME: The following inputs are downloaded from upstream to allow us
|
||||||
|
;; to use the lightweight Julia release tarball. Ideally, these inputs
|
||||||
|
;; would eventually be replaced with proper Guix packages.
|
||||||
|
|
||||||
|
;; TODO: run "make -f contrib/repackage_system_suitesparse4.make" to copy static lib
|
||||||
|
("rmath"
|
||||||
|
,(origin
|
||||||
|
(method url-fetch)
|
||||||
|
(uri "https://api.github.com/repos/JuliaLang/Rmath-julia/tarball/v0.1")
|
||||||
|
(sha256
|
||||||
|
(base32
|
||||||
|
"1qyps217175qhid46l8f5i1v8i82slgp23ia63x2hzxwfmx8617p"))))
|
||||||
|
("suitesparse"
|
||||||
|
,(origin
|
||||||
|
(method url-fetch)
|
||||||
|
(uri "http://faculty.cse.tamu.edu/davis/SuiteSparse/SuiteSparse-4.4.5.tar.gz")
|
||||||
|
(sha256
|
||||||
|
(base32
|
||||||
|
"1jcbxb8jx5wlcixzf6n5dca2rcfx6mlcms1k2rl5gp67ay3bix43"))))
|
||||||
|
("objconv"
|
||||||
|
,(origin
|
||||||
|
(method url-fetch)
|
||||||
|
;; No versioned URL, see <https://www.agner.org/optimize/> for updates.
|
||||||
|
(uri "https://www.agner.org/optimize/objconv.zip")
|
||||||
|
(file-name "objconv-2018-10-07.zip")
|
||||||
|
(sha256
|
||||||
|
(base32
|
||||||
|
"0wp6ld9vk11f4nnkn56627zmlv9k5vafi99qa3yyn1pgcd61zcfs"))))
|
||||||
|
("libwhich"
|
||||||
|
,(origin
|
||||||
|
(method url-fetch)
|
||||||
|
(uri
|
||||||
|
(string-append
|
||||||
|
"https://api.github.com/repos/vtjnash/libwhich/tarball/"
|
||||||
|
"81e9723c0273d78493dc8c8ed570f68d9ce7e89e"))
|
||||||
|
(sha256
|
||||||
|
(base32
|
||||||
|
"1p7zg31kpmpbmh1znrk1xrbd074agx13b9q4dcw8n2zrwwdlbz3b"))))
|
||||||
|
;; ("llvm"
|
||||||
|
;; ,(origin
|
||||||
|
;; (method url-fetch)
|
||||||
|
;; (uri
|
||||||
|
;; (string-append
|
||||||
|
;; "http://releases.llvm.org/6.0.0/llvm-6.0.0.src.tar.xz"))
|
||||||
|
;; (sha256
|
||||||
|
;; (base32
|
||||||
|
;; "0224xvfg6h40y5lrbnb9qaq3grmdc5rg00xq03s1wxjfbf8krx8z"))))
|
||||||
|
;; ("cmake" ,cmake) ;; required to build llvm
|
||||||
|
("dsfmt"
|
||||||
|
,(origin
|
||||||
|
(method url-fetch)
|
||||||
|
(uri
|
||||||
|
(string-append
|
||||||
|
"http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/"
|
||||||
|
"SFMT/dSFMT-src-2.2.3.tar.gz"))
|
||||||
|
(sha256
|
||||||
|
(base32
|
||||||
|
"03kaqbjbi6viz0n33dk5jlf6ayxqlsq4804n7kwkndiga9s4hd42"))))))
|
||||||
|
(native-inputs
|
||||||
|
`(("openssl" ,openssl)
|
||||||
|
("perl" ,perl)
|
||||||
|
("patchelf" ,patchelf)
|
||||||
|
("pkg-config" ,pkg-config)
|
||||||
|
("python" ,python-2)))
|
||||||
|
;; Julia is not officially released for ARM and MIPS.
|
||||||
|
;; See https://github.com/JuliaLang/julia/issues/10639
|
||||||
|
(supported-systems
|
||||||
|
'("i686-linux" "x86_64-linux" "aarch64-linux"))
|
||||||
|
(home-page "https://julialang.org/")
|
||||||
|
(synopsis "High-performance dynamic language for technical computing")
|
||||||
|
(description
|
||||||
|
"Julia is a high-level, high-performance dynamic programming language for
|
||||||
|
technical computing, with syntax that is familiar to users of other technical
|
||||||
|
computing environments. It provides a sophisticated compiler, distributed
|
||||||
|
parallel execution, numerical accuracy, and an extensive mathematical function
|
||||||
|
library.")
|
||||||
|
(license license:expat))
|
||||||
|
|
|
@ -0,0 +1,13 @@
|
||||||
|
Index: tools/llvm-cfi-verify/CMakeLists.txt
|
||||||
|
===================================================================
|
||||||
|
--- a/tools/llvm-cfi-verify/CMakeLists.txt
|
||||||
|
+++ b/tools/llvm-cfi-verify/CMakeLists.txt
|
||||||
|
@@ -11,7 +11,7 @@
|
||||||
|
Symbolize
|
||||||
|
)
|
||||||
|
|
||||||
|
-add_llvm_tool(llvm-cfi-verify
|
||||||
|
+add_llvm_tool(llvm-cfi-verify DISABLE_LLVM_LINK_LLVM_DYLIB
|
||||||
|
llvm-cfi-verify.cpp)
|
||||||
|
|
||||||
|
add_subdirectory(lib)
|
|
@ -0,0 +1,39 @@
|
||||||
|
From d793ba4bacae51ae25be19c1636fcf38707938fd Mon Sep 17 00:00:00 2001
|
||||||
|
From: Valentin Churavy <v.churavy@gmail.com>
|
||||||
|
Date: Fri, 1 Jun 2018 17:43:55 -0400
|
||||||
|
Subject: [PATCH] fix LLVM_DISABLE_ABI_BREAKING_CHECKS_ENFORCING
|
||||||
|
|
||||||
|
---
|
||||||
|
cmake/modules/HandleLLVMOptions.cmake | 2 +-
|
||||||
|
include/llvm/Config/abi-breaking.h.cmake | 2 +-
|
||||||
|
2 files changed, 2 insertions(+), 2 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/cmake/modules/HandleLLVMOptions.cmake b/cmake/modules/HandleLLVMOptions.cmake
|
||||||
|
index 3d2dd48018c..b67ee6a896e 100644
|
||||||
|
--- a/cmake/modules/HandleLLVMOptions.cmake
|
||||||
|
+++ b/cmake/modules/HandleLLVMOptions.cmake
|
||||||
|
@@ -572,7 +572,7 @@ if (LLVM_ENABLE_WARNINGS AND (LLVM_COMPILER_IS_GCC_COMPATIBLE OR CLANG_CL))
|
||||||
|
|
||||||
|
if (LLVM_ENABLE_PEDANTIC AND LLVM_COMPILER_IS_GCC_COMPATIBLE)
|
||||||
|
append("-pedantic" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
|
||||||
|
- append("-Wno-long-long" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
|
||||||
|
+ append("-Wno-long-long -Wundef" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
add_flag_if_supported("-Wcovered-switch-default" COVERED_SWITCH_DEFAULT_FLAG)
|
||||||
|
diff --git a/include/llvm/Config/abi-breaking.h.cmake b/include/llvm/Config/abi-breaking.h.cmake
|
||||||
|
index 7ae401e5b8a..d52c4609101 100644
|
||||||
|
--- a/include/llvm/Config/abi-breaking.h.cmake
|
||||||
|
+++ b/include/llvm/Config/abi-breaking.h.cmake
|
||||||
|
@@ -20,7 +20,7 @@
|
||||||
|
|
||||||
|
/* Allow selectively disabling link-time mismatch checking so that header-only
|
||||||
|
ADT content from LLVM can be used without linking libSupport. */
|
||||||
|
-#if !LLVM_DISABLE_ABI_BREAKING_CHECKS_ENFORCING
|
||||||
|
+#ifndef LLVM_DISABLE_ABI_BREAKING_CHECKS_ENFORCING
|
||||||
|
|
||||||
|
// ABI_BREAKING_CHECKS protection: provides link-time failure when clients build
|
||||||
|
// mismatch with LLVM
|
||||||
|
--
|
||||||
|
2.17.0
|
||||||
|
|
|
@ -0,0 +1,32 @@
|
||||||
|
diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp
|
||||||
|
index f1e4251a44b..73d49f5d7e4 100644
|
||||||
|
--- a/lib/Target/NVPTX/NVPTXISelLowering.cpp
|
||||||
|
+++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp
|
||||||
|
@@ -1248,6 +1248,14 @@ SDValue NVPTXTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
+bool NVPTXTargetLowering::isNoopAddrSpaceCast(unsigned SrcAS,
|
||||||
|
+ unsigned DestAS) const {
|
||||||
|
+ assert(SrcAS != DestAS && "Expected different address spaces!");
|
||||||
|
+
|
||||||
|
+ return (SrcAS == ADDRESS_SPACE_GENERIC || SrcAS > ADDRESS_SPACE_LOCAL) &&
|
||||||
|
+ (DestAS == ADDRESS_SPACE_GENERIC || DestAS > ADDRESS_SPACE_LOCAL);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
SDValue
|
||||||
|
NVPTXTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
|
||||||
|
SDLoc dl(Op);
|
||||||
|
diff --git a/lib/Target/NVPTX/NVPTXISelLowering.h b/lib/Target/NVPTX/NVPTXISelLowering.h
|
||||||
|
index ef04a8573d4..68a9a7195c4 100644
|
||||||
|
--- a/lib/Target/NVPTX/NVPTXISelLowering.h
|
||||||
|
+++ b/lib/Target/NVPTX/NVPTXISelLowering.h
|
||||||
|
@@ -443,6 +443,8 @@ public:
|
||||||
|
const NVPTXSubtarget &STI);
|
||||||
|
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
|
||||||
|
|
||||||
|
+ bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override;
|
||||||
|
+
|
||||||
|
SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
|
||||||
|
|
||||||
|
const char *getTargetNodeName(unsigned Opcode) const override;
|
|
@ -0,0 +1,35 @@
|
||||||
|
Index: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
|
||||||
|
===================================================================
|
||||||
|
--- a/lib/Target/X86/X86ISelLowering.cpp
|
||||||
|
+++ b/lib/Target/X86/X86ISelLowering.cpp
|
||||||
|
@@ -2098,7 +2098,8 @@
|
||||||
|
|
||||||
|
void X86TargetLowering::insertSSPDeclarations(Module &M) const {
|
||||||
|
// MSVC CRT provides functionalities for stack protection.
|
||||||
|
- if (Subtarget.getTargetTriple().isOSMSVCRT()) {
|
||||||
|
+ if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
|
||||||
|
+ Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
|
||||||
|
// MSVC CRT has a global variable holding security cookie.
|
||||||
|
M.getOrInsertGlobal("__security_cookie",
|
||||||
|
Type::getInt8PtrTy(M.getContext()));
|
||||||
|
@@ -2120,15 +2121,19 @@
|
||||||
|
|
||||||
|
Value *X86TargetLowering::getSDagStackGuard(const Module &M) const {
|
||||||
|
// MSVC CRT has a global variable holding security cookie.
|
||||||
|
- if (Subtarget.getTargetTriple().isOSMSVCRT())
|
||||||
|
+ if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
|
||||||
|
+ Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
|
||||||
|
return M.getGlobalVariable("__security_cookie");
|
||||||
|
+ }
|
||||||
|
return TargetLowering::getSDagStackGuard(M);
|
||||||
|
}
|
||||||
|
|
||||||
|
Value *X86TargetLowering::getSSPStackGuardCheck(const Module &M) const {
|
||||||
|
// MSVC CRT has a function to validate security cookie.
|
||||||
|
- if (Subtarget.getTargetTriple().isOSMSVCRT())
|
||||||
|
+ if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
|
||||||
|
+ Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
|
||||||
|
return M.getFunction("__security_check_cookie");
|
||||||
|
+ }
|
||||||
|
return TargetLowering::getSSPStackGuardCheck(M);
|
||||||
|
}
|
|
@ -0,0 +1,53 @@
|
||||||
|
From f76abe65e6d07fea5e838c4f8c9a9421c16debb0 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Valentin Churavy <v.churavy@gmail.com>
|
||||||
|
Date: Thu, 5 Jul 2018 12:37:50 -0400
|
||||||
|
Subject: [PATCH] Fix unwind info relocation with large code model on AArch64
|
||||||
|
|
||||||
|
---
|
||||||
|
lib/MC/MCObjectFileInfo.cpp | 2 ++
|
||||||
|
.../AArch64/ELF_ARM64_large-relocations.s | 20 +++++++++++++++++++
|
||||||
|
2 files changed, 22 insertions(+)
|
||||||
|
create mode 100644 test/ExecutionEngine/RuntimeDyld/AArch64/ELF_ARM64_large-relocations.s
|
||||||
|
|
||||||
|
diff --git a/lib/MC/MCObjectFileInfo.cpp b/lib/MC/MCObjectFileInfo.cpp
|
||||||
|
index 328f000f37c..938b35f20d1 100644
|
||||||
|
--- a/lib/MC/MCObjectFileInfo.cpp
|
||||||
|
+++ b/lib/MC/MCObjectFileInfo.cpp
|
||||||
|
@@ -291,6 +291,8 @@ void MCObjectFileInfo::initELFMCObjectFileInfo(const Triple &T, bool Large) {
|
||||||
|
break;
|
||||||
|
case Triple::ppc64:
|
||||||
|
case Triple::ppc64le:
|
||||||
|
+ case Triple::aarch64:
|
||||||
|
+ case Triple::aarch64_be:
|
||||||
|
case Triple::x86_64:
|
||||||
|
FDECFIEncoding = dwarf::DW_EH_PE_pcrel |
|
||||||
|
(Large ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4);
|
||||||
|
diff --git a/test/ExecutionEngine/RuntimeDyld/AArch64/ELF_ARM64_large-relocations.s b/test/ExecutionEngine/RuntimeDyld/AArch64/ELF_ARM64_large-relocations.s
|
||||||
|
new file mode 100644
|
||||||
|
index 00000000000..66f28dabd79
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/test/ExecutionEngine/RuntimeDyld/AArch64/ELF_ARM64_large-relocations.s
|
||||||
|
@@ -0,0 +1,20 @@
|
||||||
|
+# RUN: llvm-mc -triple=arm64-none-linux-gnu -large-code-model -filetype=obj -o %T/large-reloc.o %s
|
||||||
|
+# RUN: llvm-rtdyld -triple=arm64-none-linux-gnu -verify -map-section large-reloc.o,.eh_frame=0x10000 -map-section large-reloc.o,.text=0xffff000000000000 -check=%s %T/large-reloc.o
|
||||||
|
+# RUN-BE: llvm-mc -triple=aarch64_be-none-linux-gnu -large-code-model -filetype=obj -o %T/be-large-reloc.o %s
|
||||||
|
+# RUN-BE: llvm-rtdyld -triple=aarch64_be-none-linux-gnu -verify -map-section be-large-reloc.o,.eh_frame=0x10000 -map-section be-large-reloc.o,.text=0xffff000000000000 -check=%s %T/be-large-reloc.o
|
||||||
|
+
|
||||||
|
+ .text
|
||||||
|
+ .globl g
|
||||||
|
+ .p2align 2
|
||||||
|
+ .type g,@function
|
||||||
|
+g:
|
||||||
|
+ .cfi_startproc
|
||||||
|
+ mov x0, xzr
|
||||||
|
+ ret
|
||||||
|
+ .Lfunc_end0:
|
||||||
|
+ .size g, .Lfunc_end0-g
|
||||||
|
+ .cfi_endproc
|
||||||
|
+
|
||||||
|
+# Skip the CIE and load the 8 bytes PC begin pointer.
|
||||||
|
+# Assuming the CIE and the FDE length are both 4 bytes.
|
||||||
|
+# rtdyld-check: *{8}(section_addr(large-reloc.o, .eh_frame) + (*{4}(section_addr(large-reloc.o, .eh_frame))) + 0xc) = g - (section_addr(large-reloc.o, .eh_frame) + (*{4}(section_addr(large-reloc.o, .eh_frame))) + 0xc)
|
||||||
|
--
|
||||||
|
2.18.0
|
||||||
|
|
|
@ -0,0 +1,56 @@
|
||||||
|
From f94d12b6108b944199b715f31f25a022f75d2feb Mon Sep 17 00:00:00 2001
|
||||||
|
From: Yichao Yu <yyc1992@gmail.com>
|
||||||
|
Date: Sat, 10 Jun 2017 08:45:13 -0400
|
||||||
|
Subject: [PATCH 4/4] Enable support for floating-point division reductions
|
||||||
|
|
||||||
|
Similar to fsub, fdiv can also be vectorized using fmul.
|
||||||
|
---
|
||||||
|
lib/Transforms/Utils/LoopUtils.cpp | 1 +
|
||||||
|
test/Transforms/LoopVectorize/float-reduction.ll | 22 ++++++++++++++++++++++
|
||||||
|
2 files changed, 23 insertions(+)
|
||||||
|
|
||||||
|
diff --git a/lib/Transforms/Utils/LoopUtils.cpp b/lib/Transforms/Utils/LoopUtils.cpp
|
||||||
|
index 3c522786641..a4aced53a95 100644
|
||||||
|
--- a/lib/Transforms/Utils/LoopUtils.cpp
|
||||||
|
+++ b/lib/Transforms/Utils/LoopUtils.cpp
|
||||||
|
@@ -451,6 +451,7 @@ RecurrenceDescriptor::isRecurrenceInstr(Instruction *I, RecurrenceKind Kind,
|
||||||
|
return InstDesc(Kind == RK_IntegerOr, I);
|
||||||
|
case Instruction::Xor:
|
||||||
|
return InstDesc(Kind == RK_IntegerXor, I);
|
||||||
|
+ case Instruction::FDiv:
|
||||||
|
case Instruction::FMul:
|
||||||
|
return InstDesc(Kind == RK_FloatMult, I, UAI);
|
||||||
|
case Instruction::FSub:
|
||||||
|
diff --git a/test/Transforms/LoopVectorize/float-reduction.ll b/test/Transforms/LoopVectorize/float-reduction.ll
|
||||||
|
index f3b95d0ead7..669c54d55a2 100644
|
||||||
|
--- a/test/Transforms/LoopVectorize/float-reduction.ll
|
||||||
|
+++ b/test/Transforms/LoopVectorize/float-reduction.ll
|
||||||
|
@@ -44,3 +44,25 @@ for.body: ; preds = %for.body, %entry
|
||||||
|
for.end: ; preds = %for.body
|
||||||
|
ret float %sub
|
||||||
|
}
|
||||||
|
+
|
||||||
|
+;CHECK-LABEL: @foodiv(
|
||||||
|
+;CHECK: fdiv fast <4 x float>
|
||||||
|
+;CHECK: ret
|
||||||
|
+define float @foodiv(float* nocapture %A, i32* nocapture %n) nounwind uwtable readonly ssp {
|
||||||
|
+entry:
|
||||||
|
+ br label %for.body
|
||||||
|
+
|
||||||
|
+for.body: ; preds = %for.body, %entry
|
||||||
|
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
||||||
|
+ %sum.04 = phi float [ 1.000000e+00, %entry ], [ %sub, %for.body ]
|
||||||
|
+ %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv
|
||||||
|
+ %0 = load float, float* %arrayidx, align 4
|
||||||
|
+ %sub = fdiv fast float %sum.04, %0
|
||||||
|
+ %indvars.iv.next = add i64 %indvars.iv, 1
|
||||||
|
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
||||||
|
+ %exitcond = icmp eq i32 %lftr.wideiv, 200
|
||||||
|
+ br i1 %exitcond, label %for.end, label %for.body
|
||||||
|
+
|
||||||
|
+for.end: ; preds = %for.body
|
||||||
|
+ ret float %sub
|
||||||
|
+}
|
||||||
|
--
|
||||||
|
2.14.1
|
||||||
|
|
|
@ -0,0 +1,82 @@
|
||||||
|
commit 6a311a7a804831fea43cfb2f61322adcb407a1af
|
||||||
|
Author: Keno Fischer <keno@juliacomputing.com>
|
||||||
|
Date: Thu Jan 18 15:57:05 2018 -0500
|
||||||
|
|
||||||
|
[JumpThreading] Don't restrict cast-traversal to i1
|
||||||
|
|
||||||
|
Summary:
|
||||||
|
In D17663, JumpThreading learned to look trough simple cast instructions,
|
||||||
|
but only if the source of those cast instructions was a phi/cmp i1
|
||||||
|
(in an effort to limit compile time effects). I think this condition
|
||||||
|
is too restrictive. For switches with limited value range, InstCombine
|
||||||
|
will readily introduce an extra `trunc` instruction to a smaller
|
||||||
|
integer type (e.g. from i8 to i2), leaving us in the somewhat perverse
|
||||||
|
situation that jump-threading would work before running instcombine,
|
||||||
|
but not after. Since instcombine produces this pattern, I think we
|
||||||
|
need to consider it canonical and support it in JumpThreading.
|
||||||
|
In general, for limiting recursion, I think the existing restriction
|
||||||
|
to phi and cmp nodes should be sufficient to avoid looking through
|
||||||
|
unprofitable chains of instructions.
|
||||||
|
|
||||||
|
Reviewers: haicheng, gberry, bmakam, mcrosier
|
||||||
|
|
||||||
|
Subscribers: llvm-commits
|
||||||
|
|
||||||
|
Differential Revision: https://reviews.llvm.org/D42262
|
||||||
|
|
||||||
|
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
|
||||||
|
index 95c4650..1155e18 100644
|
||||||
|
--- a/lib/Transforms/Scalar/JumpThreading.cpp
|
||||||
|
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
|
||||||
|
@@ -647,11 +647,9 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors(
|
||||||
|
}
|
||||||
|
|
||||||
|
// Handle Cast instructions. Only see through Cast when the source operand is
|
||||||
|
- // PHI or Cmp and the source type is i1 to save the compilation time.
|
||||||
|
+ // PHI or Cmp to save the compilation time.
|
||||||
|
if (CastInst *CI = dyn_cast<CastInst>(I)) {
|
||||||
|
Value *Source = CI->getOperand(0);
|
||||||
|
- if (!Source->getType()->isIntegerTy(1))
|
||||||
|
- return false;
|
||||||
|
if (!isa<PHINode>(Source) && !isa<CmpInst>(Source))
|
||||||
|
return false;
|
||||||
|
ComputeValueKnownInPredecessors(Source, BB, Result, Preference, CxtI);
|
||||||
|
diff --git a/test/Transforms/JumpThreading/basic.ll b/test/Transforms/JumpThreading/basic.ll
|
||||||
|
index ce86cba..16e7549 100644
|
||||||
|
--- a/test/Transforms/JumpThreading/basic.ll
|
||||||
|
+++ b/test/Transforms/JumpThreading/basic.ll
|
||||||
|
@@ -547,6 +547,34 @@ l5:
|
||||||
|
; CHECK: }
|
||||||
|
}
|
||||||
|
|
||||||
|
+define i1 @trunc_switch(i1 %arg) {
|
||||||
|
+; CHECK-LABEL: @trunc_switch
|
||||||
|
+top:
|
||||||
|
+; CHECK: br i1 %arg, label %exitA, label %exitB
|
||||||
|
+ br i1 %arg, label %common, label %B
|
||||||
|
+
|
||||||
|
+B:
|
||||||
|
+ br label %common
|
||||||
|
+
|
||||||
|
+common:
|
||||||
|
+ %phi = phi i8 [ 2, %B ], [ 1, %top ]
|
||||||
|
+ %trunc = trunc i8 %phi to i2
|
||||||
|
+; CHECK-NOT: switch
|
||||||
|
+ switch i2 %trunc, label %unreach [
|
||||||
|
+ i2 1, label %exitA
|
||||||
|
+ i2 -2, label %exitB
|
||||||
|
+ ]
|
||||||
|
+
|
||||||
|
+unreach:
|
||||||
|
+ unreachable
|
||||||
|
+
|
||||||
|
+exitA:
|
||||||
|
+ ret i1 true
|
||||||
|
+
|
||||||
|
+exitB:
|
||||||
|
+ ret i1 false
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
; CHECK-LABEL: define void @h_con(i32 %p) {
|
||||||
|
define void @h_con(i32 %p) {
|
||||||
|
%x = icmp ult i32 %p, 5
|
|
@ -0,0 +1,677 @@
|
||||||
|
From 45bc0f0badbdbabaed7d204757c2aad7ab49a3fe Mon Sep 17 00:00:00 2001
|
||||||
|
From: DokFaust <rodia@autistici.org>
|
||||||
|
Date: Mon, 11 Jun 2018 12:59:42 +0200
|
||||||
|
Subject: [PATCH] PerfJITEventListener integration, requires compile flag
|
||||||
|
LLVM_USE_PERF
|
||||||
|
|
||||||
|
---
|
||||||
|
CMakeLists.txt | 13 +
|
||||||
|
include/llvm/Config/config.h.cmake | 3 +
|
||||||
|
include/llvm/Config/llvm-config.h.cmake | 3 +
|
||||||
|
.../llvm/ExecutionEngine/JITEventListener.h | 9 +
|
||||||
|
lib/ExecutionEngine/CMakeLists.txt | 4 +
|
||||||
|
lib/ExecutionEngine/LLVMBuild.txt | 2 +-
|
||||||
|
lib/ExecutionEngine/Orc/LLVMBuild.txt | 2 +-
|
||||||
|
.../PerfJITEvents/CMakeLists.txt | 5 +
|
||||||
|
.../PerfJITEvents/LLVMBuild.txt | 23 +
|
||||||
|
.../PerfJITEvents/PerfJITEventListener.cpp | 492 ++++++++++++++++++
|
||||||
|
10 files changed, 554 insertions(+), 2 deletions(-)
|
||||||
|
create mode 100644 lib/ExecutionEngine/PerfJITEvents/CMakeLists.txt
|
||||||
|
create mode 100644 lib/ExecutionEngine/PerfJITEvents/LLVMBuild.txt
|
||||||
|
create mode 100644 lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp
|
||||||
|
|
||||||
|
diff --git a/CMakeLists.txt b/CMakeLists.txt
|
||||||
|
index f8da6cf9211..fb92c825a46 100644
|
||||||
|
--- a/CMakeLists.txt
|
||||||
|
+++ b/CMakeLists.txt
|
||||||
|
@@ -426,6 +426,16 @@ if( LLVM_USE_OPROFILE )
|
||||||
|
endif( NOT CMAKE_SYSTEM_NAME MATCHES "Linux" )
|
||||||
|
endif( LLVM_USE_OPROFILE )
|
||||||
|
|
||||||
|
+option(LLVM_USE_PERF
|
||||||
|
+ "Use perf JIT interface to inform perf about JIT code" OFF)
|
||||||
|
+
|
||||||
|
+# If enabled, verify we are on a platform that supports perf.
|
||||||
|
+if( LLVM_USE_PERF )
|
||||||
|
+ if( NOT CMAKE_SYSTEM_NAME MATCHES "Linux" )
|
||||||
|
+ message(FATAL_ERROR "perf support is available on Linux only.")
|
||||||
|
+ endif( NOT CMAKE_SYSTEM_NAME MATCHES "Linux" )
|
||||||
|
+endif( LLVM_USE_PERF )
|
||||||
|
+
|
||||||
|
set(LLVM_USE_SANITIZER "" CACHE STRING
|
||||||
|
"Define the sanitizer used to build binaries and tests.")
|
||||||
|
set(LLVM_LIB_FUZZING_ENGINE "" CACHE PATH
|
||||||
|
@@ -634,6 +644,9 @@ endif (LLVM_USE_INTEL_JITEVENTS)
|
||||||
|
if (LLVM_USE_OPROFILE)
|
||||||
|
set(LLVMOPTIONALCOMPONENTS ${LLVMOPTIONALCOMPONENTS} OProfileJIT)
|
||||||
|
endif (LLVM_USE_OPROFILE)
|
||||||
|
+if (LLVM_USE_PERF)
|
||||||
|
+ set(LLVMOPTIONALCOMPONENTS ${LLVMOPTIONALCOMPONENTS} PerfJITEvents)
|
||||||
|
+endif (LLVM_USE_PERF)
|
||||||
|
|
||||||
|
message(STATUS "Constructing LLVMBuild project information")
|
||||||
|
execute_process(
|
||||||
|
diff --git a/include/llvm/Config/config.h.cmake b/include/llvm/Config/config.h.cmake
|
||||||
|
index 940f8420304..17787ed779b 100644
|
||||||
|
--- a/include/llvm/Config/config.h.cmake
|
||||||
|
+++ b/include/llvm/Config/config.h.cmake
|
||||||
|
@@ -377,6 +377,9 @@
|
||||||
|
/* Define if we have the oprofile JIT-support library */
|
||||||
|
#cmakedefine01 LLVM_USE_OPROFILE
|
||||||
|
|
||||||
|
+/* Define if we have the perf JIT-support library */
|
||||||
|
+#cmakedefine01 LLVM_USE_PERF
|
||||||
|
+
|
||||||
|
/* LLVM version information */
|
||||||
|
#cmakedefine LLVM_VERSION_INFO "${LLVM_VERSION_INFO}"
|
||||||
|
|
||||||
|
diff --git a/include/llvm/Config/llvm-config.h.cmake b/include/llvm/Config/llvm-config.h.cmake
|
||||||
|
index 4daa00f3bc4..8d9c3b24d52 100644
|
||||||
|
--- a/include/llvm/Config/llvm-config.h.cmake
|
||||||
|
+++ b/include/llvm/Config/llvm-config.h.cmake
|
||||||
|
@@ -65,6 +65,9 @@
|
||||||
|
/* Define if we have the oprofile JIT-support library */
|
||||||
|
#cmakedefine01 LLVM_USE_OPROFILE
|
||||||
|
|
||||||
|
+/* Define if we have the perf JIT-support library */
|
||||||
|
+#cmakedefine01 LLVM_USE_PERF
|
||||||
|
+
|
||||||
|
/* Major version of the LLVM API */
|
||||||
|
#define LLVM_VERSION_MAJOR ${LLVM_VERSION_MAJOR}
|
||||||
|
|
||||||
|
diff --git a/include/llvm/ExecutionEngine/JITEventListener.h b/include/llvm/ExecutionEngine/JITEventListener.h
|
||||||
|
index ff7840f00a4..1cc2c423a8b 100644
|
||||||
|
--- a/include/llvm/ExecutionEngine/JITEventListener.h
|
||||||
|
+++ b/include/llvm/ExecutionEngine/JITEventListener.h
|
||||||
|
@@ -115,6 +115,15 @@ public:
|
||||||
|
}
|
||||||
|
#endif // USE_OPROFILE
|
||||||
|
|
||||||
|
+#ifdef LLVM_USE_PERF
|
||||||
|
+ static JITEventListener *createPerfJITEventListener();
|
||||||
|
+#else
|
||||||
|
+ static JITEventListener *createPerfJITEventListener()
|
||||||
|
+ {
|
||||||
|
+ return nullptr;
|
||||||
|
+ }
|
||||||
|
+#endif //USE_PERF
|
||||||
|
+
|
||||||
|
private:
|
||||||
|
virtual void anchor();
|
||||||
|
};
|
||||||
|
diff --git a/lib/ExecutionEngine/CMakeLists.txt b/lib/ExecutionEngine/CMakeLists.txt
|
||||||
|
index 84b34919e44..893d113a685 100644
|
||||||
|
--- a/lib/ExecutionEngine/CMakeLists.txt
|
||||||
|
+++ b/lib/ExecutionEngine/CMakeLists.txt
|
||||||
|
@@ -30,3 +30,7 @@ endif( LLVM_USE_OPROFILE )
|
||||||
|
if( LLVM_USE_INTEL_JITEVENTS )
|
||||||
|
add_subdirectory(IntelJITEvents)
|
||||||
|
endif( LLVM_USE_INTEL_JITEVENTS )
|
||||||
|
+
|
||||||
|
+if( LLVM_USE_PERF )
|
||||||
|
+ add_subdirectory(PerfJITEvents)
|
||||||
|
+endif( LLVM_USE_PERF )
|
||||||
|
diff --git a/lib/ExecutionEngine/LLVMBuild.txt b/lib/ExecutionEngine/LLVMBuild.txt
|
||||||
|
index 9d29a41f504..b6e1bda6a51 100644
|
||||||
|
--- a/lib/ExecutionEngine/LLVMBuild.txt
|
||||||
|
+++ b/lib/ExecutionEngine/LLVMBuild.txt
|
||||||
|
@@ -16,7 +16,7 @@
|
||||||
|
;===------------------------------------------------------------------------===;
|
||||||
|
|
||||||
|
[common]
|
||||||
|
-subdirectories = Interpreter MCJIT RuntimeDyld IntelJITEvents OProfileJIT Orc
|
||||||
|
+subdirectories = Interpreter MCJIT RuntimeDyld IntelJITEvents OProfileJIT Orc PerfJITEvents
|
||||||
|
|
||||||
|
[component_0]
|
||||||
|
type = Library
|
||||||
|
diff --git a/lib/ExecutionEngine/Orc/LLVMBuild.txt b/lib/ExecutionEngine/Orc/LLVMBuild.txt
|
||||||
|
index 8f05172e77a..ef4ae64e823 100644
|
||||||
|
--- a/lib/ExecutionEngine/Orc/LLVMBuild.txt
|
||||||
|
+++ b/lib/ExecutionEngine/Orc/LLVMBuild.txt
|
||||||
|
@@ -19,4 +19,4 @@
|
||||||
|
type = Library
|
||||||
|
name = OrcJIT
|
||||||
|
parent = ExecutionEngine
|
||||||
|
-required_libraries = Core ExecutionEngine Object RuntimeDyld Support TransformUtils
|
||||||
|
+required_libraries = Core ExecutionEngine Object RuntimeDyld Support TransformUtils
|
||||||
|
diff --git a/lib/ExecutionEngine/PerfJITEvents/CMakeLists.txt b/lib/ExecutionEngine/PerfJITEvents/CMakeLists.txt
|
||||||
|
new file mode 100644
|
||||||
|
index 00000000000..136cc429d02
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/lib/ExecutionEngine/PerfJITEvents/CMakeLists.txt
|
||||||
|
@@ -0,0 +1,5 @@
|
||||||
|
+add_llvm_library(LLVMPerfJITEvents
|
||||||
|
+ PerfJITEventListener.cpp
|
||||||
|
+ )
|
||||||
|
+
|
||||||
|
+add_dependencies(LLVMPerfJITEvents LLVMCodeGen)
|
||||||
|
diff --git a/lib/ExecutionEngine/PerfJITEvents/LLVMBuild.txt b/lib/ExecutionEngine/PerfJITEvents/LLVMBuild.txt
|
||||||
|
new file mode 100644
|
||||||
|
index 00000000000..b1958a69260
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/lib/ExecutionEngine/PerfJITEvents/LLVMBuild.txt
|
||||||
|
@@ -0,0 +1,23 @@
|
||||||
|
+;===- ./lib/ExecutionEngine/PerfJITEvents/LLVMBuild.txt ----------------*- Conf -*--===;
|
||||||
|
+;
|
||||||
|
+; The LLVM Compiler Infrastructure
|
||||||
|
+;
|
||||||
|
+; This file is distributed under the University of Illinois Open Source
|
||||||
|
+; License. See LICENSE.TXT for details.
|
||||||
|
+;
|
||||||
|
+;===------------------------------------------------------------------------===;
|
||||||
|
+;
|
||||||
|
+; This is an LLVMBuild description file for the components in this subdirectory.
|
||||||
|
+;
|
||||||
|
+; For more information on the LLVMBuild system, please see:
|
||||||
|
+;
|
||||||
|
+; http://llvm.org/docs/LLVMBuild.html
|
||||||
|
+;
|
||||||
|
+;===------------------------------------------------------------------------===;
|
||||||
|
+
|
||||||
|
+[component_0]
|
||||||
|
+type = OptionalLibrary
|
||||||
|
+name = PerfJITEvents
|
||||||
|
+parent = ExecutionEngine
|
||||||
|
+required_libraries = CodeGen Core DebugInfoDWARF ExecutionEngine Object Support TransformUtils
|
||||||
|
+
|
||||||
|
diff --git a/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp b/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp
|
||||||
|
new file mode 100644
|
||||||
|
index 00000000000..c2b97dd59f3
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp
|
||||||
|
@@ -0,0 +1,492 @@
|
||||||
|
+//===-- PerfJITEventListener.cpp - Tell Linux's perf about JITted code ----===//
|
||||||
|
+//
|
||||||
|
+// The LLVM Compiler Infrastructure
|
||||||
|
+//
|
||||||
|
+// This file is distributed under the University of Illinois Open Source
|
||||||
|
+// License. See LICENSE.TXT for details.
|
||||||
|
+//
|
||||||
|
+//===----------------------------------------------------------------------===//
|
||||||
|
+//
|
||||||
|
+// This file defines a JITEventListener object that tells perf about JITted
|
||||||
|
+// functions, including source line information.
|
||||||
|
+//
|
||||||
|
+// Documentation for perf jit integration is available at:
|
||||||
|
+// https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/tools/perf/Documentation/jitdump-specification.txt
|
||||||
|
+// https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/tools/perf/Documentation/jit-interface.txt
|
||||||
|
+//
|
||||||
|
+//===----------------------------------------------------------------------===//
|
||||||
|
+
|
||||||
|
+#include "llvm/ADT/Twine.h"
|
||||||
|
+#include "llvm/Config/config.h"
|
||||||
|
+#include "llvm/DebugInfo/DWARF/DWARFContext.h"
|
||||||
|
+#include "llvm/ExecutionEngine/JITEventListener.h"
|
||||||
|
+#include "llvm/Object/ObjectFile.h"
|
||||||
|
+#include "llvm/Object/SymbolSize.h"
|
||||||
|
+#include "llvm/Support/Debug.h"
|
||||||
|
+#include "llvm/Support/Errno.h"
|
||||||
|
+#include "llvm/Support/FileSystem.h"
|
||||||
|
+#include "llvm/Support/MemoryBuffer.h"
|
||||||
|
+#include "llvm/Support/Mutex.h"
|
||||||
|
+#include "llvm/Support/MutexGuard.h"
|
||||||
|
+#include "llvm/Support/Path.h"
|
||||||
|
+#include "llvm/Support/Process.h"
|
||||||
|
+#include "llvm/Support/Threading.h"
|
||||||
|
+#include "llvm/Support/raw_ostream.h"
|
||||||
|
+
|
||||||
|
+#include <sys/mman.h> // mmap()
|
||||||
|
+#include <sys/types.h> // getpid()
|
||||||
|
+#include <time.h> // clock_gettime(), time(), localtime_r() */
|
||||||
|
+#include <unistd.h> // for getpid(), read(), close()
|
||||||
|
+
|
||||||
|
+using namespace llvm;
|
||||||
|
+using namespace llvm::object;
|
||||||
|
+typedef DILineInfoSpecifier::FileLineInfoKind FileLineInfoKind;
|
||||||
|
+
|
||||||
|
+namespace {
|
||||||
|
+
|
||||||
|
+// language identifier (XXX: should we generate something better from debug
|
||||||
|
+// info?)
|
||||||
|
+#define JIT_LANG "llvm-IR"
|
||||||
|
+#define LLVM_PERF_JIT_MAGIC \
|
||||||
|
+ ((uint32_t)'J' << 24 | (uint32_t)'i' << 16 | (uint32_t)'T' << 8 | \
|
||||||
|
+ (uint32_t)'D')
|
||||||
|
+#define LLVM_PERF_JIT_VERSION 1
|
||||||
|
+
|
||||||
|
+// bit 0: set if the jitdump file is using an architecture-specific timestamp
|
||||||
|
+// clock source
|
||||||
|
+#define JITDUMP_FLAGS_ARCH_TIMESTAMP (1ULL << 0)
|
||||||
|
+
|
||||||
|
+struct LLVMPerfJitHeader;
|
||||||
|
+
|
||||||
|
+class PerfJITEventListener : public JITEventListener {
|
||||||
|
+public:
|
||||||
|
+ PerfJITEventListener();
|
||||||
|
+ ~PerfJITEventListener() {
|
||||||
|
+ if (MarkerAddr)
|
||||||
|
+ CloseMarker();
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ void NotifyObjectEmitted(const ObjectFile &Obj,
|
||||||
|
+ const RuntimeDyld::LoadedObjectInfo &L) override;
|
||||||
|
+ void NotifyFreeingObject(const ObjectFile &Obj) override;
|
||||||
|
+
|
||||||
|
+private:
|
||||||
|
+ bool InitDebuggingDir();
|
||||||
|
+ bool OpenMarker();
|
||||||
|
+ void CloseMarker();
|
||||||
|
+ static bool FillMachine(LLVMPerfJitHeader &hdr);
|
||||||
|
+
|
||||||
|
+ void NotifyCode(Expected<llvm::StringRef> &Symbol, uint64_t CodeAddr,
|
||||||
|
+ uint64_t CodeSize);
|
||||||
|
+ void NotifyDebug(uint64_t CodeAddr, DILineInfoTable Lines);
|
||||||
|
+
|
||||||
|
+ // cache lookups
|
||||||
|
+ pid_t Pid;
|
||||||
|
+
|
||||||
|
+ // base directory for output data
|
||||||
|
+ std::string JitPath;
|
||||||
|
+
|
||||||
|
+ // output data stream, closed via Dumpstream
|
||||||
|
+ int DumpFd = -1;
|
||||||
|
+
|
||||||
|
+ // output data stream
|
||||||
|
+ std::unique_ptr<raw_fd_ostream> Dumpstream;
|
||||||
|
+
|
||||||
|
+ // prevent concurrent dumps from messing up the output file
|
||||||
|
+ sys::Mutex Mutex;
|
||||||
|
+
|
||||||
|
+ // perf mmap marker
|
||||||
|
+ void *MarkerAddr = NULL;
|
||||||
|
+
|
||||||
|
+ // perf support ready
|
||||||
|
+ bool SuccessfullyInitialized = false;
|
||||||
|
+
|
||||||
|
+ // identifier for functions, primarily to identify when moving them around
|
||||||
|
+ uint64_t CodeGeneration = 1;
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
+// The following are POD struct definitions from the perf jit specification
|
||||||
|
+
|
||||||
|
+enum LLVMPerfJitRecordType {
|
||||||
|
+ JIT_CODE_LOAD = 0,
|
||||||
|
+ JIT_CODE_MOVE = 1, // not emitted, code isn't moved
|
||||||
|
+ JIT_CODE_DEBUG_INFO = 2,
|
||||||
|
+ JIT_CODE_CLOSE = 3, // not emitted, unnecessary
|
||||||
|
+ JIT_CODE_UNWINDING_INFO = 4, // not emitted
|
||||||
|
+
|
||||||
|
+ JIT_CODE_MAX
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
+struct LLVMPerfJitHeader {
|
||||||
|
+ uint32_t Magic; // characters "JiTD"
|
||||||
|
+ uint32_t Version; // header version
|
||||||
|
+ uint32_t TotalSize; // total size of header
|
||||||
|
+ uint32_t ElfMach; // elf mach target
|
||||||
|
+ uint32_t Pad1; // reserved
|
||||||
|
+ uint32_t Pid;
|
||||||
|
+ uint64_t Timestamp; // timestamp
|
||||||
|
+ uint64_t Flags; // flags
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
+// record prefix (mandatory in each record)
|
||||||
|
+struct LLVMPerfJitRecordPrefix {
|
||||||
|
+ uint32_t Id; // record type identifier
|
||||||
|
+ uint32_t TotalSize;
|
||||||
|
+ uint64_t Timestamp;
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
+struct LLVMPerfJitRecordCodeLoad {
|
||||||
|
+ LLVMPerfJitRecordPrefix Prefix;
|
||||||
|
+
|
||||||
|
+ uint32_t Pid;
|
||||||
|
+ uint32_t Tid;
|
||||||
|
+ uint64_t Vma;
|
||||||
|
+ uint64_t CodeAddr;
|
||||||
|
+ uint64_t CodeSize;
|
||||||
|
+ uint64_t CodeIndex;
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
+struct LLVMPerfJitDebugEntry {
|
||||||
|
+ uint64_t Addr;
|
||||||
|
+ int Lineno; // source line number starting at 1
|
||||||
|
+ int Discrim; // column discriminator, 0 is default
|
||||||
|
+ // followed by null terminated filename, \xff\0 if same as previous entry
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
+struct LLVMPerfJitRecordDebugInfo {
|
||||||
|
+ LLVMPerfJitRecordPrefix Prefix;
|
||||||
|
+
|
||||||
|
+ uint64_t CodeAddr;
|
||||||
|
+ uint64_t NrEntry;
|
||||||
|
+ // followed by NrEntry LLVMPerfJitDebugEntry records
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
+static inline uint64_t timespec_to_ns(const struct timespec *ts) {
|
||||||
|
+ const uint64_t NanoSecPerSec = 1000000000;
|
||||||
|
+ return ((uint64_t)ts->tv_sec * NanoSecPerSec) + ts->tv_nsec;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static inline uint64_t perf_get_timestamp(void) {
|
||||||
|
+ struct timespec ts;
|
||||||
|
+ int ret;
|
||||||
|
+
|
||||||
|
+ ret = clock_gettime(CLOCK_MONOTONIC, &ts);
|
||||||
|
+ if (ret)
|
||||||
|
+ return 0;
|
||||||
|
+
|
||||||
|
+ return timespec_to_ns(&ts);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+PerfJITEventListener::PerfJITEventListener() : Pid(::getpid()) {
|
||||||
|
+ // check if clock-source is supported
|
||||||
|
+ if (!perf_get_timestamp()) {
|
||||||
|
+ errs() << "kernel does not support CLOCK_MONOTONIC\n";
|
||||||
|
+ return;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ if (!InitDebuggingDir()) {
|
||||||
|
+ errs() << "could not initialize debugging directory\n";
|
||||||
|
+ return;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ std::string Filename;
|
||||||
|
+ raw_string_ostream FilenameBuf(Filename);
|
||||||
|
+ FilenameBuf << JitPath << "/jit-" << Pid << ".dump";
|
||||||
|
+
|
||||||
|
+ // Need to open ourselves, because we need to hand the FD to OpenMarker() and
|
||||||
|
+ // raw_fd_ostream doesn't expose the FD.
|
||||||
|
+ using sys::fs::openFileForWrite;
|
||||||
|
+ if (auto EC =
|
||||||
|
+ openFileForWrite(FilenameBuf.str(), DumpFd, sys::fs::F_RW, 0666)) {
|
||||||
|
+ errs() << "could not open JIT dump file " << FilenameBuf.str() << ": "
|
||||||
|
+ << EC.message() << "\n";
|
||||||
|
+ return;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ Dumpstream = make_unique<raw_fd_ostream>(DumpFd, true);
|
||||||
|
+
|
||||||
|
+ LLVMPerfJitHeader Header = {0};
|
||||||
|
+ if (!FillMachine(Header))
|
||||||
|
+ return;
|
||||||
|
+
|
||||||
|
+ // signal this process emits JIT information
|
||||||
|
+ if (!OpenMarker())
|
||||||
|
+ return;
|
||||||
|
+
|
||||||
|
+ // emit dumpstream header
|
||||||
|
+ Header.Magic = LLVM_PERF_JIT_MAGIC;
|
||||||
|
+ Header.Version = LLVM_PERF_JIT_VERSION;
|
||||||
|
+ Header.TotalSize = sizeof(Header);
|
||||||
|
+ Header.Pid = Pid;
|
||||||
|
+ Header.Timestamp = perf_get_timestamp();
|
||||||
|
+ Dumpstream->write(reinterpret_cast<const char *>(&Header), sizeof(Header));
|
||||||
|
+
|
||||||
|
+ // Everything initialized, can do profiling now.
|
||||||
|
+ if (!Dumpstream->has_error())
|
||||||
|
+ SuccessfullyInitialized = true;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+void PerfJITEventListener::NotifyObjectEmitted(
|
||||||
|
+ const ObjectFile &Obj, const RuntimeDyld::LoadedObjectInfo &L) {
|
||||||
|
+
|
||||||
|
+ if (!SuccessfullyInitialized)
|
||||||
|
+ return;
|
||||||
|
+
|
||||||
|
+ OwningBinary<ObjectFile> DebugObjOwner = L.getObjectForDebug(Obj);
|
||||||
|
+ const ObjectFile &DebugObj = *DebugObjOwner.getBinary();
|
||||||
|
+
|
||||||
|
+ // Get the address of the object image for use as a unique identifier
|
||||||
|
+ std::unique_ptr<DIContext> Context = DWARFContext::create(DebugObj);
|
||||||
|
+
|
||||||
|
+ // Use symbol info to iterate over functions in the object.
|
||||||
|
+ for (const std::pair<SymbolRef, uint64_t> &P : computeSymbolSizes(DebugObj)) {
|
||||||
|
+ SymbolRef Sym = P.first;
|
||||||
|
+ std::string SourceFileName;
|
||||||
|
+
|
||||||
|
+ Expected<SymbolRef::Type> SymTypeOrErr = Sym.getType();
|
||||||
|
+ if (!SymTypeOrErr) {
|
||||||
|
+ // There's not much we can with errors here
|
||||||
|
+ consumeError(SymTypeOrErr.takeError());
|
||||||
|
+ continue;
|
||||||
|
+ }
|
||||||
|
+ SymbolRef::Type SymType = *SymTypeOrErr;
|
||||||
|
+ if (SymType != SymbolRef::ST_Function)
|
||||||
|
+ continue;
|
||||||
|
+
|
||||||
|
+ Expected<StringRef> Name = Sym.getName();
|
||||||
|
+ if (!Name) {
|
||||||
|
+ consumeError(Name.takeError());
|
||||||
|
+ continue;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ Expected<uint64_t> AddrOrErr = Sym.getAddress();
|
||||||
|
+ if (!AddrOrErr) {
|
||||||
|
+ consumeError(AddrOrErr.takeError());
|
||||||
|
+ continue;
|
||||||
|
+ }
|
||||||
|
+ uint64_t Addr = *AddrOrErr;
|
||||||
|
+ uint64_t Size = P.second;
|
||||||
|
+
|
||||||
|
+ // According to spec debugging info has to come before loading the
|
||||||
|
+ // corresonding code load.
|
||||||
|
+ DILineInfoTable Lines = Context->getLineInfoForAddressRange(
|
||||||
|
+ Addr, Size, FileLineInfoKind::AbsoluteFilePath);
|
||||||
|
+
|
||||||
|
+ NotifyDebug(Addr, Lines);
|
||||||
|
+ NotifyCode(Name, Addr, Size);
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ Dumpstream->flush();
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+void PerfJITEventListener::NotifyFreeingObject(const ObjectFile &Obj) {
|
||||||
|
+ // perf currently doesn't have an interface for unloading. But munmap()ing the
|
||||||
|
+ // code section does, so that's ok.
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+bool PerfJITEventListener::InitDebuggingDir() {
|
||||||
|
+ time_t Time;
|
||||||
|
+ struct tm LocalTime;
|
||||||
|
+ char TimeBuffer[sizeof("YYYYMMDD")];
|
||||||
|
+ SmallString<64> Path;
|
||||||
|
+
|
||||||
|
+ // search for location to dump data to
|
||||||
|
+ if (const char *BaseDir = getenv("JITDUMPDIR"))
|
||||||
|
+ Path.append(BaseDir);
|
||||||
|
+ else if (!sys::path::home_directory(Path))
|
||||||
|
+ Path = ".";
|
||||||
|
+
|
||||||
|
+ // create debug directory
|
||||||
|
+ Path += "/.debug/jit/";
|
||||||
|
+ if (auto EC = sys::fs::create_directories(Path)) {
|
||||||
|
+ errs() << "could not create jit cache directory " << Path << ": "
|
||||||
|
+ << EC.message() << "\n";
|
||||||
|
+ return false;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ // create unique directory for dump data related to this process
|
||||||
|
+ time(&Time);
|
||||||
|
+ localtime_r(&Time, &LocalTime);
|
||||||
|
+ strftime(TimeBuffer, sizeof(TimeBuffer), "%Y%m%d", &LocalTime);
|
||||||
|
+ Path += JIT_LANG "-jit-";
|
||||||
|
+ Path += TimeBuffer;
|
||||||
|
+
|
||||||
|
+ SmallString<128> UniqueDebugDir;
|
||||||
|
+
|
||||||
|
+ using sys::fs::createUniqueDirectory;
|
||||||
|
+ if (auto EC = createUniqueDirectory(Path, UniqueDebugDir)) {
|
||||||
|
+ errs() << "could not create unique jit cache directory " << UniqueDebugDir
|
||||||
|
+ << ": " << EC.message() << "\n";
|
||||||
|
+ return false;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ JitPath = UniqueDebugDir.str();
|
||||||
|
+
|
||||||
|
+ return true;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+bool PerfJITEventListener::OpenMarker() {
|
||||||
|
+ // We mmap the jitdump to create an MMAP RECORD in perf.data file. The mmap
|
||||||
|
+ // is captured either live (perf record running when we mmap) or in deferred
|
||||||
|
+ // mode, via /proc/PID/maps. The MMAP record is used as a marker of a jitdump
|
||||||
|
+ // file for more meta data info about the jitted code. Perf report/annotate
|
||||||
|
+ // detect this special filename and process the jitdump file.
|
||||||
|
+ //
|
||||||
|
+ // Mapping must be PROT_EXEC to ensure it is captured by perf record
|
||||||
|
+ // even when not using -d option.
|
||||||
|
+ MarkerAddr = ::mmap(NULL, sys::Process::getPageSize(), PROT_READ | PROT_EXEC,
|
||||||
|
+ MAP_PRIVATE, DumpFd, 0);
|
||||||
|
+
|
||||||
|
+ if (MarkerAddr == MAP_FAILED) {
|
||||||
|
+ errs() << "could not mmap JIT marker\n";
|
||||||
|
+ return false;
|
||||||
|
+ }
|
||||||
|
+ return true;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+void PerfJITEventListener::CloseMarker() {
|
||||||
|
+ if (!MarkerAddr)
|
||||||
|
+ return;
|
||||||
|
+
|
||||||
|
+ munmap(MarkerAddr, sys::Process::getPageSize());
|
||||||
|
+ MarkerAddr = nullptr;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+bool PerfJITEventListener::FillMachine(LLVMPerfJitHeader &hdr) {
|
||||||
|
+ char id[16];
|
||||||
|
+ struct {
|
||||||
|
+ uint16_t e_type;
|
||||||
|
+ uint16_t e_machine;
|
||||||
|
+ } info;
|
||||||
|
+
|
||||||
|
+ size_t RequiredMemory = sizeof(id) + sizeof(info);
|
||||||
|
+
|
||||||
|
+ ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
|
||||||
|
+ MemoryBuffer::getFileSlice("/proc/self/exe",
|
||||||
|
+ RequiredMemory,
|
||||||
|
+ 0);
|
||||||
|
+
|
||||||
|
+ // This'll not guarantee that enough data was actually read from the
|
||||||
|
+ // underlying file. Instead the trailing part of the buffer would be
|
||||||
|
+ // zeroed. Given the ELF signature check below that seems ok though,
|
||||||
|
+ // it's unlikely that the file ends just after that, and the
|
||||||
|
+ // consequence would just be that perf wouldn't recognize the
|
||||||
|
+ // signature.
|
||||||
|
+ if (auto EC = MB.getError()) {
|
||||||
|
+ errs() << "could not open /proc/self/exe: " << EC.message() << "\n";
|
||||||
|
+ return false;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ memcpy(&id, (*MB)->getBufferStart(), sizeof(id));
|
||||||
|
+ memcpy(&info, (*MB)->getBufferStart() + sizeof(id), sizeof(info));
|
||||||
|
+
|
||||||
|
+ // check ELF signature
|
||||||
|
+ if (id[0] != 0x7f || id[1] != 'E' || id[2] != 'L' || id[3] != 'F') {
|
||||||
|
+ errs() << "invalid elf signature\n";
|
||||||
|
+ return false;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ hdr.ElfMach = info.e_machine;
|
||||||
|
+
|
||||||
|
+ return true;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+void PerfJITEventListener::NotifyCode(Expected<llvm::StringRef> &Symbol,
|
||||||
|
+ uint64_t CodeAddr, uint64_t CodeSize) {
|
||||||
|
+ assert(SuccessfullyInitialized);
|
||||||
|
+
|
||||||
|
+ // 0 length functions can't have samples.
|
||||||
|
+ if (CodeSize == 0)
|
||||||
|
+ return;
|
||||||
|
+
|
||||||
|
+ LLVMPerfJitRecordCodeLoad rec;
|
||||||
|
+ rec.Prefix.Id = JIT_CODE_LOAD;
|
||||||
|
+ rec.Prefix.TotalSize = sizeof(rec) + // debug record itself
|
||||||
|
+ Symbol->size() + 1 + // symbol name
|
||||||
|
+ CodeSize; // and code
|
||||||
|
+ rec.Prefix.Timestamp = perf_get_timestamp();
|
||||||
|
+
|
||||||
|
+ rec.CodeSize = CodeSize;
|
||||||
|
+ rec.Vma = 0;
|
||||||
|
+ rec.CodeAddr = CodeAddr;
|
||||||
|
+ rec.Pid = Pid;
|
||||||
|
+ rec.Tid = get_threadid();
|
||||||
|
+
|
||||||
|
+ // avoid interspersing output
|
||||||
|
+ MutexGuard Guard(Mutex);
|
||||||
|
+
|
||||||
|
+ rec.CodeIndex = CodeGeneration++; // under lock!
|
||||||
|
+
|
||||||
|
+ Dumpstream->write(reinterpret_cast<const char *>(&rec), sizeof(rec));
|
||||||
|
+ Dumpstream->write(Symbol->data(), Symbol->size() + 1);
|
||||||
|
+ Dumpstream->write(reinterpret_cast<const char *>(CodeAddr), CodeSize);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+void PerfJITEventListener::NotifyDebug(uint64_t CodeAddr,
|
||||||
|
+ DILineInfoTable Lines) {
|
||||||
|
+ assert(SuccessfullyInitialized);
|
||||||
|
+
|
||||||
|
+ // Didn't get useful debug info.
|
||||||
|
+ if (Lines.empty())
|
||||||
|
+ return;
|
||||||
|
+
|
||||||
|
+ LLVMPerfJitRecordDebugInfo rec;
|
||||||
|
+ rec.Prefix.Id = JIT_CODE_DEBUG_INFO;
|
||||||
|
+ rec.Prefix.TotalSize = sizeof(rec); // will be increased further
|
||||||
|
+ rec.Prefix.Timestamp = perf_get_timestamp();
|
||||||
|
+ rec.CodeAddr = CodeAddr;
|
||||||
|
+ rec.NrEntry = Lines.size();
|
||||||
|
+
|
||||||
|
+ // compute total size size of record (variable due to filenames)
|
||||||
|
+ DILineInfoTable::iterator Begin = Lines.begin();
|
||||||
|
+ DILineInfoTable::iterator End = Lines.end();
|
||||||
|
+ for (DILineInfoTable::iterator It = Begin; It != End; ++It) {
|
||||||
|
+ DILineInfo &line = It->second;
|
||||||
|
+ rec.Prefix.TotalSize += sizeof(LLVMPerfJitDebugEntry);
|
||||||
|
+ rec.Prefix.TotalSize += line.FileName.size() + 1;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ // The debug_entry describes the source line information. It is defined as
|
||||||
|
+ // follows in order:
|
||||||
|
+ // * uint64_t code_addr: address of function for which the debug information
|
||||||
|
+ // is generated
|
||||||
|
+ // * uint32_t line : source file line number (starting at 1)
|
||||||
|
+ // * uint32_t discrim : column discriminator, 0 is default
|
||||||
|
+ // * char name[n] : source file name in ASCII, including null termination
|
||||||
|
+
|
||||||
|
+ // avoid interspersing output
|
||||||
|
+ MutexGuard Guard(Mutex);
|
||||||
|
+
|
||||||
|
+ Dumpstream->write(reinterpret_cast<const char *>(&rec), sizeof(rec));
|
||||||
|
+
|
||||||
|
+ for (DILineInfoTable::iterator It = Begin; It != End; ++It) {
|
||||||
|
+ LLVMPerfJitDebugEntry LineInfo;
|
||||||
|
+ DILineInfo &Line = It->second;
|
||||||
|
+
|
||||||
|
+ LineInfo.Addr = It->first;
|
||||||
|
+ // The function re-created by perf is preceded by a elf
|
||||||
|
+ // header. Need to adjust for that, otherwise the results are
|
||||||
|
+ // wrong.
|
||||||
|
+ LineInfo.Addr += 0x40;
|
||||||
|
+ LineInfo.Lineno = Line.Line;
|
||||||
|
+ LineInfo.Discrim = Line.Discriminator;
|
||||||
|
+
|
||||||
|
+ Dumpstream->write(reinterpret_cast<const char *>(&LineInfo),
|
||||||
|
+ sizeof(LineInfo));
|
||||||
|
+ Dumpstream->write(Line.FileName.c_str(), Line.FileName.size() + 1);
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+// There should be only a single event listener per process, otherwise perf gets
|
||||||
|
+// confused.
|
||||||
|
+llvm::ManagedStatic<PerfJITEventListener> PerfListener;
|
||||||
|
+
|
||||||
|
+} // end anonymous namespace
|
||||||
|
+
|
||||||
|
+namespace llvm {
|
||||||
|
+JITEventListener *JITEventListener::createPerfJITEventListener() {
|
||||||
|
+ return &*PerfListener;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+} // namespace llvm
|
||||||
|
+
|
||||||
|
--
|
||||||
|
2.17.1
|
||||||
|
|
|
@ -0,0 +1,26 @@
|
||||||
|
Index: lib/Analysis/LoopInfo.cpp
|
||||||
|
===================================================================
|
||||||
|
--- a/lib/Analysis/LoopInfo.cpp
|
||||||
|
+++ b/lib/Analysis/LoopInfo.cpp
|
||||||
|
@@ -223,15 +223,14 @@
|
||||||
|
BasicBlock *H = getHeader();
|
||||||
|
for (BasicBlock *BB : this->blocks()) {
|
||||||
|
TerminatorInst *TI = BB->getTerminator();
|
||||||
|
- MDNode *MD = nullptr;
|
||||||
|
|
||||||
|
// Check if this terminator branches to the loop header.
|
||||||
|
- for (BasicBlock *Successor : TI->successors()) {
|
||||||
|
- if (Successor == H) {
|
||||||
|
- MD = TI->getMetadata(LLVMContext::MD_loop);
|
||||||
|
- break;
|
||||||
|
- }
|
||||||
|
- }
|
||||||
|
+ bool IsPredecessor = any_of(TI->successors(),
|
||||||
|
+ [=](BasicBlock *Successor) { return Successor == H; });
|
||||||
|
+ if (!IsPredecessor)
|
||||||
|
+ continue;
|
||||||
|
+
|
||||||
|
+ MDNode *MD = TI->getMetadata(LLVMContext::MD_loop);
|
||||||
|
if (!MD)
|
||||||
|
return nullptr;
|
||||||
|
|
|
@ -0,0 +1,187 @@
|
||||||
|
commit 98592fcc61307968f7df1362771534595a1e1c21
|
||||||
|
Author: Keno Fischer <keno@juliacomputing.com>
|
||||||
|
Date: Wed Jul 25 19:29:02 2018 -0400
|
||||||
|
|
||||||
|
[SCEV] Don't expand Wrap predicate using inttoptr in ni addrspaces
|
||||||
|
|
||||||
|
Summary:
|
||||||
|
In non-integral address spaces, we're not allowed to introduce inttoptr/ptrtoint
|
||||||
|
intrinsics. Instead, we need to expand any pointer arithmetic as geps on the
|
||||||
|
base pointer. Luckily this is a common task for SCEV, so all we have to do here
|
||||||
|
is hook up the corresponding helper function and add test case.
|
||||||
|
|
||||||
|
Fixes PR38290
|
||||||
|
|
||||||
|
Reviewers: reames, sanjoy
|
||||||
|
|
||||||
|
Subscribers: javed.absar, llvm-commits
|
||||||
|
|
||||||
|
Differential Revision: https://reviews.llvm.org/D49832
|
||||||
|
|
||||||
|
diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp
|
||||||
|
index 7f76f057216..f441a3647fb 100644
|
||||||
|
--- a/lib/Analysis/ScalarEvolutionExpander.cpp
|
||||||
|
+++ b/lib/Analysis/ScalarEvolutionExpander.cpp
|
||||||
|
@@ -2157,8 +2157,9 @@ Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR,
|
||||||
|
const SCEV *Step = AR->getStepRecurrence(SE);
|
||||||
|
const SCEV *Start = AR->getStart();
|
||||||
|
|
||||||
|
+ Type *ARTy = AR->getType();
|
||||||
|
unsigned SrcBits = SE.getTypeSizeInBits(ExitCount->getType());
|
||||||
|
- unsigned DstBits = SE.getTypeSizeInBits(AR->getType());
|
||||||
|
+ unsigned DstBits = SE.getTypeSizeInBits(ARTy);
|
||||||
|
|
||||||
|
// The expression {Start,+,Step} has nusw/nssw if
|
||||||
|
// Step < 0, Start - |Step| * Backedge <= Start
|
||||||
|
@@ -2170,11 +2171,12 @@ Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR,
|
||||||
|
Value *TripCountVal = expandCodeFor(ExitCount, CountTy, Loc);
|
||||||
|
|
||||||
|
IntegerType *Ty =
|
||||||
|
- IntegerType::get(Loc->getContext(), SE.getTypeSizeInBits(AR->getType()));
|
||||||
|
+ IntegerType::get(Loc->getContext(), SE.getTypeSizeInBits(ARTy));
|
||||||
|
+ Type *ARExpandTy = DL.isNonIntegralPointerType(ARTy) ? ARTy : Ty;
|
||||||
|
|
||||||
|
Value *StepValue = expandCodeFor(Step, Ty, Loc);
|
||||||
|
Value *NegStepValue = expandCodeFor(SE.getNegativeSCEV(Step), Ty, Loc);
|
||||||
|
- Value *StartValue = expandCodeFor(Start, Ty, Loc);
|
||||||
|
+ Value *StartValue = expandCodeFor(Start, ARExpandTy, Loc);
|
||||||
|
|
||||||
|
ConstantInt *Zero =
|
||||||
|
ConstantInt::get(Loc->getContext(), APInt::getNullValue(DstBits));
|
||||||
|
@@ -2197,8 +2199,21 @@ Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR,
|
||||||
|
// Compute:
|
||||||
|
// Start + |Step| * Backedge < Start
|
||||||
|
// Start - |Step| * Backedge > Start
|
||||||
|
- Value *Add = Builder.CreateAdd(StartValue, MulV);
|
||||||
|
- Value *Sub = Builder.CreateSub(StartValue, MulV);
|
||||||
|
+ Value *Add = nullptr, *Sub = nullptr;
|
||||||
|
+ if (ARExpandTy->isPointerTy()) {
|
||||||
|
+ PointerType *ARPtrTy = cast<PointerType>(ARExpandTy);
|
||||||
|
+ const SCEV *MulS = SE.getSCEV(MulV);
|
||||||
|
+ const SCEV *const StepArray[2] = {MulS, SE.getNegativeSCEV(MulS)};
|
||||||
|
+ Add = Builder.CreateBitCast(
|
||||||
|
+ expandAddToGEP(&StepArray[0], &StepArray[1], ARPtrTy, Ty, StartValue),
|
||||||
|
+ ARPtrTy);
|
||||||
|
+ Sub = Builder.CreateBitCast(
|
||||||
|
+ expandAddToGEP(&StepArray[1], &StepArray[2], ARPtrTy, Ty, StartValue),
|
||||||
|
+ ARPtrTy);
|
||||||
|
+ } else {
|
||||||
|
+ Add = Builder.CreateAdd(StartValue, MulV);
|
||||||
|
+ Sub = Builder.CreateSub(StartValue, MulV);
|
||||||
|
+ }
|
||||||
|
|
||||||
|
Value *EndCompareGT = Builder.CreateICmp(
|
||||||
|
Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT, Sub, StartValue);
|
||||||
|
diff --git a/test/Analysis/LoopAccessAnalysis/wrapping-pointer-ni.ll b/test/Analysis/LoopAccessAnalysis/wrapping-pointer-ni.ll
|
||||||
|
new file mode 100644
|
||||||
|
index 00000000000..ddcf5e1a195
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/test/Analysis/LoopAccessAnalysis/wrapping-pointer-ni.ll
|
||||||
|
@@ -0,0 +1,73 @@
|
||||||
|
+; RUN: opt -loop-versioning -S < %s | FileCheck %s -check-prefix=LV
|
||||||
|
+
|
||||||
|
+; NB: addrspaces 10-13 are non-integral
|
||||||
|
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:10:11:12:13"
|
||||||
|
+
|
||||||
|
+; This matches the test case from PR38290
|
||||||
|
+; Check that we expand the SCEV predicate check using GEP, rather
|
||||||
|
+; than ptrtoint.
|
||||||
|
+
|
||||||
|
+%jl_value_t = type opaque
|
||||||
|
+%jl_array_t = type { i8 addrspace(13)*, i64, i16, i16, i32 }
|
||||||
|
+
|
||||||
|
+declare i64 @julia_steprange_last_4949()
|
||||||
|
+
|
||||||
|
+define void @"japi1_align!_9477"(%jl_value_t addrspace(10)**) #0 {
|
||||||
|
+; LV-LAVEL: L26.lver.check
|
||||||
|
+; LV: [[OFMul:%[^ ]*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[Step:%[^ ]*]])
|
||||||
|
+; LV-NEXT: [[OFMulResult:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul]], 0
|
||||||
|
+; LV-NEXT: [[OFMulOverflow:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul]], 1
|
||||||
|
+; LV-NEXT: [[PosGEP:%[^ ]*]] = getelementptr i32, i32 addrspace(13)* [[Base:%[^ ]*]], i64 [[Step]]
|
||||||
|
+; LV-NEXT: [[NegGEP:%[^ ]*]] = getelementptr i32, i32 addrspace(13)* [[Base]], i64 [[NegStep:%[^ ]*]]
|
||||||
|
+; LV-NEXT: icmp ugt i32 addrspace(13)* [[NegGEP]], [[Base]]
|
||||||
|
+; LV-NEXT: icmp ult i32 addrspace(13)* [[PosGEP]], [[Base]]
|
||||||
|
+; LV-NOT: inttoptr
|
||||||
|
+; LV-NOT: ptrtoint
|
||||||
|
+top:
|
||||||
|
+ %1 = load %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)** %0, align 8, !nonnull !1, !dereferenceable !2, !align !3
|
||||||
|
+ %2 = load i32, i32* inttoptr (i64 12 to i32*), align 4, !tbaa !4
|
||||||
|
+ %3 = sub i32 0, %2
|
||||||
|
+ %4 = call i64 @julia_steprange_last_4949()
|
||||||
|
+ %5 = addrspacecast %jl_value_t addrspace(10)* %1 to %jl_value_t addrspace(11)*
|
||||||
|
+ %6 = bitcast %jl_value_t addrspace(11)* %5 to %jl_value_t addrspace(10)* addrspace(11)*
|
||||||
|
+ %7 = load %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)* addrspace(11)* %6, align 8, !tbaa !4, !nonnull !1, !dereferenceable !9, !align !2
|
||||||
|
+ %8 = addrspacecast %jl_value_t addrspace(10)* %7 to %jl_value_t addrspace(11)*
|
||||||
|
+ %9 = bitcast %jl_value_t addrspace(11)* %8 to i32 addrspace(13)* addrspace(11)*
|
||||||
|
+ %10 = load i32 addrspace(13)*, i32 addrspace(13)* addrspace(11)* %9, align 8, !tbaa !10, !nonnull !1
|
||||||
|
+ %11 = sext i32 %3 to i64
|
||||||
|
+ br label %L26
|
||||||
|
+
|
||||||
|
+L26: ; preds = %L26, %top
|
||||||
|
+ %value_phi3 = phi i64 [ 0, %top ], [ %12, %L26 ]
|
||||||
|
+ %12 = add i64 %value_phi3, -1
|
||||||
|
+ %13 = getelementptr inbounds i32, i32 addrspace(13)* %10, i64 %12
|
||||||
|
+ %14 = load i32, i32 addrspace(13)* %13, align 4, !tbaa !13
|
||||||
|
+ %15 = add i64 %12, %11
|
||||||
|
+ %16 = getelementptr inbounds i32, i32 addrspace(13)* %10, i64 %15
|
||||||
|
+ store i32 %14, i32 addrspace(13)* %16, align 4, !tbaa !13
|
||||||
|
+ %17 = icmp eq i64 %value_phi3, %4
|
||||||
|
+ br i1 %17, label %L45, label %L26
|
||||||
|
+
|
||||||
|
+L45: ; preds = %L26
|
||||||
|
+ ret void
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+attributes #0 = { "thunk" }
|
||||||
|
+
|
||||||
|
+!llvm.module.flags = !{!0}
|
||||||
|
+
|
||||||
|
+!0 = !{i32 1, !"Debug Info Version", i32 3}
|
||||||
|
+!1 = !{}
|
||||||
|
+!2 = !{i64 16}
|
||||||
|
+!3 = !{i64 8}
|
||||||
|
+!4 = !{!5, !5, i64 0}
|
||||||
|
+!5 = !{!"jtbaa_mutab", !6, i64 0}
|
||||||
|
+!6 = !{!"jtbaa_value", !7, i64 0}
|
||||||
|
+!7 = !{!"jtbaa_data", !8, i64 0}
|
||||||
|
+!8 = !{!"jtbaa"}
|
||||||
|
+!9 = !{i64 40}
|
||||||
|
+!10 = !{!11, !11, i64 0}
|
||||||
|
+!11 = !{!"jtbaa_arrayptr", !12, i64 0}
|
||||||
|
+!12 = !{!"jtbaa_array", !8, i64 0}
|
||||||
|
+!13 = !{!14, !14, i64 0}
|
||||||
|
+!14 = !{!"jtbaa_arraybuf", !7, i64 0}
|
||||||
|
diff --git a/test/Analysis/LoopAccessAnalysis/wrapping-pointer-versioning.ll b/test/Analysis/LoopAccessAnalysis/wrapping-pointer-versioning.ll
|
||||||
|
index a7e5bce7445..fa6fccecbf1 100644
|
||||||
|
--- a/test/Analysis/LoopAccessAnalysis/wrapping-pointer-versioning.ll
|
||||||
|
+++ b/test/Analysis/LoopAccessAnalysis/wrapping-pointer-versioning.ll
|
||||||
|
@@ -58,10 +58,10 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
|
||||||
|
; LV-NEXT: [[OFMul1:%[^ ]*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[BE]])
|
||||||
|
; LV-NEXT: [[OFMulResult1:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul1]], 0
|
||||||
|
; LV-NEXT: [[OFMulOverflow1:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul1]], 1
|
||||||
|
-; LV-NEXT: [[AddEnd1:%[^ ]*]] = add i64 %a2, [[OFMulResult1]]
|
||||||
|
-; LV-NEXT: [[SubEnd1:%[^ ]*]] = sub i64 %a2, [[OFMulResult1]]
|
||||||
|
-; LV-NEXT: [[CmpNeg1:%[^ ]*]] = icmp ugt i64 [[SubEnd1]], %a2
|
||||||
|
-; LV-NEXT: [[CmpPos1:%[^ ]*]] = icmp ult i64 [[AddEnd1]], %a2
|
||||||
|
+; LV-NEXT: [[AddEnd1:%[^ ]*]] = add i64 [[A0:%[^ ]*]], [[OFMulResult1]]
|
||||||
|
+; LV-NEXT: [[SubEnd1:%[^ ]*]] = sub i64 [[A0]], [[OFMulResult1]]
|
||||||
|
+; LV-NEXT: [[CmpNeg1:%[^ ]*]] = icmp ugt i64 [[SubEnd1]], [[A0]]
|
||||||
|
+; LV-NEXT: [[CmpPos1:%[^ ]*]] = icmp ult i64 [[AddEnd1]], [[A0]]
|
||||||
|
; LV-NEXT: [[Cmp:%[^ ]*]] = select i1 false, i1 [[CmpNeg1]], i1 [[CmpPos1]]
|
||||||
|
; LV-NEXT: [[PredCheck1:%[^ ]*]] = or i1 [[Cmp]], [[OFMulOverflow1]]
|
||||||
|
|
||||||
|
@@ -233,10 +233,10 @@ for.end: ; preds = %for.body
|
||||||
|
; LV: [[OFMul1:%[^ ]*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[BE:%[^ ]*]])
|
||||||
|
; LV-NEXT: [[OFMulResult1:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul1]], 0
|
||||||
|
; LV-NEXT: [[OFMulOverflow1:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul1]], 1
|
||||||
|
-; LV-NEXT: [[AddEnd1:%[^ ]*]] = add i64 %a2, [[OFMulResult1]]
|
||||||
|
-; LV-NEXT: [[SubEnd1:%[^ ]*]] = sub i64 %a2, [[OFMulResult1]]
|
||||||
|
-; LV-NEXT: [[CmpNeg1:%[^ ]*]] = icmp ugt i64 [[SubEnd1]], %a2
|
||||||
|
-; LV-NEXT: [[CmpPos1:%[^ ]*]] = icmp ult i64 [[AddEnd1]], %a2
|
||||||
|
+; LV-NEXT: [[AddEnd1:%[^ ]*]] = add i64 [[A0:%[^ ]*]], [[OFMulResult1]]
|
||||||
|
+; LV-NEXT: [[SubEnd1:%[^ ]*]] = sub i64 [[A0]], [[OFMulResult1]]
|
||||||
|
+; LV-NEXT: [[CmpNeg1:%[^ ]*]] = icmp ugt i64 [[SubEnd1]], [[A0]]
|
||||||
|
+; LV-NEXT: [[CmpPos1:%[^ ]*]] = icmp ult i64 [[AddEnd1]], [[A0]]
|
||||||
|
; LV-NEXT: [[Cmp:%[^ ]*]] = select i1 false, i1 [[CmpNeg1]], i1 [[CmpPos1]]
|
||||||
|
; LV-NEXT: [[PredCheck1:%[^ ]*]] = or i1 [[Cmp]], [[OFMulOverflow1]]
|
||||||
|
|
|
@ -0,0 +1,89 @@
|
||||||
|
commit 8eb2b102a203d83fb713f3bf79acf235dabdd8cd
|
||||||
|
Author: Keno Fischer <keno@juliacomputing.com>
|
||||||
|
Date: Mon Jul 30 16:59:08 2018 -0400
|
||||||
|
|
||||||
|
[VNCoercion] Disallow coercion between different ni addrspaces
|
||||||
|
|
||||||
|
Summary:
|
||||||
|
I'm not sure if it would be legal by the IR reference to introduce
|
||||||
|
an addrspacecast here, since the IR reference is a bit vague on
|
||||||
|
the exact semantics, but at least for our usage of it (and I
|
||||||
|
suspect for many other's usage) it is not. For us, addrspacecasts
|
||||||
|
between non-integral address spaces carry frontend information that the
|
||||||
|
optimizer cannot deduce afterwards in a generic way (though we
|
||||||
|
have frontend specific passes in our pipline that do propagate
|
||||||
|
these). In any case, I'm sure nobody is using it this way at
|
||||||
|
the moment, since it would have introduced inttoptrs, which
|
||||||
|
are definitely illegal.
|
||||||
|
|
||||||
|
Fixes PR38375
|
||||||
|
|
||||||
|
Reviewers: sanjoy, reames, dberlin
|
||||||
|
|
||||||
|
Subscribers: llvm-commits
|
||||||
|
|
||||||
|
Differential Revision: https://reviews.llvm.org/D50010
|
||||||
|
|
||||||
|
diff --git a/lib/Transforms/Utils/VNCoercion.cpp b/lib/Transforms/Utils/VNCoercion.cpp
|
||||||
|
index c3feea6a0a4..735d1e7b792 100644
|
||||||
|
--- a/lib/Transforms/Utils/VNCoercion.cpp
|
||||||
|
+++ b/lib/Transforms/Utils/VNCoercion.cpp
|
||||||
|
@@ -20,14 +20,21 @@ bool canCoerceMustAliasedValueToLoad(Value *StoredVal, Type *LoadTy,
|
||||||
|
StoredVal->getType()->isStructTy() || StoredVal->getType()->isArrayTy())
|
||||||
|
return false;
|
||||||
|
|
||||||
|
+ Type *StoredValTy = StoredVal->getType();
|
||||||
|
+
|
||||||
|
// The store has to be at least as big as the load.
|
||||||
|
if (DL.getTypeSizeInBits(StoredVal->getType()) < DL.getTypeSizeInBits(LoadTy))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
- // Don't coerce non-integral pointers to integers or vice versa.
|
||||||
|
- if (DL.isNonIntegralPointerType(StoredVal->getType()) !=
|
||||||
|
- DL.isNonIntegralPointerType(LoadTy))
|
||||||
|
+ bool StoredNI = DL.isNonIntegralPointerType(StoredValTy);
|
||||||
|
+ bool LoadNI = DL.isNonIntegralPointerType(LoadTy);
|
||||||
|
+ if (StoredNI != LoadNI) {
|
||||||
|
return false;
|
||||||
|
+ } else if (StoredNI && LoadNI &&
|
||||||
|
+ cast<PointerType>(StoredValTy)->getAddressSpace() !=
|
||||||
|
+ cast<PointerType>(LoadTy)->getAddressSpace()) {
|
||||||
|
+ return false;
|
||||||
|
+ }
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
diff --git a/test/Transforms/GVN/non-integral-pointers.ll b/test/Transforms/GVN/non-integral-pointers.ll
|
||||||
|
index 9ae4132231d..5217fc1a06a 100644
|
||||||
|
--- a/test/Transforms/GVN/non-integral-pointers.ll
|
||||||
|
+++ b/test/Transforms/GVN/non-integral-pointers.ll
|
||||||
|
@@ -1,6 +1,6 @@
|
||||||
|
; RUN: opt -gvn -S < %s | FileCheck %s
|
||||||
|
|
||||||
|
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:4"
|
||||||
|
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:4:5"
|
||||||
|
target triple = "x86_64-unknown-linux-gnu"
|
||||||
|
|
||||||
|
define void @f0(i1 %alwaysFalse, i64 %val, i64* %loc) {
|
||||||
|
@@ -37,3 +37,21 @@ define i64 @f1(i1 %alwaysFalse, i8 addrspace(4)* %val, i8 addrspace(4)** %loc) {
|
||||||
|
alwaysTaken:
|
||||||
|
ret i64 42
|
||||||
|
}
|
||||||
|
+
|
||||||
|
+ define i8 addrspace(5)* @multini(i1 %alwaysFalse, i8 addrspace(4)* %val, i8 addrspace(4)** %loc) {
|
||||||
|
+ ; CHECK-LABEL: @multini(
|
||||||
|
+ ; CHECK-NOT: inttoptr
|
||||||
|
+ ; CHECK-NOT: ptrtoint
|
||||||
|
+ ; CHECK-NOT: addrspacecast
|
||||||
|
+ entry:
|
||||||
|
+ store i8 addrspace(4)* %val, i8 addrspace(4)** %loc
|
||||||
|
+ br i1 %alwaysFalse, label %neverTaken, label %alwaysTaken
|
||||||
|
+
|
||||||
|
+ neverTaken:
|
||||||
|
+ %loc.bc = bitcast i8 addrspace(4)** %loc to i8 addrspace(5)**
|
||||||
|
+ %differentas = load i8 addrspace(5)*, i8 addrspace(5)** %loc.bc
|
||||||
|
+ ret i8 addrspace(5)* %differentas
|
||||||
|
+
|
||||||
|
+ alwaysTaken:
|
||||||
|
+ ret i8 addrspace(5)* null
|
||||||
|
+ }
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,48 @@
|
||||||
|
commit 4840cf7299bb312125d41fc84733c15c2370f18e
|
||||||
|
Author: DokFaust <rodia@autistici.org>
|
||||||
|
Date: Fri Jun 8 19:23:01 2018 +0200
|
||||||
|
|
||||||
|
Add debug line-level code information to OProfile module
|
||||||
|
|
||||||
|
diff --git a/lib/ExecutionEngine/OProfileJIT/LLVMBuild.txt b/lib/ExecutionEngine/OProfileJIT/LLVMBuild.txt
|
||||||
|
index 7d5550046a5..ea100286318 100644
|
||||||
|
--- a/lib/ExecutionEngine/OProfileJIT/LLVMBuild.txt
|
||||||
|
+++ b/lib/ExecutionEngine/OProfileJIT/LLVMBuild.txt
|
||||||
|
@@ -24 +24 @@ parent = ExecutionEngine
|
||||||
|
-required_libraries = Support Object ExecutionEngine
|
||||||
|
+required_libraries = DebugInfoDWARF Support Object ExecutionEngine
|
||||||
|
diff --git a/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp b/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp
|
||||||
|
index 3581d645839..045ecb82853 100644
|
||||||
|
--- a/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp
|
||||||
|
+++ b/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp
|
||||||
|
@@ -26,0 +27,2 @@
|
||||||
|
+#include "llvm/DebugInfo/DIContext.h"
|
||||||
|
+#include "llvm/DebugInfo/DWARF/DWARFContext.h"
|
||||||
|
@@ -86,0 +89,2 @@ void OProfileJITEventListener::NotifyObjectEmitted(
|
||||||
|
+ std::unique_ptr<DIContext> Context = DWARFContext::create(DebugObj);
|
||||||
|
+ std::string SourceFileName;
|
||||||
|
@@ -111 +115,23 @@ void OProfileJITEventListener::NotifyObjectEmitted(
|
||||||
|
- // TODO: support line number info (similar to IntelJITEventListener.cpp)
|
||||||
|
+ DILineInfoTable Lines = Context->getLineInfoForAddressRange(Addr, Size);
|
||||||
|
+ DILineInfoTable::iterator Begin = Lines.begin();
|
||||||
|
+ DILineInfoTable::iterator End = Lines.end();
|
||||||
|
+ size_t i = 0;
|
||||||
|
+
|
||||||
|
+ size_t num_entries = std::distance(Begin, End);
|
||||||
|
+ static struct debug_line_info* debug_line;
|
||||||
|
+ debug_line = (struct debug_line_info * )calloc(num_entries, sizeof(struct debug_line_info));
|
||||||
|
+
|
||||||
|
+ for(DILineInfoTable::iterator It=Begin; It != End; ++It){
|
||||||
|
+ i = std::distance(Begin,It);
|
||||||
|
+ debug_line[i].vma = (unsigned long) It->first;
|
||||||
|
+ debug_line[i].lineno = It->second.Line;
|
||||||
|
+ SourceFileName = Lines.front().second.FileName;
|
||||||
|
+ debug_line[i].filename = const_cast<char *>(SourceFileName.c_str());
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ if(Wrapper->op_write_debug_line_info((void*) Addr, num_entries, debug_line) == -1) {
|
||||||
|
+ DEBUG(dbgs() << "Failed to tell OProfiler about debug object at ["
|
||||||
|
+ << (void*) Addr << "-" << ((char *) Addr + Size)
|
||||||
|
+ << "]\n");
|
||||||
|
+ continue;
|
||||||
|
+ }
|
|
@ -0,0 +1,29 @@
|
||||||
|
From 15899eaab58e96bb7bbe7a14099674e255656a50 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Valentin Churavy <v.churavy@gmail.com>
|
||||||
|
Date: Fri, 23 Feb 2018 14:41:20 -0500
|
||||||
|
Subject: [PATCH] Make AddrSpaceCast noops on PPC
|
||||||
|
|
||||||
|
PPC as AArch64 doesn't have address-spaces so we can drop them in the backend
|
||||||
|
---
|
||||||
|
lib/Target/PowerPC/PPCISelLowering.h | 5 +++++
|
||||||
|
1 file changed, 5 insertions(+)
|
||||||
|
|
||||||
|
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
|
||||||
|
index e60504507d3..c9b89773968 100644
|
||||||
|
--- a/lib/Target/PowerPC/PPCISelLowering.h
|
||||||
|
+++ b/lib/Target/PowerPC/PPCISelLowering.h
|
||||||
|
@@ -761,6 +761,11 @@ namespace llvm {
|
||||||
|
ReuseLoadInfo() : IsInvariant(false), Alignment(0), Ranges(nullptr) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
+ bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override {
|
||||||
|
+ // Addrspacecasts are always noops.
|
||||||
|
+ return true;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
bool canReuseLoadAddress(SDValue Op, EVT MemVT, ReuseLoadInfo &RLI,
|
||||||
|
SelectionDAG &DAG,
|
||||||
|
ISD::LoadExtType ET = ISD::NON_EXTLOAD) const;
|
||||||
|
--
|
||||||
|
2.16.2
|
||||||
|
|
|
@ -0,0 +1,45 @@
|
||||||
|
commit ab60b05a472e8651cbe53c19513b7e62b9ff32df
|
||||||
|
Author: Mikael Holmen <mikael.holmen@ericsson.com>
|
||||||
|
Date: Thu Feb 1 06:38:34 2018 +0000
|
||||||
|
|
||||||
|
[LSR] Don't force bases of foldable formulae to the final type.
|
||||||
|
|
||||||
|
Summary:
|
||||||
|
Before emitting code for scaled registers, we prevent
|
||||||
|
SCEVExpander from hoisting any scaled addressing mode
|
||||||
|
by emitting all the bases first. However, these bases
|
||||||
|
are being forced to the final type, resulting in some
|
||||||
|
odd code.
|
||||||
|
|
||||||
|
For example, if the type of the base is an integer and
|
||||||
|
the final type is a pointer, we will emit an inttoptr
|
||||||
|
for the base, a ptrtoint for the scale, and then a
|
||||||
|
'reverse' GEP where the GEP pointer is actually the base
|
||||||
|
integer and the index is the pointer. It's more intuitive
|
||||||
|
to use the pointer as a pointer and the integer as index.
|
||||||
|
|
||||||
|
Patch by: Bevin Hansson
|
||||||
|
|
||||||
|
Reviewers: atrick, qcolombet, sanjoy
|
||||||
|
|
||||||
|
Reviewed By: qcolombet
|
||||||
|
|
||||||
|
Subscribers: llvm-commits
|
||||||
|
|
||||||
|
Differential Revision: https://reviews.llvm.org/D42103
|
||||||
|
|
||||||
|
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@323946 91177308-0d34-0410-b5e6-96231b3b80d8
|
||||||
|
|
||||||
|
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
|
||||||
|
index 332c074a1df..4b8e2286ed9 100644
|
||||||
|
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
|
||||||
|
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
|
||||||
|
@@ -4993,7 +4993,7 @@ Value *LSRInstance::Expand(const LSRUse &LU, const LSRFixup &LF,
|
||||||
|
// Unless the addressing mode will not be folded.
|
||||||
|
if (!Ops.empty() && LU.Kind == LSRUse::Address &&
|
||||||
|
isAMCompletelyFolded(TTI, LU, F)) {
|
||||||
|
- Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty);
|
||||||
|
+ Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), nullptr);
|
||||||
|
Ops.clear();
|
||||||
|
Ops.push_back(SE.getUnknown(FullV));
|
||||||
|
}
|
|
@ -0,0 +1,301 @@
|
||||||
|
commit b398d8e1fa5a5a914957fa22d0a64db97f6c265e
|
||||||
|
Author: Craig Topper <craig.topper@intel.com>
|
||||||
|
Date: Thu Mar 8 00:21:17 2018 +0000
|
||||||
|
|
||||||
|
[X86] Fix some isel patterns that used aligned vector load instructions with unaligned predicates.
|
||||||
|
|
||||||
|
These patterns weren't checking the alignment of the load, but were using the aligned instructions. This will cause a GP fault if the data isn't aligned.
|
||||||
|
|
||||||
|
I believe these were introduced in r312450.
|
||||||
|
|
||||||
|
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@326967 91177308-0d34-0410-b5e6-96231b3b80d8
|
||||||
|
|
||||||
|
diff --git a/lib/Target/X86/X86InstrVecCompiler.td b/lib/Target/X86/X86InstrVecCompiler.td
|
||||||
|
index db3dfe56531..50c7763a2c3 100644
|
||||||
|
--- a/lib/Target/X86/X86InstrVecCompiler.td
|
||||||
|
+++ b/lib/Target/X86/X86InstrVecCompiler.td
|
||||||
|
@@ -261,10 +261,10 @@ let Predicates = [HasVLX] in {
|
||||||
|
// will zero the upper bits.
|
||||||
|
// TODO: Is there a safe way to detect whether the producing instruction
|
||||||
|
// already zeroed the upper bits?
|
||||||
|
-multiclass subvector_zero_lowering<string MoveStr, RegisterClass RC,
|
||||||
|
- ValueType DstTy, ValueType SrcTy,
|
||||||
|
- ValueType ZeroTy, PatFrag memop,
|
||||||
|
- SubRegIndex SubIdx> {
|
||||||
|
+multiclass subvector_zero_lowering<string MoveStr, string LoadStr,
|
||||||
|
+ RegisterClass RC, ValueType DstTy,
|
||||||
|
+ ValueType SrcTy, ValueType ZeroTy,
|
||||||
|
+ PatFrag memop, SubRegIndex SubIdx> {
|
||||||
|
def : Pat<(DstTy (insert_subvector (bitconvert (ZeroTy immAllZerosV)),
|
||||||
|
(SrcTy RC:$src), (iPTR 0))),
|
||||||
|
(SUBREG_TO_REG (i64 0),
|
||||||
|
@@ -274,91 +274,91 @@ multiclass subvector_zero_lowering<string MoveStr, RegisterClass RC,
|
||||||
|
(SrcTy (bitconvert (memop addr:$src))),
|
||||||
|
(iPTR 0))),
|
||||||
|
(SUBREG_TO_REG (i64 0),
|
||||||
|
- (!cast<Instruction>("VMOV"#MoveStr#"rm") addr:$src), SubIdx)>;
|
||||||
|
+ (!cast<Instruction>("VMOV"#LoadStr#"rm") addr:$src), SubIdx)>;
|
||||||
|
}
|
||||||
|
|
||||||
|
let Predicates = [HasAVX, NoVLX] in {
|
||||||
|
- defm : subvector_zero_lowering<"APD", VR128, v4f64, v2f64, v8i32, loadv2f64,
|
||||||
|
- sub_xmm>;
|
||||||
|
- defm : subvector_zero_lowering<"APS", VR128, v8f32, v4f32, v8i32, loadv4f32,
|
||||||
|
- sub_xmm>;
|
||||||
|
- defm : subvector_zero_lowering<"DQA", VR128, v4i64, v2i64, v8i32, loadv2i64,
|
||||||
|
- sub_xmm>;
|
||||||
|
- defm : subvector_zero_lowering<"DQA", VR128, v8i32, v4i32, v8i32, loadv2i64,
|
||||||
|
- sub_xmm>;
|
||||||
|
- defm : subvector_zero_lowering<"DQA", VR128, v16i16, v8i16, v8i32, loadv2i64,
|
||||||
|
- sub_xmm>;
|
||||||
|
- defm : subvector_zero_lowering<"DQA", VR128, v32i8, v16i8, v8i32, loadv2i64,
|
||||||
|
- sub_xmm>;
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
-let Predicates = [HasVLX] in {
|
||||||
|
- defm : subvector_zero_lowering<"APDZ128", VR128X, v4f64, v2f64, v8i32,
|
||||||
|
+ defm : subvector_zero_lowering<"APD", "UPD", VR128, v4f64, v2f64, v8i32,
|
||||||
|
loadv2f64, sub_xmm>;
|
||||||
|
- defm : subvector_zero_lowering<"APSZ128", VR128X, v8f32, v4f32, v8i32,
|
||||||
|
+ defm : subvector_zero_lowering<"APS", "UPS", VR128, v8f32, v4f32, v8i32,
|
||||||
|
loadv4f32, sub_xmm>;
|
||||||
|
- defm : subvector_zero_lowering<"DQA64Z128", VR128X, v4i64, v2i64, v8i32,
|
||||||
|
+ defm : subvector_zero_lowering<"DQA", "DQU", VR128, v4i64, v2i64, v8i32,
|
||||||
|
loadv2i64, sub_xmm>;
|
||||||
|
- defm : subvector_zero_lowering<"DQA64Z128", VR128X, v8i32, v4i32, v8i32,
|
||||||
|
+ defm : subvector_zero_lowering<"DQA", "DQU", VR128, v8i32, v4i32, v8i32,
|
||||||
|
loadv2i64, sub_xmm>;
|
||||||
|
- defm : subvector_zero_lowering<"DQA64Z128", VR128X, v16i16, v8i16, v8i32,
|
||||||
|
+ defm : subvector_zero_lowering<"DQA", "DQU", VR128, v16i16, v8i16, v8i32,
|
||||||
|
loadv2i64, sub_xmm>;
|
||||||
|
- defm : subvector_zero_lowering<"DQA64Z128", VR128X, v32i8, v16i8, v8i32,
|
||||||
|
- loadv2i64, sub_xmm>;
|
||||||
|
-
|
||||||
|
- defm : subvector_zero_lowering<"APDZ128", VR128X, v8f64, v2f64, v16i32,
|
||||||
|
- loadv2f64, sub_xmm>;
|
||||||
|
- defm : subvector_zero_lowering<"APSZ128", VR128X, v16f32, v4f32, v16i32,
|
||||||
|
- loadv4f32, sub_xmm>;
|
||||||
|
- defm : subvector_zero_lowering<"DQA64Z128", VR128X, v8i64, v2i64, v16i32,
|
||||||
|
- loadv2i64, sub_xmm>;
|
||||||
|
- defm : subvector_zero_lowering<"DQA64Z128", VR128X, v16i32, v4i32, v16i32,
|
||||||
|
- loadv2i64, sub_xmm>;
|
||||||
|
- defm : subvector_zero_lowering<"DQA64Z128", VR128X, v32i16, v8i16, v16i32,
|
||||||
|
- loadv2i64, sub_xmm>;
|
||||||
|
- defm : subvector_zero_lowering<"DQA64Z128", VR128X, v64i8, v16i8, v16i32,
|
||||||
|
+ defm : subvector_zero_lowering<"DQA", "DQU", VR128, v32i8, v16i8, v8i32,
|
||||||
|
loadv2i64, sub_xmm>;
|
||||||
|
+}
|
||||||
|
|
||||||
|
- defm : subvector_zero_lowering<"APDZ256", VR256X, v8f64, v4f64, v16i32,
|
||||||
|
- loadv4f64, sub_ymm>;
|
||||||
|
- defm : subvector_zero_lowering<"APSZ256", VR256X, v16f32, v8f32, v16i32,
|
||||||
|
- loadv8f32, sub_ymm>;
|
||||||
|
- defm : subvector_zero_lowering<"DQA64Z256", VR256X, v8i64, v4i64, v16i32,
|
||||||
|
- loadv4i64, sub_ymm>;
|
||||||
|
- defm : subvector_zero_lowering<"DQA64Z256", VR256X, v16i32, v8i32, v16i32,
|
||||||
|
- loadv4i64, sub_ymm>;
|
||||||
|
- defm : subvector_zero_lowering<"DQA64Z256", VR256X, v32i16, v16i16, v16i32,
|
||||||
|
- loadv4i64, sub_ymm>;
|
||||||
|
- defm : subvector_zero_lowering<"DQA64Z256", VR256X, v64i8, v32i8, v16i32,
|
||||||
|
- loadv4i64, sub_ymm>;
|
||||||
|
+let Predicates = [HasVLX] in {
|
||||||
|
+ defm : subvector_zero_lowering<"APDZ128", "UPDZ128", VR128X, v4f64,
|
||||||
|
+ v2f64, v8i32, loadv2f64, sub_xmm>;
|
||||||
|
+ defm : subvector_zero_lowering<"APSZ128", "UPSZ128", VR128X, v8f32,
|
||||||
|
+ v4f32, v8i32, loadv4f32, sub_xmm>;
|
||||||
|
+ defm : subvector_zero_lowering<"DQA64Z128", "DQU64Z128", VR128X, v4i64,
|
||||||
|
+ v2i64, v8i32, loadv2i64, sub_xmm>;
|
||||||
|
+ defm : subvector_zero_lowering<"DQA64Z128", "DQU64Z128", VR128X, v8i32,
|
||||||
|
+ v4i32, v8i32, loadv2i64, sub_xmm>;
|
||||||
|
+ defm : subvector_zero_lowering<"DQA64Z128", "DQU64Z128", VR128X, v16i16,
|
||||||
|
+ v8i16, v8i32, loadv2i64, sub_xmm>;
|
||||||
|
+ defm : subvector_zero_lowering<"DQA64Z128", "DQU64Z128", VR128X, v32i8,
|
||||||
|
+ v16i8, v8i32, loadv2i64, sub_xmm>;
|
||||||
|
+
|
||||||
|
+ defm : subvector_zero_lowering<"APDZ128", "UPDZ128", VR128X, v8f64,
|
||||||
|
+ v2f64, v16i32, loadv2f64, sub_xmm>;
|
||||||
|
+ defm : subvector_zero_lowering<"APSZ128", "UPSZ128", VR128X, v16f32,
|
||||||
|
+ v4f32, v16i32, loadv4f32, sub_xmm>;
|
||||||
|
+ defm : subvector_zero_lowering<"DQA64Z128", "DQU64Z128", VR128X, v8i64,
|
||||||
|
+ v2i64, v16i32, loadv2i64, sub_xmm>;
|
||||||
|
+ defm : subvector_zero_lowering<"DQA64Z128", "DQU64Z128", VR128X, v16i32,
|
||||||
|
+ v4i32, v16i32, loadv2i64, sub_xmm>;
|
||||||
|
+ defm : subvector_zero_lowering<"DQA64Z128", "DQU64Z128", VR128X, v32i16,
|
||||||
|
+ v8i16, v16i32, loadv2i64, sub_xmm>;
|
||||||
|
+ defm : subvector_zero_lowering<"DQA64Z128", "DQU64Z128", VR128X, v64i8,
|
||||||
|
+ v16i8, v16i32, loadv2i64, sub_xmm>;
|
||||||
|
+
|
||||||
|
+ defm : subvector_zero_lowering<"APDZ256", "UPDZ256", VR256X, v8f64,
|
||||||
|
+ v4f64, v16i32, loadv4f64, sub_ymm>;
|
||||||
|
+ defm : subvector_zero_lowering<"APSZ256", "UPDZ256", VR256X, v16f32,
|
||||||
|
+ v8f32, v16i32, loadv8f32, sub_ymm>;
|
||||||
|
+ defm : subvector_zero_lowering<"DQA64Z256", "DQU64Z256", VR256X, v8i64,
|
||||||
|
+ v4i64, v16i32, loadv4i64, sub_ymm>;
|
||||||
|
+ defm : subvector_zero_lowering<"DQA64Z256", "DQU64Z256", VR256X, v16i32,
|
||||||
|
+ v8i32, v16i32, loadv4i64, sub_ymm>;
|
||||||
|
+ defm : subvector_zero_lowering<"DQA64Z256", "DQU64Z256", VR256X, v32i16,
|
||||||
|
+ v16i16, v16i32, loadv4i64, sub_ymm>;
|
||||||
|
+ defm : subvector_zero_lowering<"DQA64Z256", "DQU64Z256", VR256X, v64i8,
|
||||||
|
+ v32i8, v16i32, loadv4i64, sub_ymm>;
|
||||||
|
}
|
||||||
|
|
||||||
|
let Predicates = [HasAVX512, NoVLX] in {
|
||||||
|
- defm : subvector_zero_lowering<"APD", VR128, v8f64, v2f64, v16i32, loadv2f64,
|
||||||
|
- sub_xmm>;
|
||||||
|
- defm : subvector_zero_lowering<"APS", VR128, v16f32, v4f32, v16i32, loadv4f32,
|
||||||
|
- sub_xmm>;
|
||||||
|
- defm : subvector_zero_lowering<"DQA", VR128, v8i64, v2i64, v16i32, loadv2i64,
|
||||||
|
- sub_xmm>;
|
||||||
|
- defm : subvector_zero_lowering<"DQA", VR128, v16i32, v4i32, v16i32, loadv2i64,
|
||||||
|
- sub_xmm>;
|
||||||
|
- defm : subvector_zero_lowering<"DQA", VR128, v32i16, v8i16, v16i32, loadv2i64,
|
||||||
|
- sub_xmm>;
|
||||||
|
- defm : subvector_zero_lowering<"DQA", VR128, v64i8, v16i8, v16i32, loadv2i64,
|
||||||
|
- sub_xmm>;
|
||||||
|
-
|
||||||
|
- defm : subvector_zero_lowering<"APDY", VR256, v8f64, v4f64, v16i32,
|
||||||
|
- loadv4f64, sub_ymm>;
|
||||||
|
- defm : subvector_zero_lowering<"APSY", VR256, v16f32, v8f32, v16i32,
|
||||||
|
- loadv8f32, sub_ymm>;
|
||||||
|
- defm : subvector_zero_lowering<"DQAY", VR256, v8i64, v4i64, v16i32,
|
||||||
|
- loadv4i64, sub_ymm>;
|
||||||
|
- defm : subvector_zero_lowering<"DQAY", VR256, v16i32, v8i32, v16i32,
|
||||||
|
- loadv4i64, sub_ymm>;
|
||||||
|
- defm : subvector_zero_lowering<"DQAY", VR256, v32i16, v16i16, v16i32,
|
||||||
|
- loadv4i64, sub_ymm>;
|
||||||
|
- defm : subvector_zero_lowering<"DQAY", VR256, v64i8, v32i8, v16i32,
|
||||||
|
- loadv4i64, sub_ymm>;
|
||||||
|
+ defm : subvector_zero_lowering<"APD", "UPD", VR128, v8f64, v2f64,
|
||||||
|
+ v16i32,loadv2f64, sub_xmm>;
|
||||||
|
+ defm : subvector_zero_lowering<"APS", "UPS", VR128, v16f32, v4f32,
|
||||||
|
+ v16i32, loadv4f32, sub_xmm>;
|
||||||
|
+ defm : subvector_zero_lowering<"DQA", "DQU", VR128, v8i64, v2i64,
|
||||||
|
+ v16i32, loadv2i64, sub_xmm>;
|
||||||
|
+ defm : subvector_zero_lowering<"DQA", "DQU", VR128, v16i32, v4i32,
|
||||||
|
+ v16i32, loadv2i64, sub_xmm>;
|
||||||
|
+ defm : subvector_zero_lowering<"DQA", "DQU", VR128, v32i16, v8i16,
|
||||||
|
+ v16i32, loadv2i64, sub_xmm>;
|
||||||
|
+ defm : subvector_zero_lowering<"DQA", "DQU", VR128, v64i8, v16i8,
|
||||||
|
+ v16i32, loadv2i64, sub_xmm>;
|
||||||
|
+
|
||||||
|
+ defm : subvector_zero_lowering<"APDY", "UPDY", VR256, v8f64, v4f64,
|
||||||
|
+ v16i32, loadv4f64, sub_ymm>;
|
||||||
|
+ defm : subvector_zero_lowering<"APSY", "UPSY", VR256, v16f32, v8f32,
|
||||||
|
+ v16i32, loadv8f32, sub_ymm>;
|
||||||
|
+ defm : subvector_zero_lowering<"DQAY", "DQUY", VR256, v8i64, v4i64,
|
||||||
|
+ v16i32, loadv4i64, sub_ymm>;
|
||||||
|
+ defm : subvector_zero_lowering<"DQAY", "DQUY", VR256, v16i32, v8i32,
|
||||||
|
+ v16i32, loadv4i64, sub_ymm>;
|
||||||
|
+ defm : subvector_zero_lowering<"DQAY", "DQUY", VR256, v32i16, v16i16,
|
||||||
|
+ v16i32, loadv4i64, sub_ymm>;
|
||||||
|
+ defm : subvector_zero_lowering<"DQAY", "DQUY", VR256, v64i8, v32i8,
|
||||||
|
+ v16i32, loadv4i64, sub_ymm>;
|
||||||
|
}
|
||||||
|
|
||||||
|
// List of opcodes that guaranteed to zero the upper elements of vector regs.
|
||||||
|
diff --git a/test/CodeGen/X86/merge-consecutive-loads-256.ll b/test/CodeGen/X86/merge-consecutive-loads-256.ll
|
||||||
|
index 6ecd8116443..0f2cf594b1c 100644
|
||||||
|
--- a/test/CodeGen/X86/merge-consecutive-loads-256.ll
|
||||||
|
+++ b/test/CodeGen/X86/merge-consecutive-loads-256.ll
|
||||||
|
@@ -28,13 +28,13 @@ define <4 x double> @merge_4f64_2f64_23(<2 x double>* %ptr) nounwind uwtable noi
|
||||||
|
define <4 x double> @merge_4f64_2f64_2z(<2 x double>* %ptr) nounwind uwtable noinline ssp {
|
||||||
|
; AVX-LABEL: merge_4f64_2f64_2z:
|
||||||
|
; AVX: # %bb.0:
|
||||||
|
-; AVX-NEXT: vmovaps 32(%rdi), %xmm0
|
||||||
|
+; AVX-NEXT: vmovups 32(%rdi), %xmm0
|
||||||
|
; AVX-NEXT: retq
|
||||||
|
;
|
||||||
|
; X32-AVX-LABEL: merge_4f64_2f64_2z:
|
||||||
|
; X32-AVX: # %bb.0:
|
||||||
|
; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||||
|
-; X32-AVX-NEXT: vmovaps 32(%eax), %xmm0
|
||||||
|
+; X32-AVX-NEXT: vmovups 32(%eax), %xmm0
|
||||||
|
; X32-AVX-NEXT: retl
|
||||||
|
%ptr0 = getelementptr inbounds <2 x double>, <2 x double>* %ptr, i64 2
|
||||||
|
%val0 = load <2 x double>, <2 x double>* %ptr0
|
||||||
|
@@ -109,13 +109,13 @@ define <4 x double> @merge_4f64_f64_34uu(double* %ptr) nounwind uwtable noinline
|
||||||
|
define <4 x double> @merge_4f64_f64_45zz(double* %ptr) nounwind uwtable noinline ssp {
|
||||||
|
; AVX-LABEL: merge_4f64_f64_45zz:
|
||||||
|
; AVX: # %bb.0:
|
||||||
|
-; AVX-NEXT: vmovaps 32(%rdi), %xmm0
|
||||||
|
+; AVX-NEXT: vmovups 32(%rdi), %xmm0
|
||||||
|
; AVX-NEXT: retq
|
||||||
|
;
|
||||||
|
; X32-AVX-LABEL: merge_4f64_f64_45zz:
|
||||||
|
; X32-AVX: # %bb.0:
|
||||||
|
; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||||
|
-; X32-AVX-NEXT: vmovaps 32(%eax), %xmm0
|
||||||
|
+; X32-AVX-NEXT: vmovups 32(%eax), %xmm0
|
||||||
|
; X32-AVX-NEXT: retl
|
||||||
|
%ptr0 = getelementptr inbounds double, double* %ptr, i64 4
|
||||||
|
%ptr1 = getelementptr inbounds double, double* %ptr, i64 5
|
||||||
|
@@ -155,13 +155,13 @@ define <4 x double> @merge_4f64_f64_34z6(double* %ptr) nounwind uwtable noinline
|
||||||
|
define <4 x i64> @merge_4i64_2i64_3z(<2 x i64>* %ptr) nounwind uwtable noinline ssp {
|
||||||
|
; AVX-LABEL: merge_4i64_2i64_3z:
|
||||||
|
; AVX: # %bb.0:
|
||||||
|
-; AVX-NEXT: vmovaps 48(%rdi), %xmm0
|
||||||
|
+; AVX-NEXT: vmovups 48(%rdi), %xmm0
|
||||||
|
; AVX-NEXT: retq
|
||||||
|
;
|
||||||
|
; X32-AVX-LABEL: merge_4i64_2i64_3z:
|
||||||
|
; X32-AVX: # %bb.0:
|
||||||
|
; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||||
|
-; X32-AVX-NEXT: vmovaps 48(%eax), %xmm0
|
||||||
|
+; X32-AVX-NEXT: vmovups 48(%eax), %xmm0
|
||||||
|
; X32-AVX-NEXT: retl
|
||||||
|
%ptr0 = getelementptr inbounds <2 x i64>, <2 x i64>* %ptr, i64 3
|
||||||
|
%val0 = load <2 x i64>, <2 x i64>* %ptr0
|
||||||
|
@@ -217,13 +217,13 @@ define <4 x i64> @merge_4i64_i64_1zzu(i64* %ptr) nounwind uwtable noinline ssp {
|
||||||
|
define <4 x i64> @merge_4i64_i64_23zz(i64* %ptr) nounwind uwtable noinline ssp {
|
||||||
|
; AVX-LABEL: merge_4i64_i64_23zz:
|
||||||
|
; AVX: # %bb.0:
|
||||||
|
-; AVX-NEXT: vmovaps 16(%rdi), %xmm0
|
||||||
|
+; AVX-NEXT: vmovups 16(%rdi), %xmm0
|
||||||
|
; AVX-NEXT: retq
|
||||||
|
;
|
||||||
|
; X32-AVX-LABEL: merge_4i64_i64_23zz:
|
||||||
|
; X32-AVX: # %bb.0:
|
||||||
|
; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||||
|
-; X32-AVX-NEXT: vmovaps 16(%eax), %xmm0
|
||||||
|
+; X32-AVX-NEXT: vmovups 16(%eax), %xmm0
|
||||||
|
; X32-AVX-NEXT: retl
|
||||||
|
%ptr0 = getelementptr inbounds i64, i64* %ptr, i64 2
|
||||||
|
%ptr1 = getelementptr inbounds i64, i64* %ptr, i64 3
|
||||||
|
diff --git a/test/CodeGen/X86/merge-consecutive-loads-512.ll b/test/CodeGen/X86/merge-consecutive-loads-512.ll
|
||||||
|
index 62102eb382c..3c6eaf65292 100644
|
||||||
|
--- a/test/CodeGen/X86/merge-consecutive-loads-512.ll
|
||||||
|
+++ b/test/CodeGen/X86/merge-consecutive-loads-512.ll
|
||||||
|
@@ -106,13 +106,13 @@ define <8 x double> @merge_8f64_f64_23uuuuu9(double* %ptr) nounwind uwtable noin
|
||||||
|
define <8 x double> @merge_8f64_f64_12zzuuzz(double* %ptr) nounwind uwtable noinline ssp {
|
||||||
|
; ALL-LABEL: merge_8f64_f64_12zzuuzz:
|
||||||
|
; ALL: # %bb.0:
|
||||||
|
-; ALL-NEXT: vmovaps 8(%rdi), %xmm0
|
||||||
|
+; ALL-NEXT: vmovups 8(%rdi), %xmm0
|
||||||
|
; ALL-NEXT: retq
|
||||||
|
;
|
||||||
|
; X32-AVX512F-LABEL: merge_8f64_f64_12zzuuzz:
|
||||||
|
; X32-AVX512F: # %bb.0:
|
||||||
|
; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||||
|
-; X32-AVX512F-NEXT: vmovaps 8(%eax), %xmm0
|
||||||
|
+; X32-AVX512F-NEXT: vmovups 8(%eax), %xmm0
|
||||||
|
; X32-AVX512F-NEXT: retl
|
||||||
|
%ptr0 = getelementptr inbounds double, double* %ptr, i64 1
|
||||||
|
%ptr1 = getelementptr inbounds double, double* %ptr, i64 2
|
||||||
|
@@ -190,7 +190,7 @@ define <8 x i64> @merge_8i64_4i64_z3(<4 x i64>* %ptr) nounwind uwtable noinline
|
||||||
|
define <8 x i64> @merge_8i64_i64_56zz9uzz(i64* %ptr) nounwind uwtable noinline ssp {
|
||||||
|
; ALL-LABEL: merge_8i64_i64_56zz9uzz:
|
||||||
|
; ALL: # %bb.0:
|
||||||
|
-; ALL-NEXT: vmovaps 40(%rdi), %xmm0
|
||||||
|
+; ALL-NEXT: vmovups 40(%rdi), %xmm0
|
||||||
|
; ALL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
|
||||||
|
; ALL-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
|
||||||
|
; ALL-NEXT: retq
|
||||||
|
@@ -198,7 +198,7 @@ define <8 x i64> @merge_8i64_i64_56zz9uzz(i64* %ptr) nounwind uwtable noinline s
|
||||||
|
; X32-AVX512F-LABEL: merge_8i64_i64_56zz9uzz:
|
||||||
|
; X32-AVX512F: # %bb.0:
|
||||||
|
; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||||
|
-; X32-AVX512F-NEXT: vmovaps 40(%eax), %xmm0
|
||||||
|
+; X32-AVX512F-NEXT: vmovups 40(%eax), %xmm0
|
||||||
|
; X32-AVX512F-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
|
||||||
|
; X32-AVX512F-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
|
||||||
|
; X32-AVX512F-NEXT: retl
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue