[BUILD] Restored wheels workflow (#1146)

- Dependent CUDA files (ptxas, cuda.h, libdevice.bc.10) are now packaged in
`triton/third_party/cuda`. `ptxas` is downloaded from conda repo at
install time.
- Can now be built with old glibc (as that used by manylinux2014)
This commit is contained in:
Philippe Tillet
2023-02-03 16:22:10 -08:00
committed by GitHub
parent f9e26deb05
commit 43798ab27e
10 changed files with 19411 additions and 44 deletions

View File

@@ -55,8 +55,8 @@ jobs:
if: ${{ matrix.runner != 'macos-10.15' }}
run: |
pip install clang-format
find . -regex '.*\.\(cpp\|hpp\|h\|cc\)' -not -path "./python/build/*" -not -path "./include/triton/external/*" -print0 | xargs -0 -n1 clang-format -style=file --dry-run -Werror -i ||
(echo '::error title=Style issues:: Please run `find . -regex ".*\.\(cpp\|hpp\|h\|cc\)" -not -path "./python/build/*" -not -path "./include/triton/external/*" -print0 | xargs -0 -n1 clang-format -style=file -i`' ; exit 1)
find . -regex '.*\.\(cpp\|hpp\|h\|cc\)' -not -path "./python/triton/*" -not -path "./python/build/*" -not -path "./include/triton/external/*" -print0 | xargs -0 -n1 clang-format -style=file --dry-run -Werror -i ||
(echo '::error title=Style issues:: Please run `find . -regex ".*\.\(cpp\|hpp\|h\|cc\)" -not -path "./python/triton/*" -not -path "./python/build/*" -not -path "./include/triton/external/*" -print0 | xargs -0 -n1 clang-format -style=file -i`' ; exit 1)
- name: Flake8
if: ${{ matrix.runner != 'macos-10.15' }}

View File

@@ -1,8 +1,8 @@
name: Wheels
on:
workflow_dispatch:
schedule:
- cron: "0 0 * * *"
#schedule:
# - cron: "0 0 * * *"
jobs:
@@ -26,10 +26,9 @@ jobs:
- name: Build wheels
run: |
export CIBW_MANYLINUX_X86_64_IMAGE="manylinux2014"
export CIBW_MANYLINUX_PYPY_X86_64_IMAGE="manylinux2014"
export CIBW_BEFORE_BUILD="pip install cmake;\
yum install -y llvm11 llvm11-devel llvm11-static llvm11-libs zlib-devel;"
export CIBW_MANYLINUX_X86_64_IMAGE="quay.io/pypa/manylinux2014_x86_64:latest"
#export CIBW_MANYLINUX_PYPY_X86_64_IMAGE="quay.io/pypa/manylinux2014_x86_64:latest"
export CIBW_BEFORE_BUILD="pip install cmake;"
export CIBW_SKIP="{cp,pp}35-*"
export CIBW_BUILD="{cp,pp}3*-manylinux_x86_64"
python3 -m cibuildwheel python --output-dir wheelhouse
@@ -37,4 +36,4 @@ jobs:
- name: Upload wheels to PyPI
run: |
python3 -m twine upload wheelhouse/* --skip-existing
python3 -m twine upload wheelhouse/* -u __token__ -p ${{ secrets.PYPY_API_TOKEN }}

View File

@@ -17,7 +17,6 @@ option(TRITON_BUILD_PYTHON_MODULE "Build Python Triton bindings" OFF)
# Ensure Python3 vars are set correctly
# used conditionally in this file and by lit tests
find_package(Python3 REQUIRED COMPONENTS Development Interpreter)
# Customized release build type with assertions: TritonRelBuildWithAsserts
set(CMAKE_C_FLAGS_TRITONRELBUILDWITHASSERTS "-O2 -g")
@@ -148,6 +147,7 @@ if(TRITON_BUILD_PYTHON_MODULE)
if (PYTHON_INCLUDE_DIRS)
include_directories(${PYTHON_INCLUDE_DIRS})
else()
find_package(Python3 REQUIRED COMPONENTS Development Interpreter)
include_directories(${Python3_INCLUDE_DIRS})
link_directories(${Python3_LIBRARY_DIRS})
link_libraries(${Python3_LIBRARIES})

View File

@@ -128,7 +128,8 @@ static std::map<std::string, std::string> getExternLibs(mlir::ModuleOp module) {
.parent_path()
.parent_path()
.parent_path() /
"python" / "triton" / "language" /
"python" / "triton" /
"third_party" / "cuda" / "lib" /
"libdevice.10.bc";
externLibs.try_emplace(libdevice, path.string());
}

View File

@@ -6,6 +6,7 @@ import shutil
import subprocess
import sys
import tarfile
import tempfile
import urllib.request
from distutils.version import LooseVersion
from typing import NamedTuple
@@ -30,9 +31,9 @@ def get_build_type():
# TODO: change to release when stable enough
return "TritonRelBuildWithAsserts"
# --- third party packages -----
class Package(NamedTuple):
package: str
name: str
@@ -42,24 +43,33 @@ class Package(NamedTuple):
lib_flag: str
syspath_var_name: str
# pybind11
def get_pybind11_package_info():
name = "pybind11-2.10.0"
url = "https://github.com/pybind/pybind11/archive/refs/tags/v2.10.0.tar.gz"
return Package("pybind11", name, url, "include/pybind11/pybind11.h", "PYBIND11_INCLUDE_DIR", "", "PYBIND11_SYSPATH")
# llvm
def get_llvm_package_info():
# download if nothing is installed
system = platform.system()
system_suffix = {"Linux": "linux-gnu-ubuntu-18.04", "Darwin": "apple-darwin"}[system]
use_assert_enabled_llvm = check_env_flag("TRITON_USE_ASSERT_ENABLED_LLVM", "False")
if use_assert_enabled_llvm:
name = 'llvm+mlir-14.0.0-x86_64-{}-assert'.format(system_suffix)
url = "https://github.com/shintaro-iwasaki/llvm-releases/releases/download/llvm-14.0.0-329fda39c507/{}.tar.xz".format(name)
if system == "Darwin":
system_suffix = "apple-darwin"
elif system == "Linux":
vglibc = tuple(map(int, platform.libc_ver()[1].split('.')))
vglibc = vglibc[0] * 100 + vglibc[1]
linux_suffix = 'ubuntu-18.04' if vglibc > 217 else 'centos-7'
system_suffix = f"linux-gnu-{linux_suffix}"
else:
name = 'clang+llvm-14.0.0-x86_64-{}'.format(system_suffix)
url = "https://github.com/llvm/llvm-project/releases/download/llvmorg-14.0.0/{}.tar.xz".format(name)
raise RuntimeError(f"unsupported system: {system}")
use_assert_enabled_llvm = check_env_flag("TRITON_USE_ASSERT_ENABLED_LLVM", "False")
release_suffix = "assert" if use_assert_enabled_llvm else "release"
name = f'llvm+mlir-14.0.6-x86_64-{system_suffix}-{release_suffix}'
url = f"https://github.com/ptillet/triton-llvm-releases/releases/download/llvm-14.0.6-f28c006a5895/{name}.tar.xz"
return Package("llvm", name, url, "lib", "LLVM_INCLUDE_DIRS", "LLVM_LIBRARY_DIR", "LLVM_SYSPATH")
@@ -78,16 +88,38 @@ def get_thirdparty_packages(triton_cache_path):
except Exception:
pass
os.makedirs(package_root_dir, exist_ok=True)
print('downloading and extracting {} ...'.format(p.url))
print(f'downloading and extracting {p.url} ...')
ftpstream = urllib.request.urlopen(p.url)
file = tarfile.open(fileobj=ftpstream, mode="r|*")
file.extractall(path=package_root_dir)
if p.include_flag:
thirdparty_cmake_args.append("-D{}={}/include".format(p.include_flag, package_dir))
thirdparty_cmake_args.append(f"-D{p.include_flag}={package_dir}/include")
if p.lib_flag:
thirdparty_cmake_args.append("-D{}={}/lib".format(p.lib_flag, package_dir))
thirdparty_cmake_args.append(f"-D{p.lib_flag}={package_dir}/lib")
return thirdparty_cmake_args
# ---- package data ---
def download_and_copy_ptxas():
base_dir = os.path.dirname(__file__)
src_path = "bin/ptxas"
url = "https://conda.anaconda.org/nvidia/label/cuda-12.0.0/linux-64/cuda-nvcc-12.0.76-0.tar.bz2"
dst_prefix = os.path.join(base_dir, "triton")
dst_suffix = os.path.join("third_party", "cuda", src_path)
dst_path = os.path.join(dst_prefix, dst_suffix)
if not os.path.exists(dst_path):
print(f'downloading and extracting {url} ...')
ftpstream = urllib.request.urlopen(url)
file = tarfile.open(fileobj=ftpstream, mode="r|*")
with tempfile.TemporaryDirectory() as temp_dir:
file.extractall(path=temp_dir)
src_path = os.path.join(temp_dir, src_path)
os.makedirs(os.path.split(dst_path)[0], exist_ok=True)
shutil.copy(src_path, dst_path)
return dst_suffix
# ---- cmake extension ----
@@ -154,7 +186,7 @@ class CMakeBuild(build_ext):
build_args = ["--config", cfg]
if platform.system() == "Windows":
cmake_args += ["-DCMAKE_RUNTIME_OUTPUT_DIRECTORY_{}={}".format(cfg.upper(), extdir)]
cmake_args += [f"-DCMAKE_RUNTIME_OUTPUT_DIRECTORY_{cfg.upper()}={extdir}"]
if sys.maxsize > 2**32:
cmake_args += ["-A", "x64"]
build_args += ["--", "/m"]
@@ -174,15 +206,7 @@ package_data = {
"triton/language": ["*.bc"],
}
if os.getenv("TRITION_PACKAGE_CUDA_DEPS"):
base_dir = os.path.dirname(__file__)
cuda_dir = os.getenv("CUDA_HOME", "/usr/local/cuda")
triton_dir = os.path.join(base_dir, "triton")
os.makedirs(os.path.join(triton_dir, "include"), exist_ok=True)
os.makedirs(os.path.join(triton_dir, "bin"), exist_ok=True)
shutil.copy(os.path.join(cuda_dir, "include", "cuda.h"), os.path.join(triton_dir, "include"))
shutil.copy(os.path.join(cuda_dir, "bin", "ptxas"), os.path.join(triton_dir, "bin"))
package_data["triton"] = ["include/cuda.h", "bin/ptxas"]
download_and_copy_ptxas()
setup(
name="triton",
@@ -198,7 +222,7 @@ setup(
"torch",
"lit",
],
package_data=package_data,
package_data={"triton": ["third_party/*"]},
include_package_data=True,
ext_modules=[CMakeExtension("triton", "triton/_C/")],
cmdclass={"build_ext": CMakeBuild},

View File

@@ -1061,17 +1061,13 @@ def ptx_get_version(cuda_version) -> int:
def path_to_ptxas():
prefixes = [
base_dir = os.path.dirname(__file__)
paths = [
os.environ.get("TRITON_PTXAS_PATH", ""),
"",
"/usr",
os.environ.get('CUDA_PATH', default_cuda_dir())
os.path.join(base_dir, "third_party", "cuda", "bin", "ptxas")
]
if not os.getenv("TRITON_IGNORE_BUNDLED_PTXAS"):
prefixes.insert(0, os.path.dirname(__file__))
for prefix in prefixes:
ptxas = os.path.join(prefix, "bin", "ptxas")
for ptxas in paths:
if os.path.exists(ptxas):
result = subprocess.check_output([ptxas, "--version"], stderr=subprocess.STDOUT)
if result is not None:

View File

@@ -3,8 +3,7 @@ import os
from .. import impl
from . import core, extern
LIBDEVICE_PATH = os.path.dirname(
os.path.abspath(__file__)) + "/libdevice.10.bc"
LIBDEVICE_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "third_party", "cuda", "lib", "libdevice.10.bc")
@impl.extern

19348
python/triton/third_party/cuda/include/cuda.h vendored Executable file

File diff suppressed because it is too large Load Diff

View File

@@ -289,7 +289,7 @@ class Libdevice(ExternLibrary):
# return extern.dispatch("libdevice", <path>, <args>, <arg_type_symbol_dict>, _builder)
import_str = "from . import core, extern\n"
import_str += "import os\n"
header_str = "LIBDEVICE_PATH = os.path.dirname(\n\tos.path.abspath(__file__)) + \"/libdevice.10.bc\"\n"
header_str = "LIBDEVICE_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), \"..\", \"third_party\", \"cuda\", \"lib\", \"libdevice.10.bc\")"
func_str = ""
for symbols in self._symbol_groups.values():
func_str += "@extern.extern\n"