Nightly GPU Benchmarks #41
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Copyright 2025 The OpenXLA Authors. All Rights Reserved. | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
# ============================================================================ | |
name: Nightly GPU Benchmarks | |
permissions: | |
contents: read | |
on: | |
workflow_dispatch: # Allows manual triggering | |
schedule: | |
- cron: "0 0 * * *" # Run at midnight every day | |
jobs: | |
Tests: | |
strategy: | |
# Don't fail fast - want to see results for all builds even if one fails. | |
fail-fast: false | |
matrix: | |
job_info: [ | |
{ | |
os: "linux-x86-g2-48-l4-4gpu", | |
container: "us-central1-docker.pkg.dev/tensorflow-sigs/tensorflow/ml-build:latest", | |
pretty_name: "XLA Linux x86 GPU L4 48 vcpu Presubmit", | |
}, | |
{ | |
os: "linux-x86-g2-16-l4-1gpu", | |
container: "us-central1-docker.pkg.dev/tensorflow-sigs/tensorflow/ml-build:latest", | |
pretty_name: "XLA Linux x86 GPU L4 16 vcpu Presubmit", | |
}, | |
{ | |
os: "linux-x86-a4-224-b200-1gpu", | |
container: "us-central1-docker.pkg.dev/tensorflow-sigs/tensorflow/ml-build-cuda12.8-cudnn9.8:latest", | |
pretty_name: "XLA Linux x86 GPU A4 224 vcpu Presubmit", | |
}, | |
# Expect more GPU types in the future. | |
] | |
name: ${{ matrix.job_info.pretty_name }} | |
runs-on: ${{ matrix.job_info.os }} | |
container: ${{ matrix.job_info.container }} | |
defaults: | |
run: | |
shell: bash | |
timeout-minutes: 240 | |
env: | |
OUTPUT_DIR: ${{ github.workspace }}/output | |
steps: | |
- name: Checkout XLA | |
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 | |
- name: Download Gemma Hlo Files | |
run: | | |
mkdir -p tmp_hlo | |
cd tmp_hlo | |
wget https://storage.googleapis.com/xla-benchmarking-temp/gemma2_2b_keras_jax.hlo | |
wget https://storage.googleapis.com/xla-benchmarking-temp/gemma3_1b_flax_call.hlo | |
wget https://storage.googleapis.com/xla-benchmarking-temp/gemma3_1b_flax_sample_loop.hlo | |
cd .. | |
- name: Configure GPU backend | |
run: | | |
./configure.py --backend=CUDA --cuda_compiler=nvcc | |
- name: "Run build.py" | |
run: | | |
./build_tools/ci/build.py --build="${{ matrix.job_info.pretty_name }}_github_actions" | |
- name: Run HLO tests and collect data | |
run: | | |
binary_dir="./bazel-out/k8-opt/bin/xla/tools" | |
mkdir -p "$OUTPUT_DIR" | |
# Run gpu_hlo_backend.hlo | |
HLO_FILE_GB="xla/tools/hlo_opt/tests/gpu_hlo_backend.hlo" | |
OUTPUT_PREFIX_GB="$OUTPUT_DIR/gpu_hlo_backend" | |
echo "Running GPU test: $HLO_FILE_GB" | |
$binary_dir/multihost_hlo_runner/hlo_runner_main_gpu \ | |
--device_type=gpu \ | |
--num_repeats=5 \ | |
--use_spmd_partitioning \ | |
--profile_execution=True \ | |
--xla_gpu_dump_xspace_to="${OUTPUT_PREFIX_GB}_xspace.pb" \ | |
"$HLO_FILE_GB" \ | |
> "${OUTPUT_PREFIX_GB}.txt" | |
$binary_dir/compute_xspace_stats_main_gpu \ | |
--input="${OUTPUT_PREFIX_GB}_xspace.pb" \ | |
--device_type=GPU \ | |
>> "${OUTPUT_PREFIX_GB}.txt" | |
cat "${OUTPUT_PREFIX_GB}.txt" | |
# Run gemma2_2b_keras_jax.hlo | |
HLO_FILE_GEMMA="tmp_hlo/gemma2_2b_keras_jax.hlo" | |
OUTPUT_PREFIX_GEMMA="$OUTPUT_DIR/gemma2_2b_keras_jax" | |
echo "Running GPU test: $HLO_FILE_GEMMA" | |
$binary_dir/multihost_hlo_runner/hlo_runner_main_gpu \ | |
--device_type=gpu \ | |
--num_repeats=5 \ | |
--use_spmd_partitioning \ | |
--profile_execution=True \ | |
--xla_gpu_dump_xspace_to="${OUTPUT_PREFIX_GEMMA}_xspace.pb" \ | |
"$HLO_FILE_GEMMA" \ | |
> "${OUTPUT_PREFIX_GEMMA}.txt" | |
$binary_dir/compute_xspace_stats_main_gpu \ | |
--input="${OUTPUT_PREFIX_GEMMA}_xspace.pb" \ | |
--device_type=GPU \ | |
>> "${OUTPUT_PREFIX_GEMMA}.txt" | |
cat "${OUTPUT_PREFIX_GEMMA}.txt" | |
echo "Output written to: ${OUTPUT_PREFIX_GB}.txt and ${OUTPUT_PREFIX_GEMMA}.txt" | |
# Run gemma3_1b_flax_call.hlo | |
HLO_FILE_GEMMA3_CALL="tmp_hlo/gemma3_1b_flax_call.hlo" | |
OUTPUT_PREFIX_GEMMA3_CALL="$OUTPUT_DIR/gemma3_1b_flax_call" | |
echo "Running GPU test: $HLO_FILE_GEMMA3_CALL" | |
$binary_dir/multihost_hlo_runner/hlo_runner_main_gpu \ | |
--device_type=gpu \ | |
--num_repeats=5 \ | |
--use_spmd_partitioning \ | |
--profile_execution=True \ | |
--xla_gpu_dump_xspace_to="${OUTPUT_PREFIX_GEMMA3_CALL}_xspace.pb" \ | |
"$HLO_FILE_GEMMA3_CALL" \ | |
> "${OUTPUT_PREFIX_GEMMA3_CALL}.txt" | |
$binary_dir/compute_xspace_stats_main_gpu \ | |
--input="${OUTPUT_PREFIX_GEMMA3_CALL}_xspace.pb" \ | |
--device_type=GPU \ | |
>> "${OUTPUT_PREFIX_GEMMA3_CALL}.txt" | |
cat "${OUTPUT_PREFIX_GEMMA3_CALL}.txt" | |
echo "Output written to: ${OUTPUT_PREFIX_GEMMA3_CALL}.txt" | |
# Run gemma3_1b_flax_sample_loop.hlo | |
HLO_FILE_GEMMA3_SAMPLE_LOOP="tmp_hlo/gemma3_1b_flax_sample_loop.hlo" | |
OUTPUT_PREFIX_GEMMA3_SAMPLE_LOOP="$OUTPUT_DIR/gemma3_1b_flax_sample_loop" | |
echo "Running GPU test: $HLO_FILE_GEMMA3_SAMPLE_LOOP" | |
$binary_dir/multihost_hlo_runner/hlo_runner_main_gpu \ | |
--device_type=gpu \ | |
--num_repeats=5 \ | |
--use_spmd_partitioning \ | |
--profile_execution=True \ | |
--xla_gpu_dump_xspace_to="${OUTPUT_PREFIX_GEMMA3_SAMPLE_LOOP}_xspace.pb" \ | |
"$HLO_FILE_GEMMA3_SAMPLE_LOOP" \ | |
> "${OUTPUT_PREFIX_GEMMA3_SAMPLE_LOOP}.txt" | |
$binary_dir/compute_xspace_stats_main_gpu \ | |
--input="${OUTPUT_PREFIX_GEMMA3_SAMPLE_LOOP}_xspace.pb" \ | |
--device_type=GPU \ | |
>> "${OUTPUT_PREFIX_GEMMA3_SAMPLE_LOOP}.txt" | |
cat "${OUTPUT_PREFIX_GEMMA3_SAMPLE_LOOP}.txt" | |
echo "Output written to: ${OUTPUT_PREFIX_GEMMA3_SAMPLE_LOOP}.txt" | |
- name: Upload HLO test output to GCS | |
run: | | |
GCS_BUCKET="gs://openxla-nightly-transient" | |
TIMESTAMP=$(date +%Y%m%d_%H%M%S) | |
DATE_FOLDER=$(date +%Y%m%d) | |
COMMIT_SHA="${{ github.sha }}" | |
RUN_ID="${{ github.run_id }}" | |
upload_to_gcs() { | |
local base_name="$1" | |
local gcs_file_name="${DATE_FOLDER}/${TIMESTAMP}_${{ matrix.job_info.os }}_run_${RUN_ID}_commit_${COMMIT_SHA}_${base_name}.txt" | |
echo "Uploading $OUTPUT_DIR/${base_name}.txt to $GCS_BUCKET/$gcs_file_name" | |
gsutil cp "$OUTPUT_DIR/${base_name}.txt" "$GCS_BUCKET/$gcs_file_name" | |
} | |
# Upload output for gpu_hlo_backend | |
GB_BASE_NAME="gpu_hlo_backend" | |
upload_to_gcs "$GB_BASE_NAME" | |
# Upload output for gemma2_2b_keras_jax | |
GEMMA_BASE_NAME="gemma2_2b_keras_jax" | |
upload_to_gcs "$GEMMA_BASE_NAME" | |
# Upload output for gemma3_1b_flax_call | |
GEMMA3_CALL_BASE_NAME="gemma3_1b_flax_call" | |
upload_to_gcs "$GEMMA3_CALL_BASE_NAME" | |
# Upload output for gemma3_1b_flax_sample_loop | |
GEMMA3_SAMPLE_LOOP_BASE_NAME="gemma3_1b_flax_sample_loop" | |
upload_to_gcs "$GEMMA3_SAMPLE_LOOP_BASE_NAME" | |
- name: Upload XSpace artifacts | |
uses: actions/upload-artifact@4cec3d8aa04e39d1a68397de0c4cd6fb9dce8ec1 # v4.6.1 | |
with: | |
name: gpu-xla-benchmarks-xspace-${{ matrix.job_info.os }} | |
path: ${{ github.workspace }}/output/*_xspace.pb |