Nightly GPU Benchmarks #41

Workflow file for this run

.github/workflows/gpu_benchmarks_nightly.yml at 5b08d10

	# Copyright 2025 The OpenXLA Authors. All Rights Reserved.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	# ============================================================================
	name: Nightly GPU Benchmarks
	permissions:
	contents: read
	on:
	workflow_dispatch: # Allows manual triggering
	schedule:
	- cron: "0 0 * * *" # Run at midnight every day


	jobs:
	Tests:
	strategy:
	# Don't fail fast - want to see results for all builds even if one fails.
	fail-fast: false
	matrix:
	job_info: [
	{
	os: "linux-x86-g2-48-l4-4gpu",
	container: "us-central1-docker.pkg.dev/tensorflow-sigs/tensorflow/ml-build:latest",
	pretty_name: "XLA Linux x86 GPU L4 48 vcpu Presubmit",
	},
	{
	os: "linux-x86-g2-16-l4-1gpu",
	container: "us-central1-docker.pkg.dev/tensorflow-sigs/tensorflow/ml-build:latest",
	pretty_name: "XLA Linux x86 GPU L4 16 vcpu Presubmit",
	},
	{
	os: "linux-x86-a4-224-b200-1gpu",
	container: "us-central1-docker.pkg.dev/tensorflow-sigs/tensorflow/ml-build-cuda12.8-cudnn9.8:latest",
	pretty_name: "XLA Linux x86 GPU A4 224 vcpu Presubmit",
	},
	# Expect more GPU types in the future.
	]
	name: ${{ matrix.job_info.pretty_name }}
	runs-on: ${{ matrix.job_info.os }}
	container: ${{ matrix.job_info.container }}
	defaults:
	run:
	shell: bash
	timeout-minutes: 240
	env:
	OUTPUT_DIR: ${{ github.workspace }}/output
	steps:
	- name: Checkout XLA
	uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1

	- name: Download Gemma Hlo Files
	run: \|
	mkdir -p tmp_hlo
	cd tmp_hlo
	wget https://storage.googleapis.com/xla-benchmarking-temp/gemma2_2b_keras_jax.hlo
	wget https://storage.googleapis.com/xla-benchmarking-temp/gemma3_1b_flax_call.hlo
	wget https://storage.googleapis.com/xla-benchmarking-temp/gemma3_1b_flax_sample_loop.hlo
	cd ..

	- name: Configure GPU backend
	run: \|
	./configure.py --backend=CUDA --cuda_compiler=nvcc

	- name: "Run build.py"
	run: \|
	./build_tools/ci/build.py --build="${{ matrix.job_info.pretty_name }}_github_actions"

	- name: Run HLO tests and collect data
	run: \|
	binary_dir="./bazel-out/k8-opt/bin/xla/tools"
	mkdir -p "$OUTPUT_DIR"

	# Run gpu_hlo_backend.hlo
	HLO_FILE_GB="xla/tools/hlo_opt/tests/gpu_hlo_backend.hlo"
	OUTPUT_PREFIX_GB="$OUTPUT_DIR/gpu_hlo_backend"
	echo "Running GPU test: $HLO_FILE_GB"
	$binary_dir/multihost_hlo_runner/hlo_runner_main_gpu \
	--device_type=gpu \
	--num_repeats=5 \
	--use_spmd_partitioning \
	--profile_execution=True \
	--xla_gpu_dump_xspace_to="${OUTPUT_PREFIX_GB}_xspace.pb" \
	"$HLO_FILE_GB" \
	> "${OUTPUT_PREFIX_GB}.txt"

	$binary_dir/compute_xspace_stats_main_gpu \
	--input="${OUTPUT_PREFIX_GB}_xspace.pb" \
	--device_type=GPU \
	>> "${OUTPUT_PREFIX_GB}.txt"

	cat "${OUTPUT_PREFIX_GB}.txt"

	# Run gemma2_2b_keras_jax.hlo
	HLO_FILE_GEMMA="tmp_hlo/gemma2_2b_keras_jax.hlo"
	OUTPUT_PREFIX_GEMMA="$OUTPUT_DIR/gemma2_2b_keras_jax"
	echo "Running GPU test: $HLO_FILE_GEMMA"
	$binary_dir/multihost_hlo_runner/hlo_runner_main_gpu \
	--device_type=gpu \
	--num_repeats=5 \
	--use_spmd_partitioning \
	--profile_execution=True \
	--xla_gpu_dump_xspace_to="${OUTPUT_PREFIX_GEMMA}_xspace.pb" \
	"$HLO_FILE_GEMMA" \
	> "${OUTPUT_PREFIX_GEMMA}.txt"

	$binary_dir/compute_xspace_stats_main_gpu \
	--input="${OUTPUT_PREFIX_GEMMA}_xspace.pb" \
	--device_type=GPU \
	>> "${OUTPUT_PREFIX_GEMMA}.txt"

	cat "${OUTPUT_PREFIX_GEMMA}.txt"

	echo "Output written to: ${OUTPUT_PREFIX_GB}.txt and ${OUTPUT_PREFIX_GEMMA}.txt"

	# Run gemma3_1b_flax_call.hlo
	HLO_FILE_GEMMA3_CALL="tmp_hlo/gemma3_1b_flax_call.hlo"
	OUTPUT_PREFIX_GEMMA3_CALL="$OUTPUT_DIR/gemma3_1b_flax_call"
	echo "Running GPU test: $HLO_FILE_GEMMA3_CALL"
	$binary_dir/multihost_hlo_runner/hlo_runner_main_gpu \
	--device_type=gpu \
	--num_repeats=5 \
	--use_spmd_partitioning \
	--profile_execution=True \
	--xla_gpu_dump_xspace_to="${OUTPUT_PREFIX_GEMMA3_CALL}_xspace.pb" \
	"$HLO_FILE_GEMMA3_CALL" \
	> "${OUTPUT_PREFIX_GEMMA3_CALL}.txt"

	$binary_dir/compute_xspace_stats_main_gpu \
	--input="${OUTPUT_PREFIX_GEMMA3_CALL}_xspace.pb" \
	--device_type=GPU \
	>> "${OUTPUT_PREFIX_GEMMA3_CALL}.txt"

	cat "${OUTPUT_PREFIX_GEMMA3_CALL}.txt"

	echo "Output written to: ${OUTPUT_PREFIX_GEMMA3_CALL}.txt"

	# Run gemma3_1b_flax_sample_loop.hlo
	HLO_FILE_GEMMA3_SAMPLE_LOOP="tmp_hlo/gemma3_1b_flax_sample_loop.hlo"
	OUTPUT_PREFIX_GEMMA3_SAMPLE_LOOP="$OUTPUT_DIR/gemma3_1b_flax_sample_loop"
	echo "Running GPU test: $HLO_FILE_GEMMA3_SAMPLE_LOOP"
	$binary_dir/multihost_hlo_runner/hlo_runner_main_gpu \
	--device_type=gpu \
	--num_repeats=5 \
	--use_spmd_partitioning \
	--profile_execution=True \
	--xla_gpu_dump_xspace_to="${OUTPUT_PREFIX_GEMMA3_SAMPLE_LOOP}_xspace.pb" \
	"$HLO_FILE_GEMMA3_SAMPLE_LOOP" \
	> "${OUTPUT_PREFIX_GEMMA3_SAMPLE_LOOP}.txt"

	$binary_dir/compute_xspace_stats_main_gpu \
	--input="${OUTPUT_PREFIX_GEMMA3_SAMPLE_LOOP}_xspace.pb" \
	--device_type=GPU \
	>> "${OUTPUT_PREFIX_GEMMA3_SAMPLE_LOOP}.txt"

	cat "${OUTPUT_PREFIX_GEMMA3_SAMPLE_LOOP}.txt"

	echo "Output written to: ${OUTPUT_PREFIX_GEMMA3_SAMPLE_LOOP}.txt"

	- name: Upload HLO test output to GCS
	run: \|
	GCS_BUCKET="gs://openxla-nightly-transient"
	TIMESTAMP=$(date +%Y%m%d_%H%M%S)
	DATE_FOLDER=$(date +%Y%m%d)
	COMMIT_SHA="${{ github.sha }}"
	RUN_ID="${{ github.run_id }}"

	upload_to_gcs() {
	local base_name="$1"
	local gcs_file_name="${DATE_FOLDER}/${TIMESTAMP}_${{ matrix.job_info.os }}_run_${RUN_ID}_commit_${COMMIT_SHA}_${base_name}.txt"
	echo "Uploading $OUTPUT_DIR/${base_name}.txt to $GCS_BUCKET/$gcs_file_name"
	gsutil cp "$OUTPUT_DIR/${base_name}.txt" "$GCS_BUCKET/$gcs_file_name"
	}

	# Upload output for gpu_hlo_backend
	GB_BASE_NAME="gpu_hlo_backend"
	upload_to_gcs "$GB_BASE_NAME"

	# Upload output for gemma2_2b_keras_jax
	GEMMA_BASE_NAME="gemma2_2b_keras_jax"
	upload_to_gcs "$GEMMA_BASE_NAME"

	# Upload output for gemma3_1b_flax_call
	GEMMA3_CALL_BASE_NAME="gemma3_1b_flax_call"
	upload_to_gcs "$GEMMA3_CALL_BASE_NAME"

	# Upload output for gemma3_1b_flax_sample_loop
	GEMMA3_SAMPLE_LOOP_BASE_NAME="gemma3_1b_flax_sample_loop"
	upload_to_gcs "$GEMMA3_SAMPLE_LOOP_BASE_NAME"

	- name: Upload XSpace artifacts
	uses: actions/upload-artifact@4cec3d8aa04e39d1a68397de0c4cd6fb9dce8ec1 # v4.6.1
	with:
	name: gpu-xla-benchmarks-xspace-${{ matrix.job_info.os }}
	path: ${{ github.workspace }}/output/*_xspace.pb

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Nightly GPU Benchmarks #41

Workflow file

Nightly GPU Benchmarks #41

Uh oh!

Jobs

Run details

Workflow file for this run