Skip to content

Commit 19b0c63

Browse files
authored
Merge branch 'pytorch:main' into remove_buck_out_on_clean
2 parents d4c9aa8 + a6e104c commit 19b0c63

File tree

344 files changed

+5894
-7538
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

344 files changed

+5894
-7538
lines changed

.ci/scripts/build-mediatek-sdk.sh

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
#!/bin/bash
2+
# Copyright (c) Meta Platforms, Inc. and affiliates.
3+
# All rights reserved.
4+
#
5+
# This source code is licensed under the BSD-style license found in the
6+
# LICENSE file in the root directory of this source tree.
7+
8+
set -eux
9+
10+
build_neuron_backend() {
11+
echo "Start building neuron backend."
12+
export ANDROID_NDK=/opt/ndk
13+
export MEDIATEK_SDK_ROOT=/tmp/neuropilot
14+
export NEURON_BUFFER_ALLOCATOR_LIB=${MEDIATEK_SDK_ROOT}/libneuron_buffer_allocator.so
15+
export EXECUTORCH_ROOT="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")/../.." && pwd)"
16+
17+
18+
cd ${EXECUTORCH_ROOT}
19+
./backends/mediatek/scripts/mtk_build.sh
20+
}
21+
22+
build_neuron_backend

.ci/scripts/gather_benchmark_configs.py

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,8 @@
3232
BENCHMARK_CONFIGS = {
3333
"xplat": [
3434
"xnnpack_q8",
35-
"hf_xnnpack_fp32",
35+
"hf_xnnpack_custom_spda_kv_cache_8da4w",
36+
"et_xnnpack_custom_spda_kv_cache_8da4w",
3637
"llama3_fb16",
3738
"llama3_spinquant",
3839
"llama3_qlora",
@@ -129,25 +130,25 @@ def generate_compatible_configs(model_name: str, target_os=None) -> List[str]:
129130
"""
130131
configs = []
131132
if is_valid_huggingface_model_id(model_name):
133+
configs.append("hf_xnnpack_custom_spda_kv_cache_8da4w")
132134
if model_name.startswith("meta-llama/"):
133-
# LLaMA models
135+
# etLLM recipes for Llama
134136
repo_name = model_name.split("meta-llama/")[1]
135137
if "qlora" in repo_name.lower():
136-
configs.append("llama3_qlora")
138+
configs = ["llama3_qlora"]
137139
elif "spinquant" in repo_name.lower():
138-
configs.append("llama3_spinquant")
140+
configs = ["llama3_spinquant"]
139141
else:
140-
configs.append("llama3_fb16")
142+
configs.extend(["llama3_fb16", "et_xnnpack_custom_spda_kv_cache_8da4w"])
141143
configs.extend(
142144
[
143145
config
144146
for config in BENCHMARK_CONFIGS.get(target_os, [])
145147
if config.startswith("llama")
146148
]
147149
)
148-
else:
149-
# Non-LLaMA models
150-
configs.append("hf_xnnpack_fp32")
150+
if model_name.startswith("Qwen/Qwen3"):
151+
configs.append("et_xnnpack_custom_spda_kv_cache_8da4w")
151152
elif model_name in MODEL_NAME_TO_MODEL:
152153
# ExecuTorch in-tree non-GenAI models
153154
configs.append("xnnpack_q8")

.ci/scripts/setup-mediatek-deps.sh

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
#!/bin/bash
2+
# Copyright (c) Meta Platforms, Inc. and affiliates.
3+
# All rights reserved.
4+
#
5+
# This source code is licensed under the BSD-style license found in the
6+
# LICENSE file in the root directory of this source tree.
7+
8+
set -eux
9+
10+
MEDIATEK_INSTALLATION_DIR=/tmp/neuropilot
11+
EXECUTORCH_ROOT="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")/../.." && pwd)"
12+
13+
install_neuropilot() {
14+
echo "Start installing neuropilot."
15+
mkdir -p "${MEDIATEK_INSTALLATION_DIR}"
16+
17+
curl -Lo /tmp/neuropilot-express.tar.gz "https://s3.ap-southeast-1.amazonaws.com/mediatek.neuropilot.com/06302508-4c94-4bf2-9789-b0ee44e83e27.gz"
18+
echo "Finishing downloading neuropilot sdk."
19+
tar zxvf /tmp/neuropilot-express.tar.gz --strip-components=1 --directory "${MEDIATEK_INSTALLATION_DIR}"
20+
echo "Finishing unzip neuropilot sdk."
21+
22+
# Copy NP header
23+
cp ${MEDIATEK_INSTALLATION_DIR}/api/NeuronAdapter.h ${EXECUTORCH_ROOT}/backends/mediatek/runtime/include/api/
24+
25+
# Print the content for manual verification
26+
ls -lah "${MEDIATEK_INSTALLATION_DIR}"
27+
}
28+
29+
setup_neuropilot() {
30+
pip3 install -r ${EXECUTORCH_ROOT}/backends/mediatek/requirements.txt
31+
pip3 install ${MEDIATEK_INSTALLATION_DIR}/mtk_neuron-8.2.19-py3-none-linux_x86_64.whl
32+
pip3 install ${MEDIATEK_INSTALLATION_DIR}/mtk_converter-8.13.0_public_packages/mtk_converter-8.13.0+public-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
33+
}
34+
35+
setup_calibration_data() {
36+
curl -Lo /tmp/imagenette2-160.tgz https://s3.amazonaws.com/fast-ai-imageclas/imagenette2-160.tgz
37+
tar zxvf /tmp/imagenette2-160.tgz --strip-components=1 --directory "${MEDIATEK_INSTALLATION_DIR}"
38+
}
39+
40+
install_neuropilot
41+
setup_neuropilot
42+
setup_calibration_data

.ci/scripts/test_llama.sh

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -156,8 +156,7 @@ cmake_install_executorch_libraries() {
156156
-DCMAKE_INSTALL_PREFIX=cmake-out \
157157
-DCMAKE_BUILD_TYPE="$CMAKE_BUILD_TYPE" \
158158
-DEXECUTORCH_BUILD_QNN="$QNN" \
159-
-DQNN_SDK_ROOT="$QNN_SDK_ROOT" \
160-
-Bcmake-out .
159+
-DQNN_SDK_ROOT="$QNN_SDK_ROOT"
161160
cmake --build cmake-out -j9 --target install --config "$CMAKE_BUILD_TYPE"
162161
}
163162

.ci/scripts/test_llava.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,7 @@ run_and_verify() {
147147

148148
# verify result.txt
149149
RESULT=$(cat result.txt)
150-
EXPECTED_PREFIX="ASSISTANT: image captures a basketball game in progress, with several players on the court. "
150+
EXPECTED_PREFIX="ASSISTANT: image captures a basketball game in progress, with"
151151

152152
if [[ "${RESULT}" == *"${EXPECTED_PREFIX}"* ]]; then
153153
echo "Expected result prefix: ${EXPECTED_PREFIX}"

.ci/scripts/test_model.sh

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -244,6 +244,24 @@ test_model_with_mps() {
244244
EXPORTED_MODEL=$(find "." -type f -name "${MODEL_NAME}*.pte" -print -quit)
245245
}
246246

247+
test_model_with_mediatek() {
248+
if [[ "${MODEL_NAME}" == "dl3" ]]; then
249+
EXPORT_SCRIPT=deeplab_v3
250+
elif [[ "${MODEL_NAME}" == "mv3" ]]; then
251+
EXPORT_SCRIPT=mobilenet_v3
252+
elif [[ "${MODEL_NAME}" == "mv2" ]]; then
253+
EXPORT_SCRIPT=mobilenet_v2
254+
elif [[ "${MODEL_NAME}" == "ic4" ]]; then
255+
EXPORT_SCRIPT=inception_v4
256+
elif [[ "${MODEL_NAME}" == "ic3" ]]; then
257+
EXPORT_SCRIPT=inception_v3
258+
fi
259+
260+
PYTHONPATH=examples/mediatek/ "${PYTHON_EXECUTABLE}" -m examples.mediatek.model_export_scripts.${EXPORT_SCRIPT} -d /tmp/neuropilot/train -a ${EXPORT_SCRIPT}
261+
EXPORTED_MODEL=$(find "./${EXPORT_SCRIPT}" -type f -name "*.pte" -print -quit)
262+
}
263+
264+
247265
if [[ "${BACKEND}" == "portable" ]]; then
248266
echo "Testing ${MODEL_NAME} with portable kernels..."
249267
test_model
@@ -281,6 +299,12 @@ elif [[ "${BACKEND}" == *"xnnpack"* ]]; then
281299
if [[ $? -eq 0 ]]; then
282300
prepare_artifacts_upload
283301
fi
302+
elif [[ "${BACKEND}" == "mediatek" ]]; then
303+
echo "Testing ${MODEL_NAME} with mediatek..."
304+
test_model_with_mediatek
305+
if [[ $? -eq 0 ]]; then
306+
prepare_artifacts_upload
307+
fi
284308
else
285309
set +e
286310
if [[ "${BACKEND}" == *"quantization"* ]]; then

.ci/scripts/tests/test_gather_benchmark_configs.py

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -112,15 +112,24 @@ def test_generate_compatible_configs_llama_model(self):
112112
result = self.gather_benchmark_configs.generate_compatible_configs(
113113
model_name, target_os
114114
)
115-
expected = ["llama3_fb16", "llama3_coreml_ane"]
116-
self.assertEqual(result, expected)
115+
expected = [
116+
"llama3_fb16",
117+
"llama3_coreml_ane",
118+
"et_xnnpack_custom_spda_kv_cache_8da4w",
119+
"hf_xnnpack_custom_spda_kv_cache_8da4w",
120+
]
121+
self.assertCountEqual(result, expected)
117122

118123
target_os = "android"
119124
result = self.gather_benchmark_configs.generate_compatible_configs(
120125
model_name, target_os
121126
)
122-
expected = ["llama3_fb16"]
123-
self.assertEqual(result, expected)
127+
expected = [
128+
"llama3_fb16",
129+
"et_xnnpack_custom_spda_kv_cache_8da4w",
130+
"hf_xnnpack_custom_spda_kv_cache_8da4w",
131+
]
132+
self.assertCountEqual(result, expected)
124133

125134
def test_generate_compatible_configs_quantized_llama_model(self):
126135
model_name = "meta-llama/Llama-3.2-1B-Instruct-SpinQuant_INT4_EO8"

.ci/scripts/unittest-buck2.sh

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,10 @@ buck2 query "//backends/apple/... + //backends/example/... + \
1515
//kernels/optimized/... + //kernels/portable/... + //kernels/quantized/... + \
1616
//kernels/test/... + //runtime/... + //schema/... + //test/... + //util/..."
1717

18+
# TODO: optimized ops are unbuildable because they now use ATen; put
19+
# them back after we can use PyTorch in OSS buck.
1820
UNBUILDABLE_OPTIMIZED_OPS_REGEX="_elu|gelu|fft|log_softmax"
19-
BUILDABLE_OPTIMIZED_OPS=$(buck2 query //kernels/optimized/cpu/... | grep -E -v $UNBUILDABLE_OPTIMIZED_OPS_REGEX)
21+
BUILDABLE_OPTIMIZED_OPS= #$(buck2 query //kernels/optimized/cpu/... | grep -E -v $UNBUILDABLE_OPTIMIZED_OPS_REGEX)
2022

2123
# TODO: build prim_ops_test_cpp again once supported_features works in
2224
# OSS buck.
@@ -25,7 +27,9 @@ BUILDABLE_KERNELS_PRIM_OPS_TARGETS=$(buck2 query //kernels/prim_ops/... | grep -
2527
# //runtime/kernel/... is failing because //third-party:torchgen_files's shell script can't find python on PATH.
2628
# //runtime/test/... requires Python torch, which we don't have in our OSS buck setup.
2729
for op in "build" "test"; do
28-
buck2 $op $BUILDABLE_OPTIMIZED_OPS //kernels/portable/... \
30+
buck2 $op $BUILDABLE_OPTIMIZED_OPS \
31+
//examples/selective_build:select_all_dtype_selective_lib_portable_lib \
32+
//kernels/portable/... \
2933
$BUILDABLE_KERNELS_PRIM_OPS_TARGETS //runtime/backend/... //runtime/core/... \
3034
//runtime/executor: //runtime/kernel/... //runtime/platform/...
3135
done

.github/workflows/android-perf-private-device-experiment.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ on:
1818
description: Models to be benchmarked
1919
required: false
2020
type: string
21-
default: mv3,meta-llama/Llama-3.2-1B-Instruct-SpinQuant_INT4_EO8,meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8
21+
default: Qwen/Qwen3-0.6B
2222
devices:
2323
description: Target devices to run benchmark
2424
required: false
@@ -34,7 +34,7 @@ on:
3434
description: Models to be benchmarked
3535
required: false
3636
type: string
37-
default: mv3,meta-llama/Llama-3.2-1B-Instruct-SpinQuant_INT4_EO8,meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8
37+
default: Qwen/Qwen3-0.6B
3838
devices:
3939
description: Target devices to run benchmark
4040
required: false
@@ -57,6 +57,6 @@ jobs:
5757
id-token: write
5858
contents: read
5959
with:
60-
models: ${{ inputs.models || 'mv3,meta-llama/Llama-3.2-1B-Instruct-SpinQuant_INT4_EO8,meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8' }}
60+
models: ${{ inputs.models || github.event_name == 'schedule' && 'Qwen/Qwen3-0.6B,HuggingFaceTB/SmolLM2-135M,meta-llama/Llama-3.2-1B,allenai/OLMo-1B-hf' || 'Qwen/Qwen3-0.6B' }}
6161
devices: samsung_galaxy_s22_private
6262
benchmark_configs: ${{ inputs.benchmark_configs }}

0 commit comments

Comments
 (0)