62#define DEBUG_TYPE "x86tti"
78 std::optional<unsigned>
164 bool Vector = (ClassID == 1);
171 if (!
Vector && ST->hasEGPR())
187 auto *VTy = dyn_cast<FixedVectorType>(Ty);
188 if (!Ty->
isIntegerTy() && (!VTy || VTy->getNumElements() != 1))
191 switch (cast<IntegerType>(ScalarTy)->
getBitWidth()) {
208 if (ST->
hasAVX512() && ST->hasEVEX512() && PreferVectorWidth >= 512)
210 if (ST->
hasAVX() && PreferVectorWidth >= 256)
212 if (ST->
hasSSE1() && PreferVectorWidth >= 128)
253 if (Opcode == Instruction::Mul && Ty->
isVectorTy() &&
270 assert(ISD &&
"Invalid opcode");
272 if (ISD ==
ISD::MUL && Args.size() == 2 && LT.second.isVector() &&
273 (LT.second.getScalarType() == MVT::i32 ||
274 LT.second.getScalarType() == MVT::i64)) {
276 bool Op1Signed =
false, Op2Signed =
false;
279 unsigned OpMinSize = std::max(Op1MinSize, Op2MinSize);
280 bool SignedMode = Op1Signed || Op2Signed;
285 if (OpMinSize <= 15 && !ST->isPMADDWDSlow() &&
286 LT.second.getScalarType() == MVT::i32) {
288 isa<ConstantDataVector>(Args[0]) || isa<ConstantVector>(Args[0]);
290 isa<ConstantDataVector>(Args[1]) || isa<ConstantVector>(Args[1]);
291 bool Op1Sext = isa<SExtInst>(Args[0]) &&
292 (Op1MinSize == 15 || (Op1MinSize < 15 && !ST->
hasSSE41()));
293 bool Op2Sext = isa<SExtInst>(Args[1]) &&
294 (Op2MinSize == 15 || (Op2MinSize < 15 && !ST->
hasSSE41()));
296 bool IsZeroExtended = !Op1Signed || !Op2Signed;
297 bool IsConstant = Op1Constant || Op2Constant;
298 bool IsSext = Op1Sext || Op2Sext;
299 if (IsConstant || IsZeroExtended || IsSext)
307 if (ST->useSLMArithCosts() && LT.second == MVT::v4i32) {
310 if (!SignedMode && OpMinSize <= 8)
314 if (!SignedMode && OpMinSize <= 16)
321 if (!SignedMode && OpMinSize <= 32 && LT.second.getScalarType() == MVT::i64)
374 {
ISD::SHL, MVT::v16i8, { 1, 6, 1, 2 } },
375 {
ISD::SRL, MVT::v16i8, { 1, 6, 1, 2 } },
376 {
ISD::SRA, MVT::v16i8, { 1, 6, 1, 2 } },
377 {
ISD::SHL, MVT::v32i8, { 1, 6, 1, 2 } },
378 {
ISD::SRL, MVT::v32i8, { 1, 6, 1, 2 } },
379 {
ISD::SRA, MVT::v32i8, { 1, 6, 1, 2 } },
380 {
ISD::SHL, MVT::v64i8, { 1, 6, 1, 2 } },
381 {
ISD::SRL, MVT::v64i8, { 1, 6, 1, 2 } },
382 {
ISD::SRA, MVT::v64i8, { 1, 6, 1, 2 } },
386 if (
const auto *Entry =
388 if (
auto KindCost = Entry->Cost[
CostKind])
389 return LT.first * *KindCost;
392 {
ISD::SHL, MVT::v16i8, { 1, 7, 2, 3 } },
393 {
ISD::SRL, MVT::v16i8, { 1, 7, 2, 3 } },
394 {
ISD::SRA, MVT::v16i8, { 1, 8, 4, 5 } },
395 {
ISD::SHL, MVT::v32i8, { 1, 8, 2, 3 } },
396 {
ISD::SRL, MVT::v32i8, { 1, 8, 2, 3 } },
397 {
ISD::SRA, MVT::v32i8, { 1, 9, 4, 5 } },
398 {
ISD::SHL, MVT::v64i8, { 1, 8, 2, 3 } },
399 {
ISD::SRL, MVT::v64i8, { 1, 8, 2, 3 } },
400 {
ISD::SRA, MVT::v64i8, { 1, 9, 4, 6 } },
402 {
ISD::SHL, MVT::v16i16, { 1, 1, 1, 1 } },
403 {
ISD::SRL, MVT::v16i16, { 1, 1, 1, 1 } },
404 {
ISD::SRA, MVT::v16i16, { 1, 1, 1, 1 } },
405 {
ISD::SHL, MVT::v32i16, { 1, 1, 1, 1 } },
406 {
ISD::SRL, MVT::v32i16, { 1, 1, 1, 1 } },
407 {
ISD::SRA, MVT::v32i16, { 1, 1, 1, 1 } },
411 if (
const auto *Entry =
413 if (
auto KindCost = Entry->Cost[
CostKind])
414 return LT.first * *KindCost;
417 {
ISD::SHL, MVT::v64i8, { 2, 12, 5, 6 } },
418 {
ISD::SRL, MVT::v64i8, { 2, 12, 5, 6 } },
419 {
ISD::SRA, MVT::v64i8, { 3, 10, 12, 12 } },
421 {
ISD::SHL, MVT::v16i16, { 2, 7, 4, 4 } },
422 {
ISD::SRL, MVT::v16i16, { 2, 7, 4, 4 } },
423 {
ISD::SRA, MVT::v16i16, { 2, 7, 4, 4 } },
425 {
ISD::SHL, MVT::v8i32, { 1, 1, 1, 1 } },
426 {
ISD::SRL, MVT::v8i32, { 1, 1, 1, 1 } },
427 {
ISD::SRA, MVT::v8i32, { 1, 1, 1, 1 } },
428 {
ISD::SHL, MVT::v16i32, { 1, 1, 1, 1 } },
429 {
ISD::SRL, MVT::v16i32, { 1, 1, 1, 1 } },
430 {
ISD::SRA, MVT::v16i32, { 1, 1, 1, 1 } },
432 {
ISD::SRA, MVT::v2i64, { 1, 1, 1, 1 } },
433 {
ISD::SHL, MVT::v4i64, { 1, 1, 1, 1 } },
434 {
ISD::SRL, MVT::v4i64, { 1, 1, 1, 1 } },
435 {
ISD::SRA, MVT::v4i64, { 1, 1, 1, 1 } },
436 {
ISD::SHL, MVT::v8i64, { 1, 1, 1, 1 } },
437 {
ISD::SRL, MVT::v8i64, { 1, 1, 1, 1 } },
438 {
ISD::SRA, MVT::v8i64, { 1, 1, 1, 1 } },
447 if (
const auto *Entry =
449 if (
auto KindCost = Entry->Cost[
CostKind])
450 return LT.first * *KindCost;
453 {
ISD::SHL, MVT::v16i8, { 1, 8, 2, 3 } },
454 {
ISD::SRL, MVT::v16i8, { 1, 8, 2, 3 } },
455 {
ISD::SRA, MVT::v16i8, { 2, 10, 5, 6 } },
456 {
ISD::SHL, MVT::v32i8, { 2, 8, 2, 4 } },
457 {
ISD::SRL, MVT::v32i8, { 2, 8, 2, 4 } },
458 {
ISD::SRA, MVT::v32i8, { 3, 10, 5, 9 } },
460 {
ISD::SHL, MVT::v8i16, { 1, 1, 1, 1 } },
461 {
ISD::SRL, MVT::v8i16, { 1, 1, 1, 1 } },
462 {
ISD::SRA, MVT::v8i16, { 1, 1, 1, 1 } },
463 {
ISD::SHL, MVT::v16i16,{ 2, 2, 1, 2 } },
464 {
ISD::SRL, MVT::v16i16,{ 2, 2, 1, 2 } },
465 {
ISD::SRA, MVT::v16i16,{ 2, 2, 1, 2 } },
467 {
ISD::SHL, MVT::v4i32, { 1, 1, 1, 1 } },
468 {
ISD::SRL, MVT::v4i32, { 1, 1, 1, 1 } },
469 {
ISD::SRA, MVT::v4i32, { 1, 1, 1, 1 } },
470 {
ISD::SHL, MVT::v8i32, { 2, 2, 1, 2 } },
471 {
ISD::SRL, MVT::v8i32, { 2, 2, 1, 2 } },
472 {
ISD::SRA, MVT::v8i32, { 2, 2, 1, 2 } },
474 {
ISD::SHL, MVT::v2i64, { 1, 1, 1, 1 } },
475 {
ISD::SRL, MVT::v2i64, { 1, 1, 1, 1 } },
476 {
ISD::SRA, MVT::v2i64, { 2, 3, 3, 3 } },
477 {
ISD::SHL, MVT::v4i64, { 2, 2, 1, 2 } },
478 {
ISD::SRL, MVT::v4i64, { 2, 2, 1, 2 } },
479 {
ISD::SRA, MVT::v4i64, { 4, 4, 3, 6 } },
488 if (
const auto *Entry =
490 if (
auto KindCost = Entry->Cost[
CostKind])
491 return LT.first * *KindCost;
494 {
ISD::SHL, MVT::v16i8, { 2, 7, 2, 3 } },
495 {
ISD::SRL, MVT::v16i8, { 2, 7, 2, 3 } },
496 {
ISD::SRA, MVT::v16i8, { 3, 9, 5, 6 } },
497 {
ISD::SHL, MVT::v32i8, { 4, 7, 7, 8 } },
498 {
ISD::SRL, MVT::v32i8, { 4, 7, 7, 8 } },
499 {
ISD::SRA, MVT::v32i8, { 7, 7, 12, 13 } },
501 {
ISD::SHL, MVT::v8i16, { 1, 2, 1, 1 } },
502 {
ISD::SRL, MVT::v8i16, { 1, 2, 1, 1 } },
503 {
ISD::SRA, MVT::v8i16, { 1, 2, 1, 1 } },
504 {
ISD::SHL, MVT::v16i16,{ 3, 6, 4, 5 } },
505 {
ISD::SRL, MVT::v16i16,{ 3, 6, 4, 5 } },
506 {
ISD::SRA, MVT::v16i16,{ 3, 6, 4, 5 } },
508 {
ISD::SHL, MVT::v4i32, { 1, 2, 1, 1 } },
509 {
ISD::SRL, MVT::v4i32, { 1, 2, 1, 1 } },
510 {
ISD::SRA, MVT::v4i32, { 1, 2, 1, 1 } },
511 {
ISD::SHL, MVT::v8i32, { 3, 6, 4, 5 } },
512 {
ISD::SRL, MVT::v8i32, { 3, 6, 4, 5 } },
513 {
ISD::SRA, MVT::v8i32, { 3, 6, 4, 5 } },
515 {
ISD::SHL, MVT::v2i64, { 1, 2, 1, 1 } },
516 {
ISD::SRL, MVT::v2i64, { 1, 2, 1, 1 } },
517 {
ISD::SRA, MVT::v2i64, { 2, 3, 3, 3 } },
518 {
ISD::SHL, MVT::v4i64, { 3, 6, 4, 5 } },
519 {
ISD::SRL, MVT::v4i64, { 3, 6, 4, 5 } },
520 {
ISD::SRA, MVT::v4i64, { 5, 7, 8, 9 } },
530 (!ST->hasXOP() || LT.second.getScalarSizeInBits() != 8))
531 if (
const auto *Entry =
533 if (
auto KindCost = Entry->Cost[
CostKind])
534 return LT.first * *KindCost;
537 {
ISD::SHL, MVT::v16i8, { 1, 7, 2, 3 } },
538 {
ISD::SRL, MVT::v16i8, { 1, 7, 2, 3 } },
539 {
ISD::SRA, MVT::v16i8, { 3, 9, 5, 6 } },
541 {
ISD::SHL, MVT::v8i16, { 1, 1, 1, 1 } },
542 {
ISD::SRL, MVT::v8i16, { 1, 1, 1, 1 } },
543 {
ISD::SRA, MVT::v8i16, { 1, 1, 1, 1 } },
545 {
ISD::SHL, MVT::v4i32, { 1, 1, 1, 1 } },
546 {
ISD::SRL, MVT::v4i32, { 1, 1, 1, 1 } },
547 {
ISD::SRA, MVT::v4i32, { 1, 1, 1, 1 } },
549 {
ISD::SHL, MVT::v2i64, { 1, 1, 1, 1 } },
550 {
ISD::SRL, MVT::v2i64, { 1, 1, 1, 1 } },
551 {
ISD::SRA, MVT::v2i64, { 3, 5, 6, 6 } },
561 (!ST->hasXOP() || LT.second.getScalarSizeInBits() != 8))
562 if (
const auto *Entry =
564 if (
auto KindCost = Entry->Cost[
CostKind])
565 return LT.first * *KindCost;
580 if (
const auto *Entry =
582 if (
auto KindCost = Entry->Cost[
CostKind])
583 return LT.first * *KindCost;
603 if (
const auto *Entry =
605 if (
auto KindCost = Entry->Cost[
CostKind])
606 return LT.first * *KindCost;
626 if (
const auto *Entry =
CostTableLookup(AVX2ConstCostTable, ISD, LT.second))
627 if (
auto KindCost = Entry->Cost[
CostKind])
628 return LT.first * *KindCost;
648 if (
const auto *Entry =
CostTableLookup(AVXConstCostTable, ISD, LT.second))
649 if (
auto KindCost = Entry->Cost[
CostKind])
650 return LT.first * *KindCost;
658 if (
const auto *Entry =
660 if (
auto KindCost = Entry->Cost[
CostKind])
661 return LT.first * *KindCost;
681 if (
const auto *Entry =
CostTableLookup(SSE2ConstCostTable, ISD, LT.second))
682 if (
auto KindCost = Entry->Cost[
CostKind])
683 return LT.first * *KindCost;
686 {
ISD::SHL, MVT::v16i8, { 3, 5, 5, 7 } },
687 {
ISD::SRL, MVT::v16i8, { 3,10, 5, 8 } },
688 {
ISD::SRA, MVT::v16i8, { 4,12, 8,12 } },
689 {
ISD::SHL, MVT::v32i8, { 4, 7, 6, 8 } },
690 {
ISD::SRL, MVT::v32i8, { 4, 8, 7, 9 } },
691 {
ISD::SRA, MVT::v32i8, { 5,10,10,13 } },
692 {
ISD::SHL, MVT::v64i8, { 4, 7, 6, 8 } },
693 {
ISD::SRL, MVT::v64i8, { 4, 8, 7,10 } },
694 {
ISD::SRA, MVT::v64i8, { 5,10,10,15 } },
696 {
ISD::SHL, MVT::v32i16, { 2, 4, 2, 3 } },
697 {
ISD::SRL, MVT::v32i16, { 2, 4, 2, 3 } },
698 {
ISD::SRA, MVT::v32i16, { 2, 4, 2, 3 } },
702 if (
const auto *Entry =
704 if (
auto KindCost = Entry->Cost[
CostKind])
705 return LT.first * *KindCost;
708 {
ISD::SHL, MVT::v32i16, { 5,10, 5, 7 } },
709 {
ISD::SRL, MVT::v32i16, { 5,10, 5, 7 } },
710 {
ISD::SRA, MVT::v32i16, { 5,10, 5, 7 } },
712 {
ISD::SHL, MVT::v16i32, { 2, 4, 2, 3 } },
713 {
ISD::SRL, MVT::v16i32, { 2, 4, 2, 3 } },
714 {
ISD::SRA, MVT::v16i32, { 2, 4, 2, 3 } },
716 {
ISD::SRA, MVT::v2i64, { 1, 2, 1, 2 } },
717 {
ISD::SHL, MVT::v4i64, { 1, 4, 1, 2 } },
718 {
ISD::SRL, MVT::v4i64, { 1, 4, 1, 2 } },
719 {
ISD::SRA, MVT::v4i64, { 1, 4, 1, 2 } },
720 {
ISD::SHL, MVT::v8i64, { 1, 4, 1, 2 } },
721 {
ISD::SRL, MVT::v8i64, { 1, 4, 1, 2 } },
722 {
ISD::SRA, MVT::v8i64, { 1, 4, 1, 2 } },
726 if (
const auto *Entry =
728 if (
auto KindCost = Entry->Cost[
CostKind])
729 return LT.first * *KindCost;
733 {
ISD::SHL, MVT::v16i8, { 3, 5, 5, 7 } },
734 {
ISD::SRL, MVT::v16i8, { 3, 9, 5, 8 } },
735 {
ISD::SRA, MVT::v16i8, { 4, 5, 9,13 } },
736 {
ISD::SHL, MVT::v32i8, { 4, 7, 6, 8 } },
737 {
ISD::SRL, MVT::v32i8, { 4, 8, 7, 9 } },
738 {
ISD::SRA, MVT::v32i8, { 6, 9,11,16 } },
740 {
ISD::SHL, MVT::v8i16, { 1, 2, 1, 2 } },
741 {
ISD::SRL, MVT::v8i16, { 1, 2, 1, 2 } },
742 {
ISD::SRA, MVT::v8i16, { 1, 2, 1, 2 } },
743 {
ISD::SHL, MVT::v16i16, { 2, 4, 2, 3 } },
744 {
ISD::SRL, MVT::v16i16, { 2, 4, 2, 3 } },
745 {
ISD::SRA, MVT::v16i16, { 2, 4, 2, 3 } },
747 {
ISD::SHL, MVT::v4i32, { 1, 2, 1, 2 } },
748 {
ISD::SRL, MVT::v4i32, { 1, 2, 1, 2 } },
749 {
ISD::SRA, MVT::v4i32, { 1, 2, 1, 2 } },
750 {
ISD::SHL, MVT::v8i32, { 2, 4, 2, 3 } },
751 {
ISD::SRL, MVT::v8i32, { 2, 4, 2, 3 } },
752 {
ISD::SRA, MVT::v8i32, { 2, 4, 2, 3 } },
754 {
ISD::SHL, MVT::v2i64, { 1, 2, 1, 2 } },
755 {
ISD::SRL, MVT::v2i64, { 1, 2, 1, 2 } },
756 {
ISD::SRA, MVT::v2i64, { 2, 4, 5, 7 } },
757 {
ISD::SHL, MVT::v4i64, { 2, 4, 1, 2 } },
758 {
ISD::SRL, MVT::v4i64, { 2, 4, 1, 2 } },
759 {
ISD::SRA, MVT::v4i64, { 4, 6, 5, 9 } },
763 if (
const auto *Entry =
765 if (
auto KindCost = Entry->Cost[
CostKind])
766 return LT.first * *KindCost;
769 {
ISD::SHL, MVT::v16i8, { 4, 4, 6, 8 } },
770 {
ISD::SRL, MVT::v16i8, { 4, 8, 5, 8 } },
771 {
ISD::SRA, MVT::v16i8, { 6, 6, 9,13 } },
772 {
ISD::SHL, MVT::v32i8, { 7, 8,11,14 } },
773 {
ISD::SRL, MVT::v32i8, { 7, 9,10,14 } },
774 {
ISD::SRA, MVT::v32i8, { 10,11,16,21 } },
776 {
ISD::SHL, MVT::v8i16, { 1, 3, 1, 2 } },
777 {
ISD::SRL, MVT::v8i16, { 1, 3, 1, 2 } },
778 {
ISD::SRA, MVT::v8i16, { 1, 3, 1, 2 } },
779 {
ISD::SHL, MVT::v16i16, { 3, 7, 5, 7 } },
780 {
ISD::SRL, MVT::v16i16, { 3, 7, 5, 7 } },
781 {
ISD::SRA, MVT::v16i16, { 3, 7, 5, 7 } },
783 {
ISD::SHL, MVT::v4i32, { 1, 3, 1, 2 } },
784 {
ISD::SRL, MVT::v4i32, { 1, 3, 1, 2 } },
785 {
ISD::SRA, MVT::v4i32, { 1, 3, 1, 2 } },
786 {
ISD::SHL, MVT::v8i32, { 3, 7, 5, 7 } },
787 {
ISD::SRL, MVT::v8i32, { 3, 7, 5, 7 } },
788 {
ISD::SRA, MVT::v8i32, { 3, 7, 5, 7 } },
790 {
ISD::SHL, MVT::v2i64, { 1, 3, 1, 2 } },
791 {
ISD::SRL, MVT::v2i64, { 1, 3, 1, 2 } },
792 {
ISD::SRA, MVT::v2i64, { 3, 4, 5, 7 } },
793 {
ISD::SHL, MVT::v4i64, { 3, 7, 4, 6 } },
794 {
ISD::SRL, MVT::v4i64, { 3, 7, 4, 6 } },
795 {
ISD::SRA, MVT::v4i64, { 6, 7,10,13 } },
800 (!ST->hasXOP() || LT.second.getScalarSizeInBits() != 8))
801 if (
const auto *Entry =
803 if (
auto KindCost = Entry->Cost[
CostKind])
804 return LT.first * *KindCost;
808 {
ISD::SHL, MVT::v16i8, { 9, 10, 6, 9 } },
809 {
ISD::SRL, MVT::v16i8, { 9, 13, 5, 9 } },
810 {
ISD::SRA, MVT::v16i8, { 11, 15, 9,13 } },
812 {
ISD::SHL, MVT::v8i16, { 2, 2, 1, 2 } },
813 {
ISD::SRL, MVT::v8i16, { 2, 2, 1, 2 } },
814 {
ISD::SRA, MVT::v8i16, { 2, 2, 1, 2 } },
816 {
ISD::SHL, MVT::v4i32, { 2, 2, 1, 2 } },
817 {
ISD::SRL, MVT::v4i32, { 2, 2, 1, 2 } },
818 {
ISD::SRA, MVT::v4i32, { 2, 2, 1, 2 } },
820 {
ISD::SHL, MVT::v2i64, { 2, 2, 1, 2 } },
821 {
ISD::SRL, MVT::v2i64, { 2, 2, 1, 2 } },
822 {
ISD::SRA, MVT::v2i64, { 5, 9, 5, 7 } },
826 (!ST->hasXOP() || LT.second.getScalarSizeInBits() != 8))
827 if (
const auto *Entry =
829 if (
auto KindCost = Entry->Cost[
CostKind])
830 return LT.first * *KindCost;
833 {
ISD::MUL, MVT::v2i64, { 2, 15, 1, 3 } },
834 {
ISD::MUL, MVT::v4i64, { 2, 15, 1, 3 } },
835 {
ISD::MUL, MVT::v8i64, { 3, 15, 1, 3 } }
840 if (
const auto *Entry =
CostTableLookup(AVX512DQCostTable, ISD, LT.second))
841 if (
auto KindCost = Entry->Cost[
CostKind])
842 return LT.first * *KindCost;
845 {
ISD::SHL, MVT::v16i8, { 4, 8, 4, 5 } },
846 {
ISD::SRL, MVT::v16i8, { 4, 8, 4, 5 } },
847 {
ISD::SRA, MVT::v16i8, { 4, 8, 4, 5 } },
848 {
ISD::SHL, MVT::v32i8, { 4, 23,11,16 } },
849 {
ISD::SRL, MVT::v32i8, { 4, 30,12,18 } },
850 {
ISD::SRA, MVT::v32i8, { 6, 13,24,30 } },
851 {
ISD::SHL, MVT::v64i8, { 6, 19,13,15 } },
852 {
ISD::SRL, MVT::v64i8, { 7, 27,15,18 } },
853 {
ISD::SRA, MVT::v64i8, { 15, 15,30,30 } },
855 {
ISD::SHL, MVT::v8i16, { 1, 1, 1, 1 } },
856 {
ISD::SRL, MVT::v8i16, { 1, 1, 1, 1 } },
857 {
ISD::SRA, MVT::v8i16, { 1, 1, 1, 1 } },
858 {
ISD::SHL, MVT::v16i16, { 1, 1, 1, 1 } },
859 {
ISD::SRL, MVT::v16i16, { 1, 1, 1, 1 } },
860 {
ISD::SRA, MVT::v16i16, { 1, 1, 1, 1 } },
861 {
ISD::SHL, MVT::v32i16, { 1, 1, 1, 1 } },
862 {
ISD::SRL, MVT::v32i16, { 1, 1, 1, 1 } },
863 {
ISD::SRA, MVT::v32i16, { 1, 1, 1, 1 } },
865 {
ISD::ADD, MVT::v64i8, { 1, 1, 1, 1 } },
866 {
ISD::ADD, MVT::v32i16, { 1, 1, 1, 1 } },
868 {
ISD::ADD, MVT::v32i8, { 1, 1, 1, 1 } },
869 {
ISD::ADD, MVT::v16i16, { 1, 1, 1, 1 } },
870 {
ISD::ADD, MVT::v8i32, { 1, 1, 1, 1 } },
871 {
ISD::ADD, MVT::v4i64, { 1, 1, 1, 1 } },
873 {
ISD::SUB, MVT::v64i8, { 1, 1, 1, 1 } },
874 {
ISD::SUB, MVT::v32i16, { 1, 1, 1, 1 } },
876 {
ISD::MUL, MVT::v16i8, { 4, 12, 4, 5 } },
877 {
ISD::MUL, MVT::v32i8, { 3, 10, 7,10 } },
878 {
ISD::MUL, MVT::v64i8, { 3, 11, 7,10 } },
879 {
ISD::MUL, MVT::v32i16, { 1, 5, 1, 1 } },
881 {
ISD::SUB, MVT::v32i8, { 1, 1, 1, 1 } },
882 {
ISD::SUB, MVT::v16i16, { 1, 1, 1, 1 } },
883 {
ISD::SUB, MVT::v8i32, { 1, 1, 1, 1 } },
884 {
ISD::SUB, MVT::v4i64, { 1, 1, 1, 1 } },
889 if (
const auto *Entry =
CostTableLookup(AVX512BWCostTable, ISD, LT.second))
890 if (
auto KindCost = Entry->Cost[
CostKind])
891 return LT.first * *KindCost;
894 {
ISD::SHL, MVT::v64i8, { 15, 19,27,33 } },
895 {
ISD::SRL, MVT::v64i8, { 15, 19,30,36 } },
896 {
ISD::SRA, MVT::v64i8, { 37, 37,51,63 } },
898 {
ISD::SHL, MVT::v32i16, { 11, 16,11,15 } },
899 {
ISD::SRL, MVT::v32i16, { 11, 16,11,15 } },
900 {
ISD::SRA, MVT::v32i16, { 11, 16,11,15 } },
902 {
ISD::SHL, MVT::v4i32, { 1, 1, 1, 1 } },
903 {
ISD::SRL, MVT::v4i32, { 1, 1, 1, 1 } },
904 {
ISD::SRA, MVT::v4i32, { 1, 1, 1, 1 } },
905 {
ISD::SHL, MVT::v8i32, { 1, 1, 1, 1 } },
906 {
ISD::SRL, MVT::v8i32, { 1, 1, 1, 1 } },
907 {
ISD::SRA, MVT::v8i32, { 1, 1, 1, 1 } },
908 {
ISD::SHL, MVT::v16i32, { 1, 1, 1, 1 } },
909 {
ISD::SRL, MVT::v16i32, { 1, 1, 1, 1 } },
910 {
ISD::SRA, MVT::v16i32, { 1, 1, 1, 1 } },
912 {
ISD::SHL, MVT::v2i64, { 1, 1, 1, 1 } },
913 {
ISD::SRL, MVT::v2i64, { 1, 1, 1, 1 } },
914 {
ISD::SRA, MVT::v2i64, { 1, 1, 1, 1 } },
915 {
ISD::SHL, MVT::v4i64, { 1, 1, 1, 1 } },
916 {
ISD::SRL, MVT::v4i64, { 1, 1, 1, 1 } },
917 {
ISD::SRA, MVT::v4i64, { 1, 1, 1, 1 } },
918 {
ISD::SHL, MVT::v8i64, { 1, 1, 1, 1 } },
919 {
ISD::SRL, MVT::v8i64, { 1, 1, 1, 1 } },
920 {
ISD::SRA, MVT::v8i64, { 1, 1, 1, 1 } },
922 {
ISD::ADD, MVT::v64i8, { 3, 7, 5, 5 } },
923 {
ISD::ADD, MVT::v32i16, { 3, 7, 5, 5 } },
925 {
ISD::SUB, MVT::v64i8, { 3, 7, 5, 5 } },
926 {
ISD::SUB, MVT::v32i16, { 3, 7, 5, 5 } },
928 {
ISD::AND, MVT::v32i8, { 1, 1, 1, 1 } },
929 {
ISD::AND, MVT::v16i16, { 1, 1, 1, 1 } },
930 {
ISD::AND, MVT::v8i32, { 1, 1, 1, 1 } },
931 {
ISD::AND, MVT::v4i64, { 1, 1, 1, 1 } },
933 {
ISD::OR, MVT::v32i8, { 1, 1, 1, 1 } },
934 {
ISD::OR, MVT::v16i16, { 1, 1, 1, 1 } },
935 {
ISD::OR, MVT::v8i32, { 1, 1, 1, 1 } },
936 {
ISD::OR, MVT::v4i64, { 1, 1, 1, 1 } },
938 {
ISD::XOR, MVT::v32i8, { 1, 1, 1, 1 } },
939 {
ISD::XOR, MVT::v16i16, { 1, 1, 1, 1 } },
940 {
ISD::XOR, MVT::v8i32, { 1, 1, 1, 1 } },
941 {
ISD::XOR, MVT::v4i64, { 1, 1, 1, 1 } },
943 {
ISD::MUL, MVT::v16i32, { 1, 10, 1, 2 } },
944 {
ISD::MUL, MVT::v8i32, { 1, 10, 1, 2 } },
945 {
ISD::MUL, MVT::v4i32, { 1, 10, 1, 2 } },
946 {
ISD::MUL, MVT::v8i64, { 6, 9, 8, 8 } },
951 {
ISD::FNEG, MVT::v8f64, { 1, 1, 1, 2 } },
952 {
ISD::FADD, MVT::v8f64, { 1, 4, 1, 1 } },
953 {
ISD::FADD, MVT::v4f64, { 1, 4, 1, 1 } },
954 {
ISD::FSUB, MVT::v8f64, { 1, 4, 1, 1 } },
955 {
ISD::FSUB, MVT::v4f64, { 1, 4, 1, 1 } },
956 {
ISD::FMUL, MVT::v8f64, { 1, 4, 1, 1 } },
957 {
ISD::FMUL, MVT::v4f64, { 1, 4, 1, 1 } },
958 {
ISD::FMUL, MVT::v2f64, { 1, 4, 1, 1 } },
961 {
ISD::FDIV, MVT::f64, { 4, 14, 1, 1 } },
962 {
ISD::FDIV, MVT::v2f64, { 4, 14, 1, 1 } },
963 {
ISD::FDIV, MVT::v4f64, { 8, 14, 1, 1 } },
964 {
ISD::FDIV, MVT::v8f64, { 16, 23, 1, 3 } },
966 {
ISD::FNEG, MVT::v16f32, { 1, 1, 1, 2 } },
967 {
ISD::FADD, MVT::v16f32, { 1, 4, 1, 1 } },
968 {
ISD::FADD, MVT::v8f32, { 1, 4, 1, 1 } },
969 {
ISD::FSUB, MVT::v16f32, { 1, 4, 1, 1 } },
970 {
ISD::FSUB, MVT::v8f32, { 1, 4, 1, 1 } },
971 {
ISD::FMUL, MVT::v16f32, { 1, 4, 1, 1 } },
972 {
ISD::FMUL, MVT::v8f32, { 1, 4, 1, 1 } },
973 {
ISD::FMUL, MVT::v4f32, { 1, 4, 1, 1 } },
976 {
ISD::FDIV, MVT::f32, { 3, 11, 1, 1 } },
977 {
ISD::FDIV, MVT::v4f32, { 3, 11, 1, 1 } },
978 {
ISD::FDIV, MVT::v8f32, { 5, 11, 1, 1 } },
979 {
ISD::FDIV, MVT::v16f32, { 10, 18, 1, 3 } },
983 if (
const auto *Entry =
CostTableLookup(AVX512CostTable, ISD, LT.second))
984 if (
auto KindCost = Entry->Cost[
CostKind])
985 return LT.first * *KindCost;
990 {
ISD::SHL, MVT::v4i32, { 2, 3, 1, 3 } },
991 {
ISD::SRL, MVT::v4i32, { 2, 3, 1, 3 } },
992 {
ISD::SRA, MVT::v4i32, { 2, 3, 1, 3 } },
993 {
ISD::SHL, MVT::v8i32, { 4, 4, 1, 3 } },
994 {
ISD::SRL, MVT::v8i32, { 4, 4, 1, 3 } },
995 {
ISD::SRA, MVT::v8i32, { 4, 4, 1, 3 } },
996 {
ISD::SHL, MVT::v2i64, { 2, 3, 1, 1 } },
997 {
ISD::SRL, MVT::v2i64, { 2, 3, 1, 1 } },
998 {
ISD::SHL, MVT::v4i64, { 4, 4, 1, 2 } },
999 {
ISD::SRL, MVT::v4i64, { 4, 4, 1, 2 } },
1011 if (ST->
hasAVX2() && !(ST->hasXOP() && LT.second == MVT::v4i32)) {
1012 if (ISD ==
ISD::SHL && LT.second == MVT::v16i16 &&
1019 if (
const auto *Entry =
CostTableLookup(AVX2ShiftCostTable, ISD, LT.second))
1020 if (
auto KindCost = Entry->Cost[
CostKind])
1021 return LT.first * *KindCost;
1026 {
ISD::SHL, MVT::v16i8, { 1, 3, 1, 1 } },
1027 {
ISD::SRL, MVT::v16i8, { 2, 3, 1, 1 } },
1028 {
ISD::SRA, MVT::v16i8, { 2, 3, 1, 1 } },
1029 {
ISD::SHL, MVT::v8i16, { 1, 3, 1, 1 } },
1030 {
ISD::SRL, MVT::v8i16, { 2, 3, 1, 1 } },
1031 {
ISD::SRA, MVT::v8i16, { 2, 3, 1, 1 } },
1032 {
ISD::SHL, MVT::v4i32, { 1, 3, 1, 1 } },
1033 {
ISD::SRL, MVT::v4i32, { 2, 3, 1, 1 } },
1034 {
ISD::SRA, MVT::v4i32, { 2, 3, 1, 1 } },
1035 {
ISD::SHL, MVT::v2i64, { 1, 3, 1, 1 } },
1036 {
ISD::SRL, MVT::v2i64, { 2, 3, 1, 1 } },
1037 {
ISD::SRA, MVT::v2i64, { 2, 3, 1, 1 } },
1039 {
ISD::SHL, MVT::v32i8, { 4, 7, 5, 6 } },
1040 {
ISD::SRL, MVT::v32i8, { 6, 7, 5, 6 } },
1041 {
ISD::SRA, MVT::v32i8, { 6, 7, 5, 6 } },
1042 {
ISD::SHL, MVT::v16i16, { 4, 7, 5, 6 } },
1043 {
ISD::SRL, MVT::v16i16, { 6, 7, 5, 6 } },
1044 {
ISD::SRA, MVT::v16i16, { 6, 7, 5, 6 } },
1045 {
ISD::SHL, MVT::v8i32, { 4, 7, 5, 6 } },
1046 {
ISD::SRL, MVT::v8i32, { 6, 7, 5, 6 } },
1047 {
ISD::SRA, MVT::v8i32, { 6, 7, 5, 6 } },
1048 {
ISD::SHL, MVT::v4i64, { 4, 7, 5, 6 } },
1049 {
ISD::SRL, MVT::v4i64, { 6, 7, 5, 6 } },
1050 {
ISD::SRA, MVT::v4i64, { 6, 7, 5, 6 } },
1060 if (
const auto *Entry =
1062 if (
auto KindCost = Entry->Cost[
CostKind])
1063 return LT.first * *KindCost;
1070 if (((VT == MVT::v8i16 || VT == MVT::v4i32) && ST->
hasSSE2()) ||
1071 ((VT == MVT::v16i16 || VT == MVT::v8i32) && ST->
hasAVX()))
1076 {
ISD::FDIV, MVT::f32, { 18, 19, 1, 1 } },
1077 {
ISD::FDIV, MVT::v4f32, { 35, 36, 1, 1 } },
1078 {
ISD::FDIV, MVT::f64, { 33, 34, 1, 1 } },
1079 {
ISD::FDIV, MVT::v2f64, { 65, 66, 1, 1 } },
1082 if (ST->useGLMDivSqrtCosts())
1083 if (
const auto *Entry =
CostTableLookup(GLMCostTable, ISD, LT.second))
1084 if (
auto KindCost = Entry->Cost[
CostKind])
1085 return LT.first * *KindCost;
1088 {
ISD::MUL, MVT::v4i32, { 11, 11, 1, 7 } },
1089 {
ISD::MUL, MVT::v8i16, { 2, 5, 1, 1 } },
1090 {
ISD::FMUL, MVT::f64, { 2, 5, 1, 1 } },
1091 {
ISD::FMUL, MVT::f32, { 1, 4, 1, 1 } },
1092 {
ISD::FMUL, MVT::v2f64, { 4, 7, 1, 1 } },
1093 {
ISD::FMUL, MVT::v4f32, { 2, 5, 1, 1 } },
1094 {
ISD::FDIV, MVT::f32, { 17, 19, 1, 1 } },
1095 {
ISD::FDIV, MVT::v4f32, { 39, 39, 1, 6 } },
1096 {
ISD::FDIV, MVT::f64, { 32, 34, 1, 1 } },
1097 {
ISD::FDIV, MVT::v2f64, { 69, 69, 1, 6 } },
1098 {
ISD::FADD, MVT::v2f64, { 2, 4, 1, 1 } },
1099 {
ISD::FSUB, MVT::v2f64, { 2, 4, 1, 1 } },
1105 {
ISD::MUL, MVT::v2i64, { 17, 22, 9, 9 } },
1107 {
ISD::ADD, MVT::v2i64, { 4, 2, 1, 2 } },
1108 {
ISD::SUB, MVT::v2i64, { 4, 2, 1, 2 } },
1111 if (ST->useSLMArithCosts())
1112 if (
const auto *Entry =
CostTableLookup(SLMCostTable, ISD, LT.second))
1113 if (
auto KindCost = Entry->Cost[
CostKind])
1114 return LT.first * *KindCost;
1117 {
ISD::SHL, MVT::v16i8, { 6, 21,11,16 } },
1118 {
ISD::SHL, MVT::v32i8, { 6, 23,11,22 } },
1119 {
ISD::SHL, MVT::v8i16, { 5, 18, 5,10 } },
1120 {
ISD::SHL, MVT::v16i16, { 8, 10,10,14 } },
1122 {
ISD::SRL, MVT::v16i8, { 6, 27,12,18 } },
1123 {
ISD::SRL, MVT::v32i8, { 8, 30,12,24 } },
1124 {
ISD::SRL, MVT::v8i16, { 5, 11, 5,10 } },
1125 {
ISD::SRL, MVT::v16i16, { 8, 10,10,14 } },
1127 {
ISD::SRA, MVT::v16i8, { 17, 17,24,30 } },
1128 {
ISD::SRA, MVT::v32i8, { 18, 20,24,43 } },
1129 {
ISD::SRA, MVT::v8i16, { 5, 11, 5,10 } },
1130 {
ISD::SRA, MVT::v16i16, { 8, 10,10,14 } },
1131 {
ISD::SRA, MVT::v2i64, { 4, 5, 5, 5 } },
1132 {
ISD::SRA, MVT::v4i64, { 8, 8, 5, 9 } },
1134 {
ISD::SUB, MVT::v32i8, { 1, 1, 1, 2 } },
1135 {
ISD::ADD, MVT::v32i8, { 1, 1, 1, 2 } },
1136 {
ISD::SUB, MVT::v16i16, { 1, 1, 1, 2 } },
1137 {
ISD::ADD, MVT::v16i16, { 1, 1, 1, 2 } },
1138 {
ISD::SUB, MVT::v8i32, { 1, 1, 1, 2 } },
1139 {
ISD::ADD, MVT::v8i32, { 1, 1, 1, 2 } },
1140 {
ISD::SUB, MVT::v4i64, { 1, 1, 1, 2 } },
1141 {
ISD::ADD, MVT::v4i64, { 1, 1, 1, 2 } },
1143 {
ISD::MUL, MVT::v16i8, { 5, 18, 6,12 } },
1144 {
ISD::MUL, MVT::v32i8, { 4, 8, 8,16 } },
1145 {
ISD::MUL, MVT::v16i16, { 2, 5, 1, 2 } },
1146 {
ISD::MUL, MVT::v8i32, { 4, 10, 1, 2 } },
1147 {
ISD::MUL, MVT::v4i32, { 2, 10, 1, 2 } },
1148 {
ISD::MUL, MVT::v4i64, { 6, 10, 8,13 } },
1149 {
ISD::MUL, MVT::v2i64, { 6, 10, 8, 8 } },
1153 {
ISD::FNEG, MVT::v4f64, { 1, 1, 1, 2 } },
1154 {
ISD::FNEG, MVT::v8f32, { 1, 1, 1, 2 } },
1156 {
ISD::FADD, MVT::f64, { 1, 4, 1, 1 } },
1157 {
ISD::FADD, MVT::f32, { 1, 4, 1, 1 } },
1158 {
ISD::FADD, MVT::v2f64, { 1, 4, 1, 1 } },
1159 {
ISD::FADD, MVT::v4f32, { 1, 4, 1, 1 } },
1160 {
ISD::FADD, MVT::v4f64, { 1, 4, 1, 2 } },
1161 {
ISD::FADD, MVT::v8f32, { 1, 4, 1, 2 } },
1163 {
ISD::FSUB, MVT::f64, { 1, 4, 1, 1 } },
1164 {
ISD::FSUB, MVT::f32, { 1, 4, 1, 1 } },
1165 {
ISD::FSUB, MVT::v2f64, { 1, 4, 1, 1 } },
1166 {
ISD::FSUB, MVT::v4f32, { 1, 4, 1, 1 } },
1167 {
ISD::FSUB, MVT::v4f64, { 1, 4, 1, 2 } },
1168 {
ISD::FSUB, MVT::v8f32, { 1, 4, 1, 2 } },
1170 {
ISD::FMUL, MVT::f64, { 1, 5, 1, 1 } },
1171 {
ISD::FMUL, MVT::f32, { 1, 5, 1, 1 } },
1172 {
ISD::FMUL, MVT::v2f64, { 1, 5, 1, 1 } },
1173 {
ISD::FMUL, MVT::v4f32, { 1, 5, 1, 1 } },
1174 {
ISD::FMUL, MVT::v4f64, { 1, 5, 1, 2 } },
1175 {
ISD::FMUL, MVT::v8f32, { 1, 5, 1, 2 } },
1177 {
ISD::FDIV, MVT::f32, { 7, 13, 1, 1 } },
1178 {
ISD::FDIV, MVT::v4f32, { 7, 13, 1, 1 } },
1179 {
ISD::FDIV, MVT::v8f32, { 14, 21, 1, 3 } },
1180 {
ISD::FDIV, MVT::f64, { 14, 20, 1, 1 } },
1181 {
ISD::FDIV, MVT::v2f64, { 14, 20, 1, 1 } },
1182 {
ISD::FDIV, MVT::v4f64, { 28, 35, 1, 3 } },
1187 if (
const auto *Entry =
CostTableLookup(AVX2CostTable, ISD, LT.second))
1188 if (
auto KindCost = Entry->Cost[
CostKind])
1189 return LT.first * *KindCost;
1195 {
ISD::MUL, MVT::v32i8, { 10, 11, 18, 19 } },
1196 {
ISD::MUL, MVT::v16i8, { 5, 6, 8, 12 } },
1197 {
ISD::MUL, MVT::v16i16, { 4, 8, 5, 6 } },
1198 {
ISD::MUL, MVT::v8i32, { 5, 8, 5, 10 } },
1199 {
ISD::MUL, MVT::v4i32, { 2, 5, 1, 3 } },
1200 {
ISD::MUL, MVT::v4i64, { 12, 15, 19, 20 } },
1202 {
ISD::AND, MVT::v32i8, { 1, 1, 1, 2 } },
1203 {
ISD::AND, MVT::v16i16, { 1, 1, 1, 2 } },
1204 {
ISD::AND, MVT::v8i32, { 1, 1, 1, 2 } },
1205 {
ISD::AND, MVT::v4i64, { 1, 1, 1, 2 } },
1207 {
ISD::OR, MVT::v32i8, { 1, 1, 1, 2 } },
1208 {
ISD::OR, MVT::v16i16, { 1, 1, 1, 2 } },
1209 {
ISD::OR, MVT::v8i32, { 1, 1, 1, 2 } },
1210 {
ISD::OR, MVT::v4i64, { 1, 1, 1, 2 } },
1212 {
ISD::XOR, MVT::v32i8, { 1, 1, 1, 2 } },
1213 {
ISD::XOR, MVT::v16i16, { 1, 1, 1, 2 } },
1214 {
ISD::XOR, MVT::v8i32, { 1, 1, 1, 2 } },
1215 {
ISD::XOR, MVT::v4i64, { 1, 1, 1, 2 } },
1217 {
ISD::SUB, MVT::v32i8, { 4, 2, 5, 6 } },
1218 {
ISD::ADD, MVT::v32i8, { 4, 2, 5, 6 } },
1219 {
ISD::SUB, MVT::v16i16, { 4, 2, 5, 6 } },
1220 {
ISD::ADD, MVT::v16i16, { 4, 2, 5, 6 } },
1221 {
ISD::SUB, MVT::v8i32, { 4, 2, 5, 6 } },
1222 {
ISD::ADD, MVT::v8i32, { 4, 2, 5, 6 } },
1223 {
ISD::SUB, MVT::v4i64, { 4, 2, 5, 6 } },
1224 {
ISD::ADD, MVT::v4i64, { 4, 2, 5, 6 } },
1225 {
ISD::SUB, MVT::v2i64, { 1, 1, 1, 1 } },
1226 {
ISD::ADD, MVT::v2i64, { 1, 1, 1, 1 } },
1228 {
ISD::SHL, MVT::v16i8, { 10, 21,11,17 } },
1229 {
ISD::SHL, MVT::v32i8, { 22, 22,27,40 } },
1230 {
ISD::SHL, MVT::v8i16, { 6, 9,11,11 } },
1231 {
ISD::SHL, MVT::v16i16, { 13, 16,24,25 } },
1232 {
ISD::SHL, MVT::v4i32, { 3, 11, 4, 6 } },
1233 {
ISD::SHL, MVT::v8i32, { 9, 11,12,17 } },
1234 {
ISD::SHL, MVT::v2i64, { 2, 4, 4, 6 } },
1235 {
ISD::SHL, MVT::v4i64, { 6, 7,11,15 } },
1237 {
ISD::SRL, MVT::v16i8, { 11, 27,12,18 } },
1238 {
ISD::SRL, MVT::v32i8, { 23, 23,30,43 } },
1239 {
ISD::SRL, MVT::v8i16, { 13, 16,14,22 } },
1240 {
ISD::SRL, MVT::v16i16, { 28, 30,31,48 } },
1241 {
ISD::SRL, MVT::v4i32, { 6, 7,12,16 } },
1242 {
ISD::SRL, MVT::v8i32, { 14, 14,26,34 } },
1243 {
ISD::SRL, MVT::v2i64, { 2, 4, 4, 6 } },
1244 {
ISD::SRL, MVT::v4i64, { 6, 7,11,15 } },
1246 {
ISD::SRA, MVT::v16i8, { 21, 22,24,36 } },
1247 {
ISD::SRA, MVT::v32i8, { 44, 45,51,76 } },
1248 {
ISD::SRA, MVT::v8i16, { 13, 16,14,22 } },
1249 {
ISD::SRA, MVT::v16i16, { 28, 30,31,48 } },
1250 {
ISD::SRA, MVT::v4i32, { 6, 7,12,16 } },
1251 {
ISD::SRA, MVT::v8i32, { 14, 14,26,34 } },
1252 {
ISD::SRA, MVT::v2i64, { 5, 6,10,14 } },
1253 {
ISD::SRA, MVT::v4i64, { 12, 12,22,30 } },
1255 {
ISD::FNEG, MVT::v4f64, { 2, 2, 1, 2 } },
1256 {
ISD::FNEG, MVT::v8f32, { 2, 2, 1, 2 } },
1258 {
ISD::FADD, MVT::f64, { 1, 5, 1, 1 } },
1259 {
ISD::FADD, MVT::f32, { 1, 5, 1, 1 } },
1260 {
ISD::FADD, MVT::v2f64, { 1, 5, 1, 1 } },
1261 {
ISD::FADD, MVT::v4f32, { 1, 5, 1, 1 } },
1262 {
ISD::FADD, MVT::v4f64, { 2, 5, 1, 2 } },
1263 {
ISD::FADD, MVT::v8f32, { 2, 5, 1, 2 } },
1265 {
ISD::FSUB, MVT::f64, { 1, 5, 1, 1 } },
1266 {
ISD::FSUB, MVT::f32, { 1, 5, 1, 1 } },
1267 {
ISD::FSUB, MVT::v2f64, { 1, 5, 1, 1 } },
1268 {
ISD::FSUB, MVT::v4f32, { 1, 5, 1, 1 } },
1269 {
ISD::FSUB, MVT::v4f64, { 2, 5, 1, 2 } },
1270 {
ISD::FSUB, MVT::v8f32, { 2, 5, 1, 2 } },
1272 {
ISD::FMUL, MVT::f64, { 2, 5, 1, 1 } },
1273 {
ISD::FMUL, MVT::f32, { 1, 5, 1, 1 } },
1274 {
ISD::FMUL, MVT::v2f64, { 2, 5, 1, 1 } },
1275 {
ISD::FMUL, MVT::v4f32, { 1, 5, 1, 1 } },
1276 {
ISD::FMUL, MVT::v4f64, { 4, 5, 1, 2 } },
1277 {
ISD::FMUL, MVT::v8f32, { 2, 5, 1, 2 } },
1279 {
ISD::FDIV, MVT::f32, { 14, 14, 1, 1 } },
1280 {
ISD::FDIV, MVT::v4f32, { 14, 14, 1, 1 } },
1281 {
ISD::FDIV, MVT::v8f32, { 28, 29, 1, 3 } },
1282 {
ISD::FDIV, MVT::f64, { 22, 22, 1, 1 } },
1283 {
ISD::FDIV, MVT::v2f64, { 22, 22, 1, 1 } },
1284 {
ISD::FDIV, MVT::v4f64, { 44, 45, 1, 3 } },
1288 if (
const auto *Entry =
CostTableLookup(AVX1CostTable, ISD, LT.second))
1289 if (
auto KindCost = Entry->Cost[
CostKind])
1290 return LT.first * *KindCost;
1293 {
ISD::FADD, MVT::f64, { 1, 3, 1, 1 } },
1294 {
ISD::FADD, MVT::f32, { 1, 3, 1, 1 } },
1295 {
ISD::FADD, MVT::v2f64, { 1, 3, 1, 1 } },
1296 {
ISD::FADD, MVT::v4f32, { 1, 3, 1, 1 } },
1298 {
ISD::FSUB, MVT::f64, { 1, 3, 1, 1 } },
1299 {
ISD::FSUB, MVT::f32 , { 1, 3, 1, 1 } },
1300 {
ISD::FSUB, MVT::v2f64, { 1, 3, 1, 1 } },
1301 {
ISD::FSUB, MVT::v4f32, { 1, 3, 1, 1 } },
1303 {
ISD::FMUL, MVT::f64, { 1, 5, 1, 1 } },
1304 {
ISD::FMUL, MVT::f32, { 1, 5, 1, 1 } },
1305 {
ISD::FMUL, MVT::v2f64, { 1, 5, 1, 1 } },
1306 {
ISD::FMUL, MVT::v4f32, { 1, 5, 1, 1 } },
1308 {
ISD::FDIV, MVT::f32, { 14, 14, 1, 1 } },
1309 {
ISD::FDIV, MVT::v4f32, { 14, 14, 1, 1 } },
1310 {
ISD::FDIV, MVT::f64, { 22, 22, 1, 1 } },
1311 {
ISD::FDIV, MVT::v2f64, { 22, 22, 1, 1 } },
1313 {
ISD::MUL, MVT::v2i64, { 6, 10,10,10 } }
1317 if (
const auto *Entry =
CostTableLookup(SSE42CostTable, ISD, LT.second))
1318 if (
auto KindCost = Entry->Cost[
CostKind])
1319 return LT.first * *KindCost;
1322 {
ISD::SHL, MVT::v16i8, { 15, 24,17,22 } },
1323 {
ISD::SHL, MVT::v8i16, { 11, 14,11,11 } },
1324 {
ISD::SHL, MVT::v4i32, { 14, 20, 4,10 } },
1326 {
ISD::SRL, MVT::v16i8, { 16, 27,18,24 } },
1327 {
ISD::SRL, MVT::v8i16, { 22, 26,23,27 } },
1328 {
ISD::SRL, MVT::v4i32, { 16, 17,15,19 } },
1329 {
ISD::SRL, MVT::v2i64, { 4, 6, 5, 7 } },
1331 {
ISD::SRA, MVT::v16i8, { 38, 41,30,36 } },
1332 {
ISD::SRA, MVT::v8i16, { 22, 26,23,27 } },
1333 {
ISD::SRA, MVT::v4i32, { 16, 17,15,19 } },
1334 {
ISD::SRA, MVT::v2i64, { 8, 17, 5, 7 } },
1336 {
ISD::MUL, MVT::v4i32, { 2, 11, 1, 1 } }
1340 if (
const auto *Entry =
CostTableLookup(SSE41CostTable, ISD, LT.second))
1341 if (
auto KindCost = Entry->Cost[
CostKind])
1342 return LT.first * *KindCost;
1345 {
ISD::MUL, MVT::v16i8, { 5, 18,10,12 } },
1349 if (
const auto *Entry =
CostTableLookup(SSSE3CostTable, ISD, LT.second))
1350 if (
auto KindCost = Entry->Cost[
CostKind])
1351 return LT.first * *KindCost;
1356 {
ISD::SHL, MVT::v16i8, { 13, 21,26,28 } },
1357 {
ISD::SHL, MVT::v8i16, { 24, 27,16,20 } },
1358 {
ISD::SHL, MVT::v4i32, { 17, 19,10,12 } },
1359 {
ISD::SHL, MVT::v2i64, { 4, 6, 5, 7 } },
1361 {
ISD::SRL, MVT::v16i8, { 14, 28,27,30 } },
1362 {
ISD::SRL, MVT::v8i16, { 16, 19,31,31 } },
1363 {
ISD::SRL, MVT::v4i32, { 12, 12,15,19 } },
1364 {
ISD::SRL, MVT::v2i64, { 4, 6, 5, 7 } },
1366 {
ISD::SRA, MVT::v16i8, { 27, 30,54,54 } },
1367 {
ISD::SRA, MVT::v8i16, { 16, 19,31,31 } },
1368 {
ISD::SRA, MVT::v4i32, { 12, 12,15,19 } },
1369 {
ISD::SRA, MVT::v2i64, { 8, 11,12,16 } },
1371 {
ISD::AND, MVT::v16i8, { 1, 1, 1, 1 } },
1372 {
ISD::AND, MVT::v8i16, { 1, 1, 1, 1 } },
1373 {
ISD::AND, MVT::v4i32, { 1, 1, 1, 1 } },
1374 {
ISD::AND, MVT::v2i64, { 1, 1, 1, 1 } },
1376 {
ISD::OR, MVT::v16i8, { 1, 1, 1, 1 } },
1377 {
ISD::OR, MVT::v8i16, { 1, 1, 1, 1 } },
1378 {
ISD::OR, MVT::v4i32, { 1, 1, 1, 1 } },
1379 {
ISD::OR, MVT::v2i64, { 1, 1, 1, 1 } },
1381 {
ISD::XOR, MVT::v16i8, { 1, 1, 1, 1 } },
1382 {
ISD::XOR, MVT::v8i16, { 1, 1, 1, 1 } },
1383 {
ISD::XOR, MVT::v4i32, { 1, 1, 1, 1 } },
1384 {
ISD::XOR, MVT::v2i64, { 1, 1, 1, 1 } },
1386 {
ISD::ADD, MVT::v2i64, { 1, 2, 1, 2 } },
1387 {
ISD::SUB, MVT::v2i64, { 1, 2, 1, 2 } },
1389 {
ISD::MUL, MVT::v16i8, { 6, 18,12,12 } },
1390 {
ISD::MUL, MVT::v8i16, { 1, 5, 1, 1 } },
1391 {
ISD::MUL, MVT::v4i32, { 6, 8, 7, 7 } },
1392 {
ISD::MUL, MVT::v2i64, { 7, 10,10,10 } },
1396 {
ISD::FDIV, MVT::f32, { 23, 23, 1, 1 } },
1397 {
ISD::FDIV, MVT::v4f32, { 39, 39, 1, 1 } },
1398 {
ISD::FDIV, MVT::f64, { 38, 38, 1, 1 } },
1399 {
ISD::FDIV, MVT::v2f64, { 69, 69, 1, 1 } },
1401 {
ISD::FNEG, MVT::f32, { 1, 1, 1, 1 } },
1402 {
ISD::FNEG, MVT::f64, { 1, 1, 1, 1 } },
1403 {
ISD::FNEG, MVT::v4f32, { 1, 1, 1, 1 } },
1404 {
ISD::FNEG, MVT::v2f64, { 1, 1, 1, 1 } },
1406 {
ISD::FADD, MVT::f32, { 2, 3, 1, 1 } },
1407 {
ISD::FADD, MVT::f64, { 2, 3, 1, 1 } },
1408 {
ISD::FADD, MVT::v2f64, { 2, 3, 1, 1 } },
1410 {
ISD::FSUB, MVT::f32, { 2, 3, 1, 1 } },
1411 {
ISD::FSUB, MVT::f64, { 2, 3, 1, 1 } },
1412 {
ISD::FSUB, MVT::v2f64, { 2, 3, 1, 1 } },
1414 {
ISD::FMUL, MVT::f64, { 2, 5, 1, 1 } },
1415 {
ISD::FMUL, MVT::v2f64, { 2, 5, 1, 1 } },
1419 if (
const auto *Entry =
CostTableLookup(SSE2CostTable, ISD, LT.second))
1420 if (
auto KindCost = Entry->Cost[
CostKind])
1421 return LT.first * *KindCost;
1424 {
ISD::FDIV, MVT::f32, { 17, 18, 1, 1 } },
1425 {
ISD::FDIV, MVT::v4f32, { 34, 48, 1, 1 } },
1427 {
ISD::FNEG, MVT::f32, { 2, 2, 1, 2 } },
1428 {
ISD::FNEG, MVT::v4f32, { 2, 2, 1, 2 } },
1430 {
ISD::FADD, MVT::f32, { 1, 3, 1, 1 } },
1431 {
ISD::FADD, MVT::v4f32, { 2, 3, 1, 1 } },
1433 {
ISD::FSUB, MVT::f32, { 1, 3, 1, 1 } },
1434 {
ISD::FSUB, MVT::v4f32, { 2, 3, 1, 1 } },
1436 {
ISD::FMUL, MVT::f32, { 2, 5, 1, 1 } },
1437 {
ISD::FMUL, MVT::v4f32, { 2, 5, 1, 1 } },
1441 if (
const auto *Entry =
CostTableLookup(SSE1CostTable, ISD, LT.second))
1442 if (
auto KindCost = Entry->Cost[
CostKind])
1443 return LT.first * *KindCost;
1448 {
ISD::MUL, MVT::i64, { 2, 6, 1, 2 } },
1453 if (
auto KindCost = Entry->Cost[
CostKind])
1454 return LT.first * *KindCost;
1465 {
ISD::MUL, MVT::i8, { 3, 4, 1, 1 } },
1466 {
ISD::MUL, MVT::i16, { 2, 4, 1, 1 } },
1467 {
ISD::MUL, MVT::i32, { 1, 4, 1, 1 } },
1469 {
ISD::FNEG, MVT::f64, { 2, 2, 1, 3 } },
1470 {
ISD::FADD, MVT::f64, { 2, 3, 1, 1 } },
1471 {
ISD::FSUB, MVT::f64, { 2, 3, 1, 1 } },
1472 {
ISD::FMUL, MVT::f64, { 2, 5, 1, 1 } },
1473 {
ISD::FDIV, MVT::f64, { 38, 38, 1, 1 } },
1477 if (
auto KindCost = Entry->Cost[
CostKind])
1478 return LT.first * *KindCost;
1492 return 20 * LT.first * LT.second.getVectorNumElements() * ScalarCost;
1536 if (!Args.empty() &&
1537 all_of(Args, [](
const Value *Arg) {
return isa<Constant>(Arg); }))
1546 CostKind, Mask.size() / 2, BaseTp);
1559 using namespace PatternMatch;
1562 (ST->
hasAVX() && LT.second.getScalarSizeInBits() >= 32)))
1568 bool IsInLaneShuffle =
false;
1574 unsigned NumEltsPerLane = Mask.size() / NumLanes;
1575 if ((Mask.size() % NumLanes) == 0)
1578 ((
P.value() % Mask.size()) / NumEltsPerLane) ==
1579 (
P.index() / NumEltsPerLane);
1584 if (LT.second.isVector() && LT.second.getScalarType() == MVT::bf16)
1585 LT.second = LT.second.changeVectorElementType(MVT::f16);
1590 int NumElts = LT.second.getVectorNumElements();
1591 if ((Index % NumElts) == 0)
1594 if (SubLT.second.isVector()) {
1595 int NumSubElts = SubLT.second.getVectorNumElements();
1596 if ((Index % NumSubElts) == 0 && (NumElts % NumSubElts) == 0)
1604 int OrigSubElts = cast<FixedVectorType>(SubTp)->getNumElements();
1605 if (NumSubElts > OrigSubElts && (Index % OrigSubElts) == 0 &&
1606 (NumSubElts % OrigSubElts) == 0 &&
1607 LT.second.getVectorElementType() ==
1608 SubLT.second.getVectorElementType() &&
1609 LT.second.getVectorElementType().getSizeInBits() ==
1611 assert(NumElts >= NumSubElts && NumElts > OrigSubElts &&
1612 "Unexpected number of elements!");
1614 LT.second.getVectorNumElements());
1616 SubLT.second.getVectorNumElements());
1617 int ExtractIndex =
alignDown((Index % NumElts), NumSubElts);
1624 return ExtractCost + 1;
1627 "Unexpected vector size");
1629 return ExtractCost + 2;
1642 int NumElts = LT.second.getVectorNumElements();
1644 if (SubLT.second.isVector()) {
1645 int NumSubElts = SubLT.second.getVectorNumElements();
1646 bool MatchingTypes =
1647 NumElts == NumSubElts &&
1649 if ((Index % NumSubElts) == 0 && (NumElts % NumSubElts) == 0)
1656 if (LT.first == 1 && LT.second == MVT::v4f32 && SubLT.first == 1 &&
1657 SubLT.second == MVT::f32 && (Index == 0 || ST->
hasSSE41()))
1669 static const CostTblEntry SSE2SubVectorShuffleTbl[] = {
1700 if (
const auto *Entry =
1708 if (LT.first != 1) {
1709 MVT LegalVT = LT.second;
1714 cast<FixedVectorType>(BaseTp)->getNumElements()) {
1718 unsigned NumOfSrcs = (VecTySize + LegalVTSize - 1) / LegalVTSize;
1725 if (!Mask.empty() && NumOfDests.
isValid()) {
1743 unsigned E = *NumOfDests.
getValue();
1744 unsigned NormalizedVF =
1750 unsigned PrevSrcReg = 0;
1754 NormalizedMask, NumOfSrcRegs, NumOfDestRegs, NumOfDestRegs, []() {},
1755 [
this, SingleOpTy,
CostKind, &PrevSrcReg, &PrevRegMask,
1760 if (PrevRegMask.
empty() || PrevSrcReg != SrcReg ||
1761 PrevRegMask != RegMask)
1769 if (SrcReg != DestReg &&
1774 PrevSrcReg = SrcReg;
1775 PrevRegMask = RegMask;
1807 if (
const auto *Entry =
1809 return LT.first * Entry->Cost;
1842 if (
const auto *Entry =
1844 return LT.first * Entry->Cost;
1921 if (
const auto *Entry =
CostTableLookup(AVX512ShuffleTbl, Kind, LT.second))
1922 if (
auto KindCost = Entry->Cost[
CostKind])
1923 return LT.first * *KindCost;
1939 if (IsInLaneShuffle && ST->
hasAVX2())
1940 if (
const auto *Entry =
1942 return LT.first * Entry->Cost;
1995 if (
const auto *Entry =
CostTableLookup(AVX2ShuffleTbl, Kind, LT.second))
1996 return LT.first * Entry->Cost;
2017 if (
const auto *Entry =
CostTableLookup(XOPShuffleTbl, Kind, LT.second))
2018 return LT.first * Entry->Cost;
2045 if (IsInLaneShuffle && ST->
hasAVX())
2046 if (
const auto *Entry =
2048 return LT.first * Entry->Cost;
2110 if (
const auto *Entry =
CostTableLookup(AVX1ShuffleTbl, Kind, LT.second))
2111 return LT.first * Entry->Cost;
2124 if (
const auto *Entry =
CostTableLookup(SSE41ShuffleTbl, Kind, LT.second))
2125 return LT.first * Entry->Cost;
2156 if (
const auto *Entry =
CostTableLookup(SSSE3ShuffleTbl, Kind, LT.second))
2157 return LT.first * Entry->Cost;
2213 llvm::any_of(Args, [](
const auto &V) {
return isa<LoadInst>(V); });
2215 if (
const auto *Entry =
2218 LT.second.getVectorElementCount()) &&
2219 "Table entry missing from isLegalBroadcastLoad()");
2220 return LT.first * Entry->Cost;
2223 if (
const auto *Entry =
CostTableLookup(SSE2ShuffleTbl, Kind, LT.second))
2224 return LT.first * Entry->Cost;
2237 if (LT.first == 1 && LT.second == MVT::v4f32 && Mask.size() == 4) {
2239 auto MatchSHUFPS = [](
int X,
int Y) {
2240 return X < 0 ||
Y < 0 || ((
X & 4) == (
Y & 4));
2242 if (MatchSHUFPS(Mask[0], Mask[1]) && MatchSHUFPS(Mask[2], Mask[3]))
2245 if (
const auto *Entry =
CostTableLookup(SSE1ShuffleTbl, Kind, LT.second))
2246 return LT.first * Entry->Cost;
2258 assert(ISD &&
"Invalid opcode");
2385 {
ISD::FP_ROUND, MVT::v16f16, MVT::v16f32, { 1, 1, 1, 1 } },
2407 {
ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, { 2, 1, 1, 1 } },
2408 {
ISD::TRUNCATE, MVT::v32i16, MVT::v16i32, { 2, 1, 1, 1 } },
2755 {
ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, { 4, 1, 1, 1 } },
2832 {
ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, { 6, 1, 1, 1 } },
3056 {
ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, {10, 1, 1, 1 } },
3085 AVX512BWConversionTbl, ISD, SimpleDstTy, SimpleSrcTy))
3086 if (
auto KindCost = Entry->Cost[
CostKind])
3091 AVX512DQConversionTbl, ISD, SimpleDstTy, SimpleSrcTy))
3092 if (
auto KindCost = Entry->Cost[
CostKind])
3097 AVX512FConversionTbl, ISD, SimpleDstTy, SimpleSrcTy))
3098 if (
auto KindCost = Entry->Cost[
CostKind])
3104 AVX512BWVLConversionTbl, ISD, SimpleDstTy, SimpleSrcTy))
3105 if (
auto KindCost = Entry->Cost[
CostKind])
3110 AVX512DQVLConversionTbl, ISD, SimpleDstTy, SimpleSrcTy))
3111 if (
auto KindCost = Entry->Cost[
CostKind])
3116 SimpleDstTy, SimpleSrcTy))
3117 if (
auto KindCost = Entry->Cost[
CostKind])
3122 SimpleDstTy, SimpleSrcTy))
3123 if (
auto KindCost = Entry->Cost[
CostKind])
3129 SimpleDstTy, SimpleSrcTy))
3130 if (
auto KindCost = Entry->Cost[
CostKind])
3134 if (ST->hasF16C()) {
3136 SimpleDstTy, SimpleSrcTy))
3137 if (
auto KindCost = Entry->Cost[
CostKind])
3143 SimpleDstTy, SimpleSrcTy))
3144 if (
auto KindCost = Entry->Cost[
CostKind])
3150 SimpleDstTy, SimpleSrcTy))
3151 if (
auto KindCost = Entry->Cost[
CostKind])
3174 AVX512BWConversionTbl, ISD, LTDest.second, LTSrc.second))
3175 if (
auto KindCost = Entry->Cost[
CostKind])
3176 return std::max(LTSrc.first, LTDest.first) * *KindCost;
3180 AVX512DQConversionTbl, ISD, LTDest.second, LTSrc.second))
3181 if (
auto KindCost = Entry->Cost[
CostKind])
3182 return std::max(LTSrc.first, LTDest.first) * *KindCost;
3186 AVX512FConversionTbl, ISD, LTDest.second, LTSrc.second))
3187 if (
auto KindCost = Entry->Cost[
CostKind])
3188 return std::max(LTSrc.first, LTDest.first) * *KindCost;
3193 LTDest.second, LTSrc.second))
3194 if (
auto KindCost = Entry->Cost[
CostKind])
3195 return std::max(LTSrc.first, LTDest.first) * *KindCost;
3199 LTDest.second, LTSrc.second))
3200 if (
auto KindCost = Entry->Cost[
CostKind])
3201 return std::max(LTSrc.first, LTDest.first) * *KindCost;
3205 LTDest.second, LTSrc.second))
3206 if (
auto KindCost = Entry->Cost[
CostKind])
3207 return std::max(LTSrc.first, LTDest.first) * *KindCost;
3211 LTDest.second, LTSrc.second))
3212 if (
auto KindCost = Entry->Cost[
CostKind])
3213 return std::max(LTSrc.first, LTDest.first) * *KindCost;
3217 LTDest.second, LTSrc.second))
3218 if (
auto KindCost = Entry->Cost[
CostKind])
3219 return std::max(LTSrc.first, LTDest.first) * *KindCost;
3221 if (ST->hasF16C()) {
3223 LTDest.second, LTSrc.second))
3224 if (
auto KindCost = Entry->Cost[
CostKind])
3225 return std::max(LTSrc.first, LTDest.first) * *KindCost;
3230 LTDest.second, LTSrc.second))
3231 if (
auto KindCost = Entry->Cost[
CostKind])
3232 return std::max(LTSrc.first, LTDest.first) * *KindCost;
3236 LTDest.second, LTSrc.second))
3237 if (
auto KindCost = Entry->Cost[
CostKind])
3238 return std::max(LTSrc.first, LTDest.first) * *KindCost;
3243 1 < Src->getScalarSizeInBits() && Src->getScalarSizeInBits() < 32) {
3244 Type *ExtSrc = Src->getWithNewBitWidth(32);
3250 if (!(Src->isIntegerTy() &&
I && isa<LoadInst>(
I->getOperand(0))))
3260 1 < Dst->getScalarSizeInBits() && Dst->getScalarSizeInBits() < 32) {
3261 Type *TruncDst = Dst->getWithNewBitWidth(32);
3271 return Cost == 0 ? 0 :
N;
3285 Op1Info, Op2Info,
I);
3290 MVT MTy = LT.second;
3293 assert(ISD &&
"Invalid opcode");
3296 if (Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) {
3309 Pred = cast<CmpInst>(
I)->getPredicate();
3311 bool CmpWithConstant =
false;
3312 if (
auto *CmpInstr = dyn_cast_or_null<CmpInst>(
I))
3313 CmpWithConstant = isa<Constant>(CmpInstr->getOperand(1));
3318 ExtraCost = CmpWithConstant ? 0 : 1;
3323 ExtraCost = CmpWithConstant ? 0 : 1;
3329 ExtraCost = CmpWithConstant ? 1 : 2;
3340 ExtraCost = CmpWithConstant ? 2 : 3;
3347 if (CondTy && !ST->
hasAVX())
3518 if (ST->useSLMArithCosts())
3520 if (
auto KindCost = Entry->Cost[
CostKind])
3521 return LT.first * (ExtraCost + *KindCost);
3525 if (
auto KindCost = Entry->Cost[
CostKind])
3526 return LT.first * (ExtraCost + *KindCost);
3530 if (
auto KindCost = Entry->Cost[
CostKind])
3531 return LT.first * (ExtraCost + *KindCost);
3535 if (
auto KindCost = Entry->Cost[
CostKind])
3536 return LT.first * (ExtraCost + *KindCost);
3540 if (
auto KindCost = Entry->Cost[
CostKind])
3541 return LT.first * (ExtraCost + *KindCost);
3545 if (
auto KindCost = Entry->Cost[
CostKind])
3546 return LT.first * (ExtraCost + *KindCost);
3550 if (
auto KindCost = Entry->Cost[
CostKind])
3551 return LT.first * (ExtraCost + *KindCost);
3555 if (
auto KindCost = Entry->Cost[
CostKind])
3556 return LT.first * (ExtraCost + *KindCost);
3560 if (
auto KindCost = Entry->Cost[
CostKind])
3561 return LT.first * (ExtraCost + *KindCost);
3565 if (
auto KindCost = Entry->Cost[
CostKind])
3566 return LT.first * (ExtraCost + *KindCost);
3574 Op1Info, Op2Info,
I);
3592 {
ISD::FSHL, MVT::v8i64, { 1, 1, 1, 1 } },
3593 {
ISD::FSHL, MVT::v4i64, { 1, 1, 1, 1 } },
3594 {
ISD::FSHL, MVT::v2i64, { 1, 1, 1, 1 } },
3595 {
ISD::FSHL, MVT::v16i32, { 1, 1, 1, 1 } },
3596 {
ISD::FSHL, MVT::v8i32, { 1, 1, 1, 1 } },
3597 {
ISD::FSHL, MVT::v4i32, { 1, 1, 1, 1 } },
3598 {
ISD::FSHL, MVT::v32i16, { 1, 1, 1, 1 } },
3599 {
ISD::FSHL, MVT::v16i16, { 1, 1, 1, 1 } },
3600 {
ISD::FSHL, MVT::v8i16, { 1, 1, 1, 1 } },
3601 {
ISD::ROTL, MVT::v32i16, { 1, 1, 1, 1 } },
3602 {
ISD::ROTL, MVT::v16i16, { 1, 1, 1, 1 } },
3603 {
ISD::ROTL, MVT::v8i16, { 1, 1, 1, 1 } },
3604 {
ISD::ROTR, MVT::v32i16, { 1, 1, 1, 1 } },
3605 {
ISD::ROTR, MVT::v16i16, { 1, 1, 1, 1 } },
3606 {
ISD::ROTR, MVT::v8i16, { 1, 1, 1, 1 } },
3628 {
ISD::CTLZ, MVT::v8i64, { 1, 5, 1, 1 } },
3629 {
ISD::CTLZ, MVT::v16i32, { 1, 5, 1, 1 } },
3630 {
ISD::CTLZ, MVT::v32i16, { 18, 27, 23, 27 } },
3631 {
ISD::CTLZ, MVT::v64i8, { 3, 16, 9, 11 } },
3632 {
ISD::CTLZ, MVT::v4i64, { 1, 5, 1, 1 } },
3633 {
ISD::CTLZ, MVT::v8i32, { 1, 5, 1, 1 } },
3634 {
ISD::CTLZ, MVT::v16i16, { 8, 19, 11, 13 } },
3635 {
ISD::CTLZ, MVT::v32i8, { 2, 11, 9, 10 } },
3636 {
ISD::CTLZ, MVT::v2i64, { 1, 5, 1, 1 } },
3637 {
ISD::CTLZ, MVT::v4i32, { 1, 5, 1, 1 } },
3638 {
ISD::CTLZ, MVT::v8i16, { 3, 15, 4, 6 } },
3639 {
ISD::CTLZ, MVT::v16i8, { 2, 10, 9, 10 } },
3641 {
ISD::CTTZ, MVT::v8i64, { 2, 8, 6, 7 } },
3642 {
ISD::CTTZ, MVT::v16i32, { 2, 8, 6, 7 } },
3643 {
ISD::CTTZ, MVT::v4i64, { 1, 8, 6, 6 } },
3644 {
ISD::CTTZ, MVT::v8i32, { 1, 8, 6, 6 } },
3645 {
ISD::CTTZ, MVT::v2i64, { 1, 8, 6, 6 } },
3646 {
ISD::CTTZ, MVT::v4i32, { 1, 8, 6, 6 } },
3649 {
ISD::ABS, MVT::v32i16, { 1, 1, 1, 1 } },
3650 {
ISD::ABS, MVT::v64i8, { 1, 1, 1, 1 } },
3672 {
ISD::CTLZ, MVT::v8i64, { 8, 22, 23, 23 } },
3673 {
ISD::CTLZ, MVT::v16i32, { 8, 23, 25, 25 } },
3674 {
ISD::CTLZ, MVT::v32i16, { 4, 15, 15, 16 } },
3675 {
ISD::CTLZ, MVT::v64i8, { 3, 12, 10, 9 } },
3676 {
ISD::CTPOP, MVT::v2i64, { 3, 7, 10, 10 } },
3677 {
ISD::CTPOP, MVT::v4i64, { 3, 7, 10, 10 } },
3678 {
ISD::CTPOP, MVT::v8i64, { 3, 8, 10, 12 } },
3679 {
ISD::CTPOP, MVT::v4i32, { 7, 11, 14, 14 } },
3680 {
ISD::CTPOP, MVT::v8i32, { 7, 11, 14, 14 } },
3681 {
ISD::CTPOP, MVT::v16i32, { 7, 12, 14, 16 } },
3682 {
ISD::CTPOP, MVT::v8i16, { 2, 7, 11, 11 } },
3683 {
ISD::CTPOP, MVT::v16i16, { 2, 7, 11, 11 } },
3684 {
ISD::CTPOP, MVT::v32i16, { 3, 7, 11, 13 } },
3688 {
ISD::CTTZ, MVT::v8i16, { 3, 9, 14, 14 } },
3689 {
ISD::CTTZ, MVT::v16i16, { 3, 9, 14, 14 } },
3690 {
ISD::CTTZ, MVT::v32i16, { 3, 10, 14, 16 } },
3691 {
ISD::CTTZ, MVT::v16i8, { 2, 6, 11, 11 } },
3692 {
ISD::CTTZ, MVT::v32i8, { 2, 6, 11, 11 } },
3693 {
ISD::CTTZ, MVT::v64i8, { 3, 7, 11, 13 } },
3694 {
ISD::ROTL, MVT::v32i16, { 2, 8, 6, 8 } },
3695 {
ISD::ROTL, MVT::v16i16, { 2, 8, 6, 7 } },
3696 {
ISD::ROTL, MVT::v8i16, { 2, 7, 6, 7 } },
3697 {
ISD::ROTL, MVT::v64i8, { 5, 6, 11, 12 } },
3698 {
ISD::ROTL, MVT::v32i8, { 5, 15, 7, 10 } },
3699 {
ISD::ROTL, MVT::v16i8, { 5, 15, 7, 10 } },
3700 {
ISD::ROTR, MVT::v32i16, { 2, 8, 6, 8 } },
3701 {
ISD::ROTR, MVT::v16i16, { 2, 8, 6, 7 } },
3702 {
ISD::ROTR, MVT::v8i16, { 2, 7, 6, 7 } },
3703 {
ISD::ROTR, MVT::v64i8, { 5, 6, 12, 14 } },
3704 {
ISD::ROTR, MVT::v32i8, { 5, 14, 6, 9 } },
3705 {
ISD::ROTR, MVT::v16i8, { 5, 14, 6, 9 } },
3714 {
ISD::SMAX, MVT::v32i16, { 1, 1, 1, 1 } },
3715 {
ISD::SMAX, MVT::v64i8, { 1, 1, 1, 1 } },
3716 {
ISD::SMIN, MVT::v32i16, { 1, 1, 1, 1 } },
3717 {
ISD::SMIN, MVT::v64i8, { 1, 1, 1, 1 } },
3719 {
ISD::SMULO, MVT::v64i8, { 8, 21, 17, 18 } },
3721 {
ISD::UMULO, MVT::v64i8, { 8, 15, 15, 16 } },
3726 {
ISD::UMAX, MVT::v32i16, { 1, 1, 1, 1 } },
3727 {
ISD::UMAX, MVT::v64i8, { 1, 1, 1, 1 } },
3728 {
ISD::UMIN, MVT::v32i16, { 1, 1, 1, 1 } },
3729 {
ISD::UMIN, MVT::v64i8, { 1, 1, 1, 1 } },
3734 {
ISD::ABS, MVT::v8i64, { 1, 1, 1, 1 } },
3735 {
ISD::ABS, MVT::v4i64, { 1, 1, 1, 1 } },
3736 {
ISD::ABS, MVT::v2i64, { 1, 1, 1, 1 } },
3737 {
ISD::ABS, MVT::v16i32, { 1, 1, 1, 1 } },
3738 {
ISD::ABS, MVT::v8i32, { 1, 1, 1, 1 } },
3739 {
ISD::ABS, MVT::v32i16, { 2, 7, 4, 4 } },
3740 {
ISD::ABS, MVT::v16i16, { 1, 1, 1, 1 } },
3741 {
ISD::ABS, MVT::v64i8, { 2, 7, 4, 4 } },
3742 {
ISD::ABS, MVT::v32i8, { 1, 1, 1, 1 } },
3750 {
ISD::CTLZ, MVT::v8i64, { 10, 28, 32, 32 } },
3751 {
ISD::CTLZ, MVT::v16i32, { 12, 30, 38, 38 } },
3752 {
ISD::CTLZ, MVT::v32i16, { 8, 15, 29, 29 } },
3753 {
ISD::CTLZ, MVT::v64i8, { 6, 11, 19, 19 } },
3754 {
ISD::CTPOP, MVT::v8i64, { 16, 16, 19, 19 } },
3755 {
ISD::CTPOP, MVT::v16i32, { 24, 19, 27, 27 } },
3756 {
ISD::CTPOP, MVT::v32i16, { 18, 15, 22, 22 } },
3757 {
ISD::CTPOP, MVT::v64i8, { 12, 11, 16, 16 } },
3758 {
ISD::CTTZ, MVT::v8i64, { 2, 8, 6, 7 } },
3759 {
ISD::CTTZ, MVT::v16i32, { 2, 8, 6, 7 } },
3760 {
ISD::CTTZ, MVT::v32i16, { 7, 17, 27, 27 } },
3761 {
ISD::CTTZ, MVT::v64i8, { 6, 13, 21, 21 } },
3762 {
ISD::ROTL, MVT::v8i64, { 1, 1, 1, 1 } },
3763 {
ISD::ROTL, MVT::v4i64, { 1, 1, 1, 1 } },
3764 {
ISD::ROTL, MVT::v2i64, { 1, 1, 1, 1 } },
3765 {
ISD::ROTL, MVT::v16i32, { 1, 1, 1, 1 } },
3766 {
ISD::ROTL, MVT::v8i32, { 1, 1, 1, 1 } },
3767 {
ISD::ROTL, MVT::v4i32, { 1, 1, 1, 1 } },
3768 {
ISD::ROTR, MVT::v8i64, { 1, 1, 1, 1 } },
3769 {
ISD::ROTR, MVT::v4i64, { 1, 1, 1, 1 } },
3770 {
ISD::ROTR, MVT::v2i64, { 1, 1, 1, 1 } },
3771 {
ISD::ROTR, MVT::v16i32, { 1, 1, 1, 1 } },
3772 {
ISD::ROTR, MVT::v8i32, { 1, 1, 1, 1 } },
3773 {
ISD::ROTR, MVT::v4i32, { 1, 1, 1, 1 } },
3788 {
ISD::SMAX, MVT::v8i64, { 1, 3, 1, 1 } },
3789 {
ISD::SMAX, MVT::v16i32, { 1, 1, 1, 1 } },
3790 {
ISD::SMAX, MVT::v32i16, { 3, 7, 5, 5 } },
3791 {
ISD::SMAX, MVT::v64i8, { 3, 7, 5, 5 } },
3792 {
ISD::SMAX, MVT::v4i64, { 1, 3, 1, 1 } },
3793 {
ISD::SMAX, MVT::v2i64, { 1, 3, 1, 1 } },
3794 {
ISD::SMIN, MVT::v8i64, { 1, 3, 1, 1 } },
3795 {
ISD::SMIN, MVT::v16i32, { 1, 1, 1, 1 } },
3796 {
ISD::SMIN, MVT::v32i16, { 3, 7, 5, 5 } },
3797 {
ISD::SMIN, MVT::v64i8, { 3, 7, 5, 5 } },
3798 {
ISD::SMIN, MVT::v4i64, { 1, 3, 1, 1 } },
3799 {
ISD::SMIN, MVT::v2i64, { 1, 3, 1, 1 } },
3800 {
ISD::SMULO, MVT::v8i64, { 44, 44, 81, 93 } },
3801 {
ISD::SMULO, MVT::v16i32, { 5, 12, 9, 11 } },
3802 {
ISD::SMULO, MVT::v32i16, { 6, 12, 17, 17 } },
3803 {
ISD::SMULO, MVT::v64i8, { 22, 28, 42, 42 } },
3812 {
ISD::UMAX, MVT::v8i64, { 1, 3, 1, 1 } },
3813 {
ISD::UMAX, MVT::v16i32, { 1, 1, 1, 1 } },
3814 {
ISD::UMAX, MVT::v32i16, { 3, 7, 5, 5 } },
3815 {
ISD::UMAX, MVT::v64i8, { 3, 7, 5, 5 } },
3816 {
ISD::UMAX, MVT::v4i64, { 1, 3, 1, 1 } },
3817 {
ISD::UMAX, MVT::v2i64, { 1, 3, 1, 1 } },
3818 {
ISD::UMIN, MVT::v8i64, { 1, 3, 1, 1 } },
3819 {
ISD::UMIN, MVT::v16i32, { 1, 1, 1, 1 } },
3820 {
ISD::UMIN, MVT::v32i16, { 3, 7, 5, 5 } },
3821 {
ISD::UMIN, MVT::v64i8, { 3, 7, 5, 5 } },
3822 {
ISD::UMIN, MVT::v4i64, { 1, 3, 1, 1 } },
3823 {
ISD::UMIN, MVT::v2i64, { 1, 3, 1, 1 } },
3824 {
ISD::UMULO, MVT::v8i64, { 52, 52, 95, 104} },
3825 {
ISD::UMULO, MVT::v16i32, { 5, 12, 8, 10 } },
3826 {
ISD::UMULO, MVT::v32i16, { 5, 13, 16, 16 } },
3827 {
ISD::UMULO, MVT::v64i8, { 18, 24, 30, 30 } },
3854 {
ISD::FSQRT, MVT::v16f32, { 12, 20, 1, 3 } },
3857 {
ISD::FSQRT, MVT::v4f64, { 12, 18, 1, 1 } },
3858 {
ISD::FSQRT, MVT::v8f64, { 24, 32, 1, 3 } },
3874 {
ISD::ROTL, MVT::v4i64, { 4, 7, 5, 6 } },
3875 {
ISD::ROTL, MVT::v8i32, { 4, 7, 5, 6 } },
3876 {
ISD::ROTL, MVT::v16i16, { 4, 7, 5, 6 } },
3877 {
ISD::ROTL, MVT::v32i8, { 4, 7, 5, 6 } },
3878 {
ISD::ROTL, MVT::v2i64, { 1, 3, 1, 1 } },
3879 {
ISD::ROTL, MVT::v4i32, { 1, 3, 1, 1 } },
3880 {
ISD::ROTL, MVT::v8i16, { 1, 3, 1, 1 } },
3881 {
ISD::ROTL, MVT::v16i8, { 1, 3, 1, 1 } },
3882 {
ISD::ROTR, MVT::v4i64, { 4, 7, 8, 9 } },
3883 {
ISD::ROTR, MVT::v8i32, { 4, 7, 8, 9 } },
3884 {
ISD::ROTR, MVT::v16i16, { 4, 7, 8, 9 } },
3885 {
ISD::ROTR, MVT::v32i8, { 4, 7, 8, 9 } },
3886 {
ISD::ROTR, MVT::v2i64, { 1, 3, 3, 3 } },
3887 {
ISD::ROTR, MVT::v4i32, { 1, 3, 3, 3 } },
3888 {
ISD::ROTR, MVT::v8i16, { 1, 3, 3, 3 } },
3889 {
ISD::ROTR, MVT::v16i8, { 1, 3, 3, 3 } },
3900 {
ISD::ABS, MVT::v2i64, { 2, 4, 3, 5 } },
3901 {
ISD::ABS, MVT::v4i64, { 2, 4, 3, 5 } },
3902 {
ISD::ABS, MVT::v4i32, { 1, 1, 1, 1 } },
3903 {
ISD::ABS, MVT::v8i32, { 1, 1, 1, 2 } },
3904 {
ISD::ABS, MVT::v8i16, { 1, 1, 1, 1 } },
3905 {
ISD::ABS, MVT::v16i16, { 1, 1, 1, 2 } },
3906 {
ISD::ABS, MVT::v16i8, { 1, 1, 1, 1 } },
3907 {
ISD::ABS, MVT::v32i8, { 1, 1, 1, 2 } },
3922 {
ISD::CTLZ, MVT::v2i64, { 7, 18, 24, 25 } },
3923 {
ISD::CTLZ, MVT::v4i64, { 14, 18, 24, 44 } },
3924 {
ISD::CTLZ, MVT::v4i32, { 5, 16, 19, 20 } },
3925 {
ISD::CTLZ, MVT::v8i32, { 10, 16, 19, 34 } },
3926 {
ISD::CTLZ, MVT::v8i16, { 4, 13, 14, 15 } },
3927 {
ISD::CTLZ, MVT::v16i16, { 6, 14, 14, 24 } },
3928 {
ISD::CTLZ, MVT::v16i8, { 3, 12, 9, 10 } },
3929 {
ISD::CTLZ, MVT::v32i8, { 4, 12, 9, 14 } },
3930 {
ISD::CTPOP, MVT::v2i64, { 3, 9, 10, 10 } },
3931 {
ISD::CTPOP, MVT::v4i64, { 4, 9, 10, 14 } },
3932 {
ISD::CTPOP, MVT::v4i32, { 7, 12, 14, 14 } },
3933 {
ISD::CTPOP, MVT::v8i32, { 7, 12, 14, 18 } },
3934 {
ISD::CTPOP, MVT::v8i16, { 3, 7, 11, 11 } },
3935 {
ISD::CTPOP, MVT::v16i16, { 6, 8, 11, 18 } },
3938 {
ISD::CTTZ, MVT::v2i64, { 4, 11, 13, 13 } },
3939 {
ISD::CTTZ, MVT::v4i64, { 5, 11, 13, 20 } },
3940 {
ISD::CTTZ, MVT::v4i32, { 7, 14, 17, 17 } },
3941 {
ISD::CTTZ, MVT::v8i32, { 7, 15, 17, 24 } },
3942 {
ISD::CTTZ, MVT::v8i16, { 4, 9, 14, 14 } },
3943 {
ISD::CTTZ, MVT::v16i16, { 6, 9, 14, 24 } },
3944 {
ISD::CTTZ, MVT::v16i8, { 3, 7, 11, 11 } },
3945 {
ISD::CTTZ, MVT::v32i8, { 5, 7, 11, 18 } },
3952 {
ISD::SMAX, MVT::v2i64, { 2, 7, 2, 3 } },
3953 {
ISD::SMAX, MVT::v4i64, { 2, 7, 2, 3 } },
3954 {
ISD::SMAX, MVT::v8i32, { 1, 1, 1, 2 } },
3955 {
ISD::SMAX, MVT::v16i16, { 1, 1, 1, 2 } },
3956 {
ISD::SMAX, MVT::v32i8, { 1, 1, 1, 2 } },
3957 {
ISD::SMIN, MVT::v2i64, { 2, 7, 2, 3 } },
3958 {
ISD::SMIN, MVT::v4i64, { 2, 7, 2, 3 } },
3959 {
ISD::SMIN, MVT::v8i32, { 1, 1, 1, 2 } },
3960 {
ISD::SMIN, MVT::v16i16, { 1, 1, 1, 2 } },
3961 {
ISD::SMIN, MVT::v32i8, { 1, 1, 1, 2 } },
3962 {
ISD::SMULO, MVT::v4i64, { 20, 20, 33, 37 } },
3963 {
ISD::SMULO, MVT::v2i64, { 8, 8, 13, 15 } },
3964 {
ISD::SMULO, MVT::v8i32, { 8, 20, 13, 24 } },
3965 {
ISD::SMULO, MVT::v4i32, { 5, 15, 11, 12 } },
3966 {
ISD::SMULO, MVT::v16i16, { 4, 14, 8, 14 } },
3968 {
ISD::SMULO, MVT::v32i8, { 9, 15, 18, 35 } },
3969 {
ISD::SMULO, MVT::v16i8, { 6, 22, 14, 21 } },
3981 {
ISD::UMAX, MVT::v2i64, { 2, 8, 5, 6 } },
3982 {
ISD::UMAX, MVT::v4i64, { 2, 8, 5, 8 } },
3983 {
ISD::UMAX, MVT::v8i32, { 1, 1, 1, 2 } },
3984 {
ISD::UMAX, MVT::v16i16, { 1, 1, 1, 2 } },
3985 {
ISD::UMAX, MVT::v32i8, { 1, 1, 1, 2 } },
3986 {
ISD::UMIN, MVT::v2i64, { 2, 8, 5, 6 } },
3987 {
ISD::UMIN, MVT::v4i64, { 2, 8, 5, 8 } },
3988 {
ISD::UMIN, MVT::v8i32, { 1, 1, 1, 2 } },
3989 {
ISD::UMIN, MVT::v16i16, { 1, 1, 1, 2 } },
3990 {
ISD::UMIN, MVT::v32i8, { 1, 1, 1, 2 } },
3991 {
ISD::UMULO, MVT::v4i64, { 24, 24, 39, 43 } },
3992 {
ISD::UMULO, MVT::v2i64, { 10, 10, 15, 19 } },
3993 {
ISD::UMULO, MVT::v8i32, { 8, 11, 13, 23 } },
3994 {
ISD::UMULO, MVT::v4i32, { 5, 12, 11, 12 } },
3995 {
ISD::UMULO, MVT::v16i16, { 4, 6, 8, 13 } },
3997 {
ISD::UMULO, MVT::v32i8, { 9, 13, 17, 33 } },
3998 {
ISD::UMULO, MVT::v16i8, { 6, 19, 13, 20 } },
4012 {
ISD::FSQRT, MVT::v8f32, { 14, 21, 1, 3 } },
4014 {
ISD::FSQRT, MVT::v2f64, { 14, 21, 1, 1 } },
4015 {
ISD::FSQRT, MVT::v4f64, { 28, 35, 1, 3 } },
4018 {
ISD::ABS, MVT::v4i64, { 6, 8, 6, 12 } },
4019 {
ISD::ABS, MVT::v8i32, { 3, 6, 4, 5 } },
4020 {
ISD::ABS, MVT::v16i16, { 3, 6, 4, 5 } },
4021 {
ISD::ABS, MVT::v32i8, { 3, 6, 4, 5 } },
4034 {
ISD::BSWAP, MVT::v16i16, { 5, 6, 5, 10 } },
4036 {
ISD::CTLZ, MVT::v4i64, { 29, 33, 49, 58 } },
4037 {
ISD::CTLZ, MVT::v2i64, { 14, 24, 24, 28 } },
4038 {
ISD::CTLZ, MVT::v8i32, { 24, 28, 39, 48 } },
4039 {
ISD::CTLZ, MVT::v4i32, { 12, 20, 19, 23 } },
4040 {
ISD::CTLZ, MVT::v16i16, { 19, 22, 29, 38 } },
4041 {
ISD::CTLZ, MVT::v8i16, { 9, 16, 14, 18 } },
4042 {
ISD::CTLZ, MVT::v32i8, { 14, 15, 19, 28 } },
4043 {
ISD::CTLZ, MVT::v16i8, { 7, 12, 9, 13 } },
4044 {
ISD::CTPOP, MVT::v4i64, { 14, 18, 19, 28 } },
4045 {
ISD::CTPOP, MVT::v2i64, { 7, 14, 10, 14 } },
4046 {
ISD::CTPOP, MVT::v8i32, { 18, 24, 27, 36 } },
4047 {
ISD::CTPOP, MVT::v4i32, { 9, 20, 14, 18 } },
4048 {
ISD::CTPOP, MVT::v16i16, { 16, 21, 22, 31 } },
4049 {
ISD::CTPOP, MVT::v8i16, { 8, 18, 11, 15 } },
4050 {
ISD::CTPOP, MVT::v32i8, { 13, 15, 16, 25 } },
4051 {
ISD::CTPOP, MVT::v16i8, { 6, 12, 8, 12 } },
4052 {
ISD::CTTZ, MVT::v4i64, { 17, 22, 24, 33 } },
4053 {
ISD::CTTZ, MVT::v2i64, { 9, 19, 13, 17 } },
4054 {
ISD::CTTZ, MVT::v8i32, { 21, 27, 32, 41 } },
4055 {
ISD::CTTZ, MVT::v4i32, { 11, 24, 17, 21 } },
4056 {
ISD::CTTZ, MVT::v16i16, { 18, 24, 27, 36 } },
4057 {
ISD::CTTZ, MVT::v8i16, { 9, 21, 14, 18 } },
4058 {
ISD::CTTZ, MVT::v32i8, { 15, 18, 21, 30 } },
4059 {
ISD::CTTZ, MVT::v16i8, { 8, 16, 11, 15 } },
4065 {
ISD::SMAX, MVT::v4i64, { 6, 9, 6, 12 } },
4066 {
ISD::SMAX, MVT::v2i64, { 3, 7, 2, 4 } },
4067 {
ISD::SMAX, MVT::v8i32, { 4, 6, 5, 6 } },
4068 {
ISD::SMAX, MVT::v16i16, { 4, 6, 5, 6 } },
4069 {
ISD::SMAX, MVT::v32i8, { 4, 6, 5, 6 } },
4070 {
ISD::SMIN, MVT::v4i64, { 6, 9, 6, 12 } },
4071 {
ISD::SMIN, MVT::v2i64, { 3, 7, 2, 3 } },
4072 {
ISD::SMIN, MVT::v8i32, { 4, 6, 5, 6 } },
4073 {
ISD::SMIN, MVT::v16i16, { 4, 6, 5, 6 } },
4074 {
ISD::SMIN, MVT::v32i8, { 4, 6, 5, 6 } },
4075 {
ISD::SMULO, MVT::v4i64, { 20, 20, 33, 37 } },
4076 {
ISD::SMULO, MVT::v2i64, { 9, 9, 13, 17 } },
4077 {
ISD::SMULO, MVT::v8i32, { 15, 20, 24, 29 } },
4078 {
ISD::SMULO, MVT::v4i32, { 7, 15, 11, 13 } },
4079 {
ISD::SMULO, MVT::v16i16, { 8, 14, 14, 15 } },
4081 {
ISD::SMULO, MVT::v32i8, { 20, 20, 37, 39 } },
4082 {
ISD::SMULO, MVT::v16i8, { 9, 22, 18, 21 } },
4093 {
ISD::UMAX, MVT::v4i64, { 9, 10, 11, 17 } },
4094 {
ISD::UMAX, MVT::v2i64, { 4, 8, 5, 7 } },
4095 {
ISD::UMAX, MVT::v8i32, { 4, 6, 5, 6 } },
4096 {
ISD::UMAX, MVT::v16i16, { 4, 6, 5, 6 } },
4097 {
ISD::UMAX, MVT::v32i8, { 4, 6, 5, 6 } },
4098 {
ISD::UMIN, MVT::v4i64, { 9, 10, 11, 17 } },
4099 {
ISD::UMIN, MVT::v2i64, { 4, 8, 5, 7 } },
4100 {
ISD::UMIN, MVT::v8i32, { 4, 6, 5, 6 } },
4101 {
ISD::UMIN, MVT::v16i16, { 4, 6, 5, 6 } },
4102 {
ISD::UMIN, MVT::v32i8, { 4, 6, 5, 6 } },
4103 {
ISD::UMULO, MVT::v4i64, { 24, 26, 39, 45 } },
4104 {
ISD::UMULO, MVT::v2i64, { 10, 12, 15, 20 } },
4105 {
ISD::UMULO, MVT::v8i32, { 14, 15, 23, 28 } },
4106 {
ISD::UMULO, MVT::v4i32, { 7, 12, 11, 13 } },
4107 {
ISD::UMULO, MVT::v16i16, { 7, 11, 13, 14 } },
4109 {
ISD::UMULO, MVT::v32i8, { 19, 19, 35, 37 } },
4110 {
ISD::UMULO, MVT::v16i8, { 9, 19, 17, 20 } },
4124 {
ISD::FSQRT, MVT::v4f32, { 21, 21, 1, 1 } },
4125 {
ISD::FSQRT, MVT::v8f32, { 42, 42, 1, 3 } },
4127 {
ISD::FSQRT, MVT::v2f64, { 27, 27, 1, 1 } },
4128 {
ISD::FSQRT, MVT::v4f64, { 54, 54, 1, 3 } },
4153 {
ISD::FSQRT, MVT::v4f32, { 37, 41, 1, 5 } },
4155 {
ISD::FSQRT, MVT::v2f64, { 67, 71, 1, 5 } },
4162 {
ISD::FSQRT, MVT::v4f32, { 40, 41, 1, 5 } },
4164 {
ISD::FSQRT, MVT::v2f64, { 70, 71, 1, 5 } },
4172 {
ISD::FSQRT, MVT::v4f32, { 18, 18, 1, 1 } },
4175 {
ISD::ABS, MVT::v2i64, { 3, 4, 3, 5 } },
4180 {
ISD::SMAX, MVT::v2i64, { 3, 7, 2, 3 } },
4181 {
ISD::SMAX, MVT::v4i32, { 1, 1, 1, 1 } },
4182 {
ISD::SMAX, MVT::v16i8, { 1, 1, 1, 1 } },
4183 {
ISD::SMIN, MVT::v2i64, { 3, 7, 2, 3 } },
4184 {
ISD::SMIN, MVT::v4i32, { 1, 1, 1, 1 } },
4185 {
ISD::SMIN, MVT::v16i8, { 1, 1, 1, 1 } },
4186 {
ISD::SMULO, MVT::v2i64, { 9, 11, 13, 17 } },
4187 {
ISD::SMULO, MVT::v4i32, { 20, 24, 13, 19 } },
4189 {
ISD::SMULO, MVT::v16i8, { 13, 22, 24, 25 } },
4194 {
ISD::UMAX, MVT::v2i64, { 2, 11, 6, 7 } },
4195 {
ISD::UMAX, MVT::v4i32, { 1, 1, 1, 1 } },
4196 {
ISD::UMAX, MVT::v8i16, { 1, 1, 1, 1 } },
4197 {
ISD::UMIN, MVT::v2i64, { 2, 11, 6, 7 } },
4198 {
ISD::UMIN, MVT::v4i32, { 1, 1, 1, 1 } },
4199 {
ISD::UMIN, MVT::v8i16, { 1, 1, 1, 1 } },
4200 {
ISD::UMULO, MVT::v2i64, { 14, 20, 15, 20 } },
4201 {
ISD::UMULO, MVT::v4i32, { 19, 22, 12, 18 } },
4203 {
ISD::UMULO, MVT::v16i8, { 13, 19, 18, 20 } },
4206 {
ISD::ABS, MVT::v4i32, { 1, 2, 1, 1 } },
4207 {
ISD::ABS, MVT::v8i16, { 1, 2, 1, 1 } },
4208 {
ISD::ABS, MVT::v16i8, { 1, 2, 1, 1 } },
4216 {
ISD::CTLZ, MVT::v2i64, { 18, 28, 28, 35 } },
4217 {
ISD::CTLZ, MVT::v4i32, { 15, 20, 22, 28 } },
4218 {
ISD::CTLZ, MVT::v8i16, { 13, 17, 16, 22 } },
4219 {
ISD::CTLZ, MVT::v16i8, { 11, 15, 10, 16 } },
4220 {
ISD::CTPOP, MVT::v2i64, { 13, 19, 12, 18 } },
4221 {
ISD::CTPOP, MVT::v4i32, { 18, 24, 16, 22 } },
4222 {
ISD::CTPOP, MVT::v8i16, { 13, 18, 14, 20 } },
4223 {
ISD::CTPOP, MVT::v16i8, { 11, 12, 10, 16 } },
4224 {
ISD::CTTZ, MVT::v2i64, { 13, 25, 15, 22 } },
4225 {
ISD::CTTZ, MVT::v4i32, { 18, 26, 19, 25 } },
4226 {
ISD::CTTZ, MVT::v8i16, { 13, 20, 17, 23 } },
4227 {
ISD::CTTZ, MVT::v16i8, { 11, 16, 13, 19 } }
4230 {
ISD::ABS, MVT::v2i64, { 3, 6, 5, 5 } },
4231 {
ISD::ABS, MVT::v4i32, { 1, 4, 4, 4 } },
4232 {
ISD::ABS, MVT::v8i16, { 1, 2, 3, 3 } },
4233 {
ISD::ABS, MVT::v16i8, { 1, 2, 3, 3 } },
4238 {
ISD::BSWAP, MVT::v2i64, { 5, 6, 11, 11 } },
4241 {
ISD::CTLZ, MVT::v2i64, { 10, 45, 36, 38 } },
4242 {
ISD::CTLZ, MVT::v4i32, { 10, 45, 38, 40 } },
4243 {
ISD::CTLZ, MVT::v8i16, { 9, 38, 32, 34 } },
4244 {
ISD::CTLZ, MVT::v16i8, { 8, 39, 29, 32 } },
4245 {
ISD::CTPOP, MVT::v2i64, { 12, 26, 16, 18 } },
4246 {
ISD::CTPOP, MVT::v4i32, { 15, 29, 21, 23 } },
4247 {
ISD::CTPOP, MVT::v8i16, { 13, 25, 18, 20 } },
4248 {
ISD::CTPOP, MVT::v16i8, { 10, 21, 14, 16 } },
4249 {
ISD::CTTZ, MVT::v2i64, { 14, 28, 19, 21 } },
4250 {
ISD::CTTZ, MVT::v4i32, { 18, 31, 24, 26 } },
4251 {
ISD::CTTZ, MVT::v8i16, { 16, 27, 21, 23 } },
4252 {
ISD::CTTZ, MVT::v16i8, { 13, 23, 17, 19 } },
4257 {
ISD::SMAX, MVT::v2i64, { 4, 8, 15, 15 } },
4258 {
ISD::SMAX, MVT::v4i32, { 2, 4, 5, 5 } },
4259 {
ISD::SMAX, MVT::v8i16, { 1, 1, 1, 1 } },
4260 {
ISD::SMAX, MVT::v16i8, { 2, 4, 5, 5 } },
4261 {
ISD::SMIN, MVT::v2i64, { 4, 8, 15, 15 } },
4262 {
ISD::SMIN, MVT::v4i32, { 2, 4, 5, 5 } },
4263 {
ISD::SMIN, MVT::v8i16, { 1, 1, 1, 1 } },
4264 {
ISD::SMIN, MVT::v16i8, { 2, 4, 5, 5 } },
4265 {
ISD::SMULO, MVT::v2i64, { 30, 33, 13, 23 } },
4266 {
ISD::SMULO, MVT::v4i32, { 20, 24, 23, 23 } },
4268 {
ISD::SMULO, MVT::v16i8, { 13, 23, 24, 25 } },
4277 {
ISD::UMAX, MVT::v2i64, { 4, 8, 15, 15 } },
4278 {
ISD::UMAX, MVT::v4i32, { 2, 5, 8, 8 } },
4279 {
ISD::UMAX, MVT::v8i16, { 1, 3, 3, 3 } },
4280 {
ISD::UMAX, MVT::v16i8, { 1, 1, 1, 1 } },
4281 {
ISD::UMIN, MVT::v2i64, { 4, 8, 15, 15 } },
4282 {
ISD::UMIN, MVT::v4i32, { 2, 5, 8, 8 } },
4283 {
ISD::UMIN, MVT::v8i16, { 1, 3, 3, 3 } },
4284 {
ISD::UMIN, MVT::v16i8, { 1, 1, 1, 1 } },
4285 {
ISD::UMULO, MVT::v2i64, { 30, 33, 15, 29 } },
4286 {
ISD::UMULO, MVT::v4i32, { 19, 22, 14, 18 } },
4288 {
ISD::UMULO, MVT::v16i8, { 13, 19, 20, 20 } },
4296 {
ISD::FSQRT, MVT::v2f64, { 32, 32, 1, 1 } },
4302 {
ISD::FSQRT, MVT::v4f32, { 56, 56, 1, 2 } },
4305 {
ISD::CTTZ, MVT::i64, { 1, 1, 1, 1 } },
4308 {
ISD::CTTZ, MVT::i32, { 1, 1, 1, 1 } },
4309 {
ISD::CTTZ, MVT::i16, { 2, 1, 1, 1 } },
4313 {
ISD::CTLZ, MVT::i64, { 1, 1, 1, 1 } },
4316 {
ISD::CTLZ, MVT::i32, { 1, 1, 1, 1 } },
4317 {
ISD::CTLZ, MVT::i16, { 2, 1, 1, 1 } },
4329 {
ISD::ABS, MVT::i64, { 1, 2, 3, 3 } },
4332 {
ISD::CTLZ, MVT::i64, { 2, 2, 4, 5 } },
4334 {
ISD::CTTZ, MVT::i64, { 2, 2, 3, 4 } },
4337 {
ISD::ROTL, MVT::i64, { 2, 3, 1, 3 } },
4338 {
ISD::ROTR, MVT::i64, { 2, 3, 1, 3 } },
4340 {
ISD::FSHL, MVT::i64, { 4, 4, 1, 4 } },
4345 {
ISD::SMAX, MVT::i64, { 1, 3, 2, 3 } },
4346 {
ISD::SMIN, MVT::i64, { 1, 3, 2, 3 } },
4347 {
ISD::UMAX, MVT::i64, { 1, 3, 2, 3 } },
4348 {
ISD::UMIN, MVT::i64, { 1, 3, 2, 3 } },
4355 {
ISD::ABS, MVT::i32, { 1, 2, 3, 3 } },
4356 {
ISD::ABS, MVT::i16, { 2, 2, 3, 3 } },
4357 {
ISD::ABS, MVT::i8, { 2, 4, 4, 3 } },
4363 {
ISD::CTLZ, MVT::i32, { 2, 2, 4, 5 } },
4364 {
ISD::CTLZ, MVT::i16, { 2, 2, 4, 5 } },
4369 {
ISD::CTTZ, MVT::i32, { 2, 2, 3, 3 } },
4370 {
ISD::CTTZ, MVT::i16, { 2, 2, 2, 3 } },
4378 {
ISD::ROTL, MVT::i32, { 2, 3, 1, 3 } },
4379 {
ISD::ROTL, MVT::i16, { 2, 3, 1, 3 } },
4381 {
ISD::ROTR, MVT::i32, { 2, 3, 1, 3 } },
4382 {
ISD::ROTR, MVT::i16, { 2, 3, 1, 3 } },
4387 {
ISD::FSHL, MVT::i32, { 4, 4, 1, 4 } },
4388 {
ISD::FSHL, MVT::i16, { 4, 4, 2, 5 } },
4402 {
ISD::SMAX, MVT::i32, { 1, 2, 2, 3 } },
4403 {
ISD::SMAX, MVT::i16, { 1, 4, 2, 4 } },
4405 {
ISD::SMIN, MVT::i32, { 1, 2, 2, 3 } },
4406 {
ISD::SMIN, MVT::i16, { 1, 4, 2, 4 } },
4408 {
ISD::UMAX, MVT::i32, { 1, 2, 2, 3 } },
4409 {
ISD::UMAX, MVT::i16, { 1, 4, 2, 4 } },
4411 {
ISD::UMIN, MVT::i32, { 1, 2, 2, 3 } },
4412 {
ISD::UMIN, MVT::i16, { 1, 4, 2, 4 } },
4435 case Intrinsic::abs:
4438 case Intrinsic::bitreverse:
4441 case Intrinsic::bswap:
4444 case Intrinsic::ctlz:
4447 case Intrinsic::ctpop:
4450 case Intrinsic::cttz:
4453 case Intrinsic::fshl:
4457 if (Args[0] == Args[1]) {
4468 case Intrinsic::fshr:
4473 if (Args[0] == Args[1]) {
4484 case Intrinsic::lrint:
4485 case Intrinsic::llrint: {
4492 case Intrinsic::maxnum:
4493 case Intrinsic::minnum:
4497 case Intrinsic::sadd_sat:
4500 case Intrinsic::smax:
4503 case Intrinsic::smin:
4506 case Intrinsic::ssub_sat:
4509 case Intrinsic::uadd_sat:
4512 case Intrinsic::umax:
4515 case Intrinsic::umin:
4518 case Intrinsic::usub_sat:
4521 case Intrinsic::sqrt:
4524 case Intrinsic::sadd_with_overflow:
4525 case Intrinsic::ssub_with_overflow:
4528 OpTy =
RetTy->getContainedType(0);
4530 case Intrinsic::uadd_with_overflow:
4531 case Intrinsic::usub_with_overflow:
4534 OpTy =
RetTy->getContainedType(0);
4536 case Intrinsic::smul_with_overflow:
4538 OpTy =
RetTy->getContainedType(0);
4540 case Intrinsic::umul_with_overflow:
4542 OpTy =
RetTy->getContainedType(0);
4547 auto adjustTableCost = [&](
int ISD,
unsigned Cost,
4548 std::pair<InstructionCost, MVT> LT,
4551 MVT MTy = LT.second;
4558 return LegalizationCost * 1;
4563 if (ISD ==
ISD::BSWAP && ST->hasMOVBE() && ST->hasFastMOVBE()) {
4565 if (
II->hasOneUse() && isa<StoreInst>(
II->user_back()))
4567 if (
auto *LI = dyn_cast<LoadInst>(
II->getOperand(0))) {
4568 if (LI->hasOneUse())
4575 return LegalizationCost * (int)
Cost;
4580 MVT MTy = LT.second;
4583 if (((ISD ==
ISD::CTTZ && !ST->hasBMI()) ||
4584 (ISD ==
ISD::CTLZ && !ST->hasLZCNT())) &&
4587 if (
auto *Cst = dyn_cast<ConstantInt>(Args[1]))
4588 if (Cst->isAllOnesValue())
4596 if (ST->useGLMDivSqrtCosts())
4598 if (
auto KindCost = Entry->Cost[
CostKind])
4599 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4601 if (ST->useSLMArithCosts())
4603 if (
auto KindCost = Entry->Cost[
CostKind])
4604 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4607 if (
const auto *Entry =
CostTableLookup(AVX512VBMI2CostTbl, ISD, MTy))
4608 if (
auto KindCost = Entry->Cost[
CostKind])
4609 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4611 if (ST->hasBITALG())
4612 if (
const auto *Entry =
CostTableLookup(AVX512BITALGCostTbl, ISD, MTy))
4613 if (
auto KindCost = Entry->Cost[
CostKind])
4614 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4616 if (ST->hasVPOPCNTDQ())
4617 if (
const auto *Entry =
CostTableLookup(AVX512VPOPCNTDQCostTbl, ISD, MTy))
4618 if (
auto KindCost = Entry->Cost[
CostKind])
4619 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4623 if (
auto KindCost = Entry->Cost[
CostKind])
4624 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4628 if (
auto KindCost = Entry->Cost[
CostKind])
4629 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4633 if (
auto KindCost = Entry->Cost[
CostKind])
4634 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4638 if (
auto KindCost = Entry->Cost[
CostKind])
4639 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4643 if (
auto KindCost = Entry->Cost[
CostKind])
4644 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4648 if (
auto KindCost = Entry->Cost[
CostKind])
4649 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4653 if (
auto KindCost = Entry->Cost[
CostKind])
4654 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4658 if (
auto KindCost = Entry->Cost[
CostKind])
4659 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4663 if (
auto KindCost = Entry->Cost[
CostKind])
4664 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4668 if (
auto KindCost = Entry->Cost[
CostKind])
4669 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4673 if (
auto KindCost = Entry->Cost[
CostKind])
4674 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4678 if (
auto KindCost = Entry->Cost[
CostKind])
4679 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4684 if (
auto KindCost = Entry->Cost[
CostKind])
4685 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4688 if (
auto KindCost = Entry->Cost[
CostKind])
4689 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4692 if (ST->hasLZCNT()) {
4695 if (
auto KindCost = Entry->Cost[
CostKind])
4696 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4699 if (
auto KindCost = Entry->Cost[
CostKind])
4700 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4703 if (ST->hasPOPCNT()) {
4706 if (
auto KindCost = Entry->Cost[
CostKind])
4707 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4710 if (
auto KindCost = Entry->Cost[
CostKind])
4711 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4716 if (
auto KindCost = Entry->Cost[
CostKind])
4717 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4720 if (
auto KindCost = Entry->Cost[
CostKind])
4721 return adjustTableCost(Entry->ISD, *KindCost, LT, ICA.
getFlags());
4726 (IID == Intrinsic::fshl || IID == Intrinsic::fshr)) {
4727 Type *CondTy =
RetTy->getWithNewBitWidth(1);
4747 unsigned Index,
Value *Op0,
4762 if (Index == -1U && (Opcode == Instruction::ExtractElement ||
4763 Opcode == Instruction::InsertElement)) {
4768 assert(isa<FixedVectorType>(Val) &&
"Fixed vector type expected");
4773 if (Opcode == Instruction::ExtractElement) {
4779 if (Opcode == Instruction::InsertElement) {
4787 if (Index != -1U && (Opcode == Instruction::ExtractElement ||
4788 Opcode == Instruction::InsertElement)) {
4790 if (Opcode == Instruction::ExtractElement &&
4792 cast<FixedVectorType>(Val)->getNumElements() > 1)
4799 if (!LT.second.isVector())
4803 unsigned SizeInBits = LT.second.getSizeInBits();
4804 unsigned NumElts = LT.second.getVectorNumElements();
4805 unsigned SubNumElts = NumElts;
4806 Index = Index % NumElts;
4810 if (SizeInBits > 128) {
4811 assert((SizeInBits % 128) == 0 &&
"Illegal vector");
4812 unsigned NumSubVecs = SizeInBits / 128;
4813 SubNumElts = NumElts / NumSubVecs;
4814 if (SubNumElts <= Index) {
4815 RegisterFileMoveCost += (Opcode == Instruction::InsertElement ? 2 : 1);
4816 Index %= SubNumElts;
4820 MVT MScalarTy = LT.second.getScalarType();
4821 auto IsCheapPInsrPExtrInsertPS = [&]() {
4825 return (MScalarTy == MVT::i16 && ST->
hasSSE2()) ||
4827 (MScalarTy == MVT::f32 && ST->
hasSSE1() && Index == 0 &&
4828 Opcode == Instruction::InsertElement) ||
4829 (MScalarTy == MVT::f32 && ST->
hasSSE41() &&
4830 Opcode == Instruction::InsertElement);
4838 (Opcode != Instruction::InsertElement || !Op0 ||
4839 isa<UndefValue>(Op0)))
4840 return RegisterFileMoveCost;
4842 if (Opcode == Instruction::InsertElement &&
4843 isa_and_nonnull<UndefValue>(Op0)) {
4845 if (isa_and_nonnull<LoadInst>(Op1))
4846 return RegisterFileMoveCost;
4847 if (!IsCheapPInsrPExtrInsertPS()) {
4850 return 2 + RegisterFileMoveCost;
4852 return 1 + RegisterFileMoveCost;
4857 if (ScalarType->
isIntegerTy() && Opcode == Instruction::ExtractElement)
4858 return 1 + RegisterFileMoveCost;
4862 assert(ISD &&
"Unexpected vector opcode");
4863 if (ST->useSLMArithCosts())
4865 return Entry->Cost + RegisterFileMoveCost;
4868 if (IsCheapPInsrPExtrInsertPS())
4869 return 1 + RegisterFileMoveCost;
4878 if (Opcode == Instruction::InsertElement) {
4879 auto *SubTy = cast<VectorType>(Val);
4887 return ShuffleCost + IntOrFpCost + RegisterFileMoveCost;
4891 RegisterFileMoveCost;
4898 cast<FixedVectorType>(Ty)->getNumElements() &&
4899 "Vector size mismatch");
4902 MVT MScalarTy = LT.second.getScalarType();
4903 unsigned LegalVectorBitWidth = LT.second.getSizeInBits();
4906 constexpr unsigned LaneBitWidth = 128;
4907 assert((LegalVectorBitWidth < LaneBitWidth ||
4908 (LegalVectorBitWidth % LaneBitWidth) == 0) &&
4911 const int NumLegalVectors = *LT.first.getValue();
4912 assert(NumLegalVectors >= 0 &&
"Negative cost!");
4917 if ((MScalarTy == MVT::i16 && ST->
hasSSE2()) ||
4919 (MScalarTy == MVT::f32 && ST->
hasSSE41())) {
4922 if (LegalVectorBitWidth <= LaneBitWidth) {
4938 assert((LegalVectorBitWidth % LaneBitWidth) == 0 &&
"Illegal vector");
4939 unsigned NumLegalLanes = LegalVectorBitWidth / LaneBitWidth;
4940 unsigned NumLanesTotal = NumLegalLanes * NumLegalVectors;
4941 unsigned NumLegalElts =
4942 LT.second.getVectorNumElements() * NumLegalVectors;
4944 "Vector has been legalized to smaller element count");
4945 assert((NumLegalElts % NumLanesTotal) == 0 &&
4946 "Unexpected elts per lane");
4947 unsigned NumEltsPerLane = NumLegalElts / NumLanesTotal;
4949 APInt WidenedDemandedElts = DemandedElts.
zext(NumLegalElts);
4953 for (
unsigned I = 0;
I != NumLanesTotal; ++
I) {
4955 NumEltsPerLane, NumEltsPerLane *
I);
4956 if (LaneEltMask.
isZero())
4962 I * NumEltsPerLane, LaneTy);
4967 APInt AffectedLanes =
4970 AffectedLanes, NumLegalVectors,
true);
4971 for (
int LegalVec = 0; LegalVec != NumLegalVectors; ++LegalVec) {
4972 for (
unsigned Lane = 0; Lane != NumLegalLanes; ++Lane) {
4973 unsigned I = NumLegalLanes * LegalVec + Lane;
4976 if (!AffectedLanes[
I] ||
4977 (Lane == 0 && FullyAffectedLegalVectors[LegalVec]))
4980 I * NumEltsPerLane, LaneTy);
4984 }
else if (LT.second.isVector()) {
4995 unsigned NumElts = LT.second.getVectorNumElements();
4998 Cost += (std::min<unsigned>(NumElts, Pow2Elts) - 1) * LT.first;
5007 unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
5008 unsigned MaxElts = ST->
hasAVX2() ? 32 : 16;
5009 unsigned MOVMSKCost = (NumElts + MaxElts - 1) / MaxElts;
5013 if (LT.second.isVector()) {
5014 unsigned NumLegalElts =
5015 LT.second.getVectorNumElements() * NumLegalVectors;
5017 "Vector has been legalized to smaller element count");
5021 if (LegalVectorBitWidth > LaneBitWidth) {
5022 unsigned NumLegalLanes = LegalVectorBitWidth / LaneBitWidth;
5023 unsigned NumLanesTotal = NumLegalLanes * NumLegalVectors;
5024 assert((NumLegalElts % NumLanesTotal) == 0 &&
5025 "Unexpected elts per lane");
5026 unsigned NumEltsPerLane = NumLegalElts / NumLanesTotal;
5030 APInt WidenedDemandedElts = DemandedElts.
zext(NumLegalElts);
5034 for (
unsigned I = 0;
I != NumLanesTotal; ++
I) {
5036 NumEltsPerLane,
I * NumEltsPerLane);
5037 if (LaneEltMask.
isZero())
5040 I * NumEltsPerLane, LaneTy);
5042 LaneTy, LaneEltMask,
false, Extract,
CostKind);
5059 int VF,
const APInt &DemandedDstElts,
5065 auto bailout = [&]() {
5075 unsigned PromEltTyBits = EltTyBits;
5076 switch (EltTyBits) {
5107 int NumDstElements = VF * ReplicationFactor;
5121 if (PromEltTyBits != EltTyBits) {
5127 Instruction::SExt, PromSrcVecTy, SrcVecTy,
5134 ReplicationFactor, VF,
5140 "We expect that the legalization doesn't affect the element width, "
5141 "doesn't coalesce/split elements.");
5144 unsigned NumDstVectors =
5145 divideCeil(DstVecTy->getNumElements(), NumEltsPerDstVec);
5154 DemandedDstElts.
zext(NumDstVectors * NumEltsPerDstVec), NumDstVectors);
5155 unsigned NumDstVectorsDemanded = DemandedDstVectors.
popcount();
5160 return NumDstVectorsDemanded * SingleShuffleCost;
5171 if (
auto *SI = dyn_cast_or_null<StoreInst>(
I)) {
5174 if (
auto *
GEP = dyn_cast<GetElementPtrInst>(SI->getPointerOperand())) {
5175 if (!
all_of(
GEP->indices(), [](
Value *V) { return isa<Constant>(V); }))
5182 assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
5192 auto *VTy = dyn_cast<FixedVectorType>(Src);
5197 if (Opcode == Instruction::Store && OpInfo.
isConstant())
5203 if (!VTy || !LT.second.isVector()) {
5205 return (LT.second.isFloatingPoint() ?
Cost : 0) + LT.first * 1;
5208 bool IsLoad = Opcode == Instruction::Load;
5210 Type *EltTy = VTy->getElementType();
5215 const unsigned SrcNumElt = VTy->getNumElements();
5218 int NumEltRemaining = SrcNumElt;
5220 auto NumEltDone = [&]() {
return SrcNumElt - NumEltRemaining; };
5222 const int MaxLegalOpSizeBytes =
divideCeil(LT.second.getSizeInBits(), 8);
5225 const unsigned XMMBits = 128;
5226 if (XMMBits % EltTyBits != 0)
5230 const int NumEltPerXMM = XMMBits / EltTyBits;
5234 for (
int CurrOpSizeBytes = MaxLegalOpSizeBytes, SubVecEltsLeft = 0;
5235 NumEltRemaining > 0; CurrOpSizeBytes /= 2) {
5237 if ((8 * CurrOpSizeBytes) % EltTyBits != 0)
5241 int CurrNumEltPerOp = (8 * CurrOpSizeBytes) / EltTyBits;
5243 assert(CurrOpSizeBytes > 0 && CurrNumEltPerOp > 0 &&
"How'd we get here?");
5244 assert((((NumEltRemaining * EltTyBits) < (2 * 8 * CurrOpSizeBytes)) ||
5245 (CurrOpSizeBytes == MaxLegalOpSizeBytes)) &&
5246 "Unless we haven't halved the op size yet, "
5247 "we have less than two op's sized units of work left.");
5249 auto *CurrVecTy = CurrNumEltPerOp > NumEltPerXMM
5253 assert(CurrVecTy->getNumElements() % CurrNumEltPerOp == 0 &&
5254 "After halving sizes, the vector elt count is no longer a multiple "
5255 "of number of elements per operation?");
5256 auto *CoalescedVecTy =
5257 CurrNumEltPerOp == 1
5261 EltTyBits * CurrNumEltPerOp),
5262 CurrVecTy->getNumElements() / CurrNumEltPerOp);
5265 "coalesciing elements doesn't change vector width.");
5267 while (NumEltRemaining > 0) {
5268 assert(SubVecEltsLeft >= 0 &&
"Subreg element count overconsumtion?");
5272 if (NumEltRemaining < CurrNumEltPerOp &&
5273 (!IsLoad || Alignment.
valueOrOne() < CurrOpSizeBytes) &&
5274 CurrOpSizeBytes != 1)
5282 if (CurrOpSizeBytes == 32 && ST->isUnalignedMem32Slow())
5284 else if (CurrOpSizeBytes < 4)
5294 bool Is0thSubVec = (NumEltDone() % LT.second.getVectorNumElements()) == 0;
5297 if (SubVecEltsLeft == 0) {
5298 SubVecEltsLeft += CurrVecTy->getNumElements();
5303 VTy, {},
CostKind, NumEltDone(), CurrVecTy);
5310 if (CurrOpSizeBytes <= 32 / 8 && !Is0thSubVec) {
5311 int NumEltDoneInCurrXMM = NumEltDone() % NumEltPerXMM;
5312 assert(NumEltDoneInCurrXMM % CurrNumEltPerOp == 0 &&
"");
5313 int CoalescedVecEltIdx = NumEltDoneInCurrXMM / CurrNumEltPerOp;
5314 APInt DemandedElts =
5316 CoalescedVecEltIdx, CoalescedVecEltIdx + 1);
5317 assert(DemandedElts.
popcount() == 1 &&
"Inserting single value");
5322 SubVecEltsLeft -= CurrNumEltPerOp;
5323 NumEltRemaining -= CurrNumEltPerOp;
5328 assert(NumEltRemaining <= 0 &&
"Should have processed all the elements.");
5337 bool IsLoad = (Instruction::Load == Opcode);
5338 bool IsStore = (Instruction::Store == Opcode);
5340 auto *SrcVTy = dyn_cast<FixedVectorType>(SrcTy);
5345 unsigned NumElem = SrcVTy->getNumElements();
5353 MaskTy, DemandedElts,
false,
true,
CostKind);
5358 InstructionCost MaskCmpCost = NumElem * (BranchCost + ScalarCompareCost);
5360 SrcVTy, DemandedElts, IsLoad, IsStore,
CostKind);
5364 return MemopCost + ValueSplitCost + MaskSplitCost + MaskCmpCost;
5372 if (Ty == MVT::i16 || Ty == MVT::i32 || Ty == MVT::i64)
5374 return Cost + LT.first;
5376 if (VT.isSimple() && Ty != VT.getSimpleVT() &&
5377 LT.second.getVectorNumElements() == NumElem)
5394 return Cost + LT.first * (IsLoad ? 2 : 8);
5397 return Cost + LT.first;
5405 if (
Info.isSameBase() &&
Info.isKnownStride()) {
5409 if (
const auto *BaseGEP = dyn_cast<GetElementPtrInst>(
Base)) {
5411 return getGEPCost(BaseGEP->getSourceElementType(),
5412 BaseGEP->getPointerOperand(), Indices,
nullptr,
5427 const unsigned NumVectorInstToHideOverhead = 10;
5440 return NumVectorInstToHideOverhead;
5450 std::optional<FastMathFlags> FMF,
5491 assert(ISD &&
"Invalid opcode");
5499 if (ST->useSLMArithCosts())
5514 MVT MTy = LT.second;
5516 auto *ValVTy = cast<FixedVectorType>(ValTy);
5529 if (LT.first != 1 && MTy.
isVector() &&
5535 ArithmeticCost *= LT.first - 1;
5538 if (ST->useSLMArithCosts())
5540 return ArithmeticCost + Entry->Cost;
5544 return ArithmeticCost + Entry->Cost;
5548 return ArithmeticCost + Entry->Cost;
5597 if (ValVTy->getElementType()->isIntegerTy(1)) {
5599 if (LT.first != 1 && MTy.
isVector() &&
5605 ArithmeticCost *= LT.first - 1;
5609 if (
const auto *Entry =
CostTableLookup(AVX512BoolReduction, ISD, MTy))
5610 return ArithmeticCost + Entry->Cost;
5613 return ArithmeticCost + Entry->Cost;
5616 return ArithmeticCost + Entry->Cost;
5619 return ArithmeticCost + Entry->Cost;
5624 unsigned NumVecElts = ValVTy->getNumElements();
5625 unsigned ScalarSize = ValVTy->getScalarSizeInBits();
5635 if (LT.first != 1 && MTy.
isVector() &&
5641 ReductionCost *= LT.first - 1;
5647 while (NumVecElts > 1) {
5649 unsigned Size = NumVecElts * ScalarSize;
5657 }
else if (
Size == 128) {
5660 if (ValVTy->isFloatingPointTy())
5668 }
else if (
Size == 64) {
5671 if (ValVTy->isFloatingPointTy())
5684 Instruction::LShr, ShiftTy,
CostKind,
5711 MVT MTy = LT.second;
5715 ISD = (IID == Intrinsic::umin || IID == Intrinsic::umax) ?
ISD::UMIN
5719 "Expected float point or integer vector type.");
5720 ISD = (IID == Intrinsic::minnum || IID == Intrinsic::maxnum)
5788 auto *ValVTy = cast<FixedVectorType>(ValTy);
5789 unsigned NumVecElts = ValVTy->getNumElements();
5793 if (LT.first != 1 && MTy.
isVector() &&
5799 MinMaxCost *= LT.first - 1;
5805 return MinMaxCost + Entry->Cost;
5809 return MinMaxCost + Entry->Cost;
5813 return MinMaxCost + Entry->Cost;
5817 return MinMaxCost + Entry->Cost;
5829 while (NumVecElts > 1) {
5831 unsigned Size = NumVecElts * ScalarSize;
5839 }
else if (
Size == 128) {
5849 }
else if (
Size == 64) {
5910 if (BitSize % 64 != 0)
5911 ImmVal = Imm.sext(
alignTo(BitSize, 64));
5916 for (
unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) {
5922 return std::max<InstructionCost>(1,
Cost);
5932 unsigned ImmBitWidth = Imm.getBitWidth();
5939 unsigned ImmIdx = ~0U;
5943 case Instruction::GetElementPtr:
5950 case Instruction::Store:
5953 case Instruction::ICmp:
5959 if (
Idx == 1 && ImmBitWidth == 64) {
5960 uint64_t ImmVal = Imm.getZExtValue();
5961 if (ImmVal == 0x100000000ULL || ImmVal == 0xffffffff)
5966 case Instruction::And:
5970 if (
Idx == 1 && ImmBitWidth == 64 && Imm.isIntN(32))
5973 if (
Idx == 1 && ImmBitWidth == 64 && ST->is64Bit() && ST->hasBMI() &&
5978 case Instruction::Add:
5979 case Instruction::Sub:
5981 if (
Idx == 1 && ImmBitWidth == 64 && Imm.getZExtValue() == 0x80000000)
5985 case Instruction::UDiv:
5986 case Instruction::SDiv:
5987 case Instruction::URem:
5988 case Instruction::SRem:
5993 case Instruction::Mul:
5994 case Instruction::Or:
5995 case Instruction::Xor:
5999 case Instruction::Shl:
6000 case Instruction::LShr:
6001 case Instruction::AShr:
6005 case Instruction::Trunc:
6006 case Instruction::ZExt:
6007 case Instruction::SExt:
6008 case Instruction::IntToPtr:
6009 case Instruction::PtrToInt:
6010 case Instruction::BitCast:
6011 case Instruction::PHI:
6012 case Instruction::Call:
6013 case Instruction::Select:
6014 case Instruction::Ret:
6015 case Instruction::Load:
6019 if (
Idx == ImmIdx) {
6044 case Intrinsic::sadd_with_overflow:
6045 case Intrinsic::uadd_with_overflow:
6046 case Intrinsic::ssub_with_overflow:
6047 case Intrinsic::usub_with_overflow:
6048 case Intrinsic::smul_with_overflow:
6049 case Intrinsic::umul_with_overflow:
6050 if ((
Idx == 1) && Imm.getBitWidth() <= 64 && Imm.isSignedIntN(32))
6053 case Intrinsic::experimental_stackmap:
6054 if ((
Idx < 2) || (Imm.getBitWidth() <= 64 && Imm.isSignedIntN(64)))
6057 case Intrinsic::experimental_patchpoint_void:
6058 case Intrinsic::experimental_patchpoint:
6059 if ((
Idx < 4) || (Imm.getBitWidth() <= 64 && Imm.isSignedIntN(64)))
6075int X86TTIImpl::getGatherOverhead()
const {
6088int X86TTIImpl::getScatterOverhead()
const {
6102 assert(isa<VectorType>(SrcVTy) &&
"Unexpected type in getGSVectorCost");
6103 unsigned VF = cast<FixedVectorType>(SrcVTy)->getNumElements();
6113 if (IndexSize < 64 || !
GEP)
6116 unsigned NumOfVarIndices = 0;
6117 const Value *Ptrs =
GEP->getPointerOperand();
6120 for (
unsigned I = 1, E =
GEP->getNumOperands();
I != E; ++
I) {
6121 if (isa<Constant>(
GEP->getOperand(
I)))
6123 Type *IndxTy =
GEP->getOperand(
I)->getType();
6124 if (
auto *IndexVTy = dyn_cast<VectorType>(IndxTy))
6125 IndxTy = IndexVTy->getElementType();
6127 !isa<SExtInst>(
GEP->getOperand(
I))) ||
6128 ++NumOfVarIndices > 1)
6131 return (
unsigned)32;
6136 unsigned IndexSize = (ST->
hasAVX512() && VF >= 16)
6137 ? getIndexSizeInBits(
Ptr,
DL)
6145 *std::max(IdxsLT.first, SrcLT.first).getValue();
6146 if (SplitFactor > 1) {
6150 return SplitFactor * getGSVectorCost(Opcode,
CostKind, SplitSrcTy,
Ptr,
6160 const int GSOverhead = (Opcode == Instruction::Load) ? getGatherOverhead()
6161 : getScatterOverhead();
6169 unsigned Opcode,
Type *SrcVTy,
const Value *
Ptr,
bool VariableMask,
6172 if ((Opcode == Instruction::Load &&
6175 Align(Alignment)))) ||
6176 (Opcode == Instruction::Store &&
6179 Align(Alignment)))))
6185 if (!PtrTy &&
Ptr->getType()->isVectorTy())
6186 PtrTy = dyn_cast<PointerType>(
6187 cast<VectorType>(
Ptr->getType())->getElementType());
6188 assert(PtrTy &&
"Unexpected type for Ptr argument");
6190 return getGSVectorCost(Opcode,
CostKind, SrcVTy,
Ptr, Alignment,
6206 return ST->hasMacroFusion() || ST->hasBranchFusion();
6213 if (isa<VectorType>(DataTy) && cast<FixedVectorType>(DataTy)->
getNumElements() == 1)
6225 if (ScalarTy->
isHalfTy() && ST->hasBWI())
6235 return IntWidth == 32 || IntWidth == 64 ||
6236 ((IntWidth == 8 || IntWidth == 16) && ST->hasBWI());
6248 if (Alignment >= DataSize && (DataSize == 16 || DataSize == 32))
6265 if (Alignment < DataSize || DataSize < 4 || DataSize > 32 ||
6287 if (!isa<VectorType>(DataTy))
6297 Type *ScalarTy = cast<VectorType>(DataTy)->getElementType();
6306 return IntWidth == 32 || IntWidth == 64 ||
6307 ((IntWidth == 8 || IntWidth == 16) && ST->hasVBMI2());
6314bool X86TTIImpl::supportsGather()
const {
6328 unsigned NumElts = cast<FixedVectorType>(VTy)->getNumElements();
6329 return NumElts == 1 ||
6330 (ST->
hasAVX512() && (NumElts == 2 || (NumElts == 4 && !ST->hasVLX())));
6345 return IntWidth == 32 || IntWidth == 64;
6349 if (!supportsGather() || !ST->preferGather())
6364 unsigned NumElements = cast<FixedVectorType>(VecTy)->getNumElements();
6365 assert(OpcodeMask.
size() == NumElements &&
"Mask and VecTy are incompatible");
6370 for (
int Lane : seq<int>(0, NumElements)) {
6371 unsigned Opc = OpcodeMask.
test(Lane) ? Opcode1 : Opcode0;
6373 if (Lane % 2 == 0 && Opc != Instruction::FSub)
6375 if (Lane % 2 == 1 && Opc != Instruction::FAdd)
6379 Type *ElemTy = cast<VectorType>(VecTy)->getElementType();
6381 return ST->
hasSSE3() && NumElements % 4 == 0;
6383 return ST->
hasSSE3() && NumElements % 2 == 0;
6389 if (!ST->
hasAVX512() || !ST->preferScatter())
6402 if (
I->getOpcode() == Instruction::FDiv)
6418 TM.getSubtargetImpl(*Caller)->getFeatureBits();
6420 TM.getSubtargetImpl(*Callee)->getFeatureBits();
6423 FeatureBitset RealCallerBits = CallerBits & ~InlineFeatureIgnoreList;
6424 FeatureBitset RealCalleeBits = CalleeBits & ~InlineFeatureIgnoreList;
6425 if (RealCallerBits == RealCalleeBits)
6430 if ((RealCallerBits & RealCalleeBits) != RealCalleeBits)
6434 if (
const auto *CB = dyn_cast<CallBase>(&
I)) {
6436 if (CB->isInlineAsm())
6440 for (
Value *Arg : CB->args())
6441 Types.push_back(Arg->getType());
6442 if (!CB->getType()->isVoidTy())
6443 Types.push_back(CB->getType());
6446 auto IsSimpleTy = [](
Type *Ty) {
6447 return !Ty->isVectorTy() && !Ty->isAggregateType();
6449 if (
all_of(Types, IsSimpleTy))
6452 if (
Function *NestedCallee = CB->getCalledFunction()) {
6454 if (NestedCallee->isIntrinsic())
6489 [](
Type *
T) {
return T->isVectorTy() ||
T->isAggregateType(); });
6498 Options.AllowOverlappingLoads =
true;
6503 if (PreferredWidth >= 512 && ST->
hasAVX512() && ST->hasEVEX512())
6504 Options.LoadSizes.push_back(64);
6505 if (PreferredWidth >= 256 && ST->
hasAVX())
Options.LoadSizes.push_back(32);
6506 if (PreferredWidth >= 128 && ST->
hasSSE2())
Options.LoadSizes.push_back(16);
6508 if (ST->is64Bit()) {
6509 Options.LoadSizes.push_back(8);
6511 Options.LoadSizes.push_back(4);
6512 Options.LoadSizes.push_back(2);
6513 Options.LoadSizes.push_back(1);
6518 return supportsGather();
6529 return !(ST->isAtom());
6549 unsigned NumOfMemOps = (VecTySize + LegalVTSize - 1) / LegalVTSize;
6555 bool UseMaskedMemOp = UseMaskForCond || UseMaskForGaps;
6568 if (UseMaskedMemOp) {
6570 for (
unsigned Index : Indices) {
6571 assert(Index < Factor &&
"Invalid index for interleaved memory op");
6572 for (
unsigned Elm = 0; Elm < VF; Elm++)
6573 DemandedLoadStoreElts.
setBit(Index + Elm * Factor);
6580 UseMaskForGaps ? DemandedLoadStoreElts
6589 if (UseMaskForGaps) {
6595 if (Opcode == Instruction::Load) {
6602 static const CostTblEntry AVX512InterleavedLoadTbl[] = {
6603 {3, MVT::v16i8, 12},
6604 {3, MVT::v32i8, 14},
6605 {3, MVT::v64i8, 22},
6608 if (
const auto *Entry =
6610 return MaskCost + NumOfMemOps * MemOpCost + Entry->Cost;
6622 unsigned NumOfLoadsInInterleaveGrp =
6623 Indices.
size() ? Indices.
size() : Factor;
6632 unsigned NumOfUnfoldedLoads =
6633 UseMaskedMemOp || NumOfResults > 1 ? NumOfMemOps : NumOfMemOps / 2;
6636 unsigned NumOfShufflesPerResult =
6637 std::max((
unsigned)1, (
unsigned)(NumOfMemOps - 1));
6644 NumOfMoves = NumOfResults * NumOfShufflesPerResult / 2;
6647 MaskCost + NumOfUnfoldedLoads * MemOpCost +
6654 assert(Opcode == Instruction::Store &&
6655 "Expected Store Instruction at this point");
6657 static const CostTblEntry AVX512InterleavedStoreTbl[] = {
6658 {3, MVT::v16i8, 12},
6659 {3, MVT::v32i8, 14},
6660 {3, MVT::v64i8, 26},
6663 {4, MVT::v16i8, 11},
6664 {4, MVT::v32i8, 14},
6668 if (
const auto *Entry =
6670 return MaskCost + NumOfMemOps * MemOpCost + Entry->Cost;
6675 unsigned NumOfSources = Factor;
6678 unsigned NumOfShufflesPerStore = NumOfSources - 1;
6682 unsigned NumOfMoves = NumOfMemOps * NumOfShufflesPerStore / 2;
6685 NumOfMemOps * (MemOpCost + NumOfShufflesPerStore * ShuffleCost) +
6693 bool UseMaskForCond,
bool UseMaskForGaps) {
6694 auto *VecTy = cast<FixedVectorType>(
BaseTy);
6696 auto isSupportedOnAVX512 = [&](
Type *VecTy) {
6697 Type *EltTy = cast<VectorType>(VecTy)->getElementType();
6702 return ST->hasBWI();
6704 return ST->hasBF16();
6707 if (ST->
hasAVX512() && isSupportedOnAVX512(VecTy))
6709 Opcode, VecTy, Factor, Indices, Alignment,
6712 if (UseMaskForCond || UseMaskForGaps)
6715 UseMaskForCond, UseMaskForGaps);
6735 unsigned VF = VecTy->getNumElements() / Factor;
6736 Type *ScalarTy = VecTy->getElementType();
6768 {2, MVT::v16i16, 9},
6769 {2, MVT::v32i16, 18},
6772 {2, MVT::v16i32, 8},
6773 {2, MVT::v32i32, 16},
6777 {2, MVT::v16i64, 16},
6778 {2, MVT::v32i64, 32},
6783 {3, MVT::v16i8, 11},
6784 {3, MVT::v32i8, 14},
6789 {3, MVT::v16i16, 28},
6790 {3, MVT::v32i16, 56},
6795 {3, MVT::v16i32, 14},
6796 {3, MVT::v32i32, 32},
6800 {3, MVT::v8i64, 10},
6801 {3, MVT::v16i64, 20},
6806 {4, MVT::v16i8, 24},
6807 {4, MVT::v32i8, 56},
6810 {4, MVT::v4i16, 17},
6811 {4, MVT::v8i16, 33},
6812 {4, MVT::v16i16, 75},
6813 {4, MVT::v32i16, 150},
6817 {4, MVT::v8i32, 16},
6818 {4, MVT::v16i32, 32},
6819 {4, MVT::v32i32, 68},
6823 {4, MVT::v8i64, 20},
6824 {4, MVT::v16i64, 40},
6829 {6, MVT::v16i8, 43},
6830 {6, MVT::v32i8, 82},
6832 {6, MVT::v2i16, 13},
6834 {6, MVT::v8i16, 39},
6835 {6, MVT::v16i16, 106},
6836 {6, MVT::v32i16, 212},
6839 {6, MVT::v4i32, 15},
6840 {6, MVT::v8i32, 31},
6841 {6, MVT::v16i32, 64},
6844 {6, MVT::v4i64, 18},
6845 {6, MVT::v8i64, 36},
6850 static const CostTblEntry SSSE3InterleavedLoadTbl[] = {
6864 static const CostTblEntry AVX2InterleavedStoreTbl[] = {
6869 {2, MVT::v16i16, 4},
6870 {2, MVT::v32i16, 8},
6874 {2, MVT::v16i32, 8},
6875 {2, MVT::v32i32, 16},
6880 {2, MVT::v16i64, 16},
6881 {2, MVT::v32i64, 32},
6886 {3, MVT::v16i8, 11},
6887 {3, MVT::v32i8, 13},
6891 {3, MVT::v8i16, 12},
6892 {3, MVT::v16i16, 27},
6893 {3, MVT::v32i16, 54},
6897 {3, MVT::v8i32, 11},
6898 {3, MVT::v16i32, 22},
6899 {3, MVT::v32i32, 48},
6903 {3, MVT::v8i64, 12},
6904 {3, MVT::v16i64, 24},
6910 {4, MVT::v32i8, 12},
6914 {4, MVT::v8i16, 10},
6915 {4, MVT::v16i16, 32},
6916 {4, MVT::v32i16, 64},
6920 {4, MVT::v8i32, 16},
6921 {4, MVT::v16i32, 32},
6922 {4, MVT::v32i32, 64},
6926 {4, MVT::v8i64, 20},
6927 {4, MVT::v16i64, 40},
6932 {6, MVT::v16i8, 27},
6933 {6, MVT::v32i8, 90},
6935 {6, MVT::v2i16, 10},
6936 {6, MVT::v4i16, 15},
6937 {6, MVT::v8i16, 21},
6938 {6, MVT::v16i16, 58},
6939 {6, MVT::v32i16, 90},
6942 {6, MVT::v4i32, 12},
6943 {6, MVT::v8i32, 33},
6944 {6, MVT::v16i32, 66},
6947 {6, MVT::v4i64, 15},
6948 {6, MVT::v8i64, 30},
6951 static const CostTblEntry SSE2InterleavedStoreTbl[] = {
6962 if (Opcode == Instruction::Load) {
6963 auto GetDiscountedCost = [Factor, NumMembers = Indices.
size(),
6967 return MemOpCosts +
divideCeil(NumMembers * Entry->Cost, Factor);
6971 if (
const auto *Entry =
CostTableLookup(AVX2InterleavedLoadTbl, Factor,
6973 return GetDiscountedCost(Entry);
6976 if (
const auto *Entry =
CostTableLookup(SSSE3InterleavedLoadTbl, Factor,
6978 return GetDiscountedCost(Entry);
6981 if (
const auto *Entry =
CostTableLookup(SSE2InterleavedLoadTbl, Factor,
6983 return GetDiscountedCost(Entry);
6985 assert(Opcode == Instruction::Store &&
6986 "Expected Store Instruction at this point");
6988 "Interleaved store only supports fully-interleaved groups.");
6990 if (
const auto *Entry =
CostTableLookup(AVX2InterleavedStoreTbl, Factor,
6992 return MemOpCosts + Entry->Cost;
6995 if (
const auto *Entry =
CostTableLookup(SSE2InterleavedStoreTbl, Factor,
6997 return MemOpCosts + Entry->Cost;
7002 UseMaskForCond, UseMaskForGaps);
7007 bool HasBaseReg, int64_t Scale,
7008 unsigned AddrSpace)
const {
7036 return AM.
Scale != 0;
7050 if (ST->hasXOP() && (Bits == 8 || Bits == 16 || Bits == 32 || Bits == 64))
7055 if (ST->
hasAVX2() && (Bits == 32 || Bits == 64))
7059 if (ST->hasBWI() && Bits == 16)
7068 Type *ScalarValTy)
const {
7069 if (ST->hasF16C() && ScalarMemTy->
isHalfTy()) {
7083 if (
I->getOpcode() == Instruction::Mul &&
7085 for (
auto &
Op :
I->operands()) {
7087 if (
any_of(Ops, [&](
Use *U) {
return U->get() ==
Op; }))
7095 Ops.
push_back(&cast<Instruction>(
Op)->getOperandUse(0));
7104 return !Ops.
empty();
7110 int ShiftAmountOpNum = -1;
7112 ShiftAmountOpNum = 1;
7113 else if (
auto *
II = dyn_cast<IntrinsicInst>(
I)) {
7114 if (
II->getIntrinsicID() == Intrinsic::fshl ||
7115 II->getIntrinsicID() == Intrinsic::fshr)
7116 ShiftAmountOpNum = 2;
7119 if (ShiftAmountOpNum == -1)
7122 auto *Shuf = dyn_cast<ShuffleVectorInst>(
I->getOperand(ShiftAmountOpNum));
7125 Ops.
push_back(&
I->getOperandUse(ShiftAmountOpNum));
Expand Atomic instructions
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
Analysis containing CSE Info
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Cost tables and simple lookup functions.
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
uint64_t IntrinsicInst * II
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static unsigned getNumElements(Type *Ty)
This file describes how to lower LLVM code to machine code.
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
Class for arbitrary precision integers.
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
APInt zext(unsigned width) const
Zero extend to a new width.
unsigned popcount() const
Count the number of bits set.
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
unsigned getBitWidth() const
Return the number of bits in the APInt.
APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
int64_t getSExtValue() const
Get sign extended value.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
Get intrinsic cost based on arguments.
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1)
TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind, ArrayRef< int > Mask, VectorType *Ty, int &Index, VectorType *&SubTy) const
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind)
Try to calculate op costs for min/max reduction operations.
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr)
InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *, const SCEV *)
InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, Type *AccessType, TTI::TargetCostKind CostKind)
unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy, Type *ScalarValTy) const
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
Estimate the cost of type-legalization and the legalized type.
InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, const APInt &DemandedDstElts, TTI::TargetCostKind CostKind)
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, ArrayRef< Value * > VL={})
Estimate the overhead of scalarizing an instruction.
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr)
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr, int64_t ScalableOffset=0)
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ ICMP_SLE
signed less or equal
@ ICMP_UGE
unsigned greater or equal
@ ICMP_UGT
unsigned greater than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ ICMP_ULT
unsigned less than
@ ICMP_SGE
signed greater or equal
@ ICMP_ULE
unsigned less or equal
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
unsigned getPointerSizeInBits(unsigned AS=0) const
Layout pointer size, in bits FIXME: The defaults need to be removed once all of the backends/clients ...
Align getABITypeAlign(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
constexpr bool isScalar() const
Exactly one element.
Convenience struct for specifying and reasoning about fast-math flags.
Container class for subtarget features.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
static InstructionCost getInvalid(CostType Val=0)
std::optional< CostType > getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
FastMathFlags getFlags() const
const SmallVectorImpl< Type * > & getArgTypes() const
Type * getReturnType() const
const SmallVectorImpl< const Value * > & getArgs() const
const IntrinsicInst * getInst() const
Intrinsic::ID getID() const
bool isTypeBasedOnly() const
bool is128BitVector() const
Return true if this is a 128-bit vector type.
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
Class to represent pointers.
unsigned getAddressSpace() const
Return the address space of the Pointer type.
This class represents an analyzed expression in the program.
The main scalar evolution driver.
static bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
bool test(unsigned Idx) const
size_type size() const
Returns the number of bits in this bitvector.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
static StackOffset getScalable(int64_t Scalable)
static StackOffset getFixed(int64_t Fixed)
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
const TargetMachine & getTargetMachine() const
unsigned getMaxExpandSizeMemcmp(bool OptSize) const
Get maximum # of load operations permitted for memcmp.
MVT getSimpleValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the MVT corresponding to this LLVM type. See getValueType.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
Primary interface to the complete machine description for the target machine.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
The instances of the Type class are immutable: once they are created, they are never changed.
unsigned getIntegerBitWidth() const
static Type * getDoubleTy(LLVMContext &C)
bool isVectorTy() const
True if this is an instance of VectorType.
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
bool isPointerTy() const
True if this is an instance of PointerType.
static IntegerType * getInt1Ty(LLVMContext &C)
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
static IntegerType * getInt8Ty(LLVMContext &C)
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
static IntegerType * getInt32Ty(LLVMContext &C)
static IntegerType * getInt64Ty(LLVMContext &C)
static Type * getFloatTy(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
Base class of all SIMD vector types.
static VectorType * getExtendedElementVectorType(VectorType *VTy)
This static method is like getInteger except that the element types are twice as wide as the elements...
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
static VectorType * getDoubleElementsVectorType(VectorType *VTy)
This static method returns a VectorType with twice as many elements as the input type and the same el...
Type * getElementType() const
bool useAVX512Regs() const
unsigned getPreferVectorWidth() const
InstructionCost getInterleavedMemoryOpCostAVX512(unsigned Opcode, FixedVectorType *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
bool isLegalMaskedGather(Type *DataType, Align Alignment)
InstructionCost getAltInstrCost(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, const SmallBitVector &OpcodeMask, TTI::TargetCostKind CostKind) const
bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const
std::optional< unsigned > getCacheAssociativity(TargetTransformInfo::CacheLevel Level) const override
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth)
bool isLegalNTStore(Type *DataType, Align Alignment)
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr)
bool enableInterleavedAccessVectorization()
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind)
bool isLegalNTLoad(Type *DataType, Align Alignment)
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr)
bool forceScalarizeMaskedScatter(VectorType *VTy, Align Alignment)
bool isLegalMaskedGatherScatter(Type *DataType, Align Alignment)
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, StackOffset BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) const
Return the cost of the scaling factor used in the addressing mode represented by AM for this target,...
bool isLegalMaskedLoad(Type *DataType, Align Alignment)
bool hasConditionalLoadStoreForType(Type *Ty=nullptr) const
bool supportsEfficientVectorElementLoadStore() const
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const
bool prefersVectorizedAddressing() const
unsigned getLoadStoreVecRegBitWidth(unsigned AS) const
bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind)
bool forceScalarizeMaskedGather(VectorType *VTy, Align Alignment)
std::optional< unsigned > getCacheSize(TargetTransformInfo::CacheLevel Level) const override
bool isLegalMaskedStore(Type *DataType, Align Alignment)
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr)
InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, const APInt &DemandedDstElts, TTI::TargetCostKind CostKind)
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I)
Calculate the cost of Gather / Scatter operation.
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
unsigned getMaxInterleaveFactor(ElementCount VF)
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const
bool isLegalMaskedCompressStore(Type *DataType, Align Alignment)
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
bool isVectorShiftByScalarCheap(Type *Ty) const
InstructionCost getPointersChainCost(ArrayRef< const Value * > Ptrs, const Value *Base, const TTI::PointersChainInfo &Info, Type *AccessTy, TTI::TargetCostKind CostKind)
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
unsigned getNumberOfRegisters(unsigned ClassID) const
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2)
bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment)
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)
unsigned getAtomicMemIntrinsicMaxElementSize() const
bool isLegalMaskedScatter(Type *DataType, Align Alignment)
InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, ArrayRef< Value * > VL={})
InstructionCost getIntImmCost(int64_t)
Calculate the cost of materializing a 64-bit value.
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1)
unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy, Type *ScalarValTy) const
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
InstructionCost getBranchMispredictPenalty() const
bool isExpensiveToSpeculativelyExecute(const Instruction *I)
InstructionCost getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE, const SCEV *Ptr)
bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, const SmallBitVector &OpcodeMask) const
InstructionCost getMinMaxCost(Intrinsic::ID IID, Type *Ty, TTI::TargetCostKind CostKind, FastMathFlags FMF)
bool isFCmpOrdCheaperThanFCmpZero(Type *Ty)
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
bool areTypesABICompatible(const Function *Caller, const Function *Callee, const ArrayRef< Type * > &Type) const
bool hasDivRemOp(Type *DataType, bool IsSigned)
constexpr ScalarTy getFixedValue() const
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
APInt ScaleBitMask(const APInt &A, unsigned NewBitWidth, bool MatchAllBits=false)
Splat/Merge neighboring bits to widen/narrow the bitmask represented by.
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
@ BSWAP
Byte Swap and Counting operators.
@ ADD
Simple integer binary arithmetic operators.
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ FADD
Simple binary floating point operators.
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
@ SIGN_EXTEND
Conversion operators.
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
@ SHL
Shift and rotation operations.
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
@ SMULO
Same for multiplication.
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::AShr > m_AShr(const LHS &L, const RHS &R)
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
apint_match m_APIntAllowPoison(const APInt *&Res)
Match APInt while allowing poison in splat vector constants.
OneUse_match< T > m_OneUse(const T &SubPattern)
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
This is an optimization pass for GlobalISel generic memory operations.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
const CostTblEntryT< CostType > * CostTableLookup(ArrayRef< CostTblEntryT< CostType > > Tbl, int ISD, MVT Ty)
Find in cost table.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
constexpr int PoisonMaskElem
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
OutputIt copy(R &&Range, OutputIt Out)
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
void processShuffleMasks(ArrayRef< int > Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs, unsigned NumOfUsedRegs, function_ref< void()> NoInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> SingleInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned, bool)> ManyInputsAction)
Splits and processes shuffle mask depending on the number of input and output registers.
const TypeConversionCostTblEntryT< CostType > * ConvertCostTableLookup(ArrayRef< TypeConversionCostTblEntryT< CostType > > Tbl, int ISD, MVT Dst, MVT Src)
Find in type conversion cost table.
int getSplatIndex(ArrayRef< int > Mask)
If all non-negative Mask elements are the same value, return that value.
unsigned RecipThroughputCost
std::optional< unsigned > operator[](TargetTransformInfo::TargetCostKind Kind) const
unsigned SizeAndLatencyCost
This struct is a compact representation of a valid (non-zero power of two) alignment.
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
bool isVector() const
Return true if this is a vector value type.
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
Type Conversion Cost Table.