x86.ad revision 3891
3041N/A//
3239N/A// Copyright (c) 2011, 2012, Oracle and/or its affiliates. All rights reserved.
3041N/A// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
3041N/A//
3041N/A// This code is free software; you can redistribute it and/or modify it
3041N/A// under the terms of the GNU General Public License version 2 only, as
3041N/A// published by the Free Software Foundation.
3041N/A//
3041N/A// This code is distributed in the hope that it will be useful, but WITHOUT
3041N/A// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
3041N/A// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
3041N/A// version 2 for more details (a copy is included in the LICENSE file that
3041N/A// accompanied this code).
3041N/A//
3041N/A// You should have received a copy of the GNU General Public License version
3041N/A// 2 along with this work; if not, write to the Free Software Foundation,
3041N/A// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
3041N/A//
3041N/A// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
3041N/A// or visit www.oracle.com if you need additional information or have any
3041N/A// questions.
3041N/A//
3041N/A//
3041N/A
3041N/A// X86 Common Architecture Description File
3041N/A
3845N/A//----------REGISTER DEFINITION BLOCK------------------------------------------
3845N/A// This information is used by the matcher and the register allocator to
3845N/A// describe individual registers and classes of registers within the target
3845N/A// archtecture.
3845N/A
3845N/Aregister %{
3845N/A//----------Architecture Description Register Definitions----------------------
3845N/A// General Registers
3845N/A// "reg_def" name ( register save type, C convention save type,
3845N/A// ideal register type, encoding );
3845N/A// Register Save Types:
3845N/A//
3845N/A// NS = No-Save: The register allocator assumes that these registers
3845N/A// can be used without saving upon entry to the method, &
3845N/A// that they do not need to be saved at call sites.
3845N/A//
3845N/A// SOC = Save-On-Call: The register allocator assumes that these registers
3845N/A// can be used without saving upon entry to the method,
3845N/A// but that they must be saved at call sites.
3845N/A//
3845N/A// SOE = Save-On-Entry: The register allocator assumes that these registers
3845N/A// must be saved before using them upon entry to the
3845N/A// method, but they do not need to be saved at call
3845N/A// sites.
3845N/A//
3845N/A// AS = Always-Save: The register allocator assumes that these registers
3845N/A// must be saved before using them upon entry to the
3845N/A// method, & that they must be saved at call sites.
3845N/A//
3845N/A// Ideal Register Type is used to determine how to save & restore a
3845N/A// register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
3845N/A// spilled with LoadP/StoreP. If the register supports both, use Op_RegI.
3845N/A//
3845N/A// The encoding number is the actual bit-pattern placed into the opcodes.
3845N/A
3845N/A// XMM registers. 256-bit registers or 8 words each, labeled (a)-h.
3845N/A// Word a in each register holds a Float, words ab hold a Double.
3845N/A// The whole registers are used in SSE4.2 version intrinsics,
3845N/A// array copy stubs and superword operations (see UseSSE42Intrinsics,
3845N/A// UseXMMForArrayCopy and UseSuperword flags).
3845N/A// XMM8-XMM15 must be encoded with REX (VEX for UseAVX).
3845N/A// Linux ABI: No register preserved across function calls
3845N/A// XMM0-XMM7 might hold parameters
3845N/A// Windows ABI: XMM6-XMM15 preserved across function calls
3845N/A// XMM0-XMM3 might hold parameters
3845N/A
3845N/Areg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
3891N/Areg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1));
3891N/Areg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2));
3891N/Areg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3));
3891N/Areg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4));
3891N/Areg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5));
3891N/Areg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6));
3891N/Areg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7));
3845N/A
3845N/Areg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
3891N/Areg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1));
3891N/Areg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2));
3891N/Areg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3));
3891N/Areg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4));
3891N/Areg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5));
3891N/Areg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6));
3891N/Areg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7));
3845N/A
3845N/Areg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
3891N/Areg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1));
3891N/Areg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2));
3891N/Areg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3));
3891N/Areg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4));
3891N/Areg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5));
3891N/Areg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6));
3891N/Areg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7));
3845N/A
3845N/Areg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
3891N/Areg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1));
3891N/Areg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2));
3891N/Areg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3));
3891N/Areg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4));
3891N/Areg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5));
3891N/Areg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6));
3891N/Areg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7));
3845N/A
3845N/Areg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
3891N/Areg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1));
3891N/Areg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2));
3891N/Areg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3));
3891N/Areg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4));
3891N/Areg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5));
3891N/Areg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6));
3891N/Areg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7));
3845N/A
3845N/Areg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
3891N/Areg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1));
3891N/Areg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2));
3891N/Areg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3));
3891N/Areg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4));
3891N/Areg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5));
3891N/Areg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6));
3891N/Areg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7));
3845N/A
3845N/A#ifdef _WIN64
3845N/A
3845N/Areg_def XMM6 ( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg());
3891N/Areg_def XMM6b( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(1));
3891N/Areg_def XMM6c( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(2));
3891N/Areg_def XMM6d( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(3));
3891N/Areg_def XMM6e( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(4));
3891N/Areg_def XMM6f( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(5));
3891N/Areg_def XMM6g( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(6));
3891N/Areg_def XMM6h( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(7));
3845N/A
3845N/Areg_def XMM7 ( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg());
3891N/Areg_def XMM7b( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(1));
3891N/Areg_def XMM7c( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(2));
3891N/Areg_def XMM7d( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(3));
3891N/Areg_def XMM7e( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(4));
3891N/Areg_def XMM7f( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(5));
3891N/Areg_def XMM7g( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(6));
3891N/Areg_def XMM7h( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(7));
3845N/A
3845N/Areg_def XMM8 ( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg());
3891N/Areg_def XMM8b( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(1));
3891N/Areg_def XMM8c( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(2));
3891N/Areg_def XMM8d( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(3));
3891N/Areg_def XMM8e( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(4));
3891N/Areg_def XMM8f( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(5));
3891N/Areg_def XMM8g( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(6));
3891N/Areg_def XMM8h( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(7));
3845N/A
3845N/Areg_def XMM9 ( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg());
3891N/Areg_def XMM9b( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(1));
3891N/Areg_def XMM9c( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(2));
3891N/Areg_def XMM9d( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(3));
3891N/Areg_def XMM9e( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(4));
3891N/Areg_def XMM9f( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(5));
3891N/Areg_def XMM9g( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(6));
3891N/Areg_def XMM9h( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(7));
3845N/A
3845N/Areg_def XMM10 ( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg());
3891N/Areg_def XMM10b( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(1));
3891N/Areg_def XMM10c( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(2));
3891N/Areg_def XMM10d( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(3));
3891N/Areg_def XMM10e( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(4));
3891N/Areg_def XMM10f( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(5));
3891N/Areg_def XMM10g( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(6));
3891N/Areg_def XMM10h( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(7));
3845N/A
3845N/Areg_def XMM11 ( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg());
3891N/Areg_def XMM11b( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(1));
3891N/Areg_def XMM11c( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(2));
3891N/Areg_def XMM11d( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(3));
3891N/Areg_def XMM11e( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(4));
3891N/Areg_def XMM11f( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(5));
3891N/Areg_def XMM11g( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(6));
3891N/Areg_def XMM11h( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(7));
3845N/A
3845N/Areg_def XMM12 ( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg());
3891N/Areg_def XMM12b( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(1));
3891N/Areg_def XMM12c( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(2));
3891N/Areg_def XMM12d( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(3));
3891N/Areg_def XMM12e( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(4));
3891N/Areg_def XMM12f( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(5));
3891N/Areg_def XMM12g( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(6));
3891N/Areg_def XMM12h( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(7));
3845N/A
3845N/Areg_def XMM13 ( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg());
3891N/Areg_def XMM13b( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(1));
3891N/Areg_def XMM13c( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(2));
3891N/Areg_def XMM13d( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(3));
3891N/Areg_def XMM13e( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(4));
3891N/Areg_def XMM13f( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(5));
3891N/Areg_def XMM13g( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(6));
3891N/Areg_def XMM13h( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(7));
3845N/A
3845N/Areg_def XMM14 ( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg());
3891N/Areg_def XMM14b( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(1));
3891N/Areg_def XMM14c( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(2));
3891N/Areg_def XMM14d( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(3));
3891N/Areg_def XMM14e( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(4));
3891N/Areg_def XMM14f( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(5));
3891N/Areg_def XMM14g( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(6));
3891N/Areg_def XMM14h( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(7));
3845N/A
3845N/Areg_def XMM15 ( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg());
3891N/Areg_def XMM15b( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(1));
3891N/Areg_def XMM15c( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(2));
3891N/Areg_def XMM15d( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(3));
3891N/Areg_def XMM15e( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(4));
3891N/Areg_def XMM15f( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(5));
3891N/Areg_def XMM15g( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(6));
3891N/Areg_def XMM15h( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(7));
3845N/A
3845N/A#else // _WIN64
3845N/A
3845N/Areg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
3891N/Areg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1));
3891N/Areg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2));
3891N/Areg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3));
3891N/Areg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4));
3891N/Areg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5));
3891N/Areg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6));
3891N/Areg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7));
3845N/A
3845N/Areg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
3891N/Areg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1));
3891N/Areg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2));
3891N/Areg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3));
3891N/Areg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4));
3891N/Areg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5));
3891N/Areg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6));
3891N/Areg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7));
3845N/A
3845N/A#ifdef _LP64
3845N/A
3845N/Areg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg());
3891N/Areg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1));
3891N/Areg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2));
3891N/Areg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3));
3891N/Areg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4));
3891N/Areg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5));
3891N/Areg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6));
3891N/Areg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7));
3845N/A
3845N/Areg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg());
3891N/Areg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1));
3891N/Areg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2));
3891N/Areg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3));
3891N/Areg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4));
3891N/Areg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5));
3891N/Areg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6));
3891N/Areg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7));
3845N/A
3845N/Areg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
3891N/Areg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1));
3891N/Areg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2));
3891N/Areg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3));
3891N/Areg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4));
3891N/Areg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5));
3891N/Areg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6));
3891N/Areg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7));
3845N/A
3845N/Areg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
3891N/Areg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1));
3891N/Areg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2));
3891N/Areg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3));
3891N/Areg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4));
3891N/Areg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5));
3891N/Areg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6));
3891N/Areg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7));
3845N/A
3845N/Areg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
3891N/Areg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1));
3891N/Areg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2));
3891N/Areg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3));
3891N/Areg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4));
3891N/Areg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5));
3891N/Areg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6));
3891N/Areg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7));
3845N/A
3845N/Areg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
3891N/Areg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1));
3891N/Areg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2));
3891N/Areg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3));
3891N/Areg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4));
3891N/Areg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5));
3891N/Areg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6));
3891N/Areg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7));
3845N/A
3845N/Areg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
3891N/Areg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1));
3891N/Areg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2));
3891N/Areg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3));
3891N/Areg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4));
3891N/Areg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5));
3891N/Areg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6));
3891N/Areg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7));
3845N/A
3845N/Areg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
3891N/Areg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1));
3891N/Areg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2));
3891N/Areg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3));
3891N/Areg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4));
3891N/Areg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5));
3891N/Areg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6));
3891N/Areg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7));
3845N/A
3845N/A#endif // _LP64
3845N/A
3845N/A#endif // _WIN64
3845N/A
3845N/A#ifdef _LP64
3845N/Areg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
3845N/A#else
3845N/Areg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad());
3845N/A#endif // _LP64
3845N/A
3845N/Aalloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h,
3845N/A XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h,
3845N/A XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h,
3845N/A XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h,
3845N/A XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h,
3845N/A XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h,
3845N/A XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h,
3845N/A XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h
3845N/A#ifdef _LP64
3845N/A ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h,
3845N/A XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h,
3845N/A XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
3845N/A XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
3845N/A XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
3845N/A XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
3845N/A XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
3845N/A XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h
3845N/A#endif
3845N/A );
3845N/A
3845N/A// flags allocation class should be last.
3845N/Aalloc_class chunk2(RFLAGS);
3845N/A
3845N/A// Singleton class for condition codes
3845N/Areg_class int_flags(RFLAGS);
3845N/A
3845N/A// Class for all float registers
3845N/Areg_class float_reg(XMM0,
3845N/A XMM1,
3845N/A XMM2,
3845N/A XMM3,
3845N/A XMM4,
3845N/A XMM5,
3845N/A XMM6,
3845N/A XMM7
3845N/A#ifdef _LP64
3845N/A ,XMM8,
3845N/A XMM9,
3845N/A XMM10,
3845N/A XMM11,
3845N/A XMM12,
3845N/A XMM13,
3845N/A XMM14,
3845N/A XMM15
3845N/A#endif
3845N/A );
3845N/A
3845N/A// Class for all double registers
3845N/Areg_class double_reg(XMM0, XMM0b,
3845N/A XMM1, XMM1b,
3845N/A XMM2, XMM2b,
3845N/A XMM3, XMM3b,
3845N/A XMM4, XMM4b,
3845N/A XMM5, XMM5b,
3845N/A XMM6, XMM6b,
3845N/A XMM7, XMM7b
3845N/A#ifdef _LP64
3845N/A ,XMM8, XMM8b,
3845N/A XMM9, XMM9b,
3845N/A XMM10, XMM10b,
3845N/A XMM11, XMM11b,
3845N/A XMM12, XMM12b,
3845N/A XMM13, XMM13b,
3845N/A XMM14, XMM14b,
3845N/A XMM15, XMM15b
3845N/A#endif
3845N/A );
3845N/A
3845N/A// Class for all 32bit vector registers
3845N/Areg_class vectors_reg(XMM0,
3845N/A XMM1,
3845N/A XMM2,
3845N/A XMM3,
3845N/A XMM4,
3845N/A XMM5,
3845N/A XMM6,
3845N/A XMM7
3845N/A#ifdef _LP64
3845N/A ,XMM8,
3845N/A XMM9,
3845N/A XMM10,
3845N/A XMM11,
3845N/A XMM12,
3845N/A XMM13,
3845N/A XMM14,
3845N/A XMM15
3845N/A#endif
3845N/A );
3845N/A
3845N/A// Class for all 64bit vector registers
3845N/Areg_class vectord_reg(XMM0, XMM0b,
3845N/A XMM1, XMM1b,
3845N/A XMM2, XMM2b,
3845N/A XMM3, XMM3b,
3845N/A XMM4, XMM4b,
3845N/A XMM5, XMM5b,
3845N/A XMM6, XMM6b,
3845N/A XMM7, XMM7b
3845N/A#ifdef _LP64
3845N/A ,XMM8, XMM8b,
3845N/A XMM9, XMM9b,
3845N/A XMM10, XMM10b,
3845N/A XMM11, XMM11b,
3845N/A XMM12, XMM12b,
3845N/A XMM13, XMM13b,
3845N/A XMM14, XMM14b,
3845N/A XMM15, XMM15b
3845N/A#endif
3845N/A );
3845N/A
3845N/A// Class for all 128bit vector registers
3845N/Areg_class vectorx_reg(XMM0, XMM0b, XMM0c, XMM0d,
3845N/A XMM1, XMM1b, XMM1c, XMM1d,
3845N/A XMM2, XMM2b, XMM2c, XMM2d,
3845N/A XMM3, XMM3b, XMM3c, XMM3d,
3845N/A XMM4, XMM4b, XMM4c, XMM4d,
3845N/A XMM5, XMM5b, XMM5c, XMM5d,
3845N/A XMM6, XMM6b, XMM6c, XMM6d,
3845N/A XMM7, XMM7b, XMM7c, XMM7d
3845N/A#ifdef _LP64
3845N/A ,XMM8, XMM8b, XMM8c, XMM8d,
3845N/A XMM9, XMM9b, XMM9c, XMM9d,
3845N/A XMM10, XMM10b, XMM10c, XMM10d,
3845N/A XMM11, XMM11b, XMM11c, XMM11d,
3845N/A XMM12, XMM12b, XMM12c, XMM12d,
3845N/A XMM13, XMM13b, XMM13c, XMM13d,
3845N/A XMM14, XMM14b, XMM14c, XMM14d,
3845N/A XMM15, XMM15b, XMM15c, XMM15d
3845N/A#endif
3845N/A );
3845N/A
3845N/A// Class for all 256bit vector registers
3845N/Areg_class vectory_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h,
3845N/A XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h,
3845N/A XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h,
3845N/A XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h,
3845N/A XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h,
3845N/A XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h,
3845N/A XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h,
3845N/A XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h
3845N/A#ifdef _LP64
3845N/A ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h,
3845N/A XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h,
3845N/A XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
3845N/A XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
3845N/A XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
3845N/A XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
3845N/A XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
3845N/A XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h
3845N/A#endif
3845N/A );
3845N/A
3845N/A%}
3845N/A
3041N/Asource %{
3041N/A // Float masks come from different places depending on platform.
3041N/A#ifdef _LP64
3041N/A static address float_signmask() { return StubRoutines::x86::float_sign_mask(); }
3041N/A static address float_signflip() { return StubRoutines::x86::float_sign_flip(); }
3041N/A static address double_signmask() { return StubRoutines::x86::double_sign_mask(); }
3041N/A static address double_signflip() { return StubRoutines::x86::double_sign_flip(); }
3041N/A#else
3041N/A static address float_signmask() { return (address)float_signmask_pool; }
3041N/A static address float_signflip() { return (address)float_signflip_pool; }
3041N/A static address double_signmask() { return (address)double_signmask_pool; }
3041N/A static address double_signflip() { return (address)double_signflip_pool; }
3041N/A#endif
3239N/A
3845N/A// Map Types to machine register types
3845N/Aconst int Matcher::base2reg[Type::lastype] = {
3845N/A Node::NotAMachineReg,0,0, Op_RegI, Op_RegL, 0, Op_RegN,
3845N/A Node::NotAMachineReg, Node::NotAMachineReg, /* tuple, array */
3845N/A Op_VecS, Op_VecD, Op_VecX, Op_VecY, /* Vectors */
3845N/A Op_RegP, Op_RegP, Op_RegP, Op_RegP, Op_RegP, Op_RegP, /* the pointers */
3845N/A 0, 0/*abio*/,
3845N/A Op_RegP /* Return address */, 0, /* the memories */
3845N/A Op_RegF, Op_RegF, Op_RegF, Op_RegD, Op_RegD, Op_RegD,
3845N/A 0 /*bottom*/
3845N/A};
3845N/A
3845N/A// Max vector size in bytes. 0 if not supported.
3845N/Aconst int Matcher::vector_width_in_bytes(BasicType bt) {
3845N/A assert(is_java_primitive(bt), "only primitive type vectors");
3845N/A if (UseSSE < 2) return 0;
3845N/A // SSE2 supports 128bit vectors for all types.
3845N/A // AVX2 supports 256bit vectors for all types.
3845N/A int size = (UseAVX > 1) ? 32 : 16;
3845N/A // AVX1 supports 256bit vectors only for FLOAT and DOUBLE.
3845N/A if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE))
3845N/A size = 32;
3845N/A // Use flag to limit vector size.
3845N/A size = MIN2(size,(int)MaxVectorSize);
3845N/A // Minimum 2 values in vector (or 4 for bytes).
3845N/A switch (bt) {
3845N/A case T_DOUBLE:
3845N/A case T_LONG:
3845N/A if (size < 16) return 0;
3845N/A case T_FLOAT:
3845N/A case T_INT:
3845N/A if (size < 8) return 0;
3845N/A case T_BOOLEAN:
3845N/A case T_BYTE:
3845N/A case T_CHAR:
3845N/A case T_SHORT:
3845N/A if (size < 4) return 0;
3845N/A break;
3845N/A default:
3845N/A ShouldNotReachHere();
3845N/A }
3845N/A return size;
3845N/A}
3845N/A
3845N/A// Limits on vector size (number of elements) loaded into vector.
3845N/Aconst int Matcher::max_vector_size(const BasicType bt) {
3845N/A return vector_width_in_bytes(bt)/type2aelembytes(bt);
3845N/A}
3845N/Aconst int Matcher::min_vector_size(const BasicType bt) {
3845N/A int max_size = max_vector_size(bt);
3845N/A // Min size which can be loaded into vector is 4 bytes.
3845N/A int size = (type2aelembytes(bt) == 1) ? 4 : 2;
3845N/A return MIN2(size,max_size);
3845N/A}
3845N/A
3845N/A// Vector ideal reg corresponding to specidied size in bytes
3845N/Aconst int Matcher::vector_ideal_reg(int size) {
3845N/A assert(MaxVectorSize >= size, "");
3845N/A switch(size) {
3845N/A case 4: return Op_VecS;
3845N/A case 8: return Op_VecD;
3845N/A case 16: return Op_VecX;
3845N/A case 32: return Op_VecY;
3845N/A }
3845N/A ShouldNotReachHere();
3845N/A return 0;
3845N/A}
3845N/A
3845N/A// x86 supports misaligned vectors store/load.
3845N/Aconst bool Matcher::misaligned_vectors_ok() {
3845N/A return !AlignVector; // can be changed by flag
3845N/A}
3845N/A
3845N/A// Helper methods for MachSpillCopyNode::implementation().
3845N/Astatic int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
3845N/A int src_hi, int dst_hi, uint ireg, outputStream* st) {
3845N/A // In 64-bit VM size calculation is very complex. Emitting instructions
3845N/A // into scratch buffer is used to get size in 64-bit VM.
3845N/A LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); )
3845N/A assert(ireg == Op_VecS || // 32bit vector
3845N/A (src_lo & 1) == 0 && (src_lo + 1) == src_hi &&
3845N/A (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi,
3845N/A "no non-adjacent vector moves" );
3845N/A if (cbuf) {
3845N/A MacroAssembler _masm(cbuf);
3845N/A int offset = __ offset();
3845N/A switch (ireg) {
3845N/A case Op_VecS: // copy whole register
3845N/A case Op_VecD:
3845N/A case Op_VecX:
3845N/A __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
3845N/A break;
3845N/A case Op_VecY:
3845N/A __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
3845N/A break;
3845N/A default:
3845N/A ShouldNotReachHere();
3845N/A }
3845N/A int size = __ offset() - offset;
3845N/A#ifdef ASSERT
3845N/A // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
3845N/A assert(!do_size || size == 4, "incorrect size calculattion");
3845N/A#endif
3845N/A return size;
3845N/A#ifndef PRODUCT
3845N/A } else if (!do_size) {
3845N/A switch (ireg) {
3845N/A case Op_VecS:
3845N/A case Op_VecD:
3845N/A case Op_VecX:
3845N/A st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
3845N/A break;
3845N/A case Op_VecY:
3845N/A st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
3845N/A break;
3845N/A default:
3845N/A ShouldNotReachHere();
3845N/A }
3845N/A#endif
3845N/A }
3845N/A // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
3845N/A return 4;
3845N/A}
3845N/A
3845N/Astatic int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
3845N/A int stack_offset, int reg, uint ireg, outputStream* st) {
3845N/A // In 64-bit VM size calculation is very complex. Emitting instructions
3845N/A // into scratch buffer is used to get size in 64-bit VM.
3845N/A LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); )
3845N/A if (cbuf) {
3845N/A MacroAssembler _masm(cbuf);
3845N/A int offset = __ offset();
3845N/A if (is_load) {
3845N/A switch (ireg) {
3845N/A case Op_VecS:
3845N/A __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
3845N/A break;
3845N/A case Op_VecD:
3845N/A __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
3845N/A break;
3845N/A case Op_VecX:
3845N/A __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
3845N/A break;
3845N/A case Op_VecY:
3845N/A __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
3845N/A break;
3845N/A default:
3845N/A ShouldNotReachHere();
3845N/A }
3845N/A } else { // store
3845N/A switch (ireg) {
3845N/A case Op_VecS:
3845N/A __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
3845N/A break;
3845N/A case Op_VecD:
3845N/A __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
3845N/A break;
3845N/A case Op_VecX:
3845N/A __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
3845N/A break;
3845N/A case Op_VecY:
3845N/A __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
3845N/A break;
3845N/A default:
3845N/A ShouldNotReachHere();
3845N/A }
3845N/A }
3845N/A int size = __ offset() - offset;
3845N/A#ifdef ASSERT
3845N/A int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : 4);
3845N/A // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
3845N/A assert(!do_size || size == (5+offset_size), "incorrect size calculattion");
3845N/A#endif
3845N/A return size;
3845N/A#ifndef PRODUCT
3845N/A } else if (!do_size) {
3845N/A if (is_load) {
3845N/A switch (ireg) {
3845N/A case Op_VecS:
3845N/A st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
3845N/A break;
3845N/A case Op_VecD:
3845N/A st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
3845N/A break;
3845N/A case Op_VecX:
3845N/A st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
3845N/A break;
3845N/A case Op_VecY:
3845N/A st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
3845N/A break;
3845N/A default:
3845N/A ShouldNotReachHere();
3845N/A }
3845N/A } else { // store
3845N/A switch (ireg) {
3845N/A case Op_VecS:
3845N/A st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
3845N/A break;
3845N/A case Op_VecD:
3845N/A st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
3845N/A break;
3845N/A case Op_VecX:
3845N/A st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
3845N/A break;
3845N/A case Op_VecY:
3845N/A st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
3845N/A break;
3845N/A default:
3845N/A ShouldNotReachHere();
3845N/A }
3845N/A }
3845N/A#endif
3845N/A }
3845N/A int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : 4);
3845N/A // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
3845N/A return 5+offset_size;
3845N/A}
3845N/A
3845N/Astatic inline jfloat replicate4_imm(int con, int width) {
3845N/A // Load a constant of "width" (in bytes) and replicate it to fill 32bit.
3845N/A assert(width == 1 || width == 2, "only byte or short types here");
3845N/A int bit_width = width * 8;
3845N/A jint val = con;
3845N/A val &= (1 << bit_width) - 1; // mask off sign bits
3845N/A while(bit_width < 32) {
3845N/A val |= (val << bit_width);
3845N/A bit_width <<= 1;
3845N/A }
3845N/A jfloat fval = *((jfloat*) &val); // coerce to float type
3845N/A return fval;
3845N/A}
3845N/A
3845N/Astatic inline jdouble replicate8_imm(int con, int width) {
3845N/A // Load a constant of "width" (in bytes) and replicate it to fill 64bit.
3845N/A assert(width == 1 || width == 2 || width == 4, "only byte, short or int types here");
3845N/A int bit_width = width * 8;
3845N/A jlong val = con;
3845N/A val &= (((jlong) 1) << bit_width) - 1; // mask off sign bits
3845N/A while(bit_width < 64) {
3845N/A val |= (val << bit_width);
3845N/A bit_width <<= 1;
3845N/A }
3845N/A jdouble dval = *((jdouble*) &val); // coerce to double type
3845N/A return dval;
3845N/A}
3845N/A
3239N/A#ifndef PRODUCT
3239N/A void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
3239N/A st->print("nop \t# %d bytes pad for loops and calls", _count);
3239N/A }
3239N/A#endif
3239N/A
3239N/A void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const {
3239N/A MacroAssembler _masm(&cbuf);
3239N/A __ nop(_count);
3239N/A }
3239N/A
3239N/A uint MachNopNode::size(PhaseRegAlloc*) const {
3239N/A return _count;
3239N/A }
3239N/A
3239N/A#ifndef PRODUCT
3239N/A void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const {
3239N/A st->print("# breakpoint");
3239N/A }
3239N/A#endif
3239N/A
3239N/A void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const {
3239N/A MacroAssembler _masm(&cbuf);
3239N/A __ int3();
3239N/A }
3239N/A
3239N/A uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const {
3239N/A return MachNode::size(ra_);
3239N/A }
3239N/A
3239N/A%}
3239N/A
3239N/Aencode %{
3239N/A
3239N/A enc_class preserve_SP %{
3239N/A debug_only(int off0 = cbuf.insts_size());
3239N/A MacroAssembler _masm(&cbuf);
3239N/A // RBP is preserved across all calls, even compiled calls.
3239N/A // Use it to preserve RSP in places where the callee might change the SP.
3239N/A __ movptr(rbp_mh_SP_save, rsp);
3239N/A debug_only(int off1 = cbuf.insts_size());
3239N/A assert(off1 - off0 == preserve_SP_size(), "correct size prediction");
3239N/A %}
3239N/A
3239N/A enc_class restore_SP %{
3239N/A MacroAssembler _masm(&cbuf);
3239N/A __ movptr(rsp, rbp_mh_SP_save);
3239N/A %}
3239N/A
3239N/A enc_class call_epilog %{
3239N/A if (VerifyStackAtCalls) {
3239N/A // Check that stack depth is unchanged: find majik cookie on stack
3239N/A int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
3239N/A MacroAssembler _masm(&cbuf);
3239N/A Label L;
3239N/A __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
3239N/A __ jccb(Assembler::equal, L);
3239N/A // Die if stack mismatch
3239N/A __ int3();
3239N/A __ bind(L);
3239N/A }
3239N/A %}
3239N/A
3041N/A%}
3041N/A
3845N/A
3845N/A//----------OPERANDS-----------------------------------------------------------
3845N/A// Operand definitions must precede instruction definitions for correct parsing
3845N/A// in the ADLC because operands constitute user defined types which are used in
3845N/A// instruction definitions.
3845N/A
3845N/A// Vectors
3845N/Aoperand vecS() %{
3845N/A constraint(ALLOC_IN_RC(vectors_reg));
3845N/A match(VecS);
3845N/A
3845N/A format %{ %}
3845N/A interface(REG_INTER);
3845N/A%}
3845N/A
3845N/Aoperand vecD() %{
3845N/A constraint(ALLOC_IN_RC(vectord_reg));
3845N/A match(VecD);
3845N/A
3845N/A format %{ %}
3845N/A interface(REG_INTER);
3845N/A%}
3845N/A
3845N/Aoperand vecX() %{
3845N/A constraint(ALLOC_IN_RC(vectorx_reg));
3845N/A match(VecX);
3845N/A
3845N/A format %{ %}
3845N/A interface(REG_INTER);
3845N/A%}
3845N/A
3845N/Aoperand vecY() %{
3845N/A constraint(ALLOC_IN_RC(vectory_reg));
3845N/A match(VecY);
3845N/A
3845N/A format %{ %}
3845N/A interface(REG_INTER);
3845N/A%}
3845N/A
3845N/A
3041N/A// INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit)
3041N/A
3239N/A// ============================================================================
3239N/A
3239N/Ainstruct ShouldNotReachHere() %{
3239N/A match(Halt);
3239N/A format %{ "int3\t# ShouldNotReachHere" %}
3239N/A ins_encode %{
3239N/A __ int3();
3239N/A %}
3239N/A ins_pipe(pipe_slow);
3239N/A%}
3239N/A
3239N/A// ============================================================================
3239N/A
3041N/Ainstruct addF_reg(regF dst, regF src) %{
3041N/A predicate((UseSSE>=1) && (UseAVX == 0));
3041N/A match(Set dst (AddF dst src));
3041N/A
3041N/A format %{ "addss $dst, $src" %}
3041N/A ins_cost(150);
3041N/A ins_encode %{
3041N/A __ addss($dst$$XMMRegister, $src$$XMMRegister);
3041N/A %}
3041N/A ins_pipe(pipe_slow);
3041N/A%}
3041N/A
3041N/Ainstruct addF_mem(regF dst, memory src) %{
3041N/A predicate((UseSSE>=1) && (UseAVX == 0));
3041N/A match(Set dst (AddF dst (LoadF src)));
3041N/A
3041N/A format %{ "addss $dst, $src" %}
3041N/A ins_cost(150);
3041N/A ins_encode %{
3041N/A __ addss($dst$$XMMRegister, $src$$Address);
3041N/A %}
3041N/A ins_pipe(pipe_slow);
3041N/A%}
3041N/A
3041N/Ainstruct addF_imm(regF dst, immF con) %{
3041N/A predicate((UseSSE>=1) && (UseAVX == 0));
3041N/A match(Set dst (AddF dst con));
3041N/A format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
3041N/A ins_cost(150);
3041N/A ins_encode %{
3041N/A __ addss($dst$$XMMRegister, $constantaddress($con));
3041N/A %}
3041N/A ins_pipe(pipe_slow);
3041N/A%}
3041N/A
3891N/Ainstruct addF_reg_reg(regF dst, regF src1, regF src2) %{
3041N/A predicate(UseAVX > 0);
3041N/A match(Set dst (AddF src1 src2));
3041N/A
3041N/A format %{ "vaddss $dst, $src1, $src2" %}
3041N/A ins_cost(150);
3041N/A ins_encode %{
3041N/A __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
3041N/A %}
3041N/A ins_pipe(pipe_slow);
3041N/A%}
3041N/A
3891N/Ainstruct addF_reg_mem(regF dst, regF src1, memory src2) %{
3041N/A predicate(UseAVX > 0);
3041N/A match(Set dst (AddF src1 (LoadF src2)));
3041N/A
3041N/A format %{ "vaddss $dst, $src1, $src2" %}
3041N/A ins_cost(150);
3041N/A ins_encode %{
3041N/A __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
3041N/A %}
3041N/A ins_pipe(pipe_slow);
3041N/A%}
3041N/A
3891N/Ainstruct addF_reg_imm(regF dst, regF src, immF con) %{
3041N/A predicate(UseAVX > 0);
3041N/A match(Set dst (AddF src con));
3041N/A
3041N/A format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
3041N/A ins_cost(150);
3041N/A ins_encode %{
3041N/A __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
3041N/A %}
3041N/A ins_pipe(pipe_slow);
3041N/A%}
3041N/A
3041N/Ainstruct addD_reg(regD dst, regD src) %{
3041N/A predicate((UseSSE>=2) && (UseAVX == 0));
3041N/A match(Set dst (AddD dst src));
3041N/A
3041N/A format %{ "addsd $dst, $src" %}
3041N/A ins_cost(150);
3041N/A ins_encode %{
3041N/A __ addsd($dst$$XMMRegister, $src$$XMMRegister);
3041N/A %}
3041N/A ins_pipe(pipe_slow);
3041N/A%}
3041N/A
3041N/Ainstruct addD_mem(regD dst, memory src) %{
3041N/A predicate((UseSSE>=2) && (UseAVX == 0));
3041N/A match(Set dst (AddD dst (LoadD src)));
3041N/A
3041N/A format %{ "addsd $dst, $src" %}
3041N/A ins_cost(150);
3041N/A ins_encode %{
3041N/A __ addsd($dst$$XMMRegister, $src$$Address);
3041N/A %}
3041N/A ins_pipe(pipe_slow);
3041N/A%}
3041N/A
3041N/Ainstruct addD_imm(regD dst, immD con) %{
3041N/A predicate((UseSSE>=2) && (UseAVX == 0));
3041N/A match(Set dst (AddD dst con));
3041N/A format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
3041N/A ins_cost(150);
3041N/A ins_encode %{
3041N/A __ addsd($dst$$XMMRegister, $constantaddress($con));
3041N/A %}
3041N/A ins_pipe(pipe_slow);
3041N/A%}
3041N/A
3891N/Ainstruct addD_reg_reg(regD dst, regD src1, regD src2) %{
3041N/A predicate(UseAVX > 0);
3041N/A match(Set dst (AddD src1 src2));
3041N/A
3041N/A format %{ "vaddsd $dst, $src1, $src2" %}
3041N/A ins_cost(150);
3041N/A ins_encode %{
3041N/A __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
3041N/A %}
3041N/A ins_pipe(pipe_slow);
3041N/A%}
3041N/A
3891N/Ainstruct addD_reg_mem(regD dst, regD src1, memory src2) %{
3041N/A predicate(UseAVX > 0);
3041N/A match(Set dst (AddD src1 (LoadD src2)));
3041N/A
3041N/A format %{ "vaddsd $dst, $src1, $src2" %}
3041N/A ins_cost(150);
3041N/A ins_encode %{
3041N/A __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
3041N/A %}
3041N/A ins_pipe(pipe_slow);
3041N/A%}
3041N/A
3891N/Ainstruct addD_reg_imm(regD dst, regD src, immD con) %{
3041N/A predicate(UseAVX > 0);
3041N/A match(Set dst (AddD src con));
3041N/A
3041N/A format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
3041N/A ins_cost(150);
3041N/A ins_encode %{
3041N/A __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
3041N/A %}
3041N/A ins_pipe(pipe_slow);
3041N/A%}
3041N/A
3041N/Ainstruct subF_reg(regF dst, regF src) %{
3041N/A predicate((UseSSE>=1) && (UseAVX == 0));
3041N/A match(Set dst (SubF dst src));
3041N/A
3041N/A format %{ "subss $dst, $src" %}
3041N/A ins_cost(150);
3041N/A ins_encode %{
3041N/A __ subss($dst$$XMMRegister, $src$$XMMRegister);
3041N/A %}
3041N/A ins_pipe(pipe_slow);
3041N/A%}
3041N/A
3041N/Ainstruct subF_mem(regF dst, memory src) %{
3041N/A predicate((UseSSE>=1) && (UseAVX == 0));
3041N/A match(Set dst (SubF dst (LoadF src)));
3041N/A
3041N/A format %{ "subss $dst, $src" %}
3041N/A ins_cost(150);
3041N/A ins_encode %{
3041N/A __ subss($dst$$XMMRegister, $src$$Address);
3041N/A %}
3041N/A ins_pipe(pipe_slow);
3041N/A%}
3041N/A
3041N/Ainstruct subF_imm(regF dst, immF con) %{
3041N/A predicate((UseSSE>=1) && (UseAVX == 0));
3041N/A match(Set dst (SubF dst con));
3041N/A format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
3041N/A ins_cost(150);
3041N/A ins_encode %{
3041N/A __ subss($dst$$XMMRegister, $constantaddress($con));
3041N/A %}
3041N/A ins_pipe(pipe_slow);
3041N/A%}
3041N/A
3891N/Ainstruct subF_reg_reg(regF dst, regF src1, regF src2) %{
3041N/A predicate(UseAVX > 0);
3041N/A match(Set dst (SubF src1 src2));
3041N/A
3041N/A format %{ "vsubss $dst, $src1, $src2" %}
3041N/A ins_cost(150);
3041N/A ins_encode %{
3041N/A __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
3041N/A %}
3041N/A ins_pipe(pipe_slow);
3041N/A%}
3041N/A
3891N/Ainstruct subF_reg_mem(regF dst, regF src1, memory src2) %{
3041N/A predicate(UseAVX > 0);
3041N/A match(Set dst (SubF src1 (LoadF src2)));
3041N/A
3041N/A format %{ "vsubss $dst, $src1, $src2" %}
3041N/A ins_cost(150);
3041N/A ins_encode %{
3041N/A __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
3041N/A %}
3041N/A ins_pipe(pipe_slow);
3041N/A%}
3041N/A
3891N/Ainstruct subF_reg_imm(regF dst, regF src, immF con) %{
3041N/A predicate(UseAVX > 0);
3041N/A match(Set dst (SubF src con));
3041N/A
3041N/A format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
3041N/A ins_cost(150);
3041N/A ins_encode %{
3041N/A __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
3041N/A %}
3041N/A ins_pipe(pipe_slow);
3041N/A%}
3041N/A
3041N/Ainstruct subD_reg(regD dst, regD src) %{
3041N/A predicate((UseSSE>=2) && (UseAVX == 0));
3041N/A match(Set dst (SubD dst src));
3041N/A
3041N/A format %{ "subsd $dst, $src" %}
3041N/A ins_cost(150);
3041N/A ins_encode %{
3041N/A __ subsd($dst$$XMMRegister, $src$$XMMRegister);
3041N/A %}
3041N/A ins_pipe(pipe_slow);
3041N/A%}
3041N/A
3041N/Ainstruct subD_mem(regD dst, memory src) %{
3041N/A predicate((UseSSE>=2) && (UseAVX == 0));
3041N/A match(Set dst (SubD dst (LoadD src)));
3041N/A
3041N/A format %{ "subsd $dst, $src" %}
3041N/A ins_cost(150);
3041N/A ins_encode %{
3041N/A __ subsd($dst$$XMMRegister, $src$$Address);
3041N/A %}
3041N/A ins_pipe(pipe_slow);
3041N/A%}
3041N/A
3041N/Ainstruct subD_imm(regD dst, immD con) %{
3041N/A predicate((UseSSE>=2) && (UseAVX == 0));
3041N/A match(Set dst (SubD dst con));
3041N/A format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
3041N/A ins_cost(150);
3041N/A ins_encode %{
3041N/A __ subsd($dst$$XMMRegister, $constantaddress($con));
3041N/A %}
3041N/A ins_pipe(pipe_slow);
3041N/A%}
3041N/A
3891N/Ainstruct subD_reg_reg(regD dst, regD src1, regD src2) %{
3041N/A predicate(UseAVX > 0);
3041N/A match(Set dst (SubD src1 src2));
3041N/A
3041N/A format %{ "vsubsd $dst, $src1, $src2" %}
3041N/A ins_cost(150);
3041N/A ins_encode %{
3041N/A __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
3041N/A %}
3041N/A ins_pipe(pipe_slow);
3041N/A%}
3041N/A
3891N/Ainstruct subD_reg_mem(regD dst, regD src1, memory src2) %{
3041N/A predicate(UseAVX > 0);
3041N/A match(Set dst (SubD src1 (LoadD src2)));
3041N/A
3041N/A format %{ "vsubsd $dst, $src1, $src2" %}
3041N/A ins_cost(150);
3041N/A ins_encode %{
3041N/A __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
3041N/A %}
3041N/A ins_pipe(pipe_slow);
3041N/A%}
3041N/A
3891N/Ainstruct subD_reg_imm(regD dst, regD src, immD con) %{
3041N/A predicate(UseAVX > 0);
3041N/A match(Set dst (SubD src con));
3041N/A
3041N/A format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
3041N/A ins_cost(150);
3041N/A ins_encode %{
3041N/A __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
3041N/A %}
3041N/A ins_pipe(pipe_slow);
3041N/A%}
3041N/A
3041N/Ainstruct mulF_reg(regF dst, regF src) %{
3041N/A predicate((UseSSE>=1) && (UseAVX == 0));
3041N/A match(Set dst (MulF dst src));
3041N/A
3041N/A format %{ "mulss $dst, $src" %}
3041N/A ins_cost(150);
3041N/A ins_encode %{
3041N/A __ mulss($dst$$XMMRegister, $src$$XMMRegister);
3041N/A %}
3041N/A ins_pipe(pipe_slow);
3041N/A%}
3041N/A
3041N/Ainstruct mulF_mem(regF dst, memory src) %{
3041N/A predicate((UseSSE>=1) && (UseAVX == 0));
3041N/A match(Set dst (MulF dst (LoadF src)));
3041N/A
3041N/A format %{ "mulss $dst, $src" %}
3041N/A ins_cost(150);
3041N/A ins_encode %{
3041N/A __ mulss($dst$$XMMRegister, $src$$Address);
3041N/A %}
3041N/A ins_pipe(pipe_slow);
3041N/A%}
3041N/A
3041N/Ainstruct mulF_imm(regF dst, immF con) %{
3041N/A predicate((UseSSE>=1) && (UseAVX == 0));
3041N/A match(Set dst (MulF dst con));
3041N/A format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
3041N/A ins_cost(150);
3041N/A ins_encode %{
3041N/A __ mulss($dst$$XMMRegister, $constantaddress($con));
3041N/A %}
3041N/A ins_pipe(pipe_slow);
3041N/A%}
3041N/A
3891N/Ainstruct mulF_reg_reg(regF dst, regF src1, regF src2) %{
3041N/A predicate(UseAVX > 0);
3041N/A match(Set dst (MulF src1 src2));
3041N/A
3041N/A format %{ "vmulss $dst, $src1, $src2" %}
3041N/A ins_cost(150);
3041N/A ins_encode %{
3041N/A __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
3041N/A %}
3041N/A ins_pipe(pipe_slow);
3041N/A%}
3041N/A
3891N/Ainstruct mulF_reg_mem(regF dst, regF src1, memory src2) %{
3041N/A predicate(UseAVX > 0);
3041N/A match(Set dst (MulF src1 (LoadF src2)));
3041N/A
3041N/A format %{ "vmulss $dst, $src1, $src2" %}
3041N/A ins_cost(150);
3041N/A ins_encode %{
3041N/A __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
3041N/A %}
3041N/A ins_pipe(pipe_slow);
3041N/A%}
3041N/A
3891N/Ainstruct mulF_reg_imm(regF dst, regF src, immF con) %{
3041N/A predicate(UseAVX > 0);
3041N/A match(Set dst (MulF src con));
3041N/A
3041N/A format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
3041N/A ins_cost(150);
3041N/A ins_encode %{
3041N/A __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
3041N/A %}
3041N/A ins_pipe(pipe_slow);
3041N/A%}
3041N/A
3041N/Ainstruct mulD_reg(regD dst, regD src) %{
3041N/A predicate((UseSSE>=2) && (UseAVX == 0));
3041N/A match(Set dst (MulD dst src));
3041N/A
3041N/A format %{ "mulsd $dst, $src" %}
3041N/A ins_cost(150);
3041N/A ins_encode %{
3041N/A __ mulsd($dst$$XMMRegister, $src$$XMMRegister);
3041N/A %}
3041N/A ins_pipe(pipe_slow);
3041N/A%}
3041N/A
3041N/Ainstruct mulD_mem(regD dst, memory src) %{
3041N/A predicate((UseSSE>=2) && (UseAVX == 0));
3041N/A match(Set dst (MulD dst (LoadD src)));
3041N/A
3041N/A format %{ "mulsd $dst, $src" %}
3041N/A ins_cost(150);
3041N/A ins_encode %{
3041N/A __ mulsd($dst$$XMMRegister, $src$$Address);
3041N/A %}
3041N/A ins_pipe(pipe_slow);
3041N/A%}
3041N/A
3041N/Ainstruct mulD_imm(regD dst, immD con) %{
3041N/A predicate((UseSSE>=2) && (UseAVX == 0));
3041N/A match(Set dst (MulD dst con));
3041N/A format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
3041N/A ins_cost(150);
3041N/A ins_encode %{
3041N/A __ mulsd($dst$$XMMRegister, $constantaddress($con));
3041N/A %}
3041N/A ins_pipe(pipe_slow);
3041N/A%}
3041N/A
3891N/Ainstruct mulD_reg_reg(regD dst, regD src1, regD src2) %{
3041N/A predicate(UseAVX > 0);
3041N/A match(Set dst (MulD src1 src2));
3041N/A
3041N/A format %{ "vmulsd $dst, $src1, $src2" %}
3041N/A ins_cost(150);
3041N/A ins_encode %{
3041N/A __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
3041N/A %}
3041N/A ins_pipe(pipe_slow);
3041N/A%}
3041N/A
3891N/Ainstruct mulD_reg_mem(regD dst, regD src1, memory src2) %{
3041N/A predicate(UseAVX > 0);
3041N/A match(Set dst (MulD src1 (LoadD src2)));
3041N/A
3041N/A format %{ "vmulsd $dst, $src1, $src2" %}
3041N/A ins_cost(150);
3041N/A ins_encode %{
3041N/A __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
3041N/A %}
3041N/A ins_pipe(pipe_slow);
3041N/A%}
3041N/A
3891N/Ainstruct mulD_reg_imm(regD dst, regD src, immD con) %{
3041N/A predicate(UseAVX > 0);
3041N/A match(Set dst (MulD src con));
3041N/A
3041N/A format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
3041N/A ins_cost(150);
3041N/A ins_encode %{
3041N/A __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
3041N/A %}
3041N/A ins_pipe(pipe_slow);
3041N/A%}
3041N/A
3041N/Ainstruct divF_reg(regF dst, regF src) %{
3041N/A predicate((UseSSE>=1) && (UseAVX == 0));
3041N/A match(Set dst (DivF dst src));
3041N/A
3041N/A format %{ "divss $dst, $src" %}
3041N/A ins_cost(150);
3041N/A ins_encode %{
3041N/A __ divss($dst$$XMMRegister, $src$$XMMRegister);
3041N/A %}
3041N/A ins_pipe(pipe_slow);
3041N/A%}
3041N/A
3041N/Ainstruct divF_mem(regF dst, memory src) %{
3041N/A predicate((UseSSE>=1) && (UseAVX == 0));
3041N/A match(Set dst (DivF dst (LoadF src)));
3041N/A
3041N/A format %{ "divss $dst, $src" %}
3041N/A ins_cost(150);
3041N/A ins_encode %{
3041N/A __ divss($dst$$XMMRegister, $src$$Address);
3041N/A %}
3041N/A ins_pipe(pipe_slow);
3041N/A%}
3041N/A
3041N/Ainstruct divF_imm(regF dst, immF con) %{
3041N/A predicate((UseSSE>=1) && (UseAVX == 0));
3041N/A match(Set dst (DivF dst con));
3041N/A format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
3041N/A ins_cost(150);
3041N/A ins_encode %{
3041N/A __ divss($dst$$XMMRegister, $constantaddress($con));
3041N/A %}
3041N/A ins_pipe(pipe_slow);
3041N/A%}
3041N/A
3891N/Ainstruct divF_reg_reg(regF dst, regF src1, regF src2) %{
3041N/A predicate(UseAVX > 0);
3041N/A match(Set dst (DivF src1 src2));
3041N/A
3041N/A format %{ "vdivss $dst, $src1, $src2" %}
3041N/A ins_cost(150);
3041N/A ins_encode %{
3041N/A __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
3041N/A %}
3041N/A ins_pipe(pipe_slow);
3041N/A%}
3041N/A
3891N/Ainstruct divF_reg_mem(regF dst, regF src1, memory src2) %{
3041N/A predicate(UseAVX > 0);
3041N/A match(Set dst (DivF src1 (LoadF src2)));
3041N/A
3041N/A format %{ "vdivss $dst, $src1, $src2" %}
3041N/A ins_cost(150);
3041N/A ins_encode %{
3041N/A __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
3041N/A %}
3041N/A ins_pipe(pipe_slow);
3041N/A%}
3041N/A
3891N/Ainstruct divF_reg_imm(regF dst, regF src, immF con) %{
3041N/A predicate(UseAVX > 0);
3041N/A match(Set dst (DivF src con));
3041N/A
3041N/A format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
3041N/A ins_cost(150);
3041N/A ins_encode %{
3041N/A __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
3041N/A %}
3041N/A ins_pipe(pipe_slow);
3041N/A%}
3041N/A
3041N/Ainstruct divD_reg(regD dst, regD src) %{
3041N/A predicate((UseSSE>=2) && (UseAVX == 0));
3041N/A match(Set dst (DivD dst src));
3041N/A
3041N/A format %{ "divsd $dst, $src" %}
3041N/A ins_cost(150);
3041N/A ins_encode %{
3041N/A __ divsd($dst$$XMMRegister, $src$$XMMRegister);
3041N/A %}
3041N/A ins_pipe(pipe_slow);
3041N/A%}
3041N/A
3041N/Ainstruct divD_mem(regD dst, memory src) %{
3041N/A predicate((UseSSE>=2) && (UseAVX == 0));
3041N/A match(Set dst (DivD dst (LoadD src)));
3041N/A
3041N/A format %{ "divsd $dst, $src" %}
3041N/A ins_cost(150);
3041N/A ins_encode %{
3041N/A __ divsd($dst$$XMMRegister, $src$$Address);
3041N/A %}
3041N/A ins_pipe(pipe_slow);
3041N/A%}
3041N/A
3041N/Ainstruct divD_imm(regD dst, immD con) %{
3041N/A predicate((UseSSE>=2) && (UseAVX == 0));
3041N/A match(Set dst (DivD dst con));
3041N/A format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
3041N/A ins_cost(150);
3041N/A ins_encode %{
3041N/A __ divsd($dst$$XMMRegister, $constantaddress($con));
3041N/A %}
3041N/A ins_pipe(pipe_slow);
3041N/A%}
3041N/A
3891N/Ainstruct divD_reg_reg(regD dst, regD src1, regD src2) %{
3041N/A predicate(UseAVX > 0);
3041N/A match(Set dst (DivD src1 src2));
3041N/A
3041N/A format %{ "vdivsd $dst, $src1, $src2" %}
3041N/A ins_cost(150);
3041N/A ins_encode %{
3041N/A __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
3041N/A %}
3041N/A ins_pipe(pipe_slow);
3041N/A%}
3041N/A
3891N/Ainstruct divD_reg_mem(regD dst, regD src1, memory src2) %{
3041N/A predicate(UseAVX > 0);
3041N/A match(Set dst (DivD src1 (LoadD src2)));
3041N/A
3041N/A format %{ "vdivsd $dst, $src1, $src2" %}
3041N/A ins_cost(150);
3041N/A ins_encode %{
3041N/A __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
3041N/A %}
3041N/A ins_pipe(pipe_slow);
3041N/A%}
3041N/A
3891N/Ainstruct divD_reg_imm(regD dst, regD src, immD con) %{
3041N/A predicate(UseAVX > 0);
3041N/A match(Set dst (DivD src con));
3041N/A
3041N/A format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
3041N/A ins_cost(150);
3041N/A ins_encode %{
3041N/A __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
3041N/A %}
3041N/A ins_pipe(pipe_slow);
3041N/A%}
3041N/A
3041N/Ainstruct absF_reg(regF dst) %{
3041N/A predicate((UseSSE>=1) && (UseAVX == 0));
3041N/A match(Set dst (AbsF dst));
3041N/A ins_cost(150);
3041N/A format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %}
3041N/A ins_encode %{
3041N/A __ andps($dst$$XMMRegister, ExternalAddress(float_signmask()));
3041N/A %}
3041N/A ins_pipe(pipe_slow);
3041N/A%}
3041N/A
3891N/Ainstruct absF_reg_reg(regF dst, regF src) %{
3041N/A predicate(UseAVX > 0);
3041N/A match(Set dst (AbsF src));
3041N/A ins_cost(150);
3041N/A format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
3041N/A ins_encode %{
3041N/A __ vandps($dst$$XMMRegister, $src$$XMMRegister,
3041N/A ExternalAddress(float_signmask()));
3041N/A %}
3041N/A ins_pipe(pipe_slow);
3041N/A%}
3041N/A
3041N/Ainstruct absD_reg(regD dst) %{
3041N/A predicate((UseSSE>=2) && (UseAVX == 0));
3041N/A match(Set dst (AbsD dst));
3041N/A ins_cost(150);
3041N/A format %{ "andpd $dst, [0x7fffffffffffffff]\t"
3041N/A "# abs double by sign masking" %}
3041N/A ins_encode %{
3041N/A __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask()));
3041N/A %}
3041N/A ins_pipe(pipe_slow);
3041N/A%}
3041N/A
3891N/Ainstruct absD_reg_reg(regD dst, regD src) %{
3041N/A predicate(UseAVX > 0);
3041N/A match(Set dst (AbsD src));
3041N/A ins_cost(150);
3041N/A format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t"
3041N/A "# abs double by sign masking" %}
3041N/A ins_encode %{
3041N/A __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
3041N/A ExternalAddress(double_signmask()));
3041N/A %}
3041N/A ins_pipe(pipe_slow);
3041N/A%}
3041N/A
3041N/Ainstruct negF_reg(regF dst) %{
3041N/A predicate((UseSSE>=1) && (UseAVX == 0));
3041N/A match(Set dst (NegF dst));
3041N/A ins_cost(150);
3041N/A format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %}
3041N/A ins_encode %{
3041N/A __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip()));
3041N/A %}
3041N/A ins_pipe(pipe_slow);
3041N/A%}
3041N/A
3891N/Ainstruct negF_reg_reg(regF dst, regF src) %{
3041N/A predicate(UseAVX > 0);
3041N/A match(Set dst (NegF src));
3041N/A ins_cost(150);
3041N/A format %{ "vxorps $dst, $src, [0x80000000]\t# neg float by sign flipping" %}
3041N/A ins_encode %{
3041N/A __ vxorps($dst$$XMMRegister, $src$$XMMRegister,
3041N/A ExternalAddress(float_signflip()));
3041N/A %}
3041N/A ins_pipe(pipe_slow);
3041N/A%}
3041N/A
3041N/Ainstruct negD_reg(regD dst) %{
3041N/A predicate((UseSSE>=2) && (UseAVX == 0));
3041N/A match(Set dst (NegD dst));
3041N/A ins_cost(150);
3041N/A format %{ "xorpd $dst, [0x8000000000000000]\t"
3041N/A "# neg double by sign flipping" %}
3041N/A ins_encode %{
3041N/A __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip()));
3041N/A %}
3041N/A ins_pipe(pipe_slow);
3041N/A%}
3041N/A
3891N/Ainstruct negD_reg_reg(regD dst, regD src) %{
3041N/A predicate(UseAVX > 0);
3041N/A match(Set dst (NegD src));
3041N/A ins_cost(150);
3041N/A format %{ "vxorpd $dst, $src, [0x8000000000000000]\t"
3041N/A "# neg double by sign flipping" %}
3041N/A ins_encode %{
3041N/A __ vxorpd($dst$$XMMRegister, $src$$XMMRegister,
3041N/A ExternalAddress(double_signflip()));
3041N/A %}
3041N/A ins_pipe(pipe_slow);
3041N/A%}
3041N/A
3041N/Ainstruct sqrtF_reg(regF dst, regF src) %{
3041N/A predicate(UseSSE>=1);
3041N/A match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
3041N/A
3041N/A format %{ "sqrtss $dst, $src" %}
3041N/A ins_cost(150);
3041N/A ins_encode %{
3041N/A __ sqrtss($dst$$XMMRegister, $src$$XMMRegister);
3041N/A %}
3041N/A ins_pipe(pipe_slow);
3041N/A%}
3041N/A
3041N/Ainstruct sqrtF_mem(regF dst, memory src) %{
3041N/A predicate(UseSSE>=1);
3041N/A match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF src)))));
3041N/A
3041N/A format %{ "sqrtss $dst, $src" %}
3041N/A ins_cost(150);
3041N/A ins_encode %{
3041N/A __ sqrtss($dst$$XMMRegister, $src$$Address);
3041N/A %}
3041N/A ins_pipe(pipe_slow);
3041N/A%}
3041N/A
3041N/Ainstruct sqrtF_imm(regF dst, immF con) %{
3041N/A predicate(UseSSE>=1);
3041N/A match(Set dst (ConvD2F (SqrtD (ConvF2D con))));
3041N/A format %{ "sqrtss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
3041N/A ins_cost(150);
3041N/A ins_encode %{
3041N/A __ sqrtss($dst$$XMMRegister, $constantaddress($con));
3041N/A %}
3041N/A ins_pipe(pipe_slow);
3041N/A%}
3041N/A
3041N/Ainstruct sqrtD_reg(regD dst, regD src) %{
3041N/A predicate(UseSSE>=2);
3041N/A match(Set dst (SqrtD src));
3041N/A
3041N/A format %{ "sqrtsd $dst, $src" %}
3041N/A ins_cost(150);
3041N/A ins_encode %{
3041N/A __ sqrtsd($dst$$XMMRegister, $src$$XMMRegister);
3041N/A %}
3041N/A ins_pipe(pipe_slow);
3041N/A%}
3041N/A
3041N/Ainstruct sqrtD_mem(regD dst, memory src) %{
3041N/A predicate(UseSSE>=2);
3041N/A match(Set dst (SqrtD (LoadD src)));
3041N/A
3041N/A format %{ "sqrtsd $dst, $src" %}
3041N/A ins_cost(150);
3041N/A ins_encode %{
3041N/A __ sqrtsd($dst$$XMMRegister, $src$$Address);
3041N/A %}
3041N/A ins_pipe(pipe_slow);
3041N/A%}
3041N/A
3041N/Ainstruct sqrtD_imm(regD dst, immD con) %{
3041N/A predicate(UseSSE>=2);
3041N/A match(Set dst (SqrtD con));
3041N/A format %{ "sqrtsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
3041N/A ins_cost(150);
3041N/A ins_encode %{
3041N/A __ sqrtsd($dst$$XMMRegister, $constantaddress($con));
3041N/A %}
3041N/A ins_pipe(pipe_slow);
3041N/A%}
3041N/A
3845N/A
3845N/A// ====================VECTOR INSTRUCTIONS=====================================
3845N/A
3845N/A// Load vectors (4 bytes long)
3845N/Ainstruct loadV4(vecS dst, memory mem) %{
3845N/A predicate(n->as_LoadVector()->memory_size() == 4);
3845N/A match(Set dst (LoadVector mem));
3845N/A ins_cost(125);
3845N/A format %{ "movd $dst,$mem\t! load vector (4 bytes)" %}
3845N/A ins_encode %{
3845N/A __ movdl($dst$$XMMRegister, $mem$$Address);
3845N/A %}
3845N/A ins_pipe( pipe_slow );
3845N/A%}
3845N/A
3845N/A// Load vectors (8 bytes long)
3845N/Ainstruct loadV8(vecD dst, memory mem) %{
3845N/A predicate(n->as_LoadVector()->memory_size() == 8);
3845N/A match(Set dst (LoadVector mem));
3845N/A ins_cost(125);
3845N/A format %{ "movq $dst,$mem\t! load vector (8 bytes)" %}
3845N/A ins_encode %{
3845N/A __ movq($dst$$XMMRegister, $mem$$Address);
3845N/A %}
3845N/A ins_pipe( pipe_slow );
3845N/A%}
3845N/A
3845N/A// Load vectors (16 bytes long)
3845N/Ainstruct loadV16(vecX dst, memory mem) %{
3845N/A predicate(n->as_LoadVector()->memory_size() == 16);
3845N/A match(Set dst (LoadVector mem));
3845N/A ins_cost(125);
3845N/A format %{ "movdqu $dst,$mem\t! load vector (16 bytes)" %}
3845N/A ins_encode %{
3845N/A __ movdqu($dst$$XMMRegister, $mem$$Address);
3845N/A %}
3845N/A ins_pipe( pipe_slow );
3845N/A%}
3845N/A
3845N/A// Load vectors (32 bytes long)
3845N/Ainstruct loadV32(vecY dst, memory mem) %{
3845N/A predicate(n->as_LoadVector()->memory_size() == 32);
3845N/A match(Set dst (LoadVector mem));
3845N/A ins_cost(125);
3845N/A format %{ "vmovdqu $dst,$mem\t! load vector (32 bytes)" %}
3845N/A ins_encode %{
3845N/A __ vmovdqu($dst$$XMMRegister, $mem$$Address);
3845N/A %}
3845N/A ins_pipe( pipe_slow );
3845N/A%}
3845N/A
3845N/A// Store vectors
3845N/Ainstruct storeV4(memory mem, vecS src) %{
3845N/A predicate(n->as_StoreVector()->memory_size() == 4);
3845N/A match(Set mem (StoreVector mem src));
3845N/A ins_cost(145);
3845N/A format %{ "movd $mem,$src\t! store vector (4 bytes)" %}
3845N/A ins_encode %{
3845N/A __ movdl($mem$$Address, $src$$XMMRegister);
3845N/A %}
3845N/A ins_pipe( pipe_slow );
3845N/A%}
3845N/A
3845N/Ainstruct storeV8(memory mem, vecD src) %{
3845N/A predicate(n->as_StoreVector()->memory_size() == 8);
3845N/A match(Set mem (StoreVector mem src));
3845N/A ins_cost(145);
3845N/A format %{ "movq $mem,$src\t! store vector (8 bytes)" %}
3845N/A ins_encode %{
3845N/A __ movq($mem$$Address, $src$$XMMRegister);
3845N/A %}
3845N/A ins_pipe( pipe_slow );
3845N/A%}
3845N/A
3845N/Ainstruct storeV16(memory mem, vecX src) %{
3845N/A predicate(n->as_StoreVector()->memory_size() == 16);
3845N/A match(Set mem (StoreVector mem src));
3845N/A ins_cost(145);
3845N/A format %{ "movdqu $mem,$src\t! store vector (16 bytes)" %}
3845N/A ins_encode %{
3845N/A __ movdqu($mem$$Address, $src$$XMMRegister);
3845N/A %}
3845N/A ins_pipe( pipe_slow );
3845N/A%}
3845N/A
3845N/Ainstruct storeV32(memory mem, vecY src) %{
3845N/A predicate(n->as_StoreVector()->memory_size() == 32);
3845N/A match(Set mem (StoreVector mem src));
3845N/A ins_cost(145);
3845N/A format %{ "vmovdqu $mem,$src\t! store vector (32 bytes)" %}
3845N/A ins_encode %{
3845N/A __ vmovdqu($mem$$Address, $src$$XMMRegister);
3845N/A %}
3845N/A ins_pipe( pipe_slow );
3845N/A%}
3845N/A
3845N/A// Replicate byte scalar to be vector
3845N/Ainstruct Repl4B(vecS dst, rRegI src) %{
3845N/A predicate(n->as_Vector()->length() == 4);
3845N/A match(Set dst (ReplicateB src));
3845N/A format %{ "movd $dst,$src\n\t"
3845N/A "punpcklbw $dst,$dst\n\t"
3845N/A "pshuflw $dst,$dst,0x00\t! replicate4B" %}
3845N/A ins_encode %{
3845N/A __ movdl($dst$$XMMRegister, $src$$Register);
3845N/A __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
3845N/A __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
3845N/A %}
3845N/A ins_pipe( pipe_slow );
3845N/A%}
3845N/A
3845N/Ainstruct Repl8B(vecD dst, rRegI src) %{
3845N/A predicate(n->as_Vector()->length() == 8);
3845N/A match(Set dst (ReplicateB src));
3845N/A format %{ "movd $dst,$src\n\t"
3845N/A "punpcklbw $dst,$dst\n\t"
3845N/A "pshuflw $dst,$dst,0x00\t! replicate8B" %}
3845N/A ins_encode %{
3845N/A __ movdl($dst$$XMMRegister, $src$$Register);
3845N/A __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
3845N/A __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
3845N/A %}
3845N/A ins_pipe( pipe_slow );
3845N/A%}
3845N/A
3845N/Ainstruct Repl16B(vecX dst, rRegI src) %{
3845N/A predicate(n->as_Vector()->length() == 16);
3845N/A match(Set dst (ReplicateB src));
3845N/A format %{ "movd $dst,$src\n\t"
3845N/A "punpcklbw $dst,$dst\n\t"
3845N/A "pshuflw $dst,$dst,0x00\n\t"
3891N/A "punpcklqdq $dst,$dst\t! replicate16B" %}
3845N/A ins_encode %{
3845N/A __ movdl($dst$$XMMRegister, $src$$Register);
3845N/A __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
3845N/A __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
3891N/A __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3845N/A %}
3845N/A ins_pipe( pipe_slow );
3845N/A%}
3845N/A
3845N/Ainstruct Repl32B(vecY dst, rRegI src) %{
3845N/A predicate(n->as_Vector()->length() == 32);
3845N/A match(Set dst (ReplicateB src));
3845N/A format %{ "movd $dst,$src\n\t"
3845N/A "punpcklbw $dst,$dst\n\t"
3845N/A "pshuflw $dst,$dst,0x00\n\t"
3891N/A "punpcklqdq $dst,$dst\n\t"
3891N/A "vinserti128h $dst,$dst,$dst\t! replicate32B" %}
3845N/A ins_encode %{
3845N/A __ movdl($dst$$XMMRegister, $src$$Register);
3845N/A __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
3845N/A __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
3891N/A __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3891N/A __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3845N/A %}
3845N/A ins_pipe( pipe_slow );
3845N/A%}
3845N/A
3845N/A// Replicate byte scalar immediate to be vector by loading from const table.
3845N/Ainstruct Repl4B_imm(vecS dst, immI con) %{
3845N/A predicate(n->as_Vector()->length() == 4);
3845N/A match(Set dst (ReplicateB con));
3891N/A format %{ "movdl $dst,[$constantaddress]\t! replicate4B($con)" %}
3845N/A ins_encode %{
3891N/A __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 1)));
3845N/A %}
3845N/A ins_pipe( pipe_slow );
3845N/A%}
3845N/A
3845N/Ainstruct Repl8B_imm(vecD dst, immI con) %{
3845N/A predicate(n->as_Vector()->length() == 8);
3845N/A match(Set dst (ReplicateB con));
3891N/A format %{ "movq $dst,[$constantaddress]\t! replicate8B($con)" %}
3845N/A ins_encode %{
3891N/A __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
3845N/A %}
3845N/A ins_pipe( pipe_slow );
3845N/A%}
3845N/A
3845N/Ainstruct Repl16B_imm(vecX dst, immI con) %{
3845N/A predicate(n->as_Vector()->length() == 16);
3845N/A match(Set dst (ReplicateB con));
3891N/A format %{ "movq $dst,[$constantaddress]\n\t"
3891N/A "punpcklqdq $dst,$dst\t! replicate16B($con)" %}
3845N/A ins_encode %{
3891N/A __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
3891N/A __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3845N/A %}
3845N/A ins_pipe( pipe_slow );
3845N/A%}
3845N/A
3845N/Ainstruct Repl32B_imm(vecY dst, immI con) %{
3845N/A predicate(n->as_Vector()->length() == 32);
3845N/A match(Set dst (ReplicateB con));
3891N/A format %{ "movq $dst,[$constantaddress]\n\t"
3891N/A "punpcklqdq $dst,$dst\n\t"
3891N/A "vinserti128h $dst,$dst,$dst\t! lreplicate32B($con)" %}
3845N/A ins_encode %{
3891N/A __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
3891N/A __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3891N/A __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3845N/A %}
3845N/A ins_pipe( pipe_slow );
3845N/A%}
3845N/A
3845N/A// Replicate byte scalar zero to be vector
3845N/Ainstruct Repl4B_zero(vecS dst, immI0 zero) %{
3845N/A predicate(n->as_Vector()->length() == 4);
3845N/A match(Set dst (ReplicateB zero));
3845N/A format %{ "pxor $dst,$dst\t! replicate4B zero" %}
3845N/A ins_encode %{
3845N/A __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
3845N/A %}
3845N/A ins_pipe( fpu_reg_reg );
3845N/A%}
3845N/A
3845N/Ainstruct Repl8B_zero(vecD dst, immI0 zero) %{
3845N/A predicate(n->as_Vector()->length() == 8);
3845N/A match(Set dst (ReplicateB zero));
3845N/A format %{ "pxor $dst,$dst\t! replicate8B zero" %}
3845N/A ins_encode %{
3845N/A __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
3845N/A %}
3845N/A ins_pipe( fpu_reg_reg );
3845N/A%}
3845N/A
3845N/Ainstruct Repl16B_zero(vecX dst, immI0 zero) %{
3845N/A predicate(n->as_Vector()->length() == 16);
3845N/A match(Set dst (ReplicateB zero));
3845N/A format %{ "pxor $dst,$dst\t! replicate16B zero" %}
3845N/A ins_encode %{
3845N/A __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
3845N/A %}
3845N/A ins_pipe( fpu_reg_reg );
3845N/A%}
3845N/A
3845N/Ainstruct Repl32B_zero(vecY dst, immI0 zero) %{
3845N/A predicate(n->as_Vector()->length() == 32);
3845N/A match(Set dst (ReplicateB zero));
3891N/A format %{ "vpxor $dst,$dst,$dst\t! replicate32B zero" %}
3845N/A ins_encode %{
3845N/A // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
3845N/A bool vector256 = true;
3891N/A __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
3845N/A %}
3845N/A ins_pipe( fpu_reg_reg );
3845N/A%}
3845N/A
3845N/A// Replicate char/short (2 byte) scalar to be vector
3845N/Ainstruct Repl2S(vecS dst, rRegI src) %{
3845N/A predicate(n->as_Vector()->length() == 2);
3845N/A match(Set dst (ReplicateS src));
3845N/A format %{ "movd $dst,$src\n\t"
3845N/A "pshuflw $dst,$dst,0x00\t! replicate2S" %}
3845N/A ins_encode %{
3845N/A __ movdl($dst$$XMMRegister, $src$$Register);
3845N/A __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
3845N/A %}
3845N/A ins_pipe( fpu_reg_reg );
3845N/A%}
3845N/A
3845N/Ainstruct Repl4S(vecD dst, rRegI src) %{
3845N/A predicate(n->as_Vector()->length() == 4);
3845N/A match(Set dst (ReplicateS src));
3845N/A format %{ "movd $dst,$src\n\t"
3845N/A "pshuflw $dst,$dst,0x00\t! replicate4S" %}
3845N/A ins_encode %{
3845N/A __ movdl($dst$$XMMRegister, $src$$Register);
3845N/A __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
3845N/A %}
3845N/A ins_pipe( fpu_reg_reg );
3845N/A%}
3845N/A
3845N/Ainstruct Repl8S(vecX dst, rRegI src) %{
3845N/A predicate(n->as_Vector()->length() == 8);
3845N/A match(Set dst (ReplicateS src));
3845N/A format %{ "movd $dst,$src\n\t"
3845N/A "pshuflw $dst,$dst,0x00\n\t"
3891N/A "punpcklqdq $dst,$dst\t! replicate8S" %}
3845N/A ins_encode %{
3845N/A __ movdl($dst$$XMMRegister, $src$$Register);
3845N/A __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
3891N/A __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3845N/A %}
3845N/A ins_pipe( pipe_slow );
3845N/A%}
3845N/A
3845N/Ainstruct Repl16S(vecY dst, rRegI src) %{
3845N/A predicate(n->as_Vector()->length() == 16);
3845N/A match(Set dst (ReplicateS src));
3845N/A format %{ "movd $dst,$src\n\t"
3845N/A "pshuflw $dst,$dst,0x00\n\t"
3891N/A "punpcklqdq $dst,$dst\n\t"
3891N/A "vinserti128h $dst,$dst,$dst\t! replicate16S" %}
3845N/A ins_encode %{
3845N/A __ movdl($dst$$XMMRegister, $src$$Register);
3845N/A __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
3891N/A __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3891N/A __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3845N/A %}
3845N/A ins_pipe( pipe_slow );
3845N/A%}
3845N/A
3845N/A// Replicate char/short (2 byte) scalar immediate to be vector by loading from const table.
3845N/Ainstruct Repl2S_imm(vecS dst, immI con) %{
3845N/A predicate(n->as_Vector()->length() == 2);
3845N/A match(Set dst (ReplicateS con));
3891N/A format %{ "movdl $dst,[$constantaddress]\t! replicate2S($con)" %}
3845N/A ins_encode %{
3891N/A __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 2)));
3845N/A %}
3845N/A ins_pipe( fpu_reg_reg );
3845N/A%}
3845N/A
3845N/Ainstruct Repl4S_imm(vecD dst, immI con) %{
3845N/A predicate(n->as_Vector()->length() == 4);
3845N/A match(Set dst (ReplicateS con));
3891N/A format %{ "movq $dst,[$constantaddress]\t! replicate4S($con)" %}
3845N/A ins_encode %{
3891N/A __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
3845N/A %}
3845N/A ins_pipe( fpu_reg_reg );
3845N/A%}
3845N/A
3845N/Ainstruct Repl8S_imm(vecX dst, immI con) %{
3845N/A predicate(n->as_Vector()->length() == 8);
3845N/A match(Set dst (ReplicateS con));
3891N/A format %{ "movq $dst,[$constantaddress]\n\t"
3891N/A "punpcklqdq $dst,$dst\t! replicate8S($con)" %}
3845N/A ins_encode %{
3891N/A __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
3891N/A __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3845N/A %}
3845N/A ins_pipe( pipe_slow );
3845N/A%}
3845N/A
3845N/Ainstruct Repl16S_imm(vecY dst, immI con) %{
3845N/A predicate(n->as_Vector()->length() == 16);
3845N/A match(Set dst (ReplicateS con));
3891N/A format %{ "movq $dst,[$constantaddress]\n\t"
3891N/A "punpcklqdq $dst,$dst\n\t"
3891N/A "vinserti128h $dst,$dst,$dst\t! replicate16S($con)" %}
3845N/A ins_encode %{
3891N/A __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
3891N/A __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3891N/A __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3845N/A %}
3845N/A ins_pipe( pipe_slow );
3845N/A%}
3845N/A
3845N/A// Replicate char/short (2 byte) scalar zero to be vector
3845N/Ainstruct Repl2S_zero(vecS dst, immI0 zero) %{
3845N/A predicate(n->as_Vector()->length() == 2);
3845N/A match(Set dst (ReplicateS zero));
3845N/A format %{ "pxor $dst,$dst\t! replicate2S zero" %}
3845N/A ins_encode %{
3845N/A __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
3845N/A %}
3845N/A ins_pipe( fpu_reg_reg );
3845N/A%}
3845N/A
3845N/Ainstruct Repl4S_zero(vecD dst, immI0 zero) %{
3845N/A predicate(n->as_Vector()->length() == 4);
3845N/A match(Set dst (ReplicateS zero));
3845N/A format %{ "pxor $dst,$dst\t! replicate4S zero" %}
3845N/A ins_encode %{
3845N/A __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
3845N/A %}
3845N/A ins_pipe( fpu_reg_reg );
3845N/A%}
3845N/A
3845N/Ainstruct Repl8S_zero(vecX dst, immI0 zero) %{
3845N/A predicate(n->as_Vector()->length() == 8);
3845N/A match(Set dst (ReplicateS zero));
3845N/A format %{ "pxor $dst,$dst\t! replicate8S zero" %}
3845N/A ins_encode %{
3845N/A __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
3845N/A %}
3845N/A ins_pipe( fpu_reg_reg );
3845N/A%}
3845N/A
3845N/Ainstruct Repl16S_zero(vecY dst, immI0 zero) %{
3845N/A predicate(n->as_Vector()->length() == 16);
3845N/A match(Set dst (ReplicateS zero));
3891N/A format %{ "vpxor $dst,$dst,$dst\t! replicate16S zero" %}
3845N/A ins_encode %{
3845N/A // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
3845N/A bool vector256 = true;
3891N/A __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
3845N/A %}
3845N/A ins_pipe( fpu_reg_reg );
3845N/A%}
3845N/A
3845N/A// Replicate integer (4 byte) scalar to be vector
3845N/Ainstruct Repl2I(vecD dst, rRegI src) %{
3845N/A predicate(n->as_Vector()->length() == 2);
3845N/A match(Set dst (ReplicateI src));
3845N/A format %{ "movd $dst,$src\n\t"
3845N/A "pshufd $dst,$dst,0x00\t! replicate2I" %}
3845N/A ins_encode %{
3845N/A __ movdl($dst$$XMMRegister, $src$$Register);
3845N/A __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
3845N/A %}
3845N/A ins_pipe( fpu_reg_reg );
3845N/A%}
3845N/A
3845N/Ainstruct Repl4I(vecX dst, rRegI src) %{
3845N/A predicate(n->as_Vector()->length() == 4);
3845N/A match(Set dst (ReplicateI src));
3845N/A format %{ "movd $dst,$src\n\t"
3845N/A "pshufd $dst,$dst,0x00\t! replicate4I" %}
3845N/A ins_encode %{
3845N/A __ movdl($dst$$XMMRegister, $src$$Register);
3845N/A __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
3845N/A %}
3845N/A ins_pipe( pipe_slow );
3845N/A%}
3845N/A
3845N/Ainstruct Repl8I(vecY dst, rRegI src) %{
3845N/A predicate(n->as_Vector()->length() == 8);
3845N/A match(Set dst (ReplicateI src));
3845N/A format %{ "movd $dst,$src\n\t"
3845N/A "pshufd $dst,$dst,0x00\n\t"
3891N/A "vinserti128h $dst,$dst,$dst\t! replicate8I" %}
3845N/A ins_encode %{
3845N/A __ movdl($dst$$XMMRegister, $src$$Register);
3845N/A __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
3891N/A __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3845N/A %}
3845N/A ins_pipe( pipe_slow );
3845N/A%}
3845N/A
3845N/A// Replicate integer (4 byte) scalar immediate to be vector by loading from const table.
3845N/Ainstruct Repl2I_imm(vecD dst, immI con) %{
3845N/A predicate(n->as_Vector()->length() == 2);
3845N/A match(Set dst (ReplicateI con));
3891N/A format %{ "movq $dst,[$constantaddress]\t! replicate2I($con)" %}
3845N/A ins_encode %{
3891N/A __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
3845N/A %}
3845N/A ins_pipe( fpu_reg_reg );
3845N/A%}
3845N/A
3845N/Ainstruct Repl4I_imm(vecX dst, immI con) %{
3845N/A predicate(n->as_Vector()->length() == 4);
3845N/A match(Set dst (ReplicateI con));
3891N/A format %{ "movq $dst,[$constantaddress]\t! replicate4I($con)\n\t"
3891N/A "punpcklqdq $dst,$dst" %}
3845N/A ins_encode %{
3891N/A __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
3891N/A __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3845N/A %}
3845N/A ins_pipe( pipe_slow );
3845N/A%}
3845N/A
3845N/Ainstruct Repl8I_imm(vecY dst, immI con) %{
3845N/A predicate(n->as_Vector()->length() == 8);
3845N/A match(Set dst (ReplicateI con));
3891N/A format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t"
3891N/A "punpcklqdq $dst,$dst\n\t"
3891N/A "vinserti128h $dst,$dst,$dst" %}
3845N/A ins_encode %{
3891N/A __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
3891N/A __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3891N/A __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3845N/A %}
3845N/A ins_pipe( pipe_slow );
3845N/A%}
3845N/A
3845N/A// Integer could be loaded into xmm register directly from memory.
3845N/Ainstruct Repl2I_mem(vecD dst, memory mem) %{
3845N/A predicate(n->as_Vector()->length() == 2);
3891N/A match(Set dst (ReplicateI (LoadI mem)));
3845N/A format %{ "movd $dst,$mem\n\t"
3845N/A "pshufd $dst,$dst,0x00\t! replicate2I" %}
3845N/A ins_encode %{
3845N/A __ movdl($dst$$XMMRegister, $mem$$Address);
3845N/A __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
3845N/A %}
3845N/A ins_pipe( fpu_reg_reg );
3845N/A%}
3845N/A
3845N/Ainstruct Repl4I_mem(vecX dst, memory mem) %{
3845N/A predicate(n->as_Vector()->length() == 4);
3891N/A match(Set dst (ReplicateI (LoadI mem)));
3845N/A format %{ "movd $dst,$mem\n\t"
3845N/A "pshufd $dst,$dst,0x00\t! replicate4I" %}
3845N/A ins_encode %{
3845N/A __ movdl($dst$$XMMRegister, $mem$$Address);
3845N/A __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
3845N/A %}
3845N/A ins_pipe( pipe_slow );
3845N/A%}
3845N/A
3845N/Ainstruct Repl8I_mem(vecY dst, memory mem) %{
3845N/A predicate(n->as_Vector()->length() == 8);
3891N/A match(Set dst (ReplicateI (LoadI mem)));
3845N/A format %{ "movd $dst,$mem\n\t"
3845N/A "pshufd $dst,$dst,0x00\n\t"
3891N/A "vinserti128h $dst,$dst,$dst\t! replicate8I" %}
3845N/A ins_encode %{
3845N/A __ movdl($dst$$XMMRegister, $mem$$Address);
3845N/A __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
3891N/A __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3845N/A %}
3845N/A ins_pipe( pipe_slow );
3845N/A%}
3845N/A
3845N/A// Replicate integer (4 byte) scalar zero to be vector
3845N/Ainstruct Repl2I_zero(vecD dst, immI0 zero) %{
3845N/A predicate(n->as_Vector()->length() == 2);
3845N/A match(Set dst (ReplicateI zero));
3845N/A format %{ "pxor $dst,$dst\t! replicate2I" %}
3845N/A ins_encode %{
3845N/A __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
3845N/A %}
3845N/A ins_pipe( fpu_reg_reg );
3845N/A%}
3845N/A
3845N/Ainstruct Repl4I_zero(vecX dst, immI0 zero) %{
3845N/A predicate(n->as_Vector()->length() == 4);
3845N/A match(Set dst (ReplicateI zero));
3845N/A format %{ "pxor $dst,$dst\t! replicate4I zero)" %}
3845N/A ins_encode %{
3845N/A __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
3845N/A %}
3845N/A ins_pipe( fpu_reg_reg );
3845N/A%}
3845N/A
3845N/Ainstruct Repl8I_zero(vecY dst, immI0 zero) %{
3845N/A predicate(n->as_Vector()->length() == 8);
3845N/A match(Set dst (ReplicateI zero));
3891N/A format %{ "vpxor $dst,$dst,$dst\t! replicate8I zero" %}
3845N/A ins_encode %{
3845N/A // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
3845N/A bool vector256 = true;
3891N/A __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
3845N/A %}
3845N/A ins_pipe( fpu_reg_reg );
3845N/A%}
3845N/A
3845N/A// Replicate long (8 byte) scalar to be vector
3845N/A#ifdef _LP64
3845N/Ainstruct Repl2L(vecX dst, rRegL src) %{
3845N/A predicate(n->as_Vector()->length() == 2);
3845N/A match(Set dst (ReplicateL src));
3845N/A format %{ "movdq $dst,$src\n\t"
3891N/A "punpcklqdq $dst,$dst\t! replicate2L" %}
3845N/A ins_encode %{
3845N/A __ movdq($dst$$XMMRegister, $src$$Register);
3891N/A __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3845N/A %}
3845N/A ins_pipe( pipe_slow );
3845N/A%}
3845N/A
3845N/Ainstruct Repl4L(vecY dst, rRegL src) %{
3845N/A predicate(n->as_Vector()->length() == 4);
3845N/A match(Set dst (ReplicateL src));
3845N/A format %{ "movdq $dst,$src\n\t"
3891N/A "punpcklqdq $dst,$dst\n\t"
3891N/A "vinserti128h $dst,$dst,$dst\t! replicate4L" %}
3845N/A ins_encode %{
3845N/A __ movdq($dst$$XMMRegister, $src$$Register);
3891N/A __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3891N/A __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3845N/A %}
3845N/A ins_pipe( pipe_slow );
3845N/A%}
3845N/A#else // _LP64
3845N/Ainstruct Repl2L(vecX dst, eRegL src, regD tmp) %{
3845N/A predicate(n->as_Vector()->length() == 2);
3845N/A match(Set dst (ReplicateL src));
3845N/A effect(TEMP dst, USE src, TEMP tmp);
3845N/A format %{ "movdl $dst,$src.lo\n\t"
3845N/A "movdl $tmp,$src.hi\n\t"
3845N/A "punpckldq $dst,$tmp\n\t"
3891N/A "punpcklqdq $dst,$dst\t! replicate2L"%}
3845N/A ins_encode %{
3845N/A __ movdl($dst$$XMMRegister, $src$$Register);
3845N/A __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
3845N/A __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
3891N/A __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3845N/A %}
3845N/A ins_pipe( pipe_slow );
3845N/A%}
3845N/A
3845N/Ainstruct Repl4L(vecY dst, eRegL src, regD tmp) %{
3845N/A predicate(n->as_Vector()->length() == 4);
3845N/A match(Set dst (ReplicateL src));
3845N/A effect(TEMP dst, USE src, TEMP tmp);
3845N/A format %{ "movdl $dst,$src.lo\n\t"
3845N/A "movdl $tmp,$src.hi\n\t"
3845N/A "punpckldq $dst,$tmp\n\t"
3891N/A "punpcklqdq $dst,$dst\n\t"
3891N/A "vinserti128h $dst,$dst,$dst\t! replicate4L" %}
3845N/A ins_encode %{
3845N/A __ movdl($dst$$XMMRegister, $src$$Register);
3845N/A __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
3845N/A __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
3891N/A __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3891N/A __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3845N/A %}
3845N/A ins_pipe( pipe_slow );
3845N/A%}
3845N/A#endif // _LP64
3845N/A
3845N/A// Replicate long (8 byte) scalar immediate to be vector by loading from const table.
3845N/Ainstruct Repl2L_imm(vecX dst, immL con) %{
3845N/A predicate(n->as_Vector()->length() == 2);
3845N/A match(Set dst (ReplicateL con));
3891N/A format %{ "movq $dst,[$constantaddress]\n\t"
3891N/A "punpcklqdq $dst,$dst\t! replicate2L($con)" %}
3845N/A ins_encode %{
3891N/A __ movq($dst$$XMMRegister, $constantaddress($con));
3891N/A __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3845N/A %}
3845N/A ins_pipe( pipe_slow );
3845N/A%}
3845N/A
3845N/Ainstruct Repl4L_imm(vecY dst, immL con) %{
3845N/A predicate(n->as_Vector()->length() == 4);
3845N/A match(Set dst (ReplicateL con));
3891N/A format %{ "movq $dst,[$constantaddress]\n\t"
3891N/A "punpcklqdq $dst,$dst\n\t"
3891N/A "vinserti128h $dst,$dst,$dst\t! replicate4L($con)" %}
3845N/A ins_encode %{
3891N/A __ movq($dst$$XMMRegister, $constantaddress($con));
3891N/A __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3891N/A __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3845N/A %}
3845N/A ins_pipe( pipe_slow );
3845N/A%}
3845N/A
3845N/A// Long could be loaded into xmm register directly from memory.
3845N/Ainstruct Repl2L_mem(vecX dst, memory mem) %{
3845N/A predicate(n->as_Vector()->length() == 2);
3891N/A match(Set dst (ReplicateL (LoadL mem)));
3845N/A format %{ "movq $dst,$mem\n\t"
3891N/A "punpcklqdq $dst,$dst\t! replicate2L" %}
3845N/A ins_encode %{
3845N/A __ movq($dst$$XMMRegister, $mem$$Address);
3891N/A __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3845N/A %}
3845N/A ins_pipe( pipe_slow );
3845N/A%}
3845N/A
3845N/Ainstruct Repl4L_mem(vecY dst, memory mem) %{
3845N/A predicate(n->as_Vector()->length() == 4);
3891N/A match(Set dst (ReplicateL (LoadL mem)));
3845N/A format %{ "movq $dst,$mem\n\t"
3891N/A "punpcklqdq $dst,$dst\n\t"
3891N/A "vinserti128h $dst,$dst,$dst\t! replicate4L" %}
3845N/A ins_encode %{
3845N/A __ movq($dst$$XMMRegister, $mem$$Address);
3891N/A __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
3891N/A __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3845N/A %}
3845N/A ins_pipe( pipe_slow );
3845N/A%}
3845N/A
3845N/A// Replicate long (8 byte) scalar zero to be vector
3845N/Ainstruct Repl2L_zero(vecX dst, immL0 zero) %{
3845N/A predicate(n->as_Vector()->length() == 2);
3845N/A match(Set dst (ReplicateL zero));
3845N/A format %{ "pxor $dst,$dst\t! replicate2L zero" %}
3845N/A ins_encode %{
3845N/A __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
3845N/A %}
3845N/A ins_pipe( fpu_reg_reg );
3845N/A%}
3845N/A
3845N/Ainstruct Repl4L_zero(vecY dst, immL0 zero) %{
3845N/A predicate(n->as_Vector()->length() == 4);
3845N/A match(Set dst (ReplicateL zero));
3891N/A format %{ "vpxor $dst,$dst,$dst\t! replicate4L zero" %}
3845N/A ins_encode %{
3845N/A // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
3845N/A bool vector256 = true;
3891N/A __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
3845N/A %}
3845N/A ins_pipe( fpu_reg_reg );
3845N/A%}
3845N/A
3845N/A// Replicate float (4 byte) scalar to be vector
3845N/Ainstruct Repl2F(vecD dst, regF src) %{
3845N/A predicate(n->as_Vector()->length() == 2);
3845N/A match(Set dst (ReplicateF src));
3845N/A format %{ "pshufd $dst,$dst,0x00\t! replicate2F" %}
3845N/A ins_encode %{
3845N/A __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
3845N/A %}
3845N/A ins_pipe( fpu_reg_reg );
3845N/A%}
3845N/A
3845N/Ainstruct Repl4F(vecX dst, regF src) %{
3845N/A predicate(n->as_Vector()->length() == 4);
3845N/A match(Set dst (ReplicateF src));
3845N/A format %{ "pshufd $dst,$dst,0x00\t! replicate4F" %}
3845N/A ins_encode %{
3845N/A __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
3845N/A %}
3845N/A ins_pipe( pipe_slow );
3845N/A%}
3845N/A
3845N/Ainstruct Repl8F(vecY dst, regF src) %{
3845N/A predicate(n->as_Vector()->length() == 8);
3845N/A match(Set dst (ReplicateF src));
3845N/A format %{ "pshufd $dst,$src,0x00\n\t"
3845N/A "vinsertf128h $dst,$dst,$dst\t! replicate8F" %}
3845N/A ins_encode %{
3845N/A __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
3845N/A __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3845N/A %}
3845N/A ins_pipe( pipe_slow );
3845N/A%}
3845N/A
3845N/A// Replicate float (4 byte) scalar zero to be vector
3845N/Ainstruct Repl2F_zero(vecD dst, immF0 zero) %{
3845N/A predicate(n->as_Vector()->length() == 2);
3845N/A match(Set dst (ReplicateF zero));
3845N/A format %{ "xorps $dst,$dst\t! replicate2F zero" %}
3845N/A ins_encode %{
3845N/A __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
3845N/A %}
3845N/A ins_pipe( fpu_reg_reg );
3845N/A%}
3845N/A
3845N/Ainstruct Repl4F_zero(vecX dst, immF0 zero) %{
3845N/A predicate(n->as_Vector()->length() == 4);
3845N/A match(Set dst (ReplicateF zero));
3845N/A format %{ "xorps $dst,$dst\t! replicate4F zero" %}
3845N/A ins_encode %{
3845N/A __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
3845N/A %}
3845N/A ins_pipe( fpu_reg_reg );
3845N/A%}
3845N/A
3845N/Ainstruct Repl8F_zero(vecY dst, immF0 zero) %{
3845N/A predicate(n->as_Vector()->length() == 8);
3845N/A match(Set dst (ReplicateF zero));
3845N/A format %{ "vxorps $dst,$dst,$dst\t! replicate8F zero" %}
3845N/A ins_encode %{
3845N/A bool vector256 = true;
3845N/A __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
3845N/A %}
3845N/A ins_pipe( fpu_reg_reg );
3845N/A%}
3845N/A
3845N/A// Replicate double (8 bytes) scalar to be vector
3845N/Ainstruct Repl2D(vecX dst, regD src) %{
3845N/A predicate(n->as_Vector()->length() == 2);
3845N/A match(Set dst (ReplicateD src));
3845N/A format %{ "pshufd $dst,$src,0x44\t! replicate2D" %}
3845N/A ins_encode %{
3845N/A __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
3845N/A %}
3845N/A ins_pipe( pipe_slow );
3845N/A%}
3845N/A
3845N/Ainstruct Repl4D(vecY dst, regD src) %{
3845N/A predicate(n->as_Vector()->length() == 4);
3845N/A match(Set dst (ReplicateD src));
3845N/A format %{ "pshufd $dst,$src,0x44\n\t"
3845N/A "vinsertf128h $dst,$dst,$dst\t! replicate4D" %}
3845N/A ins_encode %{
3845N/A __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
3845N/A __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
3845N/A %}
3845N/A ins_pipe( pipe_slow );
3845N/A%}
3845N/A
3845N/A// Replicate double (8 byte) scalar zero to be vector
3845N/Ainstruct Repl2D_zero(vecX dst, immD0 zero) %{
3845N/A predicate(n->as_Vector()->length() == 2);
3845N/A match(Set dst (ReplicateD zero));
3845N/A format %{ "xorpd $dst,$dst\t! replicate2D zero" %}
3845N/A ins_encode %{
3845N/A __ xorpd($dst$$XMMRegister, $dst$$XMMRegister);
3845N/A %}
3845N/A ins_pipe( fpu_reg_reg );
3845N/A%}
3845N/A
3845N/Ainstruct Repl4D_zero(vecY dst, immD0 zero) %{
3845N/A predicate(n->as_Vector()->length() == 4);
3845N/A match(Set dst (ReplicateD zero));
3845N/A format %{ "vxorpd $dst,$dst,$dst,vect256\t! replicate4D zero" %}
3845N/A ins_encode %{
3845N/A bool vector256 = true;
3845N/A __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
3845N/A %}
3845N/A ins_pipe( fpu_reg_reg );
3845N/A%}
3845N/A