Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next

Pull networking updates from David Miller: "Another merge window, another pull full of stuff: 1) Support alternative names for network devices, from Jiri Pirko. 2) Introduce per-netns netdev notifiers, also from Jiri Pirko. 3) Support MSG_PEEK in vsock/virtio, from Matias Ezequiel Vara Larsen. 4) Allow compiling out the TLS TOE code, from Jakub Kicinski. 5) Add several new tracepoints to the kTLS code, also from Jakub. 6) Support set channels ethtool callback in ena driver, from Sameeh Jubran. 7) New SCTP events SCTP_ADDR_ADDED, SCTP_ADDR_REMOVED, SCTP_ADDR_MADE_PRIM, and SCTP_SEND_FAILED_EVENT. From Xin Long. 8) Add XDP support to mvneta driver, from Lorenzo Bianconi. 9) Lots of netfilter hw offload fixes, cleanups and enhancements, from Pablo Neira Ayuso. 10) PTP support for aquantia chips, from Egor Pomozov. 11) Add UDP segmentation offload support to igb, ixgbe, and i40e. From Josh Hunt. 12) Add smart nagle to tipc, from Jon Maloy. 13) Support L2 field rewrite by TC offloads in bnxt_en, from Venkat Duvvuru. 14) Add a flow mask cache to OVS, from Tonghao Zhang. 15) Add XDP support to ice driver, from Maciej Fijalkowski. 16) Add AF_XDP support to ice driver, from Krzysztof Kazimierczak. 17) Support UDP GSO offload in atlantic driver, from Igor Russkikh. 18) Support it in stmmac driver too, from Jose Abreu. 19) Support TIPC encryption and auth, from Tuong Lien. 20) Introduce BPF trampolines, from Alexei Starovoitov. 21) Make page_pool API more numa friendly, from Saeed Mahameed. 22) Introduce route hints to ipv4 and ipv6, from Paolo Abeni. 23) Add UDP segmentation offload to cxgb4, Rahul Lakkireddy" * git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (1857 commits) libbpf: Fix usage of u32 in userspace code mm: Implement no-MMU variant of vmalloc_user_node_flags slip: Fix use-after-free Read in slip_open net: dsa: sja1105: fix sja1105_parse_rgmii_delays() macvlan: schedule bc_work even if error enetc: add support Credit Based Shaper(CBS) for hardware offload net: phy: add helpers phy_(un)lock_mdio_bus mdio_bus: don't use managed reset-controller ax88179_178a: add ethtool_op_get_ts_info() mlxsw: spectrum_router: Fix use of uninitialized adjacency index mlxsw: spectrum_router: After underlay moves, demote conflicting tunnels bpf: Simplify __bpf_arch_text_poke poke type handling bpf: Introduce BPF_TRACE_x helper for the tracing tests bpf: Add bpf_jit_blinding_enabled for !CONFIG_BPF_JIT bpf, testing: Add various tail call test cases bpf, x86: Emit patchable direct jump as tail call bpf: Constant map key tracking for prog array pokes bpf: Add poke dependency tracking for prog array maps bpf: Add initial poke descriptor table for jit images bpf: Move owner type, jited info into array auxiliary data ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2019-11-25 20:02:57 -0800
committer: Linus Torvalds <torvalds@linux-foundation.org> 2019-11-25 20:02:57 -0800
commit: 386403a115f95997c2715691226e11a7b5cffcfd (patch)
tree: a685df70bd3d5b295683713818ddf0752c3d75b6 /drivers/net/ethernet/intel/ice
parent: 642356cb5f4a8c82b5ca5ebac288c327d10df236 (diff)
parent: 622dc5ad8052f4f0c6b7a12787696a5caa3c6a58 (diff)
download: kernel_replicant_linux-386403a115f95997c2715691226e11a7b5cffcfd.tar.gz
kernel_replicant_linux-386403a115f95997c2715691226e11a7b5cffcfd.tar.bz2
kernel_replicant_linux-386403a115f95997c2715691226e11a7b5cffcfd.zip
35 files changed, 7752 insertions, 1929 deletions
diff --git a/drivers/net/ethernet/intel/ice/Makefile b/drivers/net/ethernet/intel/ice/Makefile
index 9edde960b4f2..7cb829132d28 100644
--- a/drivers/net/ethernet/intel/ice/Makefile
+++ b/drivers/net/ethernet/intel/ice/Makefile
@@ -13,9 +13,12 @@ ice-y := ice_main.o	\
 	 ice_nvm.o	\
 	 ice_switch.o	\
 	 ice_sched.o	\
+	 ice_base.o	\
 	 ice_lib.o	\
+	 ice_txrx_lib.o	\
 	 ice_txrx.o	\
 	 ice_flex_pipe.o	\
 	 ice_ethtool.o
 ice-$(CONFIG_PCI_IOV) += ice_virtchnl_pf.o ice_sriov.o
-ice-$(CONFIG_DCB) += ice_dcb.o ice_dcb_lib.o
+ice-$(CONFIG_DCB) += ice_dcb.o ice_dcb_nl.o ice_dcb_lib.o
+ice-$(CONFIG_XDP_SOCKETS) += ice_xsk.o
diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index 45e100666049..f972dce8aebb 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -29,10 +29,13 @@
 #include <linux/ip.h>
 #include <linux/sctp.h>
 #include <linux/ipv6.h>
+#include <linux/pkt_sched.h>
 #include <linux/if_bridge.h>
 #include <linux/ctype.h>
+#include <linux/bpf.h>
 #include <linux/avf/virtchnl.h>
 #include <net/ipv6.h>
+#include <net/xdp_sock.h>
 #include "ice_devids.h"
 #include "ice_type.h"
 #include "ice_txrx.h"
@@ -42,6 +45,7 @@
 #include "ice_sched.h"
 #include "ice_virtchnl_pf.h"
 #include "ice_sriov.h"
+#include "ice_xsk.h"
 
 extern const char ice_drv_ver[];
 #define ICE_BAR0		0
@@ -78,8 +82,7 @@ extern const char ice_drv_ver[];
 
 #define ICE_DFLT_NETIF_M (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK)
 
-#define ICE_MAX_MTU	(ICE_AQ_SET_MAC_FRAME_SIZE_MAX - \
-			(ETH_HLEN + ETH_FCS_LEN + (VLAN_HLEN * 2)))
+#define ICE_MAX_MTU	(ICE_AQ_SET_MAC_FRAME_SIZE_MAX - ICE_ETH_PKT_HDR_PAD)
 
 #define ICE_UP_TABLE_TRANSLATE(val, i) \
 		(((val) << ICE_AQ_VSI_UP_TABLE_UP##i##_S) & \
@@ -127,6 +130,16 @@ extern const char ice_drv_ver[];
 				     ICE_PROMISC_VLAN_TX  | \
 				     ICE_PROMISC_VLAN_RX)
 
+#define ice_pf_to_dev(pf) (&((pf)->pdev->dev))
+
+struct ice_txq_meta {
+	u32 q_teid;	/* Tx-scheduler element identifier */
+	u16 q_id;	/* Entry in VSI's txq_map bitmap */
+	u16 q_handle;	/* Relative index of Tx queue within TC */
+	u16 vsi_idx;	/* VSI index that Tx queue belongs to */
+	u8 tc;		/* TC number that Tx queue belongs to */
+};
+
 struct ice_tc_info {
 	u16 qoffset;
 	u16 qcount_tx;
@@ -169,6 +182,7 @@ enum ice_state {
 	__ICE_NEEDS_RESTART,
 	__ICE_PREPARED_FOR_RESET,	/* set by driver when prepared */
 	__ICE_RESET_OICR_RECV,		/* set by driver after rcv reset OICR */
+	__ICE_DCBNL_DEVRESET,		/* set by dcbnl devreset */
 	__ICE_PFR_REQ,			/* set by driver and peers */
 	__ICE_CORER_REQ,		/* set by driver and peers */
 	__ICE_GLOBR_REQ,		/* set by driver and peers */
@@ -271,9 +285,18 @@ struct ice_vsi {
 	u16 num_txq;			 /* Used Tx queues */
 	u16 alloc_rxq;			 /* Allocated Rx queues */
 	u16 num_rxq;			 /* Used Rx queues */
+	u16 req_txq;			 /* User requested Tx queues */
+	u16 req_rxq;			 /* User requested Rx queues */
 	u16 num_rx_desc;
 	u16 num_tx_desc;
 	struct ice_tc_cfg tc_cfg;
+	struct bpf_prog *xdp_prog;
+	struct ice_ring **xdp_rings;	 /* XDP ring array */
+	u16 num_xdp_txq;		 /* Used XDP queues */
+	u8 xdp_mapping_mode;		 /* ICE_MAP_MODE_[CONTIG|SCATTER] */
+	struct xdp_umem **xsk_umems;
+	u16 num_xsk_umems_used;
+	u16 num_xsk_umems;
 } ____cacheline_internodealigned_in_smp;
 
 /* struct that defines an interrupt vector */
@@ -313,6 +336,7 @@ enum ice_pf_flags {
 	ICE_FLAG_NO_MEDIA,
 	ICE_FLAG_FW_LLDP_AGENT,
 	ICE_FLAG_ETHTOOL_CTXT,		/* set when ethtool holds RTNL lock */
+	ICE_FLAG_LEGACY_RX,
 	ICE_PF_FLAGS_NBITS		/* must be last */
 };
 
@@ -346,6 +370,7 @@ struct ice_pf {
 	struct work_struct serv_task;
 	struct mutex avail_q_mutex;	/* protects access to avail_[rx|tx]qs */
 	struct mutex sw_mutex;		/* lock for protecting VSI alloc flow */
+	struct mutex tc_mutex;		/* lock to protect TC changes */
 	u32 msg_enable;
 	u32 hw_csum_rx_error;
 	u32 oicr_idx;		/* Other interrupt cause MSIX vector index */
@@ -417,6 +442,37 @@ static inline struct ice_pf *ice_netdev_to_pf(struct net_device *netdev)
 	return np->vsi->back;
 }
 
+static inline bool ice_is_xdp_ena_vsi(struct ice_vsi *vsi)
+{
+	return !!vsi->xdp_prog;
+}
+
+static inline void ice_set_ring_xdp(struct ice_ring *ring)
+{
+	ring->flags |= ICE_TX_FLAGS_RING_XDP;
+}
+
+/**
+ * ice_xsk_umem - get XDP UMEM bound to a ring
+ * @ring - ring to use
+ *
+ * Returns a pointer to xdp_umem structure if there is an UMEM present,
+ * NULL otherwise.
+ */
+static inline struct xdp_umem *ice_xsk_umem(struct ice_ring *ring)
+{
+	struct xdp_umem **umems = ring->vsi->xsk_umems;
+	int qid = ring->q_index;
+
+	if (ice_ring_is_xdp(ring))
+		qid -= ring->vsi->num_xdp_txq;
+
+	if (!umems || !umems[qid] || !ice_is_xdp_ena_vsi(ring->vsi))
+		return NULL;
+
+	return umems[qid];
+}
+
 /**
  * ice_get_main_vsi - Get the PF VSI
  * @pf: PF instance
@@ -437,20 +493,23 @@ void ice_set_ethtool_ops(struct net_device *netdev);
 void ice_set_ethtool_safe_mode_ops(struct net_device *netdev);
 u16 ice_get_avail_txq_count(struct ice_pf *pf);
 u16 ice_get_avail_rxq_count(struct ice_pf *pf);
+int ice_vsi_recfg_qs(struct ice_vsi *vsi, int new_rx, int new_tx);
 void ice_update_vsi_stats(struct ice_vsi *vsi);
 void ice_update_pf_stats(struct ice_pf *pf);
 int ice_up(struct ice_vsi *vsi);
 int ice_down(struct ice_vsi *vsi);
 int ice_vsi_cfg(struct ice_vsi *vsi);
 struct ice_vsi *ice_lb_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi);
+int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog);
+int ice_destroy_xdp_rings(struct ice_vsi *vsi);
+int
+ice_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
+	     u32 flags);
 int ice_set_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size);
 int ice_get_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size);
 void ice_fill_rss_lut(u8 *lut, u16 rss_table_size, u16 rss_size);
+int ice_schedule_reset(struct ice_pf *pf, enum ice_reset_req reset);
 void ice_print_link_msg(struct ice_vsi *vsi, bool isup);
-#ifdef CONFIG_DCB
-int ice_pf_ena_all_vsi(struct ice_pf *pf, bool locked);
-void ice_pf_dis_all_vsi(struct ice_pf *pf, bool locked);
-#endif /* CONFIG_DCB */
 int ice_open(struct net_device *netdev);
 int ice_stop(struct net_device *netdev);
 
diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
index 023e3d2fee5f..5421fc413f94 100644
--- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
+++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
@@ -742,6 +742,10 @@ struct ice_aqc_add_elem {
 	struct ice_aqc_txsched_elem_data generic[1];
 };
 
+struct ice_aqc_conf_elem {
+	struct ice_aqc_txsched_elem_data generic[1];
+};
+
 struct ice_aqc_get_elem {
 	struct ice_aqc_txsched_elem_data generic[1];
 };
@@ -783,6 +787,44 @@ struct ice_aqc_port_ets_elem {
 	__le32 tc_node_teid[8]; /* Used for response, reserved in command */
 };
 
+/* Rate limiting profile for
+ * Add RL profile (indirect 0x0410)
+ * Query RL profile (indirect 0x0411)
+ * Remove RL profile (indirect 0x0415)
+ * These indirect commands acts on single or multiple
+ * RL profiles with specified data.
+ */
+struct ice_aqc_rl_profile {
+	__le16 num_profiles;
+	__le16 num_processed; /* Only for response. Reserved in Command. */
+	u8 reserved[4];
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+struct ice_aqc_rl_profile_elem {
+	u8 level;
+	u8 flags;
+#define ICE_AQC_RL_PROFILE_TYPE_S	0x0
+#define ICE_AQC_RL_PROFILE_TYPE_M	(0x3 << ICE_AQC_RL_PROFILE_TYPE_S)
+#define ICE_AQC_RL_PROFILE_TYPE_CIR	0
+#define ICE_AQC_RL_PROFILE_TYPE_EIR	1
+#define ICE_AQC_RL_PROFILE_TYPE_SRL	2
+/* The following flag is used for Query RL Profile Data */
+#define ICE_AQC_RL_PROFILE_INVAL_S	0x7
+#define ICE_AQC_RL_PROFILE_INVAL_M	(0x1 << ICE_AQC_RL_PROFILE_INVAL_S)
+
+	__le16 profile_id;
+	__le16 max_burst_size;
+	__le16 rl_multiply;
+	__le16 wake_up_calc;
+	__le16 rl_encode;
+};
+
+struct ice_aqc_rl_profile_generic_elem {
+	struct ice_aqc_rl_profile_elem generic[1];
+};
+
 /* Query Scheduler Resource Allocation (indirect 0x0412)
  * This indirect command retrieves the scheduler resources allocated by
  * EMP Firmware to the given PF.
@@ -1044,6 +1086,10 @@ struct ice_aqc_get_link_status_data {
 #define ICE_AQ_LINK_TOPO_CONFLICT	BIT(0)
 #define ICE_AQ_LINK_MEDIA_CONFLICT	BIT(1)
 #define ICE_AQ_LINK_TOPO_CORRUPT	BIT(2)
+#define ICE_AQ_LINK_TOPO_UNREACH_PRT	BIT(4)
+#define ICE_AQ_LINK_TOPO_UNDRUTIL_PRT	BIT(5)
+#define ICE_AQ_LINK_TOPO_UNDRUTIL_MEDIA	BIT(6)
+#define ICE_AQ_LINK_TOPO_UNSUPP_MEDIA	BIT(7)
 	u8 reserved1;
 	u8 link_info;
 #define ICE_AQ_LINK_UP			BIT(0)	/* Link Status */
@@ -1147,6 +1193,33 @@ struct ice_aqc_set_port_id_led {
 	u8 rsvd[13];
 };
 
+/* Read/Write SFF EEPROM command (indirect 0x06EE) */
+struct ice_aqc_sff_eeprom {
+	u8 lport_num;
+	u8 lport_num_valid;
+#define ICE_AQC_SFF_PORT_NUM_VALID	BIT(0)
+	__le16 i2c_bus_addr;
+#define ICE_AQC_SFF_I2CBUS_7BIT_M	0x7F
+#define ICE_AQC_SFF_I2CBUS_10BIT_M	0x3FF
+#define ICE_AQC_SFF_I2CBUS_TYPE_M	BIT(10)
+#define ICE_AQC_SFF_I2CBUS_TYPE_7BIT	0
+#define ICE_AQC_SFF_I2CBUS_TYPE_10BIT	ICE_AQC_SFF_I2CBUS_TYPE_M
+#define ICE_AQC_SFF_SET_EEPROM_PAGE_S	11
+#define ICE_AQC_SFF_SET_EEPROM_PAGE_M	(0x3 << ICE_AQC_SFF_SET_EEPROM_PAGE_S)
+#define ICE_AQC_SFF_NO_PAGE_CHANGE	0
+#define ICE_AQC_SFF_SET_23_ON_MISMATCH	1
+#define ICE_AQC_SFF_SET_22_ON_MISMATCH	2
+#define ICE_AQC_SFF_IS_WRITE		BIT(15)
+	__le16 i2c_mem_addr;
+	__le16 eeprom_page;
+#define  ICE_AQC_SFF_EEPROM_BANK_S 0
+#define  ICE_AQC_SFF_EEPROM_BANK_M (0xFF << ICE_AQC_SFF_EEPROM_BANK_S)
+#define  ICE_AQC_SFF_EEPROM_PAGE_S 8
+#define  ICE_AQC_SFF_EEPROM_PAGE_M (0xFF << ICE_AQC_SFF_EEPROM_PAGE_S)
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
 /* NVM Read command (indirect 0x0701)
  * NVM Erase commands (direct 0x0702)
  * NVM Update commands (indirect 0x0703)
@@ -1618,6 +1691,7 @@ struct ice_aq_desc {
 		struct ice_aqc_get_phy_caps get_phy;
 		struct ice_aqc_set_phy_cfg set_phy;
 		struct ice_aqc_restart_an restart_an;
+		struct ice_aqc_sff_eeprom read_write_sff_param;
 		struct ice_aqc_set_port_id_led set_port_id_led;
 		struct ice_aqc_get_sw_cfg get_sw_conf;
 		struct ice_aqc_sw_rules sw_rules;
@@ -1625,6 +1699,7 @@ struct ice_aq_desc {
 		struct ice_aqc_sched_elem_cmd sched_elem_cmd;
 		struct ice_aqc_query_txsched_res query_sched_res;
 		struct ice_aqc_query_port_ets port_ets;
+		struct ice_aqc_rl_profile rl_profile;
 		struct ice_aqc_nvm nvm;
 		struct ice_aqc_nvm_checksum nvm_checksum;
 		struct ice_aqc_pf_vf_msg virt;
@@ -1726,12 +1801,15 @@ enum ice_adminq_opc {
 	/* transmit scheduler commands */
 	ice_aqc_opc_get_dflt_topo			= 0x0400,
 	ice_aqc_opc_add_sched_elems			= 0x0401,
+	ice_aqc_opc_cfg_sched_elems			= 0x0403,
 	ice_aqc_opc_get_sched_elems			= 0x0404,
 	ice_aqc_opc_suspend_sched_elems			= 0x0409,
 	ice_aqc_opc_resume_sched_elems			= 0x040A,
 	ice_aqc_opc_query_port_ets			= 0x040E,
 	ice_aqc_opc_delete_sched_elems			= 0x040F,
+	ice_aqc_opc_add_rl_profiles			= 0x0410,
 	ice_aqc_opc_query_sched_res			= 0x0412,
+	ice_aqc_opc_remove_rl_profiles			= 0x0415,
 
 	/* PHY commands */
 	ice_aqc_opc_get_phy_caps			= 0x0600,
@@ -1741,6 +1819,7 @@ enum ice_adminq_opc {
 	ice_aqc_opc_set_event_mask			= 0x0613,
 	ice_aqc_opc_set_mac_lb				= 0x0620,
 	ice_aqc_opc_set_port_id_led			= 0x06E9,
+	ice_aqc_opc_sff_eeprom				= 0x06EE,
 
 	/* NVM commands */
 	ice_aqc_opc_nvm_read				= 0x0701,
diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c
new file mode 100644
index 000000000000..77d6a0291e97
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_base.c
@@ -0,0 +1,859 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019, Intel Corporation. */
+
+#include "ice_base.h"
+#include "ice_dcb_lib.h"
+
+/**
+ * __ice_vsi_get_qs_contig - Assign a contiguous chunk of queues to VSI
+ * @qs_cfg: gathered variables needed for PF->VSI queues assignment
+ *
+ * Return 0 on success and -ENOMEM in case of no left space in PF queue bitmap
+ */
+static int __ice_vsi_get_qs_contig(struct ice_qs_cfg *qs_cfg)
+{
+	int offset, i;
+
+	mutex_lock(qs_cfg->qs_mutex);
+	offset = bitmap_find_next_zero_area(qs_cfg->pf_map, qs_cfg->pf_map_size,
+					    0, qs_cfg->q_count, 0);
+	if (offset >= qs_cfg->pf_map_size) {
+		mutex_unlock(qs_cfg->qs_mutex);
+		return -ENOMEM;
+	}
+
+	bitmap_set(qs_cfg->pf_map, offset, qs_cfg->q_count);
+	for (i = 0; i < qs_cfg->q_count; i++)
+		qs_cfg->vsi_map[i + qs_cfg->vsi_map_offset] = i + offset;
+	mutex_unlock(qs_cfg->qs_mutex);
+
+	return 0;
+}
+
+/**
+ * __ice_vsi_get_qs_sc - Assign a scattered queues from PF to VSI
+ * @qs_cfg: gathered variables needed for pf->vsi queues assignment
+ *
+ * Return 0 on success and -ENOMEM in case of no left space in PF queue bitmap
+ */
+static int __ice_vsi_get_qs_sc(struct ice_qs_cfg *qs_cfg)
+{
+	int i, index = 0;
+
+	mutex_lock(qs_cfg->qs_mutex);
+	for (i = 0; i < qs_cfg->q_count; i++) {
+		index = find_next_zero_bit(qs_cfg->pf_map,
+					   qs_cfg->pf_map_size, index);
+		if (index >= qs_cfg->pf_map_size)
+			goto err_scatter;
+		set_bit(index, qs_cfg->pf_map);
+		qs_cfg->vsi_map[i + qs_cfg->vsi_map_offset] = index;
+	}
+	mutex_unlock(qs_cfg->qs_mutex);
+
+	return 0;
+err_scatter:
+	for (index = 0; index < i; index++) {
+		clear_bit(qs_cfg->vsi_map[index], qs_cfg->pf_map);
+		qs_cfg->vsi_map[index + qs_cfg->vsi_map_offset] = 0;
+	}
+	mutex_unlock(qs_cfg->qs_mutex);
+
+	return -ENOMEM;
+}
+
+/**
+ * ice_pf_rxq_wait - Wait for a PF's Rx queue to be enabled or disabled
+ * @pf: the PF being configured
+ * @pf_q: the PF queue
+ * @ena: enable or disable state of the queue
+ *
+ * This routine will wait for the given Rx queue of the PF to reach the
+ * enabled or disabled state.
+ * Returns -ETIMEDOUT in case of failing to reach the requested state after
+ * multiple retries; else will return 0 in case of success.
+ */
+static int ice_pf_rxq_wait(struct ice_pf *pf, int pf_q, bool ena)
+{
+	int i;
+
+	for (i = 0; i < ICE_Q_WAIT_MAX_RETRY; i++) {
+		if (ena == !!(rd32(&pf->hw, QRX_CTRL(pf_q)) &
+			      QRX_CTRL_QENA_STAT_M))
+			return 0;
+
+		usleep_range(20, 40);
+	}
+
+	return -ETIMEDOUT;
+}
+
+/**
+ * ice_vsi_alloc_q_vector - Allocate memory for a single interrupt vector
+ * @vsi: the VSI being configured
+ * @v_idx: index of the vector in the VSI struct
+ *
+ * We allocate one q_vector. If allocation fails we return -ENOMEM.
+ */
+static int ice_vsi_alloc_q_vector(struct ice_vsi *vsi, int v_idx)
+{
+	struct ice_pf *pf = vsi->back;
+	struct ice_q_vector *q_vector;
+
+	/* allocate q_vector */
+	q_vector = devm_kzalloc(ice_pf_to_dev(pf), sizeof(*q_vector),
+				GFP_KERNEL);
+	if (!q_vector)
+		return -ENOMEM;
+
+	q_vector->vsi = vsi;
+	q_vector->v_idx = v_idx;
+	if (vsi->type == ICE_VSI_VF)
+		goto out;
+	/* only set affinity_mask if the CPU is online */
+	if (cpu_online(v_idx))
+		cpumask_set_cpu(v_idx, &q_vector->affinity_mask);
+
+	/* This will not be called in the driver load path because the netdev
+	 * will not be created yet. All other cases with register the NAPI
+	 * handler here (i.e. resume, reset/rebuild, etc.)
+	 */
+	if (vsi->netdev)
+		netif_napi_add(vsi->netdev, &q_vector->napi, ice_napi_poll,
+			       NAPI_POLL_WEIGHT);
+
+out:
+	/* tie q_vector and VSI together */
+	vsi->q_vectors[v_idx] = q_vector;
+
+	return 0;
+}
+
+/**
+ * ice_free_q_vector - Free memory allocated for a specific interrupt vector
+ * @vsi: VSI having the memory freed
+ * @v_idx: index of the vector to be freed
+ */
+static void ice_free_q_vector(struct ice_vsi *vsi, int v_idx)
+{
+	struct ice_q_vector *q_vector;
+	struct ice_pf *pf = vsi->back;
+	struct ice_ring *ring;
+	struct device *dev;
+
+	dev = ice_pf_to_dev(pf);
+	if (!vsi->q_vectors[v_idx]) {
+		dev_dbg(dev, "Queue vector at index %d not found\n", v_idx);
+		return;
+	}
+	q_vector = vsi->q_vectors[v_idx];
+
+	ice_for_each_ring(ring, q_vector->tx)
+		ring->q_vector = NULL;
+	ice_for_each_ring(ring, q_vector->rx)
+		ring->q_vector = NULL;
+
+	/* only VSI with an associated netdev is set up with NAPI */
+	if (vsi->netdev)
+		netif_napi_del(&q_vector->napi);
+
+	devm_kfree(dev, q_vector);
+	vsi->q_vectors[v_idx] = NULL;
+}
+
+/**
+ * ice_cfg_itr_gran - set the ITR granularity to 2 usecs if not already set
+ * @hw: board specific structure
+ */
+static void ice_cfg_itr_gran(struct ice_hw *hw)
+{
+	u32 regval = rd32(hw, GLINT_CTL);
+
+	/* no need to update global register if ITR gran is already set */
+	if (!(regval & GLINT_CTL_DIS_AUTOMASK_M) &&
+	    (((regval & GLINT_CTL_ITR_GRAN_200_M) >>
+	     GLINT_CTL_ITR_GRAN_200_S) == ICE_ITR_GRAN_US) &&
+	    (((regval & GLINT_CTL_ITR_GRAN_100_M) >>
+	     GLINT_CTL_ITR_GRAN_100_S) == ICE_ITR_GRAN_US) &&
+	    (((regval & GLINT_CTL_ITR_GRAN_50_M) >>
+	     GLINT_CTL_ITR_GRAN_50_S) == ICE_ITR_GRAN_US) &&
+	    (((regval & GLINT_CTL_ITR_GRAN_25_M) >>
+	      GLINT_CTL_ITR_GRAN_25_S) == ICE_ITR_GRAN_US))
+		return;
+
+	regval = ((ICE_ITR_GRAN_US << GLINT_CTL_ITR_GRAN_200_S) &
+		  GLINT_CTL_ITR_GRAN_200_M) |
+		 ((ICE_ITR_GRAN_US << GLINT_CTL_ITR_GRAN_100_S) &
+		  GLINT_CTL_ITR_GRAN_100_M) |
+		 ((ICE_ITR_GRAN_US << GLINT_CTL_ITR_GRAN_50_S) &
+		  GLINT_CTL_ITR_GRAN_50_M) |
+		 ((ICE_ITR_GRAN_US << GLINT_CTL_ITR_GRAN_25_S) &
+		  GLINT_CTL_ITR_GRAN_25_M);
+	wr32(hw, GLINT_CTL, regval);
+}
+
+/**
+ * ice_calc_q_handle - calculate the queue handle
+ * @vsi: VSI that ring belongs to
+ * @ring: ring to get the absolute queue index
+ * @tc: traffic class number
+ */
+static u16 ice_calc_q_handle(struct ice_vsi *vsi, struct ice_ring *ring, u8 tc)
+{
+	WARN_ONCE(ice_ring_is_xdp(ring) && tc,
+		  "XDP ring can't belong to TC other than 0");
+
+	/* Idea here for calculation is that we subtract the number of queue
+	 * count from TC that ring belongs to from it's absolute queue index
+	 * and as a result we get the queue's index within TC.
+	 */
+	return ring->q_index - vsi->tc_cfg.tc_info[tc].qoffset;
+}
+
+/**
+ * ice_setup_tx_ctx - setup a struct ice_tlan_ctx instance
+ * @ring: The Tx ring to configure
+ * @tlan_ctx: Pointer to the Tx LAN queue context structure to be initialized
+ * @pf_q: queue index in the PF space
+ *
+ * Configure the Tx descriptor ring in TLAN context.
+ */
+static void
+ice_setup_tx_ctx(struct ice_ring *ring, struct ice_tlan_ctx *tlan_ctx, u16 pf_q)
+{
+	struct ice_vsi *vsi = ring->vsi;
+	struct ice_hw *hw = &vsi->back->hw;
+
+	tlan_ctx->base = ring->dma >> ICE_TLAN_CTX_BASE_S;
+
+	tlan_ctx->port_num = vsi->port_info->lport;
+
+	/* Transmit Queue Length */
+	tlan_ctx->qlen = ring->count;
+
+	ice_set_cgd_num(tlan_ctx, ring);
+
+	/* PF number */
+	tlan_ctx->pf_num = hw->pf_id;
+
+	/* queue belongs to a specific VSI type
+	 * VF / VM index should be programmed per vmvf_type setting:
+	 * for vmvf_type = VF, it is VF number between 0-256
+	 * for vmvf_type = VM, it is VM number between 0-767
+	 * for PF or EMP this field should be set to zero
+	 */
+	switch (vsi->type) {
+	case ICE_VSI_LB:
+		/* fall through */
+	case ICE_VSI_PF:
+		tlan_ctx->vmvf_type = ICE_TLAN_CTX_VMVF_TYPE_PF;
+		break;
+	case ICE_VSI_VF:
+		/* Firmware expects vmvf_num to be absolute VF ID */
+		tlan_ctx->vmvf_num = hw->func_caps.vf_base_id + vsi->vf_id;
+		tlan_ctx->vmvf_type = ICE_TLAN_CTX_VMVF_TYPE_VF;
+		break;
+	default:
+		return;
+	}
+
+	/* make sure the context is associated with the right VSI */
+	tlan_ctx->src_vsi = ice_get_hw_vsi_num(hw, vsi->idx);
+
+	tlan_ctx->tso_ena = ICE_TX_LEGACY;
+	tlan_ctx->tso_qnum = pf_q;
+
+	/* Legacy or Advanced Host Interface:
+	 * 0: Advanced Host Interface
+	 * 1: Legacy Host Interface
+	 */
+	tlan_ctx->legacy_int = ICE_TX_LEGACY;
+}
+
+/**
+ * ice_setup_rx_ctx - Configure a receive ring context
+ * @ring: The Rx ring to configure
+ *
+ * Configure the Rx descriptor ring in RLAN context.
+ */
+int ice_setup_rx_ctx(struct ice_ring *ring)
+{
+	int chain_len = ICE_MAX_CHAINED_RX_BUFS;
+	struct ice_vsi *vsi = ring->vsi;
+	u32 rxdid = ICE_RXDID_FLEX_NIC;
+	struct ice_rlan_ctx rlan_ctx;
+	struct ice_hw *hw;
+	u32 regval;
+	u16 pf_q;
+	int err;
+
+	hw = &vsi->back->hw;
+
+	/* what is Rx queue number in global space of 2K Rx queues */
+	pf_q = vsi->rxq_map[ring->q_index];
+
+	/* clear the context structure first */
+	memset(&rlan_ctx, 0, sizeof(rlan_ctx));
+
+	ring->rx_buf_len = vsi->rx_buf_len;
+
+	if (ring->vsi->type == ICE_VSI_PF) {
+		if (!xdp_rxq_info_is_reg(&ring->xdp_rxq))
+			xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev,
+					 ring->q_index);
+
+		ring->xsk_umem = ice_xsk_umem(ring);
+		if (ring->xsk_umem) {
+			xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq);
+
+			ring->rx_buf_len = ring->xsk_umem->chunk_size_nohr -
+					   XDP_PACKET_HEADROOM;
+			/* For AF_XDP ZC, we disallow packets to span on
+			 * multiple buffers, thus letting us skip that
+			 * handling in the fast-path.
+			 */
+			chain_len = 1;
+			ring->zca.free = ice_zca_free;
+			err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
+							 MEM_TYPE_ZERO_COPY,
+							 &ring->zca);
+			if (err)
+				return err;
+
+			dev_info(&vsi->back->pdev->dev, "Registered XDP mem model MEM_TYPE_ZERO_COPY on Rx ring %d\n",
+				 ring->q_index);
+		} else {
+			if (!xdp_rxq_info_is_reg(&ring->xdp_rxq))
+				xdp_rxq_info_reg(&ring->xdp_rxq,
+						 ring->netdev,
+						 ring->q_index);
+
+			err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
+							 MEM_TYPE_PAGE_SHARED,
+							 NULL);
+			if (err)
+				return err;
+		}
+	}
+	/* Receive Queue Base Address.
+	 * Indicates the starting address of the descriptor queue defined in
+	 * 128 Byte units.
+	 */
+	rlan_ctx.base = ring->dma >> 7;
+
+	rlan_ctx.qlen = ring->count;
+
+	/* Receive Packet Data Buffer Size.
+	 * The Packet Data Buffer Size is defined in 128 byte units.
+	 */
+	rlan_ctx.dbuf = ring->rx_buf_len >> ICE_RLAN_CTX_DBUF_S;
+
+	/* use 32 byte descriptors */
+	rlan_ctx.dsize = 1;
+
+	/* Strip the Ethernet CRC bytes before the packet is posted to host
+	 * memory.
+	 */
+	rlan_ctx.crcstrip = 1;
+
+	/* L2TSEL flag defines the reported L2 Tags in the receive descriptor */
+	rlan_ctx.l2tsel = 1;
+
+	rlan_ctx.dtype = ICE_RX_DTYPE_NO_SPLIT;
+	rlan_ctx.hsplit_0 = ICE_RLAN_RX_HSPLIT_0_NO_SPLIT;
+	rlan_ctx.hsplit_1 = ICE_RLAN_RX_HSPLIT_1_NO_SPLIT;
+
+	/* This controls whether VLAN is stripped from inner headers
+	 * The VLAN in the inner L2 header is stripped to the receive
+	 * descriptor if enabled by this flag.
+	 */
+	rlan_ctx.showiv = 0;
+
+	/* Max packet size for this queue - must not be set to a larger value
+	 * than 5 x DBUF
+	 */
+	rlan_ctx.rxmax = min_t(u16, vsi->max_frame,
+			       chain_len * ring->rx_buf_len);
+
+	/* Rx queue threshold in units of 64 */
+	rlan_ctx.lrxqthresh = 1;
+
+	 /* Enable Flexible Descriptors in the queue context which
+	  * allows this driver to select a specific receive descriptor format
+	  */
+	if (vsi->type != ICE_VSI_VF) {
+		regval = rd32(hw, QRXFLXP_CNTXT(pf_q));
+		regval |= (rxdid << QRXFLXP_CNTXT_RXDID_IDX_S) &
+			QRXFLXP_CNTXT_RXDID_IDX_M;
+
+		/* increasing context priority to pick up profile ID;
+		 * default is 0x01; setting to 0x03 to ensure profile
+		 * is programming if prev context is of same priority
+		 */
+		regval |= (0x03 << QRXFLXP_CNTXT_RXDID_PRIO_S) &
+			QRXFLXP_CNTXT_RXDID_PRIO_M;
+
+		wr32(hw, QRXFLXP_CNTXT(pf_q), regval);
+	}
+
+	/* Absolute queue number out of 2K needs to be passed */
+	err = ice_write_rxq_ctx(hw, &rlan_ctx, pf_q);
+	if (err) {
+		dev_err(&vsi->back->pdev->dev,
+			"Failed to set LAN Rx queue context for absolute Rx queue %d error: %d\n",
+			pf_q, err);
+		return -EIO;
+	}
+
+	if (vsi->type == ICE_VSI_VF)
+		return 0;
+
+	/* configure Rx buffer alignment */
+	if (!vsi->netdev || test_bit(ICE_FLAG_LEGACY_RX, vsi->back->flags))
+		ice_clear_ring_build_skb_ena(ring);
+	else
+		ice_set_ring_build_skb_ena(ring);
+
+	/* init queue specific tail register */
+	ring->tail = hw->hw_addr + QRX_TAIL(pf_q);
+	writel(0, ring->tail);
+
+	err = ring->xsk_umem ?
+	      ice_alloc_rx_bufs_slow_zc(ring, ICE_DESC_UNUSED(ring)) :
+	      ice_alloc_rx_bufs(ring, ICE_DESC_UNUSED(ring));
+	if (err)
+		dev_info(&vsi->back->pdev->dev,
+			 "Failed allocate some buffers on %sRx ring %d (pf_q %d)\n",
+			 ring->xsk_umem ? "UMEM enabled " : "",
+			 ring->q_index, pf_q);
+
+	return 0;
+}
+
+/**
+ * __ice_vsi_get_qs - helper function for assigning queues from PF to VSI
+ * @qs_cfg: gathered variables needed for pf->vsi queues assignment
+ *
+ * This function first tries to find contiguous space. If it is not successful,
+ * it tries with the scatter approach.
+ *
+ * Return 0 on success and -ENOMEM in case of no left space in PF queue bitmap
+ */
+int __ice_vsi_get_qs(struct ice_qs_cfg *qs_cfg)
+{
+	int ret = 0;
+
+	ret = __ice_vsi_get_qs_contig(qs_cfg);
+	if (ret) {
+		/* contig failed, so try with scatter approach */
+		qs_cfg->mapping_mode = ICE_VSI_MAP_SCATTER;
+		qs_cfg->q_count = min_t(u16, qs_cfg->q_count,
+					qs_cfg->scatter_count);
+		ret = __ice_vsi_get_qs_sc(qs_cfg);
+	}
+	return ret;
+}
+
+/**
+ * ice_vsi_ctrl_rx_ring - Start or stop a VSI's Rx ring
+ * @vsi: the VSI being configured
+ * @ena: start or stop the Rx rings
+ * @rxq_idx: Rx queue index
+ */
+int ice_vsi_ctrl_rx_ring(struct ice_vsi *vsi, bool ena, u16 rxq_idx)
+{
+	int pf_q = vsi->rxq_map[rxq_idx];
+	struct ice_pf *pf = vsi->back;
+	struct ice_hw *hw = &pf->hw;
+	int ret = 0;
+	u32 rx_reg;
+
+	rx_reg = rd32(hw, QRX_CTRL(pf_q));
+
+	/* Skip if the queue is already in the requested state */
+	if (ena == !!(rx_reg & QRX_CTRL_QENA_STAT_M))
+		return 0;
+
+	/* turn on/off the queue */
+	if (ena)
+		rx_reg |= QRX_CTRL_QENA_REQ_M;
+	else
+		rx_reg &= ~QRX_CTRL_QENA_REQ_M;
+	wr32(hw, QRX_CTRL(pf_q), rx_reg);
+
+	/* wait for the change to finish */
+	ret = ice_pf_rxq_wait(pf, pf_q, ena);
+	if (ret)
+		dev_err(ice_pf_to_dev(pf),
+			"VSI idx %d Rx ring %d %sable timeout\n",
+			vsi->idx, pf_q, (ena ? "en" : "dis"));
+
+	return ret;
+}
+
+/**
+ * ice_vsi_alloc_q_vectors - Allocate memory for interrupt vectors
+ * @vsi: the VSI being configured
+ *
+ * We allocate one q_vector per queue interrupt. If allocation fails we
+ * return -ENOMEM.
+ */
+int ice_vsi_alloc_q_vectors(struct ice_vsi *vsi)
+{
+	struct ice_pf *pf = vsi->back;
+	int v_idx = 0, num_q_vectors;
+	struct device *dev;
+	int err;
+
+	dev = ice_pf_to_dev(pf);
+	if (vsi->q_vectors[0]) {
+		dev_dbg(dev, "VSI %d has existing q_vectors\n", vsi->vsi_num);
+		return -EEXIST;
+	}
+
+	num_q_vectors = vsi->num_q_vectors;
+
+	for (v_idx = 0; v_idx < num_q_vectors; v_idx++) {
+		err = ice_vsi_alloc_q_vector(vsi, v_idx);
+		if (err)
+			goto err_out;
+	}
+
+	return 0;
+
+err_out:
+	while (v_idx--)
+		ice_free_q_vector(vsi, v_idx);
+
+	dev_err(dev, "Failed to allocate %d q_vector for VSI %d, ret=%d\n",
+		vsi->num_q_vectors, vsi->vsi_num, err);
+	vsi->num_q_vectors = 0;
+	return err;
+}
+
+/**
+ * ice_vsi_map_rings_to_vectors - Map VSI rings to interrupt vectors
+ * @vsi: the VSI being configured
+ *
+ * This function maps descriptor rings to the queue-specific vectors allotted
+ * through the MSI-X enabling code. On a constrained vector budget, we map Tx
+ * and Rx rings to the vector as "efficiently" as possible.
+ */
+void ice_vsi_map_rings_to_vectors(struct ice_vsi *vsi)
+{
+	int q_vectors = vsi->num_q_vectors;
+	int tx_rings_rem, rx_rings_rem;
+	int v_id;
+
+	/* initially assigning remaining rings count to VSIs num queue value */
+	tx_rings_rem = vsi->num_txq;
+	rx_rings_rem = vsi->num_rxq;
+
+	for (v_id = 0; v_id < q_vectors; v_id++) {
+		struct ice_q_vector *q_vector = vsi->q_vectors[v_id];
+		int tx_rings_per_v, rx_rings_per_v, q_id, q_base;
+
+		/* Tx rings mapping to vector */
+		tx_rings_per_v = DIV_ROUND_UP(tx_rings_rem, q_vectors - v_id);
+		q_vector->num_ring_tx = tx_rings_per_v;
+		q_vector->tx.ring = NULL;
+		q_vector->tx.itr_idx = ICE_TX_ITR;
+		q_base = vsi->num_txq - tx_rings_rem;
+
+		for (q_id = q_base; q_id < (q_base + tx_rings_per_v); q_id++) {
+			struct ice_ring *tx_ring = vsi->tx_rings[q_id];
+
+			tx_ring->q_vector = q_vector;
+			tx_ring->next = q_vector->tx.ring;
+			q_vector->tx.ring = tx_ring;
+		}
+		tx_rings_rem -= tx_rings_per_v;
+
+		/* Rx rings mapping to vector */
+		rx_rings_per_v = DIV_ROUND_UP(rx_rings_rem, q_vectors - v_id);
+		q_vector->num_ring_rx = rx_rings_per_v;
+		q_vector->rx.ring = NULL;
+		q_vector->rx.itr_idx = ICE_RX_ITR;
+		q_base = vsi->num_rxq - rx_rings_rem;
+
+		for (q_id = q_base; q_id < (q_base + rx_rings_per_v); q_id++) {
+			struct ice_ring *rx_ring = vsi->rx_rings[q_id];
+
+			rx_ring->q_vector = q_vector;
+			rx_ring->next = q_vector->rx.ring;
+			q_vector->rx.ring = rx_ring;
+		}
+		rx_rings_rem -= rx_rings_per_v;
+	}
+}
+
+/**
+ * ice_vsi_free_q_vectors - Free memory allocated for interrupt vectors
+ * @vsi: the VSI having memory freed
+ */
+void ice_vsi_free_q_vectors(struct ice_vsi *vsi)
+{
+	int v_idx;
+
+	ice_for_each_q_vector(vsi, v_idx)
+		ice_free_q_vector(vsi, v_idx);
+}
+
+/**
+ * ice_vsi_cfg_txq - Configure single Tx queue
+ * @vsi: the VSI that queue belongs to
+ * @ring: Tx ring to be configured
+ * @qg_buf: queue group buffer
+ */
+int
+ice_vsi_cfg_txq(struct ice_vsi *vsi, struct ice_ring *ring,
+		struct ice_aqc_add_tx_qgrp *qg_buf)
+{
+	struct ice_tlan_ctx tlan_ctx = { 0 };
+	struct ice_aqc_add_txqs_perq *txq;
+	struct ice_pf *pf = vsi->back;
+	u8 buf_len = sizeof(*qg_buf);
+	enum ice_status status;
+	u16 pf_q;
+	u8 tc;
+
+	pf_q = ring->reg_idx;
+	ice_setup_tx_ctx(ring, &tlan_ctx, pf_q);
+	/* copy context contents into the qg_buf */
+	qg_buf->txqs[0].txq_id = cpu_to_le16(pf_q);
+	ice_set_ctx((u8 *)&tlan_ctx, qg_buf->txqs[0].txq_ctx,
+		    ice_tlan_ctx_info);
+
+	/* init queue specific tail reg. It is referred as
+	 * transmit comm scheduler queue doorbell.
+	 */
+	ring->tail = pf->hw.hw_addr + QTX_COMM_DBELL(pf_q);
+
+	if (IS_ENABLED(CONFIG_DCB))
+		tc = ring->dcb_tc;
+	else
+		tc = 0;
+
+	/* Add unique software queue handle of the Tx queue per
+	 * TC into the VSI Tx ring
+	 */
+	ring->q_handle = ice_calc_q_handle(vsi, ring, tc);
+
+	status = ice_ena_vsi_txq(vsi->port_info, vsi->idx, tc, ring->q_handle,
+				 1, qg_buf, buf_len, NULL);
+	if (status) {
+		dev_err(ice_pf_to_dev(pf),
+			"Failed to set LAN Tx queue context, error: %d\n",
+			status);
+		return -ENODEV;
+	}
+
+	/* Add Tx Queue TEID into the VSI Tx ring from the
+	 * response. This will complete configuring and
+	 * enabling the queue.
+	 */
+	txq = &qg_buf->txqs[0];
+	if (pf_q == le16_to_cpu(txq->txq_id))
+		ring->txq_teid = le32_to_cpu(txq->q_teid);
+
+	return 0;
+}
+
+/**
+ * ice_cfg_itr - configure the initial interrupt throttle values
+ * @hw: pointer to the HW structure
+ * @q_vector: interrupt vector that's being configured
+ *
+ * Configure interrupt throttling values for the ring containers that are
+ * associated with the interrupt vector passed in.
+ */
+void ice_cfg_itr(struct ice_hw *hw, struct ice_q_vector *q_vector)
+{
+	ice_cfg_itr_gran(hw);
+
+	if (q_vector->num_ring_rx) {
+		struct ice_ring_container *rc = &q_vector->rx;
+
+		/* if this value is set then don't overwrite with default */
+		if (!rc->itr_setting)
+			rc->itr_setting = ICE_DFLT_RX_ITR;
+
+		rc->target_itr = ITR_TO_REG(rc->itr_setting);
+		rc->next_update = jiffies + 1;
+		rc->current_itr = rc->target_itr;
+		wr32(hw, GLINT_ITR(rc->itr_idx, q_vector->reg_idx),
+		     ITR_REG_ALIGN(rc->current_itr) >> ICE_ITR_GRAN_S);
+	}
+
+	if (q_vector->num_ring_tx) {
+		struct ice_ring_container *rc = &q_vector->tx;
+
+		/* if this value is set then don't overwrite with default */
+		if (!rc->itr_setting)
+			rc->itr_setting = ICE_DFLT_TX_ITR;
+
+		rc->target_itr = ITR_TO_REG(rc->itr_setting);
+		rc->next_update = jiffies + 1;
+		rc->current_itr = rc->target_itr;
+		wr32(hw, GLINT_ITR(rc->itr_idx, q_vector->reg_idx),
+		     ITR_REG_ALIGN(rc->current_itr) >> ICE_ITR_GRAN_S);
+	}
+}
+
+/**
+ * ice_cfg_txq_interrupt - configure interrupt on Tx queue
+ * @vsi: the VSI being configured
+ * @txq: Tx queue being mapped to MSI-X vector
+ * @msix_idx: MSI-X vector index within the function
+ * @itr_idx: ITR index of the interrupt cause
+ *
+ * Configure interrupt on Tx queue by associating Tx queue to MSI-X vector
+ * within the function space.
+ */
+void
+ice_cfg_txq_interrupt(struct ice_vsi *vsi, u16 txq, u16 msix_idx, u16 itr_idx)
+{
+	struct ice_pf *pf = vsi->back;
+	struct ice_hw *hw = &pf->hw;
+	u32 val;
+
+	itr_idx = (itr_idx << QINT_TQCTL_ITR_INDX_S) & QINT_TQCTL_ITR_INDX_M;
+
+	val = QINT_TQCTL_CAUSE_ENA_M | itr_idx |
+	      ((msix_idx << QINT_TQCTL_MSIX_INDX_S) & QINT_TQCTL_MSIX_INDX_M);
+
+	wr32(hw, QINT_TQCTL(vsi->txq_map[txq]), val);
+	if (ice_is_xdp_ena_vsi(vsi)) {
+		u32 xdp_txq = txq + vsi->num_xdp_txq;
+
+		wr32(hw, QINT_TQCTL(vsi->txq_map[xdp_txq]),
+		     val);
+	}
+	ice_flush(hw);
+}
+
+/**
+ * ice_cfg_rxq_interrupt - configure interrupt on Rx queue
+ * @vsi: the VSI being configured
+ * @rxq: Rx queue being mapped to MSI-X vector
+ * @msix_idx: MSI-X vector index within the function
+ * @itr_idx: ITR index of the interrupt cause
+ *
+ * Configure interrupt on Rx queue by associating Rx queue to MSI-X vector
+ * within the function space.
+ */
+void
+ice_cfg_rxq_interrupt(struct ice_vsi *vsi, u16 rxq, u16 msix_idx, u16 itr_idx)
+{
+	struct ice_pf *pf = vsi->back;
+	struct ice_hw *hw = &pf->hw;
+	u32 val;
+
+	itr_idx = (itr_idx << QINT_RQCTL_ITR_INDX_S) & QINT_RQCTL_ITR_INDX_M;
+
+	val = QINT_RQCTL_CAUSE_ENA_M | itr_idx |
+	      ((msix_idx << QINT_RQCTL_MSIX_INDX_S) & QINT_RQCTL_MSIX_INDX_M);
+
+	wr32(hw, QINT_RQCTL(vsi->rxq_map[rxq]), val);
+
+	ice_flush(hw);
+}
+
+/**
+ * ice_trigger_sw_intr - trigger a software interrupt
+ * @hw: pointer to the HW structure
+ * @q_vector: interrupt vector to trigger the software interrupt for
+ */
+void ice_trigger_sw_intr(struct ice_hw *hw, struct ice_q_vector *q_vector)
+{
+	wr32(hw, GLINT_DYN_CTL(q_vector->reg_idx),
+	     (ICE_ITR_NONE << GLINT_DYN_CTL_ITR_INDX_S) |
+	     GLINT_DYN_CTL_SWINT_TRIG_M |
+	     GLINT_DYN_CTL_INTENA_M);
+}
+
+/**
+ * ice_vsi_stop_tx_ring - Disable single Tx ring
+ * @vsi: the VSI being configured
+ * @rst_src: reset source
+ * @rel_vmvf_num: Relative ID of VF/VM
+ * @ring: Tx ring to be stopped
+ * @txq_meta: Meta data of Tx ring to be stopped
+ */
+int
+ice_vsi_stop_tx_ring(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
+		     u16 rel_vmvf_num, struct ice_ring *ring,
+		     struct ice_txq_meta *txq_meta)
+{
+	struct ice_pf *pf = vsi->back;
+	struct ice_q_vector *q_vector;
+	struct ice_hw *hw = &pf->hw;
+	enum ice_status status;
+	u32 val;
+
+	/* clear cause_ena bit for disabled queues */
+	val = rd32(hw, QINT_TQCTL(ring->reg_idx));
+	val &= ~QINT_TQCTL_CAUSE_ENA_M;
+	wr32(hw, QINT_TQCTL(ring->reg_idx), val);
+
+	/* software is expected to wait for 100 ns */
+	ndelay(100);
+
+	/* trigger a software interrupt for the vector
+	 * associated to the queue to schedule NAPI handler
+	 */
+	q_vector = ring->q_vector;
+	if (q_vector)
+		ice_trigger_sw_intr(hw, q_vector);
+
+	status = ice_dis_vsi_txq(vsi->port_info, txq_meta->vsi_idx,
+				 txq_meta->tc, 1, &txq_meta->q_handle,
+				 &txq_meta->q_id, &txq_meta->q_teid, rst_src,
+				 rel_vmvf_num, NULL);
+
+	/* if the disable queue command was exercised during an
+	 * active reset flow, ICE_ERR_RESET_ONGOING is returned.
+	 * This is not an error as the reset operation disables
+	 * queues at the hardware level anyway.
+	 */
+	if (status == ICE_ERR_RESET_ONGOING) {
+		dev_dbg(&vsi->back->pdev->dev,
+			"Reset in progress. LAN Tx queues already disabled\n");
+	} else if (status == ICE_ERR_DOES_NOT_EXIST) {
+		dev_dbg(&vsi->back->pdev->dev,
+			"LAN Tx queues do not exist, nothing to disable\n");
+	} else if (status) {
+		dev_err(&vsi->back->pdev->dev,
+			"Failed to disable LAN Tx queues, error: %d\n", status);
+		return -ENODEV;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_fill_txq_meta - Prepare the Tx queue's meta data
+ * @vsi: VSI that ring belongs to
+ * @ring: ring that txq_meta will be based on
+ * @txq_meta: a helper struct that wraps Tx queue's information
+ *
+ * Set up a helper struct that will contain all the necessary fields that
+ * are needed for stopping Tx queue
+ */
+void
+ice_fill_txq_meta(struct ice_vsi *vsi, struct ice_ring *ring,
+		  struct ice_txq_meta *txq_meta)
+{
+	u8 tc;
+
+	if (IS_ENABLED(CONFIG_DCB))
+		tc = ring->dcb_tc;
+	else
+		tc = 0;
+
+	txq_meta->q_id = ring->reg_idx;
+	txq_meta->q_teid = ring->txq_teid;
+	txq_meta->q_handle = ring->q_handle;
+	txq_meta->vsi_idx = vsi->idx;
+	txq_meta->tc = tc;
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_base.h b/drivers/net/ethernet/intel/ice/ice_base.h
new file mode 100644
index 000000000000..407995e8e944
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_base.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2019, Intel Corporation. */
+
+#ifndef _ICE_BASE_H_
+#define _ICE_BASE_H_
+
+#include "ice.h"
+
+int ice_setup_rx_ctx(struct ice_ring *ring);
+int __ice_vsi_get_qs(struct ice_qs_cfg *qs_cfg);
+int ice_vsi_ctrl_rx_ring(struct ice_vsi *vsi, bool ena, u16 rxq_idx);
+int ice_vsi_alloc_q_vectors(struct ice_vsi *vsi);
+void ice_vsi_map_rings_to_vectors(struct ice_vsi *vsi);
+void ice_vsi_free_q_vectors(struct ice_vsi *vsi);
+int
+ice_vsi_cfg_txq(struct ice_vsi *vsi, struct ice_ring *ring,
+		struct ice_aqc_add_tx_qgrp *qg_buf);
+void ice_cfg_itr(struct ice_hw *hw, struct ice_q_vector *q_vector);
+void
+ice_cfg_txq_interrupt(struct ice_vsi *vsi, u16 txq, u16 msix_idx, u16 itr_idx);
+void
+ice_cfg_rxq_interrupt(struct ice_vsi *vsi, u16 rxq, u16 msix_idx, u16 itr_idx);
+void ice_trigger_sw_intr(struct ice_hw *hw, struct ice_q_vector *q_vector);
+int
+ice_vsi_stop_tx_ring(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
+		     u16 rel_vmvf_num, struct ice_ring *ring,
+		     struct ice_txq_meta *txq_meta);
+void
+ice_fill_txq_meta(struct ice_vsi *vsi, struct ice_ring *ring,
+		  struct ice_txq_meta *txq_meta);
+#endif /* _ICE_BASE_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c
index 3a6b3950eb0e..fb1d930470c7 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.c
+++ b/drivers/net/ethernet/intel/ice/ice_common.c
@@ -855,6 +855,9 @@ enum ice_status ice_init_hw(struct ice_hw *hw)
 		goto err_unroll_sched;
 	}
 	INIT_LIST_HEAD(&hw->agg_list);
+	/* Initialize max burst size */
+	if (!hw->max_burst_size)
+		ice_cfg_rl_burst_size(hw, ICE_SCHED_DFLT_BURST_SIZE);
 
 	status = ice_init_fltr_mgmt_struct(hw);
 	if (status)
@@ -1067,6 +1070,72 @@ enum ice_status ice_reset(struct ice_hw *hw, enum ice_reset_req req)
 }
 
 /**
+ * ice_get_pfa_module_tlv - Reads sub module TLV from NVM PFA
+ * @hw: pointer to hardware structure
+ * @module_tlv: pointer to module TLV to return
+ * @module_tlv_len: pointer to module TLV length to return
+ * @module_type: module type requested
+ *
+ * Finds the requested sub module TLV type from the Preserved Field
+ * Area (PFA) and returns the TLV pointer and length. The caller can
+ * use these to read the variable length TLV value.
+ */
+enum ice_status
+ice_get_pfa_module_tlv(struct ice_hw *hw, u16 *module_tlv, u16 *module_tlv_len,
+		       u16 module_type)
+{
+	enum ice_status status;
+	u16 pfa_len, pfa_ptr;
+	u16 next_tlv;
+
+	status = ice_read_sr_word(hw, ICE_SR_PFA_PTR, &pfa_ptr);
+	if (status) {
+		ice_debug(hw, ICE_DBG_INIT, "Preserved Field Array pointer.\n");
+		return status;
+	}
+	status = ice_read_sr_word(hw, pfa_ptr, &pfa_len);
+	if (status) {
+		ice_debug(hw, ICE_DBG_INIT, "Failed to read PFA length.\n");
+		return status;
+	}
+	/* Starting with first TLV after PFA length, iterate through the list
+	 * of TLVs to find the requested one.
+	 */
+	next_tlv = pfa_ptr + 1;
+	while (next_tlv < pfa_ptr + pfa_len) {
+		u16 tlv_sub_module_type;
+		u16 tlv_len;
+
+		/* Read TLV type */
+		status = ice_read_sr_word(hw, next_tlv, &tlv_sub_module_type);
+		if (status) {
+			ice_debug(hw, ICE_DBG_INIT, "Failed to read TLV type.\n");
+			break;
+		}
+		/* Read TLV length */
+		status = ice_read_sr_word(hw, next_tlv + 1, &tlv_len);
+		if (status) {
+			ice_debug(hw, ICE_DBG_INIT, "Failed to read TLV length.\n");
+			break;
+		}
+		if (tlv_sub_module_type == module_type) {
+			if (tlv_len) {
+				*module_tlv = next_tlv;
+				*module_tlv_len = tlv_len;
+				return 0;
+			}
+			return ICE_ERR_INVAL_SIZE;
+		}
+		/* Check next TLV, i.e. current TLV pointer + length + 2 words
+		 * (for current TLV's type and length)
+		 */
+		next_tlv = next_tlv + tlv_len + 2;
+	}
+	/* Module does not exist */
+	return ICE_ERR_DOES_NOT_EXIST;
+}
+
+/**
  * ice_copy_rxq_ctx_to_hw
  * @hw: pointer to the hardware structure
  * @ice_rxq_ctx: pointer to the rxq context
@@ -1182,56 +1251,6 @@ const struct ice_ctx_ele ice_tlan_ctx_info[] = {
 	{ 0 }
 };
 
-/**
- * ice_debug_cq
- * @hw: pointer to the hardware structure
- * @mask: debug mask
- * @desc: pointer to control queue descriptor
- * @buf: pointer to command buffer
- * @buf_len: max length of buf
- *
- * Dumps debug log about control command with descriptor contents.
- */
-void
-ice_debug_cq(struct ice_hw *hw, u32 __maybe_unused mask, void *desc, void *buf,
-	     u16 buf_len)
-{
-	struct ice_aq_desc *cq_desc = (struct ice_aq_desc *)desc;
-	u16 len;
-
-#ifndef CONFIG_DYNAMIC_DEBUG
-	if (!(mask & hw->debug_mask))
-		return;
-#endif
-
-	if (!desc)
-		return;
-
-	len = le16_to_cpu(cq_desc->datalen);
-
-	ice_debug(hw, mask,
-		  "CQ CMD: opcode 0x%04X, flags 0x%04X, datalen 0x%04X, retval 0x%04X\n",
-		  le16_to_cpu(cq_desc->opcode),
-		  le16_to_cpu(cq_desc->flags),
-		  le16_to_cpu(cq_desc->datalen), le16_to_cpu(cq_desc->retval));
-	ice_debug(hw, mask, "\tcookie (h,l) 0x%08X 0x%08X\n",
-		  le32_to_cpu(cq_desc->cookie_high),
-		  le32_to_cpu(cq_desc->cookie_low));
-	ice_debug(hw, mask, "\tparam (0,1)  0x%08X 0x%08X\n",
-		  le32_to_cpu(cq_desc->params.generic.param0),
-		  le32_to_cpu(cq_desc->params.generic.param1));
-	ice_debug(hw, mask, "\taddr (h,l)   0x%08X 0x%08X\n",
-		  le32_to_cpu(cq_desc->params.generic.addr_high),
-		  le32_to_cpu(cq_desc->params.generic.addr_low));
-	if (buf && cq_desc->datalen != 0) {
-		ice_debug(hw, mask, "Buffer:\n");
-		if (buf_len < len)
-			len = buf_len;
-
-		ice_debug_array(hw, mask, 16, 1, (u8 *)buf, len);
-	}
-}
-
 /* FW Admin Queue command wrappers */
 
 /* Software lock/mutex that is meant to be held while the Global Config Lock
@@ -1654,6 +1673,10 @@ ice_parse_caps(struct ice_hw *hw, void *buf, u32 cap_count,
 			ice_debug(hw, ICE_DBG_INIT,
 				  "%s: valid_functions (bitmap) = %d\n", prefix,
 				  caps->valid_functions);
+
+			/* store func count for resource management purposes */
+			if (dev_p)
+				dev_p->num_funcs = hweight32(number);
 			break;
 		case ICE_AQC_CAPS_SRIOV:
 			caps->sr_iov_1_1 = (number == 1);
@@ -1760,6 +1783,18 @@ ice_parse_caps(struct ice_hw *hw, void *buf, u32 cap_count,
 			break;
 		}
 	}
+
+	/* Re-calculate capabilities that are dependent on the number of
+	 * physical ports; i.e. some features are not supported or function
+	 * differently on devices with more than 4 ports.
+	 */
+	if (hw->dev_caps.num_funcs > 4) {
+		/* Max 4 TCs per port */
+		caps->maxtc = 4;
+		ice_debug(hw, ICE_DBG_INIT,
+			  "%s: maxtc = %d (based on #ports)\n", prefix,
+			  caps->maxtc);
+	}
 }
 
 /**
@@ -1856,8 +1891,7 @@ void ice_set_safe_mode_caps(struct ice_hw *hw)
 	struct ice_hw_dev_caps *dev_caps = &hw->dev_caps;
 	u32 valid_func, rxq_first_id, txq_first_id;
 	u32 msix_vector_first_id, max_mtu;
-	u32 num_func = 0;
-	u8 i;
+	u32 num_funcs;
 
 	/* cache some func_caps values that should be restored after memset */
 	valid_func = func_caps->common_cap.valid_functions;
@@ -1890,6 +1924,7 @@ void ice_set_safe_mode_caps(struct ice_hw *hw)
 	rxq_first_id = dev_caps->common_cap.rxq_first_id;
 	msix_vector_first_id = dev_caps->common_cap.msix_vector_first_id;
 	max_mtu = dev_caps->common_cap.max_mtu;
+	num_funcs = dev_caps->num_funcs;
 
 	/* unset dev capabilities */
 	memset(dev_caps, 0, sizeof(*dev_caps));
@@ -1900,19 +1935,14 @@ void ice_set_safe_mode_caps(struct ice_hw *hw)
 	dev_caps->common_cap.rxq_first_id = rxq_first_id;
 	dev_caps->common_cap.msix_vector_first_id = msix_vector_first_id;
 	dev_caps->common_cap.max_mtu = max_mtu;
-
-	/* valid_func is a bitmap. get number of functions */
-#define ICE_MAX_FUNCS 8
-	for (i = 0; i < ICE_MAX_FUNCS; i++)
-		if (valid_func & BIT(i))
-			num_func++;
+	dev_caps->num_funcs = num_funcs;
 
 	/* one Tx and one Rx queue per function in safe mode */
-	dev_caps->common_cap.num_rxq = num_func;
-	dev_caps->common_cap.num_txq = num_func;
+	dev_caps->common_cap.num_rxq = num_funcs;
+	dev_caps->common_cap.num_txq = num_funcs;
 
 	/* two MSIX vectors per function */
-	dev_caps->common_cap.num_msix_vectors = 2 * num_func;
+	dev_caps->common_cap.num_msix_vectors = 2 * num_funcs;
 }
 
 /**
@@ -2556,6 +2586,52 @@ ice_aq_set_port_id_led(struct ice_port_info *pi, bool is_orig_mode,
 }
 
 /**
+ * ice_aq_sff_eeprom
+ * @hw: pointer to the HW struct
+ * @lport: bits [7:0] = logical port, bit [8] = logical port valid
+ * @bus_addr: I2C bus address of the eeprom (typically 0xA0, 0=topo default)
+ * @mem_addr: I2C offset. lower 8 bits for address, 8 upper bits zero padding.
+ * @page: QSFP page
+ * @set_page: set or ignore the page
+ * @data: pointer to data buffer to be read/written to the I2C device.
+ * @length: 1-16 for read, 1 for write.
+ * @write: 0 read, 1 for write.
+ * @cd: pointer to command details structure or NULL
+ *
+ * Read/Write SFF EEPROM (0x06EE)
+ */
+enum ice_status
+ice_aq_sff_eeprom(struct ice_hw *hw, u16 lport, u8 bus_addr,
+		  u16 mem_addr, u8 page, u8 set_page, u8 *data, u8 length,
+		  bool write, struct ice_sq_cd *cd)
+{
+	struct ice_aqc_sff_eeprom *cmd;
+	struct ice_aq_desc desc;
+	enum ice_status status;
+
+	if (!data || (mem_addr & 0xff00))
+		return ICE_ERR_PARAM;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_sff_eeprom);
+	cmd = &desc.params.read_write_sff_param;
+	desc.flags = cpu_to_le16(ICE_AQ_FLAG_RD | ICE_AQ_FLAG_BUF);
+	cmd->lport_num = (u8)(lport & 0xff);
+	cmd->lport_num_valid = (u8)((lport >> 8) & 0x01);
+	cmd->i2c_bus_addr = cpu_to_le16(((bus_addr >> 1) &
+					 ICE_AQC_SFF_I2CBUS_7BIT_M) |
+					((set_page <<
+					  ICE_AQC_SFF_SET_EEPROM_PAGE_S) &
+					 ICE_AQC_SFF_SET_EEPROM_PAGE_M));
+	cmd->i2c_mem_addr = cpu_to_le16(mem_addr & 0xff);
+	cmd->eeprom_page = cpu_to_le16((u16)page << ICE_AQC_SFF_EEPROM_PAGE_S);
+	if (write)
+		cmd->i2c_bus_addr |= cpu_to_le16(ICE_AQC_SFF_IS_WRITE);
+
+	status = ice_aq_send_cmd(hw, &desc, data, length, cd);
+	return status;
+}
+
+/**
  * __ice_aq_get_set_rss_lut
  * @hw: pointer to the hardware structure
  * @vsi_id: VSI FW index
@@ -3148,7 +3224,7 @@ ice_set_ctx(u8 *src_ctx, u8 *dest_ctx, const struct ice_ctx_ele *ce_info)
  * @tc: TC number
  * @q_handle: software queue handle
  */
-static struct ice_q_ctx *
+struct ice_q_ctx *
 ice_get_lan_q_ctx(struct ice_hw *hw, u16 vsi_handle, u8 tc, u16 q_handle)
 {
 	struct ice_vsi_ctx *vsi;
@@ -3245,9 +3321,12 @@ ice_ena_vsi_txq(struct ice_port_info *pi, u16 vsi_handle, u8 tc, u16 q_handle,
 	node.node_teid = buf->txqs[0].q_teid;
 	node.data.elem_type = ICE_AQC_ELEM_TYPE_LEAF;
 	q_ctx->q_handle = q_handle;
+	q_ctx->q_teid = le32_to_cpu(node.node_teid);
 
-	/* add a leaf node into schduler tree queue layer */
+	/* add a leaf node into scheduler tree queue layer */
 	status = ice_sched_add_node(pi, hw->num_tx_sched_layers - 1, &node);
+	if (!status)
+		status = ice_sched_replay_q_bw(pi, q_ctx);
 
 ena_txq_exit:
 	mutex_unlock(&pi->sched_lock);
diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h
index c3df92f57777..b22aa561e253 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.h
+++ b/drivers/net/ethernet/intel/ice/ice_common.h
@@ -6,16 +6,18 @@
 
 #include "ice.h"
 #include "ice_type.h"
+#include "ice_nvm.h"
 #include "ice_flex_pipe.h"
 #include "ice_switch.h"
 #include <linux/avf/virtchnl.h>
 
 enum ice_status ice_nvm_validate_checksum(struct ice_hw *hw);
 
-void
-ice_debug_cq(struct ice_hw *hw, u32 mask, void *desc, void *buf, u16 buf_len);
 enum ice_status ice_init_hw(struct ice_hw *hw);
 void ice_deinit_hw(struct ice_hw *hw);
+enum ice_status
+ice_get_pfa_module_tlv(struct ice_hw *hw, u16 *module_tlv, u16 *module_tlv_len,
+		       u16 module_type);
 enum ice_status ice_check_reset(struct ice_hw *hw);
 enum ice_status ice_reset(struct ice_hw *hw, enum ice_reset_req req);
 enum ice_status ice_create_all_ctrlq(struct ice_hw *hw);
@@ -117,6 +119,10 @@ ice_aq_set_mac_loopback(struct ice_hw *hw, bool ena_lpbk, struct ice_sq_cd *cd);
 enum ice_status
 ice_aq_set_port_id_led(struct ice_port_info *pi, bool is_orig_mode,
 		       struct ice_sq_cd *cd);
+enum ice_status
+ice_aq_sff_eeprom(struct ice_hw *hw, u16 lport, u8 bus_addr,
+		  u16 mem_addr, u8 page, u8 set_page, u8 *data, u8 length,
+		  bool write, struct ice_sq_cd *cd);
 
 enum ice_status
 ice_dis_vsi_txq(struct ice_port_info *pi, u16 vsi_handle, u8 tc, u8 num_queues,
@@ -133,6 +139,8 @@ ice_ena_vsi_txq(struct ice_port_info *pi, u16 vsi_handle, u8 tc, u16 q_handle,
 enum ice_status ice_replay_vsi(struct ice_hw *hw, u16 vsi_handle);
 void ice_replay_post(struct ice_hw *hw);
 void ice_output_fw_log(struct ice_hw *hw, struct ice_aq_desc *desc, void *buf);
+struct ice_q_ctx *
+ice_get_lan_q_ctx(struct ice_hw *hw, u16 vsi_handle, u8 tc, u16 q_handle);
 void
 ice_stat_update40(struct ice_hw *hw, u32 reg, bool prev_stat_loaded,
 		  u64 *prev_stat, u64 *cur_stat);
diff --git a/drivers/net/ethernet/intel/ice/ice_controlq.c b/drivers/net/ethernet/intel/ice/ice_controlq.c
index 2353166c654e..dd946866d7b8 100644
--- a/drivers/net/ethernet/intel/ice/ice_controlq.c
+++ b/drivers/net/ethernet/intel/ice/ice_controlq.c
@@ -810,6 +810,52 @@ static u16 ice_clean_sq(struct ice_hw *hw, struct ice_ctl_q_info *cq)
 }
 
 /**
+ * ice_debug_cq
+ * @hw: pointer to the hardware structure
+ * @desc: pointer to control queue descriptor
+ * @buf: pointer to command buffer
+ * @buf_len: max length of buf
+ *
+ * Dumps debug log about control command with descriptor contents.
+ */
+static void ice_debug_cq(struct ice_hw *hw, void *desc, void *buf, u16 buf_len)
+{
+	struct ice_aq_desc *cq_desc = (struct ice_aq_desc *)desc;
+	u16 len;
+
+	if (!IS_ENABLED(CONFIG_DYNAMIC_DEBUG) &&
+	    !((ICE_DBG_AQ_DESC | ICE_DBG_AQ_DESC_BUF) & hw->debug_mask))
+		return;
+
+	if (!desc)
+		return;
+
+	len = le16_to_cpu(cq_desc->datalen);
+
+	ice_debug(hw, ICE_DBG_AQ_DESC,
+		  "CQ CMD: opcode 0x%04X, flags 0x%04X, datalen 0x%04X, retval 0x%04X\n",
+		  le16_to_cpu(cq_desc->opcode),
+		  le16_to_cpu(cq_desc->flags),
+		  le16_to_cpu(cq_desc->datalen), le16_to_cpu(cq_desc->retval));
+	ice_debug(hw, ICE_DBG_AQ_DESC, "\tcookie (h,l) 0x%08X 0x%08X\n",
+		  le32_to_cpu(cq_desc->cookie_high),
+		  le32_to_cpu(cq_desc->cookie_low));
+	ice_debug(hw, ICE_DBG_AQ_DESC, "\tparam (0,1)  0x%08X 0x%08X\n",
+		  le32_to_cpu(cq_desc->params.generic.param0),
+		  le32_to_cpu(cq_desc->params.generic.param1));
+	ice_debug(hw, ICE_DBG_AQ_DESC, "\taddr (h,l)   0x%08X 0x%08X\n",
+		  le32_to_cpu(cq_desc->params.generic.addr_high),
+		  le32_to_cpu(cq_desc->params.generic.addr_low));
+	if (buf && cq_desc->datalen != 0) {
+		ice_debug(hw, ICE_DBG_AQ_DESC_BUF, "Buffer:\n");
+		if (buf_len < len)
+			len = buf_len;
+
+		ice_debug_array(hw, ICE_DBG_AQ_DESC_BUF, 16, 1, (u8 *)buf, len);
+	}
+}
+
+/**
  * ice_sq_done - check if FW has processed the Admin Send Queue (ATQ)
  * @hw: pointer to the HW struct
  * @cq: pointer to the specific Control queue
@@ -934,10 +980,10 @@ ice_sq_send_cmd(struct ice_hw *hw, struct ice_ctl_q_info *cq,
 	}
 
 	/* Debug desc and buffer */
-	ice_debug(hw, ICE_DBG_AQ_MSG,
+	ice_debug(hw, ICE_DBG_AQ_DESC,
 		  "ATQ: Control Send queue desc and buffer:\n");
 
-	ice_debug_cq(hw, ICE_DBG_AQ_CMD, (void *)desc_on_ring, buf, buf_size);
+	ice_debug_cq(hw, (void *)desc_on_ring, buf, buf_size);
 
 	(cq->sq.next_to_use)++;
 	if (cq->sq.next_to_use == cq->sq.count)
@@ -948,7 +994,7 @@ ice_sq_send_cmd(struct ice_hw *hw, struct ice_ctl_q_info *cq,
 		if (ice_sq_done(hw, cq))
 			break;
 
-		mdelay(1);
+		udelay(ICE_CTL_Q_SQ_CMD_USEC);
 		total_delay++;
 	} while (total_delay < cq->sq_cmd_timeout);
 
@@ -971,7 +1017,8 @@ ice_sq_send_cmd(struct ice_hw *hw, struct ice_ctl_q_info *cq,
 		retval = le16_to_cpu(desc->retval);
 		if (retval) {
 			ice_debug(hw, ICE_DBG_AQ_MSG,
-				  "Control Send Queue command completed with error 0x%x\n",
+				  "Control Send Queue command 0x%04X completed with error 0x%X\n",
+				  le16_to_cpu(desc->opcode),
 				  retval);
 
 			/* strip off FW internal code */
@@ -986,7 +1033,7 @@ ice_sq_send_cmd(struct ice_hw *hw, struct ice_ctl_q_info *cq,
 	ice_debug(hw, ICE_DBG_AQ_MSG,
 		  "ATQ: desc and buffer writeback:\n");
 
-	ice_debug_cq(hw, ICE_DBG_AQ_CMD, (void *)desc, buf, buf_size);
+	ice_debug_cq(hw, (void *)desc, buf, buf_size);
 
 	/* save writeback AQ if requested */
 	if (details->wb_desc)
@@ -1075,7 +1122,8 @@ ice_clean_rq_elem(struct ice_hw *hw, struct ice_ctl_q_info *cq,
 	if (flags & ICE_AQ_FLAG_ERR) {
 		ret_code = ICE_ERR_AQ_ERROR;
 		ice_debug(hw, ICE_DBG_AQ_MSG,
-			  "Control Receive Queue Event received with error 0x%x\n",
+			  "Control Receive Queue Event 0x%04X received with error 0x%X\n",
+			  le16_to_cpu(desc->opcode),
 			  cq->rq_last_status);
 	}
 	memcpy(&e->desc, desc, sizeof(e->desc));
@@ -1084,10 +1132,9 @@ ice_clean_rq_elem(struct ice_hw *hw, struct ice_ctl_q_info *cq,
 	if (e->msg_buf && e->msg_len)
 		memcpy(e->msg_buf, cq->rq.r.rq_bi[desc_idx].va, e->msg_len);
 
-	ice_debug(hw, ICE_DBG_AQ_MSG, "ARQ: desc and buffer:\n");
+	ice_debug(hw, ICE_DBG_AQ_DESC, "ARQ: desc and buffer:\n");
 
-	ice_debug_cq(hw, ICE_DBG_AQ_CMD, (void *)desc, e->msg_buf,
-		     cq->rq_buf_size);
+	ice_debug_cq(hw, (void *)desc, e->msg_buf, cq->rq_buf_size);
 
 	/* Restore the original datalen and buffer address in the desc,
 	 * FW updates datalen to indicate the event message size
diff --git a/drivers/net/ethernet/intel/ice/ice_controlq.h b/drivers/net/ethernet/intel/ice/ice_controlq.h
index 44945c2165d8..bf0ebe6149e8 100644
--- a/drivers/net/ethernet/intel/ice/ice_controlq.h
+++ b/drivers/net/ethernet/intel/ice/ice_controlq.h
@@ -22,7 +22,7 @@
  */
 #define EXP_FW_API_VER_BRANCH		0x00
 #define EXP_FW_API_VER_MAJOR		0x01
-#define EXP_FW_API_VER_MINOR		0x03
+#define EXP_FW_API_VER_MINOR		0x05
 
 /* Different control queue types: These are mainly for SW consumption. */
 enum ice_ctl_q {
@@ -31,8 +31,9 @@ enum ice_ctl_q {
 	ICE_CTL_Q_MAILBOX,
 };
 
-/* Control Queue default settings */
-#define ICE_CTL_Q_SQ_CMD_TIMEOUT	250  /* msecs */
+/* Control Queue timeout settings - max delay 250ms */
+#define ICE_CTL_Q_SQ_CMD_TIMEOUT	2500  /* Count 2500 times */
+#define ICE_CTL_Q_SQ_CMD_USEC		100   /* Check every 100usec */
 
 struct ice_ctl_q_ring {
 	void *dma_head;			/* Virtual address to DMA head */
diff --git a/drivers/net/ethernet/intel/ice/ice_dcb.c b/drivers/net/ethernet/intel/ice/ice_dcb.c
index dd7efff121bd..713e8a892e14 100644
--- a/drivers/net/ethernet/intel/ice/ice_dcb.c
+++ b/drivers/net/ethernet/intel/ice/ice_dcb.c
@@ -965,9 +965,9 @@ enum ice_status ice_init_dcb(struct ice_hw *hw, bool enable_mib_change)
 	    pi->dcbx_status == ICE_DCBX_STATUS_NOT_STARTED) {
 		/* Get current DCBX configuration */
 		ret = ice_get_dcb_cfg(pi);
-		pi->is_sw_lldp = (hw->adminq.sq_last_status == ICE_AQ_RC_EPERM);
 		if (ret)
 			return ret;
+		pi->is_sw_lldp = false;
 	} else if (pi->dcbx_status == ICE_DCBX_STATUS_DIS) {
 		return ICE_ERR_NOT_READY;
 	}
@@ -975,8 +975,8 @@ enum ice_status ice_init_dcb(struct ice_hw *hw, bool enable_mib_change)
 	/* Configure the LLDP MIB change event */
 	if (enable_mib_change) {
 		ret = ice_aq_cfg_lldp_mib_change(hw, true, NULL);
-		if (!ret)
-			pi->is_sw_lldp = false;
+		if (ret)
+			pi->is_sw_lldp = true;
 	}
 
 	return ret;
diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c
index dd47869c4ad4..d3d3ec29def9 100644
--- a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c
@@ -2,6 +2,7 @@
 /* Copyright (c) 2019, Intel Corporation. */
 
 #include "ice_dcb_lib.h"
+#include "ice_dcb_nl.h"
 
 /**
  * ice_vsi_cfg_netdev_tc - Setup the netdev TC configuration
@@ -100,6 +101,16 @@ u8 ice_dcb_get_num_tc(struct ice_dcbx_cfg *dcbcfg)
 }
 
 /**
+ * ice_dcb_get_tc - Get the TC associated with the queue
+ * @vsi: ptr to the VSI
+ * @queue_index: queue number associated with VSI
+ */
+u8 ice_dcb_get_tc(struct ice_vsi *vsi, int queue_index)
+{
+	return vsi->tx_rings[queue_index]->dcb_tc;
+}
+
+/**
  * ice_vsi_cfg_dcb_rings - Update rings to reflect DCB TC
  * @vsi: VSI owner of rings being updated
  */
@@ -138,83 +149,62 @@ void ice_vsi_cfg_dcb_rings(struct ice_vsi *vsi)
 }
 
 /**
- * ice_pf_dcb_recfg - Reconfigure all VEBs and VSIs
- * @pf: pointer to the PF struct
- *
- * Assumed caller has already disabled all VSIs before
- * calling this function. Reconfiguring DCB based on
- * local_dcbx_cfg.
- */
-static void ice_pf_dcb_recfg(struct ice_pf *pf)
-{
-	struct ice_dcbx_cfg *dcbcfg = &pf->hw.port_info->local_dcbx_cfg;
-	u8 tc_map = 0;
-	int v, ret;
-
-	/* Update each VSI */
-	ice_for_each_vsi(pf, v) {
-		if (!pf->vsi[v])
-			continue;
-
-		if (pf->vsi[v]->type == ICE_VSI_PF)
-			tc_map = ice_dcb_get_ena_tc(dcbcfg);
-		else
-			tc_map = ICE_DFLT_TRAFFIC_CLASS;
-
-		ret = ice_vsi_cfg_tc(pf->vsi[v], tc_map);
-		if (ret) {
-			dev_err(&pf->pdev->dev,
-				"Failed to config TC for VSI index: %d\n",
-				pf->vsi[v]->idx);
-			continue;
-		}
-
-		ice_vsi_map_rings_to_vectors(pf->vsi[v]);
-	}
-}
-
-/**
  * ice_pf_dcb_cfg - Apply new DCB configuration
  * @pf: pointer to the PF struct
  * @new_cfg: DCBX config to apply
  * @locked: is the RTNL held
  */
-static
 int ice_pf_dcb_cfg(struct ice_pf *pf, struct ice_dcbx_cfg *new_cfg, bool locked)
 {
-	struct ice_dcbx_cfg *old_cfg, *curr_cfg;
 	struct ice_aqc_port_ets_elem buf = { 0 };
-	int ret = 0;
+	struct ice_dcbx_cfg *old_cfg, *curr_cfg;
+	struct device *dev = ice_pf_to_dev(pf);
+	int ret = ICE_DCB_NO_HW_CHG;
+	struct ice_vsi *pf_vsi;
 
 	curr_cfg = &pf->hw.port_info->local_dcbx_cfg;
 
+	/* FW does not care if change happened */
+	if (!pf->hw.port_info->is_sw_lldp)
+		ret = ICE_DCB_HW_CHG_RST;
+
 	/* Enable DCB tagging only when more than one TC */
 	if (ice_dcb_get_num_tc(new_cfg) > 1) {
-		dev_dbg(&pf->pdev->dev, "DCB tagging enabled (num TC > 1)\n");
+		dev_dbg(dev, "DCB tagging enabled (num TC > 1)\n");
 		set_bit(ICE_FLAG_DCB_ENA, pf->flags);
 	} else {
-		dev_dbg(&pf->pdev->dev, "DCB tagging disabled (num TC = 1)\n");
+		dev_dbg(dev, "DCB tagging disabled (num TC = 1)\n");
 		clear_bit(ICE_FLAG_DCB_ENA, pf->flags);
 	}
 
 	if (!memcmp(new_cfg, curr_cfg, sizeof(*new_cfg))) {
-		dev_dbg(&pf->pdev->dev, "No change in DCB config required\n");
+		dev_dbg(dev, "No change in DCB config required\n");
 		return ret;
 	}
 
 	/* Store old config in case FW config fails */
-	old_cfg = devm_kzalloc(&pf->pdev->dev, sizeof(*old_cfg), GFP_KERNEL);
-	memcpy(old_cfg, curr_cfg, sizeof(*old_cfg));
+	old_cfg = kmemdup(curr_cfg, sizeof(*old_cfg), GFP_KERNEL);
+	if (!old_cfg)
+		return -ENOMEM;
+
+	dev_info(dev, "Commit DCB Configuration to the hardware\n");
+	pf_vsi = ice_get_main_vsi(pf);
+	if (!pf_vsi) {
+		dev_dbg(dev, "PF VSI doesn't exist\n");
+		ret = -EINVAL;
+		goto free_cfg;
+	}
 
 	/* avoid race conditions by holding the lock while disabling and
 	 * re-enabling the VSI
 	 */
 	if (!locked)
 		rtnl_lock();
-	ice_pf_dis_all_vsi(pf, true);
+	ice_dis_vsi(pf_vsi, true);
 
 	memcpy(curr_cfg, new_cfg, sizeof(*curr_cfg));
 	memcpy(&curr_cfg->etsrec, &curr_cfg->etscfg, sizeof(curr_cfg->etsrec));
+	memcpy(&new_cfg->etsrec, &curr_cfg->etscfg, sizeof(curr_cfg->etsrec));
 
 	/* Only send new config to HW if we are in SW LLDP mode. Otherwise,
 	 * the new config came from the HW in the first place.
@@ -222,7 +212,7 @@ int ice_pf_dcb_cfg(struct ice_pf *pf, struct ice_dcbx_cfg *new_cfg, bool locked)
 	if (pf->hw.port_info->is_sw_lldp) {
 		ret = ice_set_dcb_cfg(pf->hw.port_info);
 		if (ret) {
-			dev_err(&pf->pdev->dev, "Set DCB Config failed\n");
+			dev_err(dev, "Set DCB Config failed\n");
 			/* Restore previous settings to local config */
 			memcpy(curr_cfg, old_cfg, sizeof(*curr_cfg));
 			goto out;
@@ -231,17 +221,18 @@ int ice_pf_dcb_cfg(struct ice_pf *pf, struct ice_dcbx_cfg *new_cfg, bool locked)
 
 	ret = ice_query_port_ets(pf->hw.port_info, &buf, sizeof(buf), NULL);
 	if (ret) {
-		dev_err(&pf->pdev->dev, "Query Port ETS failed\n");
+		dev_err(dev, "Query Port ETS failed\n");
 		goto out;
 	}
 
 	ice_pf_dcb_recfg(pf);
 
 out:
-	ice_pf_ena_all_vsi(pf, true);
+	ice_ena_vsi(pf_vsi, true);
 	if (!locked)
 		rtnl_unlock();
-	devm_kfree(&pf->pdev->dev, old_cfg);
+free_cfg:
+	kfree(old_cfg);
 	return ret;
 }
 
@@ -277,6 +268,7 @@ static bool
 ice_dcb_need_recfg(struct ice_pf *pf, struct ice_dcbx_cfg *old_cfg,
 		   struct ice_dcbx_cfg *new_cfg)
 {
+	struct device *dev = ice_pf_to_dev(pf);
 	bool need_reconfig = false;
 
 	/* Check if ETS configuration has changed */
@@ -287,33 +279,33 @@ ice_dcb_need_recfg(struct ice_pf *pf, struct ice_dcbx_cfg *old_cfg,
 			   &old_cfg->etscfg.prio_table,
 			   sizeof(new_cfg->etscfg.prio_table))) {
 			need_reconfig = true;
-			dev_dbg(&pf->pdev->dev, "ETS UP2TC changed.\n");
+			dev_dbg(dev, "ETS UP2TC changed.\n");
 		}
 
 		if (memcmp(&new_cfg->etscfg.tcbwtable,
 			   &old_cfg->etscfg.tcbwtable,
 			   sizeof(new_cfg->etscfg.tcbwtable)))
-			dev_dbg(&pf->pdev->dev, "ETS TC BW Table changed.\n");
+			dev_dbg(dev, "ETS TC BW Table changed.\n");
 
 		if (memcmp(&new_cfg->etscfg.tsatable,
 			   &old_cfg->etscfg.tsatable,
 			   sizeof(new_cfg->etscfg.tsatable)))
-			dev_dbg(&pf->pdev->dev, "ETS TSA Table changed.\n");
+			dev_dbg(dev, "ETS TSA Table changed.\n");
 	}
 
 	/* Check if PFC configuration has changed */
 	if (memcmp(&new_cfg->pfc, &old_cfg->pfc, sizeof(new_cfg->pfc))) {
 		need_reconfig = true;
-		dev_dbg(&pf->pdev->dev, "PFC config change detected.\n");
+		dev_dbg(dev, "PFC config change detected.\n");
 	}
 
 	/* Check if APP Table has changed */
 	if (memcmp(&new_cfg->app, &old_cfg->app, sizeof(new_cfg->app))) {
 		need_reconfig = true;
-		dev_dbg(&pf->pdev->dev, "APP Table change detected.\n");
+		dev_dbg(dev, "APP Table change detected.\n");
 	}
 
-	dev_dbg(&pf->pdev->dev, "dcb need_reconfig=%d\n", need_reconfig);
+	dev_dbg(dev, "dcb need_reconfig=%d\n", need_reconfig);
 	return need_reconfig;
 }
 
@@ -325,11 +317,12 @@ void ice_dcb_rebuild(struct ice_pf *pf)
 {
 	struct ice_dcbx_cfg *local_dcbx_cfg, *desired_dcbx_cfg, *prev_cfg;
 	struct ice_aqc_port_ets_elem buf = { 0 };
+	struct device *dev = ice_pf_to_dev(pf);
 	enum ice_status ret;
 
 	ret = ice_query_port_ets(pf->hw.port_info, &buf, sizeof(buf), NULL);
 	if (ret) {
-		dev_err(&pf->pdev->dev, "Query Port ETS failed\n");
+		dev_err(dev, "Query Port ETS failed\n");
 		goto dcb_error;
 	}
 
@@ -348,17 +341,14 @@ void ice_dcb_rebuild(struct ice_pf *pf)
 	ice_cfg_etsrec_defaults(pf->hw.port_info);
 	ret = ice_set_dcb_cfg(pf->hw.port_info);
 	if (ret) {
-		dev_err(&pf->pdev->dev, "Failed to set DCB to unwilling\n");
+		dev_err(dev, "Failed to set DCB to unwilling\n");
 		goto dcb_error;
 	}
 
 	/* Retrieve DCB config and ensure same as current in SW */
-	prev_cfg = devm_kmemdup(&pf->pdev->dev, local_dcbx_cfg,
-				sizeof(*prev_cfg), GFP_KERNEL);
-	if (!prev_cfg) {
-		dev_err(&pf->pdev->dev, "Failed to alloc space for DCB cfg\n");
+	prev_cfg = kmemdup(local_dcbx_cfg, sizeof(*prev_cfg), GFP_KERNEL);
+	if (!prev_cfg)
 		goto dcb_error;
-	}
 
 	ice_init_dcb(&pf->hw, true);
 	if (pf->hw.port_info->dcbx_status == ICE_DCBX_STATUS_DIS)
@@ -368,12 +358,13 @@ void ice_dcb_rebuild(struct ice_pf *pf)
 
 	if (ice_dcb_need_recfg(pf, prev_cfg, local_dcbx_cfg)) {
 		/* difference in cfg detected - disable DCB till next MIB */
-		dev_err(&pf->pdev->dev, "Set local MIB not accurate\n");
+		dev_err(dev, "Set local MIB not accurate\n");
+		kfree(prev_cfg);
 		goto dcb_error;
 	}
 
 	/* fetched config congruent to previous configuration */
-	devm_kfree(&pf->pdev->dev, prev_cfg);
+	kfree(prev_cfg);
 
 	/* Set the local desired config */
 	if (local_dcbx_cfg->dcbx_mode == ICE_DCBX_MODE_CEE)
@@ -383,27 +374,30 @@ void ice_dcb_rebuild(struct ice_pf *pf)
 	ice_cfg_etsrec_defaults(pf->hw.port_info);
 	ret = ice_set_dcb_cfg(pf->hw.port_info);
 	if (ret) {
-		dev_err(&pf->pdev->dev, "Failed to set desired config\n");
+		dev_err(dev, "Failed to set desired config\n");
 		goto dcb_error;
 	}
-	dev_info(&pf->pdev->dev, "DCB restored after reset\n");
+	dev_info(dev, "DCB restored after reset\n");
 	ret = ice_query_port_ets(pf->hw.port_info, &buf, sizeof(buf), NULL);
 	if (ret) {
-		dev_err(&pf->pdev->dev, "Query Port ETS failed\n");
+		dev_err(dev, "Query Port ETS failed\n");
 		goto dcb_error;
 	}
 
 	return;
 
 dcb_error:
-	dev_err(&pf->pdev->dev, "Disabling DCB until new settings occur\n");
-	prev_cfg = devm_kzalloc(&pf->pdev->dev, sizeof(*prev_cfg), GFP_KERNEL);
+	dev_err(dev, "Disabling DCB until new settings occur\n");
+	prev_cfg = kzalloc(sizeof(*prev_cfg), GFP_KERNEL);
+	if (!prev_cfg)
+		return;
+
 	prev_cfg->etscfg.willing = true;
 	prev_cfg->etscfg.tcbwtable[0] = ICE_TC_MAX_BW;
 	prev_cfg->etscfg.tsatable[0] = ICE_IEEE_TSA_ETS;
 	memcpy(&prev_cfg->etsrec, &prev_cfg->etscfg, sizeof(prev_cfg->etsrec));
 	ice_pf_dcb_cfg(pf, prev_cfg, false);
-	devm_kfree(&pf->pdev->dev, prev_cfg);
+	kfree(prev_cfg);
 }
 
 /**
@@ -418,18 +412,17 @@ static int ice_dcb_init_cfg(struct ice_pf *pf, bool locked)
 	int ret = 0;
 
 	pi = pf->hw.port_info;
-	newcfg = devm_kzalloc(&pf->pdev->dev, sizeof(*newcfg), GFP_KERNEL);
+	newcfg = kmemdup(&pi->local_dcbx_cfg, sizeof(*newcfg), GFP_KERNEL);
 	if (!newcfg)
 		return -ENOMEM;
 
-	memcpy(newcfg, &pi->local_dcbx_cfg, sizeof(*newcfg));
 	memset(&pi->local_dcbx_cfg, 0, sizeof(*newcfg));
 
-	dev_info(&pf->pdev->dev, "Configuring initial DCB values\n");
+	dev_info(ice_pf_to_dev(pf), "Configuring initial DCB values\n");
 	if (ice_pf_dcb_cfg(pf, newcfg, locked))
 		ret = -EINVAL;
 
-	devm_kfree(&pf->pdev->dev, newcfg);
+	kfree(newcfg);
 
 	return ret;
 }
@@ -437,9 +430,10 @@ static int ice_dcb_init_cfg(struct ice_pf *pf, bool locked)
 /**
  * ice_dcb_sw_default_config - Apply a default DCB config
  * @pf: PF to apply config to
+ * @ets_willing: configure ets willing
  * @locked: was this function called with RTNL held
  */
-static int ice_dcb_sw_dflt_cfg(struct ice_pf *pf, bool locked)
+static int ice_dcb_sw_dflt_cfg(struct ice_pf *pf, bool ets_willing, bool locked)
 {
 	struct ice_aqc_port_ets_elem buf = { 0 };
 	struct ice_dcbx_cfg *dcbcfg;
@@ -449,12 +443,13 @@ static int ice_dcb_sw_dflt_cfg(struct ice_pf *pf, bool locked)
 
 	hw = &pf->hw;
 	pi = hw->port_info;
-	dcbcfg = devm_kzalloc(&pf->pdev->dev, sizeof(*dcbcfg), GFP_KERNEL);
+	dcbcfg = kzalloc(sizeof(*dcbcfg), GFP_KERNEL);
+	if (!dcbcfg)
+		return -ENOMEM;
 
-	memset(dcbcfg, 0, sizeof(*dcbcfg));
 	memset(&pi->local_dcbx_cfg, 0, sizeof(*dcbcfg));
 
-	dcbcfg->etscfg.willing = 1;
+	dcbcfg->etscfg.willing = ets_willing ? 1 : 0;
 	dcbcfg->etscfg.maxtcs = hw->func_caps.common_cap.maxtc;
 	dcbcfg->etscfg.tcbwtable[0] = 100;
 	dcbcfg->etscfg.tsatable[0] = ICE_IEEE_TSA_ETS;
@@ -472,7 +467,7 @@ static int ice_dcb_sw_dflt_cfg(struct ice_pf *pf, bool locked)
 	dcbcfg->app[0].prot_id = ICE_APP_PROT_ID_FCOE;
 
 	ret = ice_pf_dcb_cfg(pf, dcbcfg, locked);
-	devm_kfree(&pf->pdev->dev, dcbcfg);
+	kfree(dcbcfg);
 	if (ret)
 		return ret;
 
@@ -480,13 +475,112 @@ static int ice_dcb_sw_dflt_cfg(struct ice_pf *pf, bool locked)
 }
 
 /**
+ * ice_dcb_tc_contig - Check that TCs are contiguous
+ * @prio_table: pointer to priority table
+ *
+ * Check if TCs begin with TC0 and are contiguous
+ */
+static bool ice_dcb_tc_contig(u8 *prio_table)
+{
+	u8 max_tc = 0;
+	int i;
+
+	for (i = 0; i < CEE_DCBX_MAX_PRIO; i++) {
+		u8 cur_tc = prio_table[i];
+
+		if (cur_tc > max_tc)
+			return false;
+		else if (cur_tc == max_tc)
+			max_tc++;
+	}
+
+	return true;
+}
+
+/**
+ * ice_dcb_noncontig_cfg - Configure DCB for non-contiguous TCs
+ * @pf: pointer to the PF struct
+ *
+ * If non-contiguous TCs, then configure SW DCB with TC0 and ETS non-willing
+ */
+static int ice_dcb_noncontig_cfg(struct ice_pf *pf)
+{
+	struct ice_dcbx_cfg *dcbcfg = &pf->hw.port_info->local_dcbx_cfg;
+	struct device *dev = ice_pf_to_dev(pf);
+	int ret;
+
+	/* Configure SW DCB default with ETS non-willing */
+	ret = ice_dcb_sw_dflt_cfg(pf, false, true);
+	if (ret) {
+		dev_err(dev, "Failed to set local DCB config %d\n", ret);
+		return ret;
+	}
+
+	/* Reconfigure with ETS willing so that FW will send LLDP MIB event */
+	dcbcfg->etscfg.willing = 1;
+	ret = ice_set_dcb_cfg(pf->hw.port_info);
+	if (ret)
+		dev_err(dev, "Failed to set DCB to unwilling\n");
+
+	return ret;
+}
+
+/**
+ * ice_pf_dcb_recfg - Reconfigure all VEBs and VSIs
+ * @pf: pointer to the PF struct
+ *
+ * Assumed caller has already disabled all VSIs before
+ * calling this function. Reconfiguring DCB based on
+ * local_dcbx_cfg.
+ */
+void ice_pf_dcb_recfg(struct ice_pf *pf)
+{
+	struct ice_dcbx_cfg *dcbcfg = &pf->hw.port_info->local_dcbx_cfg;
+	u8 tc_map = 0;
+	int v, ret;
+
+	/* Update each VSI */
+	ice_for_each_vsi(pf, v) {
+		struct ice_vsi *vsi = pf->vsi[v];
+
+		if (!vsi)
+			continue;
+
+		if (vsi->type == ICE_VSI_PF) {
+			tc_map = ice_dcb_get_ena_tc(dcbcfg);
+
+			/* If DCBX request non-contiguous TC, then configure
+			 * default TC
+			 */
+			if (!ice_dcb_tc_contig(dcbcfg->etscfg.prio_table)) {
+				tc_map = ICE_DFLT_TRAFFIC_CLASS;
+				ice_dcb_noncontig_cfg(pf);
+			}
+		} else {
+			tc_map = ICE_DFLT_TRAFFIC_CLASS;
+		}
+
+		ret = ice_vsi_cfg_tc(vsi, tc_map);
+		if (ret) {
+			dev_err(ice_pf_to_dev(pf), "Failed to config TC for VSI index: %d\n",
+				vsi->idx);
+			continue;
+		}
+
+		ice_vsi_map_rings_to_vectors(vsi);
+		if (vsi->type == ICE_VSI_PF)
+			ice_dcbnl_set_all(vsi);
+	}
+}
+
+/**
  * ice_init_pf_dcb - initialize DCB for a PF
  * @pf: PF to initialize DCB for
  * @locked: Was function called with RTNL held
  */
 int ice_init_pf_dcb(struct ice_pf *pf, bool locked)
 {
-	struct device *dev = &pf->pdev->dev;
+	struct device *dev = ice_pf_to_dev(pf);
 	struct ice_port_info *port_info;
 	struct ice_hw *hw = &pf->hw;
 	int err;
@@ -495,26 +589,39 @@ int ice_init_pf_dcb(struct ice_pf *pf, bool locked)
 
 	err = ice_init_dcb(hw, false);
 	if (err && !port_info->is_sw_lldp) {
-		dev_err(&pf->pdev->dev, "Error initializing DCB %d\n", err);
+		dev_err(dev, "Error initializing DCB %d\n", err);
 		goto dcb_init_err;
 	}
 
-	dev_info(&pf->pdev->dev,
+	dev_info(dev,
 		 "DCB is enabled in the hardware, max number of TCs supported on this port are %d\n",
 		 pf->hw.func_caps.common_cap.maxtc);
 	if (err) {
+		struct ice_vsi *pf_vsi;
+
 		/* FW LLDP is disabled, activate SW DCBX/LLDP mode */
-		dev_info(&pf->pdev->dev,
-			 "FW LLDP is disabled, DCBx/LLDP in SW mode.\n");
+		dev_info(dev, "FW LLDP is disabled, DCBx/LLDP in SW mode.\n");
 		clear_bit(ICE_FLAG_FW_LLDP_AGENT, pf->flags);
-		err = ice_dcb_sw_dflt_cfg(pf, locked);
+		err = ice_dcb_sw_dflt_cfg(pf, true, locked);
 		if (err) {
-			dev_err(&pf->pdev->dev,
+			dev_err(dev,
 				"Failed to set local DCB config %d\n", err);
 			err = -EIO;
 			goto dcb_init_err;
 		}
 
+		/* If the FW DCBX engine is not running then Rx LLDP packets
+		 * need to be redirected up the stack.
+		 */
+		pf_vsi = ice_get_main_vsi(pf);
+		if (!pf_vsi) {
+			dev_err(dev, "Failed to set local DCB config\n");
+			err = -EIO;
+			goto dcb_init_err;
+		}
+
+		ice_cfg_sw_lldp(pf_vsi, false, true);
+
 		pf->dcbx_cap = DCB_CAP_DCBX_HOST | DCB_CAP_DCBX_VER_IEEE;
 		return 0;
 	}
@@ -623,10 +730,12 @@ ice_dcb_process_lldp_set_mib_change(struct ice_pf *pf,
 				    struct ice_rq_event_info *event)
 {
 	struct ice_aqc_port_ets_elem buf = { 0 };
+	struct device *dev = ice_pf_to_dev(pf);
 	struct ice_aqc_lldp_get_mib *mib;
 	struct ice_dcbx_cfg tmp_dcbx_cfg;
 	bool need_reconfig = false;
 	struct ice_port_info *pi;
+	struct ice_vsi *pf_vsi;
 	u8 type;
 	int ret;
 
@@ -635,8 +744,7 @@ ice_dcb_process_lldp_set_mib_change(struct ice_pf *pf,
 		return;
 
 	if (pf->dcbx_cap & DCB_CAP_DCBX_HOST) {
-		dev_dbg(&pf->pdev->dev,
-			"MIB Change Event in HOST mode\n");
+		dev_dbg(dev, "MIB Change Event in HOST mode\n");
 		return;
 	}
 
@@ -645,21 +753,20 @@ ice_dcb_process_lldp_set_mib_change(struct ice_pf *pf,
 	/* Ignore if event is not for Nearest Bridge */
 	type = ((mib->type >> ICE_AQ_LLDP_BRID_TYPE_S) &
 		ICE_AQ_LLDP_BRID_TYPE_M);
-	dev_dbg(&pf->pdev->dev, "LLDP event MIB bridge type 0x%x\n", type);
+	dev_dbg(dev, "LLDP event MIB bridge type 0x%x\n", type);
 	if (type != ICE_AQ_LLDP_BRID_TYPE_NEAREST_BRID)
 		return;
 
 	/* Check MIB Type and return if event for Remote MIB update */
 	type = mib->type & ICE_AQ_LLDP_MIB_TYPE_M;
-	dev_dbg(&pf->pdev->dev,
-		"LLDP event mib type %s\n", type ? "remote" : "local");
+	dev_dbg(dev, "LLDP event mib type %s\n", type ? "remote" : "local");
 	if (type == ICE_AQ_LLDP_MIB_REMOTE) {
 		/* Update the remote cached instance and return */
 		ret = ice_aq_get_dcb_cfg(pi->hw, ICE_AQ_LLDP_MIB_REMOTE,
 					 ICE_AQ_LLDP_BRID_TYPE_NEAREST_BRID,
 					 &pi->remote_dcbx_cfg);
 		if (ret) {
-			dev_err(&pf->pdev->dev, "Failed to get remote DCB config\n");
+			dev_err(dev, "Failed to get remote DCB config\n");
 			return;
 		}
 	}
@@ -673,37 +780,43 @@ ice_dcb_process_lldp_set_mib_change(struct ice_pf *pf,
 	/* Get updated DCBX data from firmware */
 	ret = ice_get_dcb_cfg(pf->hw.port_info);
 	if (ret) {
-		dev_err(&pf->pdev->dev, "Failed to get DCB config\n");
+		dev_err(dev, "Failed to get DCB config\n");
 		return;
 	}
 
 	/* No change detected in DCBX configs */
 	if (!memcmp(&tmp_dcbx_cfg, &pi->local_dcbx_cfg, sizeof(tmp_dcbx_cfg))) {
-		dev_dbg(&pf->pdev->dev,
-			"No change detected in DCBX configuration.\n");
+		dev_dbg(dev, "No change detected in DCBX configuration.\n");
 		return;
 	}
 
 	need_reconfig = ice_dcb_need_recfg(pf, &tmp_dcbx_cfg,
 					   &pi->local_dcbx_cfg);
+	ice_dcbnl_flush_apps(pf, &tmp_dcbx_cfg, &pi->local_dcbx_cfg);
 	if (!need_reconfig)
 		return;
 
 	/* Enable DCB tagging only when more than one TC */
 	if (ice_dcb_get_num_tc(&pi->local_dcbx_cfg) > 1) {
-		dev_dbg(&pf->pdev->dev, "DCB tagging enabled (num TC > 1)\n");
+		dev_dbg(dev, "DCB tagging enabled (num TC > 1)\n");
 		set_bit(ICE_FLAG_DCB_ENA, pf->flags);
 	} else {
-		dev_dbg(&pf->pdev->dev, "DCB tagging disabled (num TC = 1)\n");
+		dev_dbg(dev, "DCB tagging disabled (num TC = 1)\n");
 		clear_bit(ICE_FLAG_DCB_ENA, pf->flags);
 	}
 
+	pf_vsi = ice_get_main_vsi(pf);
+	if (!pf_vsi) {
+		dev_dbg(dev, "PF VSI doesn't exist\n");
+		return;
+	}
+
 	rtnl_lock();
-	ice_pf_dis_all_vsi(pf, true);
+	ice_dis_vsi(pf_vsi, true);
 
 	ret = ice_query_port_ets(pf->hw.port_info, &buf, sizeof(buf), NULL);
 	if (ret) {
-		dev_err(&pf->pdev->dev, "Query Port ETS failed\n");
+		dev_err(dev, "Query Port ETS failed\n");
 		rtnl_unlock();
 		return;
 	}
@@ -711,6 +824,6 @@ ice_dcb_process_lldp_set_mib_change(struct ice_pf *pf,
 	/* changes in configuration update VSI */
 	ice_pf_dcb_recfg(pf);
 
-	ice_pf_ena_all_vsi(pf, true);
+	ice_ena_vsi(pf_vsi, true);
 	rtnl_unlock();
 }
diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_lib.h b/drivers/net/ethernet/intel/ice/ice_dcb_lib.h
index 661a6f7bca64..f15e5776f287 100644
--- a/drivers/net/ethernet/intel/ice/ice_dcb_lib.h
+++ b/drivers/net/ethernet/intel/ice/ice_dcb_lib.h
@@ -5,14 +5,22 @@
 #define _ICE_DCB_LIB_H_
 
 #include "ice.h"
+#include "ice_base.h"
 #include "ice_lib.h"
 
 #ifdef CONFIG_DCB
-#define ICE_TC_MAX_BW 100 /* Default Max BW percentage */
+#define ICE_TC_MAX_BW		100 /* Default Max BW percentage */
+#define ICE_DCB_HW_CHG_RST	0 /* DCB configuration changed with reset */
+#define ICE_DCB_NO_HW_CHG	1 /* DCB configuration did not change */
+#define ICE_DCB_HW_CHG		2 /* DCB configuration changed, no reset */
 
 void ice_dcb_rebuild(struct ice_pf *pf);
 u8 ice_dcb_get_ena_tc(struct ice_dcbx_cfg *dcbcfg);
 u8 ice_dcb_get_num_tc(struct ice_dcbx_cfg *dcbcfg);
+u8 ice_dcb_get_tc(struct ice_vsi *vsi, int queue_index);
+int
+ice_pf_dcb_cfg(struct ice_pf *pf, struct ice_dcbx_cfg *new_cfg, bool locked);
+void ice_pf_dcb_recfg(struct ice_pf *pf);
 void ice_vsi_cfg_dcb_rings(struct ice_vsi *vsi);
 int ice_init_pf_dcb(struct ice_pf *pf, bool locked);
 void ice_update_dcb_stats(struct ice_pf *pf);
@@ -41,10 +49,25 @@ static inline u8 ice_dcb_get_num_tc(struct ice_dcbx_cfg __always_unused *dcbcfg)
 	return 1;
 }
 
+static inline u8
+ice_dcb_get_tc(struct ice_vsi __always_unused *vsi,
+	       int __always_unused queue_index)
+{
+	return 0;
+}
+
 static inline int
 ice_init_pf_dcb(struct ice_pf *pf, bool __always_unused locked)
 {
-	dev_dbg(&pf->pdev->dev, "DCB not supported\n");
+	dev_dbg(ice_pf_to_dev(pf), "DCB not supported\n");
+	return -EOPNOTSUPP;
+}
+
+static inline int
+ice_pf_dcb_cfg(struct ice_pf __always_unused *pf,
+	       struct ice_dcbx_cfg __always_unused *new_cfg,
+	       bool __always_unused locked)
+{
 	return -EOPNOTSUPP;
 }
 
@@ -56,6 +79,7 @@ ice_tx_prepare_vlan_flags_dcb(struct ice_ring __always_unused *tx_ring,
 }
 
 #define ice_update_dcb_stats(pf) do {} while (0)
+#define ice_pf_dcb_recfg(pf) do {} while (0)
 #define ice_vsi_cfg_dcb_rings(vsi) do {} while (0)
 #define ice_dcb_process_lldp_set_mib_change(pf, event) do {} while (0)
 #define ice_set_cgd_num(tlan_ctx, ring) do {} while (0)
diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_nl.c b/drivers/net/ethernet/intel/ice/ice_dcb_nl.c
new file mode 100644
index 000000000000..d870c1aedc17
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_dcb_nl.c
@@ -0,0 +1,933 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019, Intel Corporation. */
+
+#include "ice.h"
+#include "ice_dcb.h"
+#include "ice_dcb_lib.h"
+#include "ice_dcb_nl.h"
+#include <net/dcbnl.h>
+
+#define ICE_APP_PROT_ID_ROCE	0x8915
+
+/**
+ * ice_dcbnl_devreset - perform enough of a ifdown/ifup to sync DCBNL info
+ * @netdev: device associated with interface that needs reset
+ */
+static void ice_dcbnl_devreset(struct net_device *netdev)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+
+	while (ice_is_reset_in_progress(pf->state))
+		usleep_range(1000, 2000);
+
+	set_bit(__ICE_DCBNL_DEVRESET, pf->state);
+	dev_close(netdev);
+	netdev_state_change(netdev);
+	dev_open(netdev, NULL);
+	netdev_state_change(netdev);
+	clear_bit(__ICE_DCBNL_DEVRESET, pf->state);
+}
+
+/**
+ * ice_dcbnl_getets - retrieve local ETS configuration
+ * @netdev: the relevant netdev
+ * @ets: struct to hold ETS configuration
+ */
+static int ice_dcbnl_getets(struct net_device *netdev, struct ieee_ets *ets)
+{
+	struct ice_dcbx_cfg *dcbxcfg;
+	struct ice_port_info *pi;
+	struct ice_pf *pf;
+
+	pf = ice_netdev_to_pf(netdev);
+	pi = pf->hw.port_info;
+	dcbxcfg = &pi->local_dcbx_cfg;
+
+	ets->willing = dcbxcfg->etscfg.willing;
+	ets->ets_cap = dcbxcfg->etscfg.maxtcs;
+	ets->cbs = dcbxcfg->etscfg.cbs;
+	memcpy(ets->tc_tx_bw, dcbxcfg->etscfg.tcbwtable, sizeof(ets->tc_tx_bw));
+	memcpy(ets->tc_rx_bw, dcbxcfg->etscfg.tcbwtable, sizeof(ets->tc_rx_bw));
+	memcpy(ets->tc_tsa, dcbxcfg->etscfg.tsatable, sizeof(ets->tc_tsa));
+	memcpy(ets->prio_tc, dcbxcfg->etscfg.prio_table, sizeof(ets->prio_tc));
+	memcpy(ets->tc_reco_bw, dcbxcfg->etsrec.tcbwtable,
+	       sizeof(ets->tc_reco_bw));
+	memcpy(ets->tc_reco_tsa, dcbxcfg->etsrec.tsatable,
+	       sizeof(ets->tc_reco_tsa));
+	memcpy(ets->reco_prio_tc, dcbxcfg->etscfg.prio_table,
+	       sizeof(ets->reco_prio_tc));
+
+	return 0;
+}
+
+/**
+ * ice_dcbnl_setets - set IEEE ETS configuration
+ * @netdev: pointer to relevant netdev
+ * @ets: struct to hold ETS configuration
+ */
+static int ice_dcbnl_setets(struct net_device *netdev, struct ieee_ets *ets)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+	struct ice_dcbx_cfg *new_cfg;
+	int bwcfg = 0, bwrec = 0;
+	int err, i, max_tc = 0;
+
+	if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) ||
+	    !(pf->dcbx_cap & DCB_CAP_DCBX_VER_IEEE))
+		return -EINVAL;
+
+	new_cfg = &pf->hw.port_info->desired_dcbx_cfg;
+
+	mutex_lock(&pf->tc_mutex);
+
+	new_cfg->etscfg.willing = ets->willing;
+	new_cfg->etscfg.cbs = ets->cbs;
+	ice_for_each_traffic_class(i) {
+		new_cfg->etscfg.tcbwtable[i] = ets->tc_tx_bw[i];
+		bwcfg += ets->tc_tx_bw[i];
+		new_cfg->etscfg.tsatable[i] = ets->tc_tsa[i];
+		new_cfg->etscfg.prio_table[i] = ets->prio_tc[i];
+		if (ets->prio_tc[i] > max_tc)
+			max_tc = ets->prio_tc[i];
+		new_cfg->etsrec.tcbwtable[i] = ets->tc_reco_bw[i];
+		bwrec += ets->tc_reco_bw[i];
+		new_cfg->etsrec.tsatable[i] = ets->tc_reco_tsa[i];
+		new_cfg->etsrec.prio_table[i] = ets->reco_prio_tc[i];
+	}
+
+	/* max_tc is a 1-8 value count of number of TC's, not a 0-7 value
+	 * for the TC's index number.  Add one to value if not zero, and
+	 * for zero set it to the FW's default value
+	 */
+	if (max_tc)
+		max_tc++;
+	else
+		max_tc = IEEE_8021QAZ_MAX_TCS;
+
+	new_cfg->etscfg.maxtcs = max_tc;
+
+	if (!bwcfg)
+		new_cfg->etscfg.tcbwtable[0] = 100;
+
+	if (!bwrec)
+		new_cfg->etsrec.tcbwtable[0] = 100;
+
+	err = ice_pf_dcb_cfg(pf, new_cfg, true);
+	/* return of zero indicates new cfg applied */
+	if (err == ICE_DCB_HW_CHG_RST)
+		ice_dcbnl_devreset(netdev);
+	if (err == ICE_DCB_NO_HW_CHG)
+		err = ICE_DCB_HW_CHG_RST;
+
+	mutex_unlock(&pf->tc_mutex);
+	return err;
+}
+
+/**
+ * ice_dcbnl_getnumtcs - Get max number of traffic classes supported
+ * @dev: pointer to netdev struct
+ * @tcid: TC ID
+ * @num: total number of TCs supported by the adapter
+ *
+ * Return the total number of TCs supported
+ */
+static int
+ice_dcbnl_getnumtcs(struct net_device *dev, int __always_unused tcid, u8 *num)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(dev);
+
+	if (!test_bit(ICE_FLAG_DCB_CAPABLE, pf->flags))
+		return -EINVAL;
+
+	*num = IEEE_8021QAZ_MAX_TCS;
+	return 0;
+}
+
+/**
+ * ice_dcbnl_getdcbx - retrieve current DCBX capability
+ * @netdev: pointer to the netdev struct
+ */
+static u8 ice_dcbnl_getdcbx(struct net_device *netdev)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+
+	return pf->dcbx_cap;
+}
+
+/**
+ * ice_dcbnl_setdcbx - set required DCBX capability
+ * @netdev: the corresponding netdev
+ * @mode: required mode
+ */
+static u8 ice_dcbnl_setdcbx(struct net_device *netdev, u8 mode)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+
+	/* No support for LLD_MANAGED modes or CEE+IEEE */
+	if ((mode & DCB_CAP_DCBX_LLD_MANAGED) ||
+	    ((mode & DCB_CAP_DCBX_VER_IEEE) && (mode & DCB_CAP_DCBX_VER_CEE)) ||
+	    !(mode & DCB_CAP_DCBX_HOST))
+		return ICE_DCB_NO_HW_CHG;
+
+	/* Already set to the given mode no change */
+	if (mode == pf->dcbx_cap)
+		return ICE_DCB_NO_HW_CHG;
+
+	pf->dcbx_cap = mode;
+	if (mode & DCB_CAP_DCBX_VER_CEE)
+		pf->hw.port_info->local_dcbx_cfg.dcbx_mode = ICE_DCBX_MODE_CEE;
+	else
+		pf->hw.port_info->local_dcbx_cfg.dcbx_mode = ICE_DCBX_MODE_IEEE;
+
+	dev_info(ice_pf_to_dev(pf), "DCBx mode = 0x%x\n", mode);
+	return ICE_DCB_HW_CHG_RST;
+}
+
+/**
+ * ice_dcbnl_get_perm_hw_addr - MAC address used by DCBX
+ * @netdev: pointer to netdev struct
+ * @perm_addr: buffer to return permanent MAC address
+ */
+static void ice_dcbnl_get_perm_hw_addr(struct net_device *netdev, u8 *perm_addr)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+	struct ice_port_info *pi = pf->hw.port_info;
+	int i, j;
+
+	memset(perm_addr, 0xff, MAX_ADDR_LEN);
+
+	for (i = 0; i < netdev->addr_len; i++)
+		perm_addr[i] = pi->mac.perm_addr[i];
+
+	for (j = 0; j < netdev->addr_len; j++, i++)
+		perm_addr[i] = pi->mac.perm_addr[j];
+}
+
+/**
+ * ice_get_pfc_delay - Retrieve PFC Link Delay
+ * @hw: pointer to HW struct
+ * @delay: holds the PFC Link Delay value
+ */
+static void ice_get_pfc_delay(struct ice_hw *hw, u16 *delay)
+{
+	u32 val;
+
+	val = rd32(hw, PRTDCB_GENC);
+	*delay = (u16)((val & PRTDCB_GENC_PFCLDA_M) >> PRTDCB_GENC_PFCLDA_S);
+}
+
+/**
+ * ice_dcbnl_getpfc - retrieve local IEEE PFC config
+ * @netdev: pointer to netdev struct
+ * @pfc: struct to hold PFC info
+ */
+static int ice_dcbnl_getpfc(struct net_device *netdev, struct ieee_pfc *pfc)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+	struct ice_port_info *pi = pf->hw.port_info;
+	struct ice_dcbx_cfg *dcbxcfg;
+	int i;
+
+	dcbxcfg = &pi->local_dcbx_cfg;
+	pfc->pfc_cap = dcbxcfg->pfc.pfccap;
+	pfc->pfc_en = dcbxcfg->pfc.pfcena;
+	pfc->mbc = dcbxcfg->pfc.mbc;
+	ice_get_pfc_delay(&pf->hw, &pfc->delay);
+
+	ice_for_each_traffic_class(i) {
+		pfc->requests[i] = pf->stats.priority_xoff_tx[i];
+		pfc->indications[i] = pf->stats.priority_xoff_rx[i];
+	}
+
+	return 0;
+}
+
+/**
+ * ice_dcbnl_setpfc - set local IEEE PFC config
+ * @netdev: pointer to relevant netdev
+ * @pfc: pointer to struct holding PFC config
+ */
+static int ice_dcbnl_setpfc(struct net_device *netdev, struct ieee_pfc *pfc)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+	struct ice_dcbx_cfg *new_cfg;
+	int err;
+
+	if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) ||
+	    !(pf->dcbx_cap & DCB_CAP_DCBX_VER_IEEE))
+		return -EINVAL;
+
+	mutex_lock(&pf->tc_mutex);
+
+	new_cfg = &pf->hw.port_info->desired_dcbx_cfg;
+
+	if (pfc->pfc_cap)
+		new_cfg->pfc.pfccap = pfc->pfc_cap;
+	else
+		new_cfg->pfc.pfccap = pf->hw.func_caps.common_cap.maxtc;
+
+	new_cfg->pfc.pfcena = pfc->pfc_en;
+
+	err = ice_pf_dcb_cfg(pf, new_cfg, true);
+	if (err == ICE_DCB_HW_CHG_RST)
+		ice_dcbnl_devreset(netdev);
+	if (err == ICE_DCB_NO_HW_CHG)
+		err = ICE_DCB_HW_CHG_RST;
+	mutex_unlock(&pf->tc_mutex);
+	return err;
+}
+
+/**
+ * ice_dcbnl_get_pfc_cfg - Get CEE PFC config
+ * @netdev: pointer to netdev struct
+ * @prio: corresponding user priority
+ * @setting: the PFC setting for given priority
+ */
+static void
+ice_dcbnl_get_pfc_cfg(struct net_device *netdev, int prio, u8 *setting)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+	struct ice_port_info *pi = pf->hw.port_info;
+
+	if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) ||
+	    !(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE))
+		return;
+
+	if (prio >= ICE_MAX_USER_PRIORITY)
+		return;
+
+	*setting = (pi->local_dcbx_cfg.pfc.pfcena >> prio) & 0x1;
+	dev_dbg(ice_pf_to_dev(pf),
+		"Get PFC Config up=%d, setting=%d, pfcenable=0x%x\n",
+		prio, *setting, pi->local_dcbx_cfg.pfc.pfcena);
+}
+
+/**
+ * ice_dcbnl_set_pfc_cfg - Set CEE PFC config
+ * @netdev: the corresponding netdev
+ * @prio: User Priority
+ * @set: PFC setting to apply
+ */
+static void ice_dcbnl_set_pfc_cfg(struct net_device *netdev, int prio, u8 set)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+	struct ice_dcbx_cfg *new_cfg;
+
+	if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) ||
+	    !(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE))
+		return;
+
+	if (prio >= ICE_MAX_USER_PRIORITY)
+		return;
+
+	new_cfg = &pf->hw.port_info->desired_dcbx_cfg;
+
+	new_cfg->pfc.pfccap = pf->hw.func_caps.common_cap.maxtc;
+	if (set)
+		new_cfg->pfc.pfcena |= BIT(prio);
+	else
+		new_cfg->pfc.pfcena &= ~BIT(prio);
+
+	dev_dbg(ice_pf_to_dev(pf), "Set PFC config UP:%d set:%d pfcena:0x%x\n",
+		prio, set, new_cfg->pfc.pfcena);
+}
+
+/**
+ * ice_dcbnl_getpfcstate - get CEE PFC mode
+ * @netdev: pointer to netdev struct
+ */
+static u8 ice_dcbnl_getpfcstate(struct net_device *netdev)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+	struct ice_port_info *pi = pf->hw.port_info;
+
+	/* Return enabled if any UP enabled for PFC */
+	if (pi->local_dcbx_cfg.pfc.pfcena)
+		return 1;
+
+	return 0;
+}
+
+/**
+ * ice_dcbnl_getstate - get DCB enabled state
+ * @netdev: pointer to netdev struct
+ */
+static u8 ice_dcbnl_getstate(struct net_device *netdev)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+	u8 state = 0;
+
+	state = test_bit(ICE_FLAG_DCB_CAPABLE, pf->flags);
+
+	dev_dbg(ice_pf_to_dev(pf), "DCB enabled state = %d\n", state);
+	return state;
+}
+
+/**
+ * ice_dcbnl_setstate - Set CEE DCB state
+ * @netdev: pointer to relevant netdev
+ * @state: state value to set
+ */
+static u8 ice_dcbnl_setstate(struct net_device *netdev, u8 state)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+
+	if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) ||
+	    !(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE))
+		return ICE_DCB_NO_HW_CHG;
+
+	/* Nothing to do */
+	if (!!state == test_bit(ICE_FLAG_DCB_ENA, pf->flags))
+		return ICE_DCB_NO_HW_CHG;
+
+	if (state) {
+		set_bit(ICE_FLAG_DCB_ENA, pf->flags);
+		memcpy(&pf->hw.port_info->desired_dcbx_cfg,
+		       &pf->hw.port_info->local_dcbx_cfg,
+		       sizeof(struct ice_dcbx_cfg));
+	} else {
+		clear_bit(ICE_FLAG_DCB_ENA, pf->flags);
+	}
+
+	return ICE_DCB_HW_CHG;
+}
+
+/**
+ * ice_dcbnl_get_pg_tc_cfg_tx - get CEE PG Tx config
+ * @netdev: pointer to netdev struct
+ * @prio: the corresponding user priority
+ * @prio_type: traffic priority type
+ * @pgid: the BW group ID the traffic class belongs to
+ * @bw_pct: BW percentage for the corresponding BWG
+ * @up_map: prio mapped to corresponding TC
+ */
+static void
+ice_dcbnl_get_pg_tc_cfg_tx(struct net_device *netdev, int prio,
+			   u8 __always_unused *prio_type, u8 *pgid,
+			   u8 __always_unused *bw_pct,
+			   u8 __always_unused *up_map)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+	struct ice_port_info *pi = pf->hw.port_info;
+
+	if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) ||
+	    !(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE))
+		return;
+
+	if (prio >= ICE_MAX_USER_PRIORITY)
+		return;
+
+	*pgid = pi->local_dcbx_cfg.etscfg.prio_table[prio];
+	dev_dbg(ice_pf_to_dev(pf),
+		"Get PG config prio=%d tc=%d\n", prio, *pgid);
+}
+
+/**
+ * ice_dcbnl_set_pg_tc_cfg_tx - set CEE PG Tx config
+ * @netdev: pointer to relevant netdev
+ * @tc: the corresponding traffic class
+ * @prio_type: the traffic priority type
+ * @bwg_id: the BW group ID the TC belongs to
+ * @bw_pct: the BW perventage for the BWG
+ * @up_map: prio mapped to corresponding TC
+ */
+static void
+ice_dcbnl_set_pg_tc_cfg_tx(struct net_device *netdev, int tc,
+			   u8 __always_unused prio_type,
+			   u8 __always_unused bwg_id,
+			   u8 __always_unused bw_pct, u8 up_map)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+	struct ice_dcbx_cfg *new_cfg;
+	int i;
+
+	if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) ||
+	    !(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE))
+		return;
+
+	if (tc >= ICE_MAX_TRAFFIC_CLASS)
+		return;
+
+	new_cfg = &pf->hw.port_info->desired_dcbx_cfg;
+
+	/* prio_type, bwg_id and bw_pct per UP are not supported */
+
+	ice_for_each_traffic_class(i) {
+		if (up_map & BIT(i))
+			new_cfg->etscfg.prio_table[i] = tc;
+	}
+	new_cfg->etscfg.tsatable[tc] = ICE_IEEE_TSA_ETS;
+}
+
+/**
+ * ice_dcbnl_get_pg_bwg_cfg_tx - Get CEE PGBW config
+ * @netdev: pointer to the netdev struct
+ * @pgid: corresponding traffic class
+ * @bw_pct: the BW percentage for the corresponding TC
+ */
+static void
+ice_dcbnl_get_pg_bwg_cfg_tx(struct net_device *netdev, int pgid, u8 *bw_pct)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+	struct ice_port_info *pi = pf->hw.port_info;
+
+	if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) ||
+	    !(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE))
+		return;
+
+	if (pgid >= ICE_MAX_TRAFFIC_CLASS)
+		return;
+
+	*bw_pct = pi->local_dcbx_cfg.etscfg.tcbwtable[pgid];
+	dev_dbg(ice_pf_to_dev(pf), "Get PG BW config tc=%d bw_pct=%d\n",
+		pgid, *bw_pct);
+}
+
+/**
+ * ice_dcbnl_set_pg_bwg_cfg_tx - set CEE PG Tx BW config
+ * @netdev: the corresponding netdev
+ * @pgid: Correspongind traffic class
+ * @bw_pct: the BW percentage for the specified TC
+ */
+static void
+ice_dcbnl_set_pg_bwg_cfg_tx(struct net_device *netdev, int pgid, u8 bw_pct)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+	struct ice_dcbx_cfg *new_cfg;
+
+	if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) ||
+	    !(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE))
+		return;
+
+	if (pgid >= ICE_MAX_TRAFFIC_CLASS)
+		return;
+
+	new_cfg = &pf->hw.port_info->desired_dcbx_cfg;
+
+	new_cfg->etscfg.tcbwtable[pgid] = bw_pct;
+}
+
+/**
+ * ice_dcbnl_get_pg_tc_cfg_rx - Get CEE PG Rx config
+ * @netdev: pointer to netdev struct
+ * @prio: the corresponding user priority
+ * @prio_type: the traffic priority type
+ * @pgid: the PG ID
+ * @bw_pct: the BW percentage for the corresponding BWG
+ * @up_map: prio mapped to corresponding TC
+ */
+static void
+ice_dcbnl_get_pg_tc_cfg_rx(struct net_device *netdev, int prio,
+			   u8 __always_unused *prio_type, u8 *pgid,
+			   u8 __always_unused *bw_pct,
+			   u8 __always_unused *up_map)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+	struct ice_port_info *pi = pf->hw.port_info;
+
+	if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) ||
+	    !(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE))
+		return;
+
+	if (prio >= ICE_MAX_USER_PRIORITY)
+		return;
+
+	*pgid = pi->local_dcbx_cfg.etscfg.prio_table[prio];
+}
+
+/**
+ * ice_dcbnl_get_pg_bwg_cfg_rx - Get CEE PG BW Rx config
+ * @netdev: pointer to netdev struct
+ * @pgid: the corresponding traffic class
+ * @bw_pct: the BW percentage for the corresponding TC
+ */
+static void
+ice_dcbnl_get_pg_bwg_cfg_rx(struct net_device *netdev, int __always_unused pgid,
+			    u8 *bw_pct)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+
+	if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) ||
+	    !(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE))
+		return;
+
+	*bw_pct = 0;
+}
+
+/**
+ * ice_dcbnl_get_cap - Get DCBX capabilities of adapter
+ * @netdev: pointer to netdev struct
+ * @capid: the capability type
+ * @cap: the capability value
+ */
+static u8 ice_dcbnl_get_cap(struct net_device *netdev, int capid, u8 *cap)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+
+	if (!(test_bit(ICE_FLAG_DCB_CAPABLE, pf->flags)))
+		return ICE_DCB_NO_HW_CHG;
+
+	switch (capid) {
+	case DCB_CAP_ATTR_PG:
+		*cap = true;
+		break;
+	case DCB_CAP_ATTR_PFC:
+		*cap = true;
+		break;
+	case DCB_CAP_ATTR_UP2TC:
+		*cap = false;
+		break;
+	case DCB_CAP_ATTR_PG_TCS:
+		*cap = 0x80;
+		break;
+	case DCB_CAP_ATTR_PFC_TCS:
+		*cap = 0x80;
+		break;
+	case DCB_CAP_ATTR_GSP:
+		*cap = false;
+		break;
+	case DCB_CAP_ATTR_BCN:
+		*cap = false;
+		break;
+	case DCB_CAP_ATTR_DCBX:
+		*cap = pf->dcbx_cap;
+		break;
+	default:
+		*cap = false;
+		break;
+	}
+
+	dev_dbg(ice_pf_to_dev(pf), "DCBX Get Capability cap=%d capval=0x%x\n",
+		capid, *cap);
+	return 0;
+}
+
+/**
+ * ice_dcbnl_getapp - get CEE APP
+ * @netdev: pointer to netdev struct
+ * @idtype: the App selector
+ * @id: the App ethtype or port number
+ */
+static int ice_dcbnl_getapp(struct net_device *netdev, u8 idtype, u16 id)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+	struct dcb_app app = {
+				.selector = idtype,
+				.protocol = id,
+			     };
+
+	if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) ||
+	    !(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE))
+		return -EINVAL;
+
+	return dcb_getapp(netdev, &app);
+}
+
+/**
+ * ice_dcbnl_find_app - Search for APP in given DCB config
+ * @cfg: struct to hold DCBX config
+ * @app: struct to hold app data to look for
+ */
+static bool
+ice_dcbnl_find_app(struct ice_dcbx_cfg *cfg,
+		   struct ice_dcb_app_priority_table *app)
+{
+	int i;
+
+	for (i = 0; i < cfg->numapps; i++) {
+		if (app->selector == cfg->app[i].selector &&
+		    app->prot_id == cfg->app[i].prot_id &&
+		    app->priority == cfg->app[i].priority)
+			return true;
+	}
+
+	return false;
+}
+
+/**
+ * ice_dcbnl_setapp - set local IEEE App config
+ * @netdev: relevant netdev struct
+ * @app: struct to hold app config info
+ */
+static int ice_dcbnl_setapp(struct net_device *netdev, struct dcb_app *app)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+	struct ice_dcb_app_priority_table new_app;
+	struct ice_dcbx_cfg *old_cfg, *new_cfg;
+	int ret;
+
+	if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) ||
+	    !(pf->dcbx_cap & DCB_CAP_DCBX_VER_IEEE))
+		return -EINVAL;
+
+	mutex_lock(&pf->tc_mutex);
+
+	new_cfg = &pf->hw.port_info->desired_dcbx_cfg;
+
+	old_cfg = &pf->hw.port_info->local_dcbx_cfg;
+
+	if (old_cfg->numapps == ICE_DCBX_MAX_APPS) {
+		ret = -EINVAL;
+		goto setapp_out;
+	}
+
+	ret = dcb_ieee_setapp(netdev, app);
+	if (ret)
+		goto setapp_out;
+
+	new_app.selector = app->selector;
+	new_app.prot_id = app->protocol;
+	new_app.priority = app->priority;
+	if (ice_dcbnl_find_app(old_cfg, &new_app)) {
+		ret = 0;
+		goto setapp_out;
+	}
+
+	new_cfg->app[new_cfg->numapps++] = new_app;
+	ret = ice_pf_dcb_cfg(pf, new_cfg, true);
+	/* return of zero indicates new cfg applied */
+	if (ret == ICE_DCB_HW_CHG_RST)
+		ice_dcbnl_devreset(netdev);
+	if (ret == ICE_DCB_NO_HW_CHG)
+		ret = ICE_DCB_HW_CHG_RST;
+
+setapp_out:
+	mutex_unlock(&pf->tc_mutex);
+	return ret;
+}
+
+/**
+ * ice_dcbnl_delapp - Delete local IEEE App config
+ * @netdev: relevant netdev
+ * @app: struct to hold app too delete
+ *
+ * Will not delete first application required by the FW
+ */
+static int ice_dcbnl_delapp(struct net_device *netdev, struct dcb_app *app)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+	struct ice_dcbx_cfg *old_cfg, *new_cfg;
+	int i, j, ret = 0;
+
+	if (pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED)
+		return -EINVAL;
+
+	mutex_lock(&pf->tc_mutex);
+	ret = dcb_ieee_delapp(netdev, app);
+	if (ret)
+		goto delapp_out;
+
+	old_cfg = &pf->hw.port_info->local_dcbx_cfg;
+
+	if (old_cfg->numapps == 1)
+		goto delapp_out;
+
+	new_cfg = &pf->hw.port_info->desired_dcbx_cfg;
+
+	for (i = 1; i < new_cfg->numapps; i++) {
+		if (app->selector == new_cfg->app[i].selector &&
+		    app->protocol == new_cfg->app[i].prot_id &&
+		    app->priority == new_cfg->app[i].priority) {
+			new_cfg->app[i].selector = 0;
+			new_cfg->app[i].prot_id = 0;
+			new_cfg->app[i].priority = 0;
+			break;
+		}
+	}
+
+	/* Did not find DCB App */
+	if (i == new_cfg->numapps) {
+		ret = -EINVAL;
+		goto delapp_out;
+	}
+
+	new_cfg->numapps--;
+
+	for (j = i; j < new_cfg->numapps; j++) {
+		new_cfg->app[i].selector = old_cfg->app[j + 1].selector;
+		new_cfg->app[i].prot_id = old_cfg->app[j + 1].prot_id;
+		new_cfg->app[i].priority = old_cfg->app[j + 1].priority;
+	}
+
+	ret = ice_pf_dcb_cfg(pf, new_cfg, true);
+	/* return of zero indicates new cfg applied */
+	if (ret == ICE_DCB_HW_CHG_RST)
+		ice_dcbnl_devreset(netdev);
+	if (ret == ICE_DCB_NO_HW_CHG)
+		ret = ICE_DCB_HW_CHG_RST;
+
+delapp_out:
+	mutex_unlock(&pf->tc_mutex);
+	return ret;
+}
+
+/**
+ * ice_dcbnl_cee_set_all - Commit CEE DCB settings to HW
+ * @netdev: the corresponding netdev
+ */
+static u8 ice_dcbnl_cee_set_all(struct net_device *netdev)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+	struct ice_dcbx_cfg *new_cfg;
+	int err;
+
+	if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) ||
+	    !(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE))
+		return ICE_DCB_NO_HW_CHG;
+
+	new_cfg = &pf->hw.port_info->desired_dcbx_cfg;
+
+	mutex_lock(&pf->tc_mutex);
+
+	err = ice_pf_dcb_cfg(pf, new_cfg, true);
+
+	mutex_unlock(&pf->tc_mutex);
+	return (err != ICE_DCB_HW_CHG_RST) ? ICE_DCB_NO_HW_CHG : err;
+}
+
+static const struct dcbnl_rtnl_ops dcbnl_ops = {
+	/* IEEE 802.1Qaz std */
+	.ieee_getets = ice_dcbnl_getets,
+	.ieee_setets = ice_dcbnl_setets,
+	.ieee_getpfc = ice_dcbnl_getpfc,
+	.ieee_setpfc = ice_dcbnl_setpfc,
+	.ieee_setapp = ice_dcbnl_setapp,
+	.ieee_delapp = ice_dcbnl_delapp,
+
+	/* CEE std */
+	.getstate = ice_dcbnl_getstate,
+	.setstate = ice_dcbnl_setstate,
+	.getpermhwaddr = ice_dcbnl_get_perm_hw_addr,
+	.setpgtccfgtx = ice_dcbnl_set_pg_tc_cfg_tx,
+	.setpgbwgcfgtx = ice_dcbnl_set_pg_bwg_cfg_tx,
+	.getpgtccfgtx = ice_dcbnl_get_pg_tc_cfg_tx,
+	.getpgbwgcfgtx = ice_dcbnl_get_pg_bwg_cfg_tx,
+	.getpgtccfgrx = ice_dcbnl_get_pg_tc_cfg_rx,
+	.getpgbwgcfgrx = ice_dcbnl_get_pg_bwg_cfg_rx,
+	.setpfccfg = ice_dcbnl_set_pfc_cfg,
+	.getpfccfg = ice_dcbnl_get_pfc_cfg,
+	.setall = ice_dcbnl_cee_set_all,
+	.getcap = ice_dcbnl_get_cap,
+	.getnumtcs = ice_dcbnl_getnumtcs,
+	.getpfcstate = ice_dcbnl_getpfcstate,
+	.getapp = ice_dcbnl_getapp,
+
+	/* DCBX configuration */
+	.getdcbx = ice_dcbnl_getdcbx,
+	.setdcbx = ice_dcbnl_setdcbx,
+};
+
+/**
+ * ice_dcbnl_set_all - set all the apps and ieee data from DCBX config
+ * @vsi: pointer to VSI struct
+ */
+void ice_dcbnl_set_all(struct ice_vsi *vsi)
+{
+	struct net_device *netdev = vsi->netdev;
+	struct ice_dcbx_cfg *dcbxcfg;
+	struct ice_port_info *pi;
+	struct dcb_app sapp;
+	struct ice_pf *pf;
+	int i;
+
+	if (!netdev)
+		return;
+
+	pf = ice_netdev_to_pf(netdev);
+	pi = pf->hw.port_info;
+
+	/* SW DCB taken care of by SW Default Config */
+	if (pf->dcbx_cap & DCB_CAP_DCBX_HOST)
+		return;
+
+	/* DCB not enabled */
+	if (!test_bit(ICE_FLAG_DCB_ENA, pf->flags))
+		return;
+
+	dcbxcfg = &pi->local_dcbx_cfg;
+
+	for (i = 0; i < dcbxcfg->numapps; i++) {
+		u8 prio, tc_map;
+
+		prio = dcbxcfg->app[i].priority;
+		tc_map = BIT(dcbxcfg->etscfg.prio_table[prio]);
+
+		/* Add APP only if the TC is enabled for this VSI */
+		if (tc_map & vsi->tc_cfg.ena_tc) {
+			sapp.selector = dcbxcfg->app[i].selector;
+			sapp.protocol = dcbxcfg->app[i].prot_id;
+			sapp.priority = prio;
+			dcb_ieee_setapp(netdev, &sapp);
+		}
+	}
+	/* Notify user-space of the changes */
+	dcbnl_ieee_notify(netdev, RTM_SETDCB, DCB_CMD_IEEE_SET, 0, 0);
+}
+
+/**
+ * ice_dcbnl_vsi_del_app - Delete APP on all VSIs
+ * @vsi: pointer to the main VSI
+ * @app: APP to delete
+ *
+ * Delete given APP from all the VSIs for given PF
+ */
+static void
+ice_dcbnl_vsi_del_app(struct ice_vsi *vsi,
+		      struct ice_dcb_app_priority_table *app)
+{
+	struct dcb_app sapp;
+	int err;
+
+	sapp.selector = app->selector;
+	sapp.protocol = app->prot_id;
+	sapp.priority = app->priority;
+	err = ice_dcbnl_delapp(vsi->netdev, &sapp);
+	dev_dbg(&vsi->back->pdev->dev,
+		"Deleting app for VSI idx=%d err=%d sel=%d proto=0x%x, prio=%d\n",
+		vsi->idx, err, app->selector, app->prot_id, app->priority);
+}
+
+/**
+ * ice_dcbnl_flush_apps - Delete all removed APPs
+ * @pf: the corresponding PF
+ * @old_cfg: old DCBX configuration data
+ * @new_cfg: new DCBX configuration data
+ *
+ * Find and delete all APPS that are not present in the passed
+ * DCB configuration
+ */
+void
+ice_dcbnl_flush_apps(struct ice_pf *pf, struct ice_dcbx_cfg *old_cfg,
+		     struct ice_dcbx_cfg *new_cfg)
+{
+	struct ice_vsi *main_vsi = ice_get_main_vsi(pf);
+	int i;
+
+	if (!main_vsi)
+		return;
+
+	for (i = 0; i < old_cfg->numapps; i++) {
+		struct ice_dcb_app_priority_table app = old_cfg->app[i];
+
+		/* The APP is not available anymore delete it */
+		if (!ice_dcbnl_find_app(new_cfg, &app))
+			ice_dcbnl_vsi_del_app(main_vsi, &app);
+	}
+}
+
+/**
+ * ice_dcbnl_setup - setup DCBNL
+ * @vsi: VSI to get associated netdev from
+ */
+void ice_dcbnl_setup(struct ice_vsi *vsi)
+{
+	struct net_device *netdev = vsi->netdev;
+	struct ice_pf *pf;
+
+	pf = ice_netdev_to_pf(netdev);
+	if (!test_bit(ICE_FLAG_DCB_CAPABLE, pf->flags))
+		return;
+
+	netdev->dcbnl_ops = &dcbnl_ops;
+	ice_dcbnl_set_all(vsi);
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_nl.h b/drivers/net/ethernet/intel/ice/ice_dcb_nl.h
new file mode 100644
index 000000000000..6c630a362293
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_dcb_nl.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2019, Intel Corporation. */
+
+#ifndef _ICE_DCB_NL_H_
+#define _ICE_DCB_NL_H_
+
+#ifdef CONFIG_DCB
+void ice_dcbnl_setup(struct ice_vsi *vsi);
+void ice_dcbnl_set_all(struct ice_vsi *vsi);
+void
+ice_dcbnl_flush_apps(struct ice_pf *pf, struct ice_dcbx_cfg *old_cfg,
+		     struct ice_dcbx_cfg *new_cfg);
+#else
+#define ice_dcbnl_setup(vsi) do {} while (0)
+#define ice_dcbnl_set_all(vsi) do {} while (0)
+#define ice_dcbnl_flush_apps(pf, old_cfg, new_cfg) do {} while (0)
+#endif /* CONFIG_DCB */
+
+#endif /* _ICE_DCB_NL_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
index 7e23034df955..aec3c6c379df 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
@@ -156,6 +156,7 @@ struct ice_priv_flag {
 static const struct ice_priv_flag ice_gstrings_priv_flags[] = {
 	ICE_PRIV_FLAG("link-down-on-close", ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA),
 	ICE_PRIV_FLAG("fw-lldp-agent", ICE_FLAG_FW_LLDP_AGENT),
+	ICE_PRIV_FLAG("legacy-rx", ICE_FLAG_LEGACY_RX),
 };
 
 #define ICE_PRIV_FLAG_ARRAY_SIZE	ARRAY_SIZE(ice_gstrings_priv_flags)
@@ -247,7 +248,7 @@ ice_get_eeprom(struct net_device *netdev, struct ethtool_eeprom *eeprom,
 	int ret = 0;
 	u16 *buf;
 
-	dev = &pf->pdev->dev;
+	dev = ice_pf_to_dev(pf);
 
 	eeprom->magic = hw->vendor_id | (hw->device_id << 16);
 
@@ -342,6 +343,7 @@ static u64 ice_eeprom_test(struct net_device *netdev)
 static int ice_reg_pattern_test(struct ice_hw *hw, u32 reg, u32 mask)
 {
 	struct ice_pf *pf = (struct ice_pf *)hw->back;
+	struct device *dev = ice_pf_to_dev(pf);
 	static const u32 patterns[] = {
 		0x5A5A5A5A, 0xA5A5A5A5,
 		0x00000000, 0xFFFFFFFF
@@ -357,7 +359,7 @@ static int ice_reg_pattern_test(struct ice_hw *hw, u32 reg, u32 mask)
 		val = rd32(hw, reg);
 		if (val == pattern)
 			continue;
-		dev_err(&pf->pdev->dev,
+		dev_err(dev,
 			"%s: reg pattern test failed - reg 0x%08x pat 0x%08x val 0x%08x\n"
 			, __func__, reg, pattern, val);
 		return 1;
@@ -366,7 +368,7 @@ static int ice_reg_pattern_test(struct ice_hw *hw, u32 reg, u32 mask)
 	wr32(hw, reg, orig_val);
 	val = rd32(hw, reg);
 	if (val != orig_val) {
-		dev_err(&pf->pdev->dev,
+		dev_err(dev,
 			"%s: reg restore test failed - reg 0x%08x orig 0x%08x val 0x%08x\n"
 			, __func__, reg, orig_val, val);
 		return 1;
@@ -506,7 +508,7 @@ static int ice_lbtest_create_frame(struct ice_pf *pf, u8 **ret_data, u16 size)
 	if (!pf)
 		return -EINVAL;
 
-	data = devm_kzalloc(&pf->pdev->dev, size, GFP_KERNEL);
+	data = devm_kzalloc(ice_pf_to_dev(pf), size, GFP_KERNEL);
 	if (!data)
 		return -ENOMEM;
 
@@ -623,7 +625,7 @@ static int ice_lbtest_receive_frames(struct ice_ring *rx_ring)
 			continue;
 
 		rx_buf = &rx_ring->rx_buf[i];
-		received_buf = page_address(rx_buf->page);
+		received_buf = page_address(rx_buf->page) + rx_buf->page_offset;
 
 		if (ice_lbtest_check_frame(received_buf))
 			valid_frames++;
@@ -648,9 +650,11 @@ static u64 ice_loopback_test(struct net_device *netdev)
 	u8 broadcast[ETH_ALEN], ret = 0;
 	int num_frames, valid_frames;
 	LIST_HEAD(tmp_list);
+	struct device *dev;
 	u8 *tx_frame;
 	int i;
 
+	dev = ice_pf_to_dev(pf);
 	netdev_info(netdev, "loopback test\n");
 
 	test_vsi = ice_lb_vsi_setup(pf, pf->hw.port_info);
@@ -711,12 +715,12 @@ static u64 ice_loopback_test(struct net_device *netdev)
 		ret = 10;
 
 lbtest_free_frame:
-	devm_kfree(&pf->pdev->dev, tx_frame);
+	devm_kfree(dev, tx_frame);
 remove_mac_filters:
 	if (ice_remove_mac(&pf->hw, &tmp_list))
 		netdev_err(netdev, "Could not remove MAC filter for the test VSI");
 free_mac_list:
-	ice_free_fltr_list(&pf->pdev->dev, &tmp_list);
+	ice_free_fltr_list(dev, &tmp_list);
 lbtest_mac_dis:
 	/* Disable MAC loopback after the test is completed. */
 	if (ice_aq_set_mac_loopback(&pf->hw, false, NULL))
@@ -773,6 +777,9 @@ ice_self_test(struct net_device *netdev, struct ethtool_test *eth_test,
 	struct ice_netdev_priv *np = netdev_priv(netdev);
 	bool if_running = netif_running(netdev);
 	struct ice_pf *pf = np->vsi->back;
+	struct device *dev;
+
+	dev = ice_pf_to_dev(pf);
 
 	if (eth_test->flags == ETH_TEST_FL_OFFLINE) {
 		netdev_info(netdev, "offline testing starting\n");
@@ -780,7 +787,7 @@ ice_self_test(struct net_device *netdev, struct ethtool_test *eth_test,
 		set_bit(__ICE_TESTING, pf->state);
 
 		if (ice_active_vfs(pf)) {
-			dev_warn(&pf->pdev->dev,
+			dev_warn(dev,
 				 "Please take active VFs and Netqueues offline and restart the adapter before running NIC diagnostics\n");
 			data[ICE_ETH_TEST_REG] = 1;
 			data[ICE_ETH_TEST_EEPROM] = 1;
@@ -815,8 +822,7 @@ ice_self_test(struct net_device *netdev, struct ethtool_test *eth_test,
 			int status = ice_open(netdev);
 
 			if (status) {
-				dev_err(&pf->pdev->dev,
-					"Could not open device %s, err %d",
+				dev_err(dev, "Could not open device %s, err %d",
 					pf->int_name, status);
 			}
 		}
@@ -961,7 +967,7 @@ static int ice_set_fec_cfg(struct net_device *netdev, enum ice_fec_mode req_fec)
 	}
 
 	/* Get last SW configuration */
-	caps = devm_kzalloc(&vsi->back->pdev->dev, sizeof(*caps), GFP_KERNEL);
+	caps = kzalloc(sizeof(*caps), GFP_KERNEL);
 	if (!caps)
 		return -ENOMEM;
 
@@ -1006,7 +1012,7 @@ static int ice_set_fec_cfg(struct net_device *netdev, enum ice_fec_mode req_fec)
 	}
 
 done:
-	devm_kfree(&vsi->back->pdev->dev, caps);
+	kfree(caps);
 	return err;
 }
 
@@ -1082,7 +1088,7 @@ ice_get_fecparam(struct net_device *netdev, struct ethtool_fecparam *fecparam)
 		break;
 	}
 
-	caps = devm_kzalloc(&vsi->back->pdev->dev, sizeof(*caps), GFP_KERNEL);
+	caps = kzalloc(sizeof(*caps), GFP_KERNEL);
 	if (!caps)
 		return -ENOMEM;
 
@@ -1109,7 +1115,7 @@ ice_get_fecparam(struct net_device *netdev, struct ethtool_fecparam *fecparam)
 		fecparam->fec |= ETHTOOL_FEC_OFF;
 
 done:
-	devm_kfree(&vsi->back->pdev->dev, caps);
+	kfree(caps);
 	return err;
 }
 
@@ -1154,12 +1160,14 @@ static int ice_set_priv_flags(struct net_device *netdev, u32 flags)
 	DECLARE_BITMAP(orig_flags, ICE_PF_FLAGS_NBITS);
 	struct ice_vsi *vsi = np->vsi;
 	struct ice_pf *pf = vsi->back;
+	struct device *dev;
 	int ret = 0;
 	u32 i;
 
 	if (flags > BIT(ICE_PRIV_FLAG_ARRAY_SIZE))
 		return -EINVAL;
 
+	dev = ice_pf_to_dev(pf);
 	set_bit(ICE_FLAG_ETHTOOL_CTXT, pf->flags);
 
 	bitmap_copy(orig_flags, pf->flags, ICE_PF_FLAGS_NBITS);
@@ -1188,7 +1196,7 @@ static int ice_set_priv_flags(struct net_device *netdev, u32 flags)
 			 * events to respond to.
 			 */
 			if (status)
-				dev_info(&pf->pdev->dev,
+				dev_info(dev,
 					 "Failed to unreg for LLDP events\n");
 
 			/* The AQ call to stop the FW LLDP agent will generate
@@ -1196,20 +1204,14 @@ static int ice_set_priv_flags(struct net_device *netdev, u32 flags)
 			 */
 			status = ice_aq_stop_lldp(&pf->hw, true, true, NULL);
 			if (status)
-				dev_warn(&pf->pdev->dev,
-					 "Fail to stop LLDP agent\n");
+				dev_warn(dev, "Fail to stop LLDP agent\n");
 			/* Use case for having the FW LLDP agent stopped
 			 * will likely not need DCB, so failure to init is
 			 * not a concern of ethtool
 			 */
 			status = ice_init_pf_dcb(pf, true);
 			if (status)
-				dev_warn(&pf->pdev->dev, "Fail to init DCB\n");
-
-			/* Forward LLDP packets to default VSI so that they
-			 * are passed up the stack
-			 */
-			ice_cfg_sw_lldp(vsi, false, true);
+				dev_warn(dev, "Fail to init DCB\n");
 		} else {
 			enum ice_status status;
 			bool dcbx_agent_status;
@@ -1219,8 +1221,7 @@ static int ice_set_priv_flags(struct net_device *netdev, u32 flags)
 			 */
 			status = ice_aq_start_lldp(&pf->hw, true, NULL);
 			if (status)
-				dev_warn(&pf->pdev->dev,
-					 "Fail to start LLDP Agent\n");
+				dev_warn(dev, "Fail to start LLDP Agent\n");
 
 			/* AQ command to start FW DCBX agent will fail if
 			 * the agent is already started
@@ -1229,10 +1230,9 @@ static int ice_set_priv_flags(struct net_device *netdev, u32 flags)
 							&dcbx_agent_status,
 							NULL);
 			if (status)
-				dev_dbg(&pf->pdev->dev,
-					"Failed to start FW DCBX\n");
+				dev_dbg(dev, "Failed to start FW DCBX\n");
 
-			dev_info(&pf->pdev->dev, "FW DCBX agent is %s\n",
+			dev_info(dev, "FW DCBX agent is %s\n",
 				 dcbx_agent_status ? "ACTIVE" : "DISABLED");
 
 			/* Failure to configure MIB change or init DCB is not
@@ -1242,7 +1242,7 @@ static int ice_set_priv_flags(struct net_device *netdev, u32 flags)
 			 */
 			status = ice_init_pf_dcb(pf, true);
 			if (status)
-				dev_dbg(&pf->pdev->dev, "Fail to init DCB\n");
+				dev_dbg(dev, "Fail to init DCB\n");
 
 			/* Remove rule to direct LLDP packets to default VSI.
 			 * The FW LLDP engine will now be consuming them.
@@ -1252,10 +1252,15 @@ static int ice_set_priv_flags(struct net_device *netdev, u32 flags)
 			/* Register for MIB change events */
 			status = ice_cfg_lldp_mib_change(&pf->hw, true);
 			if (status)
-				dev_dbg(&pf->pdev->dev,
+				dev_dbg(dev,
 					"Fail to enable MIB change events\n");
 		}
 	}
+	if (test_bit(ICE_FLAG_LEGACY_RX, change_flags)) {
+		/* down and up VSI so that changes of Rx cfg are reflected. */
+		ice_down(vsi);
+		ice_up(vsi);
+	}
 	clear_bit(ICE_FLAG_ETHTOOL_CTXT, pf->flags);
 	return ret;
 }
@@ -2140,7 +2145,7 @@ ice_get_link_ksettings(struct net_device *netdev,
 	/* flow control is symmetric and always supported */
 	ethtool_link_ksettings_add_link_mode(ks, supported, Pause);
 
-	caps = devm_kzalloc(&vsi->back->pdev->dev, sizeof(*caps), GFP_KERNEL);
+	caps = kzalloc(sizeof(*caps), GFP_KERNEL);
 	if (!caps)
 		return -ENOMEM;
 
@@ -2198,7 +2203,7 @@ ice_get_link_ksettings(struct net_device *netdev,
 		ethtool_link_ksettings_add_link_mode(ks, supported, FEC_RS);
 
 done:
-	devm_kfree(&vsi->back->pdev->dev, caps);
+	kfree(caps);
 	return err;
 }
 
@@ -2427,8 +2432,7 @@ ice_set_link_ksettings(struct net_device *netdev,
 		usleep_range(TEST_SET_BITS_SLEEP_MIN, TEST_SET_BITS_SLEEP_MAX);
 	}
 
-	abilities = devm_kzalloc(&pf->pdev->dev, sizeof(*abilities),
-				 GFP_KERNEL);
+	abilities = kzalloc(sizeof(*abilities), GFP_KERNEL);
 	if (!abilities)
 		return -ENOMEM;
 
@@ -2520,7 +2524,7 @@ ice_set_link_ksettings(struct net_device *netdev,
 	}
 
 done:
-	devm_kfree(&pf->pdev->dev, abilities);
+	kfree(abilities);
 	clear_bit(__ICE_CFG_BUSY, pf->state);
 
 	return err;
@@ -2577,6 +2581,7 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring)
 {
 	struct ice_ring *tx_rings = NULL, *rx_rings = NULL;
 	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_ring *xdp_rings = NULL;
 	struct ice_vsi *vsi = np->vsi;
 	struct ice_pf *pf = vsi->back;
 	int i, timeout = 50, err = 0;
@@ -2611,6 +2616,13 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring)
 		return 0;
 	}
 
+	/* If there is a AF_XDP UMEM attached to any of Rx rings,
+	 * disallow changing the number of descriptors -- regardless
+	 * if the netdev is running or not.
+	 */
+	if (ice_xsk_any_rx_ring_ena(vsi))
+		return -EBUSY;
+
 	while (test_and_set_bit(__ICE_CFG_BUSY, pf->state)) {
 		timeout--;
 		if (!timeout)
@@ -2624,6 +2636,11 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring)
 			vsi->tx_rings[i]->count = new_tx_cnt;
 		for (i = 0; i < vsi->alloc_rxq; i++)
 			vsi->rx_rings[i]->count = new_rx_cnt;
+		if (ice_is_xdp_ena_vsi(vsi))
+			for (i = 0; i < vsi->num_xdp_txq; i++)
+				vsi->xdp_rings[i]->count = new_tx_cnt;
+		vsi->num_tx_desc = new_tx_cnt;
+		vsi->num_rx_desc = new_rx_cnt;
 		netdev_dbg(netdev, "Link is down, descriptor count change happens when link is brought up\n");
 		goto done;
 	}
@@ -2635,14 +2652,13 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring)
 	netdev_info(netdev, "Changing Tx descriptor count from %d to %d\n",
 		    vsi->tx_rings[0]->count, new_tx_cnt);
 
-	tx_rings = devm_kcalloc(&pf->pdev->dev, vsi->alloc_txq,
-				sizeof(*tx_rings), GFP_KERNEL);
+	tx_rings = kcalloc(vsi->num_txq, sizeof(*tx_rings), GFP_KERNEL);
 	if (!tx_rings) {
 		err = -ENOMEM;
 		goto done;
 	}
 
-	for (i = 0; i < vsi->alloc_txq; i++) {
+	ice_for_each_txq(vsi, i) {
 		/* clone ring and setup updated count */
 		tx_rings[i] = *vsi->tx_rings[i];
 		tx_rings[i].count = new_tx_cnt;
@@ -2650,15 +2666,42 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring)
 		tx_rings[i].tx_buf = NULL;
 		err = ice_setup_tx_ring(&tx_rings[i]);
 		if (err) {
-			while (i) {
-				i--;
+			while (i--)
 				ice_clean_tx_ring(&tx_rings[i]);
-			}
-			devm_kfree(&pf->pdev->dev, tx_rings);
+			kfree(tx_rings);
 			goto done;
 		}
 	}
 
+	if (!ice_is_xdp_ena_vsi(vsi))
+		goto process_rx;
+
+	/* alloc updated XDP resources */
+	netdev_info(netdev, "Changing XDP descriptor count from %d to %d\n",
+		    vsi->xdp_rings[0]->count, new_tx_cnt);
+
+	xdp_rings = kcalloc(vsi->num_xdp_txq, sizeof(*xdp_rings), GFP_KERNEL);
+	if (!xdp_rings) {
+		err = -ENOMEM;
+		goto free_tx;
+	}
+
+	for (i = 0; i < vsi->num_xdp_txq; i++) {
+		/* clone ring and setup updated count */
+		xdp_rings[i] = *vsi->xdp_rings[i];
+		xdp_rings[i].count = new_tx_cnt;
+		xdp_rings[i].desc = NULL;
+		xdp_rings[i].tx_buf = NULL;
+		err = ice_setup_tx_ring(&xdp_rings[i]);
+		if (err) {
+			while (i--)
+				ice_clean_tx_ring(&xdp_rings[i]);
+			kfree(xdp_rings);
+			goto free_tx;
+		}
+		ice_set_ring_xdp(&xdp_rings[i]);
+	}
+
 process_rx:
 	if (new_rx_cnt == vsi->rx_rings[0]->count)
 		goto process_link;
@@ -2667,14 +2710,13 @@ process_rx:
 	netdev_info(netdev, "Changing Rx descriptor count from %d to %d\n",
 		    vsi->rx_rings[0]->count, new_rx_cnt);
 
-	rx_rings = devm_kcalloc(&pf->pdev->dev, vsi->alloc_rxq,
-				sizeof(*rx_rings), GFP_KERNEL);
+	rx_rings = kcalloc(vsi->num_rxq, sizeof(*rx_rings), GFP_KERNEL);
 	if (!rx_rings) {
 		err = -ENOMEM;
 		goto done;
 	}
 
-	for (i = 0; i < vsi->alloc_rxq; i++) {
+	ice_for_each_rxq(vsi, i) {
 		/* clone ring and setup updated count */
 		rx_rings[i] = *vsi->rx_rings[i];
 		rx_rings[i].count = new_rx_cnt;
@@ -2698,7 +2740,7 @@ rx_unwind:
 				i--;
 				ice_free_rx_ring(&rx_rings[i]);
 			}
-			devm_kfree(&pf->pdev->dev, rx_rings);
+			kfree(rx_rings);
 			err = -ENOMEM;
 			goto free_tx;
 		}
@@ -2712,15 +2754,15 @@ process_link:
 		ice_down(vsi);
 
 		if (tx_rings) {
-			for (i = 0; i < vsi->alloc_txq; i++) {
+			ice_for_each_txq(vsi, i) {
 				ice_free_tx_ring(vsi->tx_rings[i]);
 				*vsi->tx_rings[i] = tx_rings[i];
 			}
-			devm_kfree(&pf->pdev->dev, tx_rings);
+			kfree(tx_rings);
 		}
 
 		if (rx_rings) {
-			for (i = 0; i < vsi->alloc_rxq; i++) {
+			ice_for_each_rxq(vsi, i) {
 				ice_free_rx_ring(vsi->rx_rings[i]);
 				/* copy the real tail offset */
 				rx_rings[i].tail = vsi->rx_rings[i]->tail;
@@ -2734,9 +2776,19 @@ process_link:
 				rx_rings[i].next_to_alloc = 0;
 				*vsi->rx_rings[i] = rx_rings[i];
 			}
-			devm_kfree(&pf->pdev->dev, rx_rings);
+			kfree(rx_rings);
+		}
+
+		if (xdp_rings) {
+			for (i = 0; i < vsi->num_xdp_txq; i++) {
+				ice_free_tx_ring(vsi->xdp_rings[i]);
+				*vsi->xdp_rings[i] = xdp_rings[i];
+			}
+			kfree(xdp_rings);
 		}
 
+		vsi->num_tx_desc = new_tx_cnt;
+		vsi->num_rx_desc = new_rx_cnt;
 		ice_up(vsi);
 	}
 	goto done;
@@ -2744,9 +2796,9 @@ process_link:
 free_tx:
 	/* error cleanup if the Rx allocations failed after getting Tx */
 	if (tx_rings) {
-		for (i = 0; i < vsi->alloc_txq; i++)
+		ice_for_each_txq(vsi, i)
 			ice_free_tx_ring(&tx_rings[i]);
-		devm_kfree(&pf->pdev->dev, tx_rings);
+		kfree(tx_rings);
 	}
 
 done:
@@ -2794,7 +2846,6 @@ ice_get_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *pause)
 	struct ice_netdev_priv *np = netdev_priv(netdev);
 	struct ice_port_info *pi = np->vsi->port_info;
 	struct ice_aqc_get_phy_caps_data *pcaps;
-	struct ice_vsi *vsi = np->vsi;
 	struct ice_dcbx_cfg *dcbx_cfg;
 	enum ice_status status;
 
@@ -2804,8 +2855,7 @@ ice_get_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *pause)
 
 	dcbx_cfg = &pi->local_dcbx_cfg;
 
-	pcaps = devm_kzalloc(&vsi->back->pdev->dev, sizeof(*pcaps),
-			     GFP_KERNEL);
+	pcaps = kzalloc(sizeof(*pcaps), GFP_KERNEL);
 	if (!pcaps)
 		return;
 
@@ -2828,7 +2878,7 @@ ice_get_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *pause)
 		pause->rx_pause = 1;
 
 out:
-	devm_kfree(&vsi->back->pdev->dev, pcaps);
+	kfree(pcaps);
 }
 
 /**
@@ -3009,7 +3059,7 @@ ice_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, u8 *hfunc)
 		return -EIO;
 	}
 
-	lut = devm_kzalloc(&pf->pdev->dev, vsi->rss_table_size, GFP_KERNEL);
+	lut = kzalloc(vsi->rss_table_size, GFP_KERNEL);
 	if (!lut)
 		return -ENOMEM;
 
@@ -3022,7 +3072,7 @@ ice_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, u8 *hfunc)
 		indir[i] = (u32)(lut[i]);
 
 out:
-	devm_kfree(&pf->pdev->dev, lut);
+	kfree(lut);
 	return ret;
 }
 
@@ -3043,8 +3093,10 @@ ice_set_rxfh(struct net_device *netdev, const u32 *indir, const u8 *key,
 	struct ice_netdev_priv *np = netdev_priv(netdev);
 	struct ice_vsi *vsi = np->vsi;
 	struct ice_pf *pf = vsi->back;
+	struct device *dev;
 	u8 *seed = NULL;
 
+	dev = ice_pf_to_dev(pf);
 	if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP)
 		return -EOPNOTSUPP;
 
@@ -3057,8 +3109,7 @@ ice_set_rxfh(struct net_device *netdev, const u32 *indir, const u8 *key,
 	if (key) {
 		if (!vsi->rss_hkey_user) {
 			vsi->rss_hkey_user =
-				devm_kzalloc(&pf->pdev->dev,
-					     ICE_VSIQF_HKEY_ARRAY_SIZE,
+				devm_kzalloc(dev, ICE_VSIQF_HKEY_ARRAY_SIZE,
 					     GFP_KERNEL);
 			if (!vsi->rss_hkey_user)
 				return -ENOMEM;
@@ -3068,8 +3119,7 @@ ice_set_rxfh(struct net_device *netdev, const u32 *indir, const u8 *key,
 	}
 
 	if (!vsi->rss_lut_user) {
-		vsi->rss_lut_user = devm_kzalloc(&pf->pdev->dev,
-						 vsi->rss_table_size,
+		vsi->rss_lut_user = devm_kzalloc(dev, vsi->rss_table_size,
 						 GFP_KERNEL);
 		if (!vsi->rss_lut_user)
 			return -ENOMEM;
@@ -3092,6 +3142,188 @@ ice_set_rxfh(struct net_device *netdev, const u32 *indir, const u8 *key,
 	return 0;
 }
 
+/**
+ * ice_get_max_txq - return the maximum number of Tx queues for in a PF
+ * @pf: PF structure
+ */
+static int ice_get_max_txq(struct ice_pf *pf)
+{
+	return min_t(int, num_online_cpus(),
+		     pf->hw.func_caps.common_cap.num_txq);
+}
+
+/**
+ * ice_get_max_rxq - return the maximum number of Rx queues for in a PF
+ * @pf: PF structure
+ */
+static int ice_get_max_rxq(struct ice_pf *pf)
+{
+	return min_t(int, num_online_cpus(),
+		     pf->hw.func_caps.common_cap.num_rxq);
+}
+
+/**
+ * ice_get_combined_cnt - return the current number of combined channels
+ * @vsi: PF VSI pointer
+ *
+ * Go through all queue vectors and count ones that have both Rx and Tx ring
+ * attached
+ */
+static u32 ice_get_combined_cnt(struct ice_vsi *vsi)
+{
+	u32 combined = 0;
+	int q_idx;
+
+	ice_for_each_q_vector(vsi, q_idx) {
+		struct ice_q_vector *q_vector = vsi->q_vectors[q_idx];
+
+		if (q_vector->rx.ring && q_vector->tx.ring)
+			combined++;
+	}
+
+	return combined;
+}
+
+/**
+ * ice_get_channels - get the current and max supported channels
+ * @dev: network interface device structure
+ * @ch: ethtool channel data structure
+ */
+static void
+ice_get_channels(struct net_device *dev, struct ethtool_channels *ch)
+{
+	struct ice_netdev_priv *np = netdev_priv(dev);
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_pf *pf = vsi->back;
+
+	/* check to see if VSI is active */
+	if (test_bit(__ICE_DOWN, vsi->state))
+		return;
+
+	/* report maximum channels */
+	ch->max_rx = ice_get_max_rxq(pf);
+	ch->max_tx = ice_get_max_txq(pf);
+	ch->max_combined = min_t(int, ch->max_rx, ch->max_tx);
+
+	/* report current channels */
+	ch->combined_count = ice_get_combined_cnt(vsi);
+	ch->rx_count = vsi->num_rxq - ch->combined_count;
+	ch->tx_count = vsi->num_txq - ch->combined_count;
+}
+
+/**
+ * ice_vsi_set_dflt_rss_lut - set default RSS LUT with requested RSS size
+ * @vsi: VSI to reconfigure RSS LUT on
+ * @req_rss_size: requested range of queue numbers for hashing
+ *
+ * Set the VSI's RSS parameters, configure the RSS LUT based on these.
+ */
+static int ice_vsi_set_dflt_rss_lut(struct ice_vsi *vsi, int req_rss_size)
+{
+	struct ice_pf *pf = vsi->back;
+	enum ice_status status;
+	struct device *dev;
+	struct ice_hw *hw;
+	int err = 0;
+	u8 *lut;
+
+	dev = ice_pf_to_dev(pf);
+	hw = &pf->hw;
+
+	if (!req_rss_size)
+		return -EINVAL;
+
+	lut = kzalloc(vsi->rss_table_size, GFP_KERNEL);
+	if (!lut)
+		return -ENOMEM;
+
+	/* set RSS LUT parameters */
+	if (!test_bit(ICE_FLAG_RSS_ENA, pf->flags)) {
+		vsi->rss_size = 1;
+	} else {
+		struct ice_hw_common_caps *caps = &hw->func_caps.common_cap;
+
+		vsi->rss_size = min_t(int, req_rss_size,
+				      BIT(caps->rss_table_entry_width));
+	}
+
+	/* create/set RSS LUT */
+	ice_fill_rss_lut(lut, vsi->rss_table_size, vsi->rss_size);
+	status = ice_aq_set_rss_lut(hw, vsi->idx, vsi->rss_lut_type, lut,
+				    vsi->rss_table_size);
+	if (status) {
+		dev_err(dev, "Cannot set RSS lut, err %d aq_err %d\n",
+			status, hw->adminq.rq_last_status);
+		err = -EIO;
+	}
+
+	kfree(lut);
+	return err;
+}
+
+/**
+ * ice_set_channels - set the number channels
+ * @dev: network interface device structure
+ * @ch: ethtool channel data structure
+ */
+static int ice_set_channels(struct net_device *dev, struct ethtool_channels *ch)
+{
+	struct ice_netdev_priv *np = netdev_priv(dev);
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_pf *pf = vsi->back;
+	int new_rx = 0, new_tx = 0;
+	u32 curr_combined;
+
+	/* do not support changing channels in Safe Mode */
+	if (ice_is_safe_mode(pf)) {
+		netdev_err(dev, "Changing channel in Safe Mode is not supported\n");
+		return -EOPNOTSUPP;
+	}
+	/* do not support changing other_count */
+	if (ch->other_count)
+		return -EINVAL;
+
+	curr_combined = ice_get_combined_cnt(vsi);
+
+	/* these checks are for cases where user didn't specify a particular
+	 * value on cmd line but we get non-zero value anyway via
+	 * get_channels(); look at ethtool.c in ethtool repository (the user
+	 * space part), particularly, do_schannels() routine
+	 */
+	if (ch->rx_count == vsi->num_rxq - curr_combined)
+		ch->rx_count = 0;
+	if (ch->tx_count == vsi->num_txq - curr_combined)
+		ch->tx_count = 0;
+	if (ch->combined_count == curr_combined)
+		ch->combined_count = 0;
+
+	if (!(ch->combined_count || (ch->rx_count && ch->tx_count))) {
+		netdev_err(dev, "Please specify at least 1 Rx and 1 Tx channel\n");
+		return -EINVAL;
+	}
+
+	new_rx = ch->combined_count + ch->rx_count;
+	new_tx = ch->combined_count + ch->tx_count;
+
+	if (new_rx > ice_get_max_rxq(pf)) {
+		netdev_err(dev, "Maximum allowed Rx channels is %d\n",
+			   ice_get_max_rxq(pf));
+		return -EINVAL;
+	}
+	if (new_tx > ice_get_max_txq(pf)) {
+		netdev_err(dev, "Maximum allowed Tx channels is %d\n",
+			   ice_get_max_txq(pf));
+		return -EINVAL;
+	}
+
+	ice_vsi_recfg_qs(vsi, new_rx, new_tx);
+
+	if (new_rx && !netif_is_rxfh_configured(dev))
+		return ice_vsi_set_dflt_rss_lut(vsi, new_rx);
+
+	return 0;
+}
+
 enum ice_container_type {
 	ICE_RX_CONTAINER,
 	ICE_TX_CONTAINER,
@@ -3131,7 +3363,7 @@ ice_get_rc_coalesce(struct ethtool_coalesce *ec, enum ice_container_type c_type,
 		ec->tx_coalesce_usecs = rc->itr_setting & ~ICE_ITR_DYNAMIC;
 		break;
 	default:
-		dev_dbg(&pf->pdev->dev, "Invalid c_type %d\n", c_type);
+		dev_dbg(ice_pf_to_dev(pf), "Invalid c_type %d\n", c_type);
 		return -EINVAL;
 	}
 
@@ -3271,7 +3503,8 @@ ice_set_rc_coalesce(enum ice_container_type c_type, struct ethtool_coalesce *ec,
 
 		break;
 	default:
-		dev_dbg(&pf->pdev->dev, "Invalid container type %d\n", c_type);
+		dev_dbg(ice_pf_to_dev(pf), "Invalid container type %d\n",
+			c_type);
 		return -EINVAL;
 	}
 
@@ -3368,10 +3601,17 @@ __ice_set_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec,
 	struct ice_vsi *vsi = np->vsi;
 
 	if (q_num < 0) {
-		int i;
+		int v_idx;
+
+		ice_for_each_q_vector(vsi, v_idx) {
+			/* In some cases if DCB is configured the num_[rx|tx]q
+			 * can be less than vsi->num_q_vectors. This check
+			 * accounts for that so we don't report a false failure
+			 */
+			if (v_idx >= vsi->num_rxq && v_idx >= vsi->num_txq)
+				goto set_complete;
 
-		ice_for_each_q_vector(vsi, i) {
-			if (ice_set_q_coalesce(vsi, ec, i))
+			if (ice_set_q_coalesce(vsi, ec, v_idx))
 				return -EINVAL;
 		}
 		goto set_complete;
@@ -3398,6 +3638,151 @@ ice_set_per_q_coalesce(struct net_device *netdev, u32 q_num,
 	return __ice_set_coalesce(netdev, ec, q_num);
 }
 
+#define ICE_I2C_EEPROM_DEV_ADDR		0xA0
+#define ICE_I2C_EEPROM_DEV_ADDR2	0xA2
+#define ICE_MODULE_TYPE_SFP		0x03
+#define ICE_MODULE_TYPE_QSFP_PLUS	0x0D
+#define ICE_MODULE_TYPE_QSFP28		0x11
+#define ICE_MODULE_SFF_ADDR_MODE	0x04
+#define ICE_MODULE_SFF_DIAG_CAPAB	0x40
+#define ICE_MODULE_REVISION_ADDR	0x01
+#define ICE_MODULE_SFF_8472_COMP	0x5E
+#define ICE_MODULE_SFF_8472_SWAP	0x5C
+#define ICE_MODULE_QSFP_MAX_LEN		640
+
+/**
+ * ice_get_module_info - get SFF module type and revision information
+ * @netdev: network interface device structure
+ * @modinfo: module EEPROM size and layout information structure
+ */
+static int
+ice_get_module_info(struct net_device *netdev,
+		    struct ethtool_modinfo *modinfo)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_pf *pf = vsi->back;
+	struct ice_hw *hw = &pf->hw;
+	enum ice_status status;
+	u8 sff8472_comp = 0;
+	u8 sff8472_swap = 0;
+	u8 sff8636_rev = 0;
+	u8 value = 0;
+
+	status = ice_aq_sff_eeprom(hw, 0, ICE_I2C_EEPROM_DEV_ADDR, 0x00, 0x00,
+				   0, &value, 1, 0, NULL);
+	if (status)
+		return -EIO;
+
+	switch (value) {
+	case ICE_MODULE_TYPE_SFP:
+		status = ice_aq_sff_eeprom(hw, 0, ICE_I2C_EEPROM_DEV_ADDR,
+					   ICE_MODULE_SFF_8472_COMP, 0x00, 0,
+					   &sff8472_comp, 1, 0, NULL);
+		if (status)
+			return -EIO;
+		status = ice_aq_sff_eeprom(hw, 0, ICE_I2C_EEPROM_DEV_ADDR,
+					   ICE_MODULE_SFF_8472_SWAP, 0x00, 0,
+					   &sff8472_swap, 1, 0, NULL);
+		if (status)
+			return -EIO;
+
+		if (sff8472_swap & ICE_MODULE_SFF_ADDR_MODE) {
+			modinfo->type = ETH_MODULE_SFF_8079;
+			modinfo->eeprom_len = ETH_MODULE_SFF_8079_LEN;
+		} else if (sff8472_comp &&
+			   (sff8472_swap & ICE_MODULE_SFF_DIAG_CAPAB)) {
+			modinfo->type = ETH_MODULE_SFF_8472;
+			modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN;
+		} else {
+			modinfo->type = ETH_MODULE_SFF_8079;
+			modinfo->eeprom_len = ETH_MODULE_SFF_8079_LEN;
+		}
+		break;
+	case ICE_MODULE_TYPE_QSFP_PLUS:
+	case ICE_MODULE_TYPE_QSFP28:
+		status = ice_aq_sff_eeprom(hw, 0, ICE_I2C_EEPROM_DEV_ADDR,
+					   ICE_MODULE_REVISION_ADDR, 0x00, 0,
+					   &sff8636_rev, 1, 0, NULL);
+		if (status)
+			return -EIO;
+		/* Check revision compliance */
+		if (sff8636_rev > 0x02) {
+			/* Module is SFF-8636 compliant */
+			modinfo->type = ETH_MODULE_SFF_8636;
+			modinfo->eeprom_len = ICE_MODULE_QSFP_MAX_LEN;
+		} else {
+			modinfo->type = ETH_MODULE_SFF_8436;
+			modinfo->eeprom_len = ICE_MODULE_QSFP_MAX_LEN;
+		}
+		break;
+	default:
+		netdev_warn(netdev,
+			    "SFF Module Type not recognized.\n");
+		return -EINVAL;
+	}
+	return 0;
+}
+
+/**
+ * ice_get_module_eeprom - fill buffer with SFF EEPROM contents
+ * @netdev: network interface device structure
+ * @ee: EEPROM dump request structure
+ * @data: buffer to be filled with EEPROM contents
+ */
+static int
+ice_get_module_eeprom(struct net_device *netdev,
+		      struct ethtool_eeprom *ee, u8 *data)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	u8 addr = ICE_I2C_EEPROM_DEV_ADDR;
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_pf *pf = vsi->back;
+	struct ice_hw *hw = &pf->hw;
+	enum ice_status status;
+	bool is_sfp = false;
+	u16 offset = 0;
+	u8 value = 0;
+	u8 page = 0;
+	int i;
+
+	status = ice_aq_sff_eeprom(hw, 0, addr, offset, page, 0,
+				   &value, 1, 0, NULL);
+	if (status)
+		return -EIO;
+
+	if (!ee || !ee->len || !data)
+		return -EINVAL;
+
+	if (value == ICE_MODULE_TYPE_SFP)
+		is_sfp = true;
+
+	for (i = 0; i < ee->len; i++) {
+		offset = i + ee->offset;
+
+		/* Check if we need to access the other memory page */
+		if (is_sfp) {
+			if (offset >= ETH_MODULE_SFF_8079_LEN) {
+				offset -= ETH_MODULE_SFF_8079_LEN;
+				addr = ICE_I2C_EEPROM_DEV_ADDR2;
+			}
+		} else {
+			while (offset >= ETH_MODULE_SFF_8436_LEN) {
+				/* Compute memory page number and offset. */
+				offset -= ETH_MODULE_SFF_8436_LEN / 2;
+				page++;
+			}
+		}
+
+		status = ice_aq_sff_eeprom(hw, 0, addr, offset, page, !is_sfp,
+					   &value, 1, 0, NULL);
+		if (status)
+			value = 0;
+		data[i] = value;
+	}
+	return 0;
+}
+
 static const struct ethtool_ops ice_ethtool_ops = {
 	.get_link_ksettings	= ice_get_link_ksettings,
 	.set_link_ksettings	= ice_set_link_ksettings,
@@ -3428,11 +3813,15 @@ static const struct ethtool_ops ice_ethtool_ops = {
 	.get_rxfh_indir_size	= ice_get_rxfh_indir_size,
 	.get_rxfh		= ice_get_rxfh,
 	.set_rxfh		= ice_set_rxfh,
+	.get_channels		= ice_get_channels,
+	.set_channels		= ice_set_channels,
 	.get_ts_info		= ethtool_op_get_ts_info,
 	.get_per_queue_coalesce = ice_get_per_q_coalesce,
 	.set_per_queue_coalesce = ice_set_per_q_coalesce,
 	.get_fecparam		= ice_get_fecparam,
 	.set_fecparam		= ice_set_fecparam,
+	.get_module_info	= ice_get_module_info,
+	.get_module_eeprom	= ice_get_module_eeprom,
 };
 
 static const struct ethtool_ops ice_ethtool_safe_mode_ops = {
@@ -3451,6 +3840,7 @@ static const struct ethtool_ops ice_ethtool_safe_mode_ops = {
 	.get_ringparam		= ice_get_ringparam,
 	.set_ringparam		= ice_set_ringparam,
 	.nway_reset		= ice_nway_reset,
+	.get_channels		= ice_get_channels,
 };
 
 /**
diff --git a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
index 152fbd556e9b..e8f32350fed2 100644
--- a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
+++ b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
@@ -52,6 +52,9 @@
 #define PF_MBX_ATQLEN_ATQLEN_M			ICE_M(0x3FF, 0)
 #define PF_MBX_ATQLEN_ATQENABLE_M		BIT(31)
 #define PF_MBX_ATQT				0x0022E300
+#define PRTDCB_GENC				0x00083000
+#define PRTDCB_GENC_PFCLDA_S			16
+#define PRTDCB_GENC_PFCLDA_M			ICE_M(0xFFFF, 16)
 #define PRTDCB_GENS				0x00083020
 #define PRTDCB_GENS_DCBX_STATUS_S		0
 #define PRTDCB_GENS_DCBX_STATUS_M		ICE_M(0x7, 0)
diff --git a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h
index 2aac8f13daeb..ad34f22d44ef 100644
--- a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h
+++ b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h
@@ -211,7 +211,7 @@ enum ice_flex_rx_mdid {
 /* Rx/Tx Flag64 packet flag bits */
 enum ice_flg64_bits {
 	ICE_FLG_PKT_DSI		= 0,
-	ICE_FLG_EVLAN_x8100	= 15,
+	ICE_FLG_EVLAN_x8100	= 14,
 	ICE_FLG_EVLAN_x9100,
 	ICE_FLG_VLAN_x8100,
 	ICE_FLG_TNL_MAC		= 22,
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index cc755382df25..e7449248fab4 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -2,232 +2,26 @@
 /* Copyright (c) 2018, Intel Corporation. */
 
 #include "ice.h"
+#include "ice_base.h"
 #include "ice_lib.h"
 #include "ice_dcb_lib.h"
 
 /**
- * ice_setup_rx_ctx - Configure a receive ring context
- * @ring: The Rx ring to configure
- *
- * Configure the Rx descriptor ring in RLAN context.
+ * ice_vsi_type_str - maps VSI type enum to string equivalents
+ * @type: VSI type enum
  */
-static int ice_setup_rx_ctx(struct ice_ring *ring)
+const char *ice_vsi_type_str(enum ice_vsi_type type)
 {
-	struct ice_vsi *vsi = ring->vsi;
-	struct ice_hw *hw = &vsi->back->hw;
-	u32 rxdid = ICE_RXDID_FLEX_NIC;
-	struct ice_rlan_ctx rlan_ctx;
-	u32 regval;
-	u16 pf_q;
-	int err;
-
-	/* what is Rx queue number in global space of 2K Rx queues */
-	pf_q = vsi->rxq_map[ring->q_index];
-
-	/* clear the context structure first */
-	memset(&rlan_ctx, 0, sizeof(rlan_ctx));
-
-	rlan_ctx.base = ring->dma >> 7;
-
-	rlan_ctx.qlen = ring->count;
-
-	/* Receive Packet Data Buffer Size.
-	 * The Packet Data Buffer Size is defined in 128 byte units.
-	 */
-	rlan_ctx.dbuf = vsi->rx_buf_len >> ICE_RLAN_CTX_DBUF_S;
-
-	/* use 32 byte descriptors */
-	rlan_ctx.dsize = 1;
-
-	/* Strip the Ethernet CRC bytes before the packet is posted to host
-	 * memory.
-	 */
-	rlan_ctx.crcstrip = 1;
-
-	/* L2TSEL flag defines the reported L2 Tags in the receive descriptor */
-	rlan_ctx.l2tsel = 1;
-
-	rlan_ctx.dtype = ICE_RX_DTYPE_NO_SPLIT;
-	rlan_ctx.hsplit_0 = ICE_RLAN_RX_HSPLIT_0_NO_SPLIT;
-	rlan_ctx.hsplit_1 = ICE_RLAN_RX_HSPLIT_1_NO_SPLIT;
-
-	/* This controls whether VLAN is stripped from inner headers
-	 * The VLAN in the inner L2 header is stripped to the receive
-	 * descriptor if enabled by this flag.
-	 */
-	rlan_ctx.showiv = 0;
-
-	/* Max packet size for this queue - must not be set to a larger value
-	 * than 5 x DBUF
-	 */
-	rlan_ctx.rxmax = min_t(u16, vsi->max_frame,
-			       ICE_MAX_CHAINED_RX_BUFS * vsi->rx_buf_len);
-
-	/* Rx queue threshold in units of 64 */
-	rlan_ctx.lrxqthresh = 1;
-
-	 /* Enable Flexible Descriptors in the queue context which
-	  * allows this driver to select a specific receive descriptor format
-	  */
-	if (vsi->type != ICE_VSI_VF) {
-		regval = rd32(hw, QRXFLXP_CNTXT(pf_q));
-		regval |= (rxdid << QRXFLXP_CNTXT_RXDID_IDX_S) &
-			QRXFLXP_CNTXT_RXDID_IDX_M;
-
-		/* increasing context priority to pick up profile ID;
-		 * default is 0x01; setting to 0x03 to ensure profile
-		 * is programming if prev context is of same priority
-		 */
-		regval |= (0x03 << QRXFLXP_CNTXT_RXDID_PRIO_S) &
-			QRXFLXP_CNTXT_RXDID_PRIO_M;
-
-		wr32(hw, QRXFLXP_CNTXT(pf_q), regval);
-	}
-
-	/* Absolute queue number out of 2K needs to be passed */
-	err = ice_write_rxq_ctx(hw, &rlan_ctx, pf_q);
-	if (err) {
-		dev_err(&vsi->back->pdev->dev,
-			"Failed to set LAN Rx queue context for absolute Rx queue %d error: %d\n",
-			pf_q, err);
-		return -EIO;
-	}
-
-	if (vsi->type == ICE_VSI_VF)
-		return 0;
-
-	/* init queue specific tail register */
-	ring->tail = hw->hw_addr + QRX_TAIL(pf_q);
-	writel(0, ring->tail);
-	ice_alloc_rx_bufs(ring, ICE_DESC_UNUSED(ring));
-
-	return 0;
-}
-
-/**
- * ice_setup_tx_ctx - setup a struct ice_tlan_ctx instance
- * @ring: The Tx ring to configure
- * @tlan_ctx: Pointer to the Tx LAN queue context structure to be initialized
- * @pf_q: queue index in the PF space
- *
- * Configure the Tx descriptor ring in TLAN context.
- */
-static void
-ice_setup_tx_ctx(struct ice_ring *ring, struct ice_tlan_ctx *tlan_ctx, u16 pf_q)
-{
-	struct ice_vsi *vsi = ring->vsi;
-	struct ice_hw *hw = &vsi->back->hw;
-
-	tlan_ctx->base = ring->dma >> ICE_TLAN_CTX_BASE_S;
-
-	tlan_ctx->port_num = vsi->port_info->lport;
-
-	/* Transmit Queue Length */
-	tlan_ctx->qlen = ring->count;
-
-	ice_set_cgd_num(tlan_ctx, ring);
-
-	/* PF number */
-	tlan_ctx->pf_num = hw->pf_id;
-
-	/* queue belongs to a specific VSI type
-	 * VF / VM index should be programmed per vmvf_type setting:
-	 * for vmvf_type = VF, it is VF number between 0-256
-	 * for vmvf_type = VM, it is VM number between 0-767
-	 * for PF or EMP this field should be set to zero
-	 */
-	switch (vsi->type) {
-	case ICE_VSI_LB:
-		/* fall through */
+	switch (type) {
 	case ICE_VSI_PF:
-		tlan_ctx->vmvf_type = ICE_TLAN_CTX_VMVF_TYPE_PF;
-		break;
+		return "ICE_VSI_PF";
 	case ICE_VSI_VF:
-		/* Firmware expects vmvf_num to be absolute VF ID */
-		tlan_ctx->vmvf_num = hw->func_caps.vf_base_id + vsi->vf_id;
-		tlan_ctx->vmvf_type = ICE_TLAN_CTX_VMVF_TYPE_VF;
-		break;
+		return "ICE_VSI_VF";
+	case ICE_VSI_LB:
+		return "ICE_VSI_LB";
 	default:
-		return;
-	}
-
-	/* make sure the context is associated with the right VSI */
-	tlan_ctx->src_vsi = ice_get_hw_vsi_num(hw, vsi->idx);
-
-	tlan_ctx->tso_ena = ICE_TX_LEGACY;
-	tlan_ctx->tso_qnum = pf_q;
-
-	/* Legacy or Advanced Host Interface:
-	 * 0: Advanced Host Interface
-	 * 1: Legacy Host Interface
-	 */
-	tlan_ctx->legacy_int = ICE_TX_LEGACY;
-}
-
-/**
- * ice_pf_rxq_wait - Wait for a PF's Rx queue to be enabled or disabled
- * @pf: the PF being configured
- * @pf_q: the PF queue
- * @ena: enable or disable state of the queue
- *
- * This routine will wait for the given Rx queue of the PF to reach the
- * enabled or disabled state.
- * Returns -ETIMEDOUT in case of failing to reach the requested state after
- * multiple retries; else will return 0 in case of success.
- */
-static int ice_pf_rxq_wait(struct ice_pf *pf, int pf_q, bool ena)
-{
-	int i;
-
-	for (i = 0; i < ICE_Q_WAIT_MAX_RETRY; i++) {
-		if (ena == !!(rd32(&pf->hw, QRX_CTRL(pf_q)) &
-			      QRX_CTRL_QENA_STAT_M))
-			return 0;
-
-		usleep_range(20, 40);
+		return "unknown";
 	}
-
-	return -ETIMEDOUT;
-}
-
-/**
- * ice_vsi_ctrl_rx_ring - Start or stop a VSI's Rx ring
- * @vsi: the VSI being configured
- * @ena: start or stop the Rx rings
- * @rxq_idx: Rx queue index
- */
-#ifndef CONFIG_PCI_IOV
-static
-#endif /* !CONFIG_PCI_IOV */
-int ice_vsi_ctrl_rx_ring(struct ice_vsi *vsi, bool ena, u16 rxq_idx)
-{
-	int pf_q = vsi->rxq_map[rxq_idx];
-	struct ice_pf *pf = vsi->back;
-	struct ice_hw *hw = &pf->hw;
-	int ret = 0;
-	u32 rx_reg;
-
-	rx_reg = rd32(hw, QRX_CTRL(pf_q));
-
-	/* Skip if the queue is already in the requested state */
-	if (ena == !!(rx_reg & QRX_CTRL_QENA_STAT_M))
-		return 0;
-
-	/* turn on/off the queue */
-	if (ena)
-		rx_reg |= QRX_CTRL_QENA_REQ_M;
-	else
-		rx_reg &= ~QRX_CTRL_QENA_REQ_M;
-	wr32(hw, QRX_CTRL(pf_q), rx_reg);
-
-	/* wait for the change to finish */
-	ret = ice_pf_rxq_wait(pf, pf_q, ena);
-	if (ret)
-		dev_err(&pf->pdev->dev,
-			"VSI idx %d Rx ring %d %sable timeout\n",
-			vsi->idx, pf_q, (ena ? "en" : "dis"));
-
-	return ret;
 }
 
 /**
@@ -258,36 +52,39 @@ static int ice_vsi_ctrl_rx_rings(struct ice_vsi *vsi, bool ena)
 static int ice_vsi_alloc_arrays(struct ice_vsi *vsi)
 {
 	struct ice_pf *pf = vsi->back;
+	struct device *dev;
+
+	dev = ice_pf_to_dev(pf);
 
 	/* allocate memory for both Tx and Rx ring pointers */
-	vsi->tx_rings = devm_kcalloc(&pf->pdev->dev, vsi->alloc_txq,
+	vsi->tx_rings = devm_kcalloc(dev, vsi->alloc_txq,
 				     sizeof(*vsi->tx_rings), GFP_KERNEL);
 	if (!vsi->tx_rings)
 		return -ENOMEM;
 
-	vsi->rx_rings = devm_kcalloc(&pf->pdev->dev, vsi->alloc_rxq,
+	vsi->rx_rings = devm_kcalloc(dev, vsi->alloc_rxq,
 				     sizeof(*vsi->rx_rings), GFP_KERNEL);
 	if (!vsi->rx_rings)
 		goto err_rings;
 
-	vsi->txq_map = devm_kcalloc(&pf->pdev->dev, vsi->alloc_txq,
+	/* XDP will have vsi->alloc_txq Tx queues as well, so double the size */
+	vsi->txq_map = devm_kcalloc(dev, (2 * vsi->alloc_txq),
 				    sizeof(*vsi->txq_map), GFP_KERNEL);
 
 	if (!vsi->txq_map)
 		goto err_txq_map;
 
-	vsi->rxq_map = devm_kcalloc(&pf->pdev->dev, vsi->alloc_rxq,
+	vsi->rxq_map = devm_kcalloc(dev, vsi->alloc_rxq,
 				    sizeof(*vsi->rxq_map), GFP_KERNEL);
 	if (!vsi->rxq_map)
 		goto err_rxq_map;
 
-
 	/* There is no need to allocate q_vectors for a loopback VSI. */
 	if (vsi->type == ICE_VSI_LB)
 		return 0;
 
 	/* allocate memory for q_vector pointers */
-	vsi->q_vectors = devm_kcalloc(&pf->pdev->dev, vsi->num_q_vectors,
+	vsi->q_vectors = devm_kcalloc(dev, vsi->num_q_vectors,
 				      sizeof(*vsi->q_vectors), GFP_KERNEL);
 	if (!vsi->q_vectors)
 		goto err_vectors;
@@ -295,13 +92,13 @@ static int ice_vsi_alloc_arrays(struct ice_vsi *vsi)
 	return 0;
 
 err_vectors:
-	devm_kfree(&pf->pdev->dev, vsi->rxq_map);
+	devm_kfree(dev, vsi->rxq_map);
 err_rxq_map:
-	devm_kfree(&pf->pdev->dev, vsi->txq_map);
+	devm_kfree(dev, vsi->txq_map);
 err_txq_map:
-	devm_kfree(&pf->pdev->dev, vsi->rx_rings);
+	devm_kfree(dev, vsi->rx_rings);
 err_rings:
-	devm_kfree(&pf->pdev->dev, vsi->tx_rings);
+	devm_kfree(dev, vsi->tx_rings);
 	return -ENOMEM;
 }
 
@@ -345,15 +142,24 @@ static void ice_vsi_set_num_qs(struct ice_vsi *vsi, u16 vf_id)
 	case ICE_VSI_PF:
 		vsi->alloc_txq = min_t(int, ice_get_avail_txq_count(pf),
 				       num_online_cpus());
+		if (vsi->req_txq) {
+			vsi->alloc_txq = vsi->req_txq;
+			vsi->num_txq = vsi->req_txq;
+		}
 
 		pf->num_lan_tx = vsi->alloc_txq;
 
 		/* only 1 Rx queue unless RSS is enabled */
-		if (!test_bit(ICE_FLAG_RSS_ENA, pf->flags))
+		if (!test_bit(ICE_FLAG_RSS_ENA, pf->flags)) {
 			vsi->alloc_rxq = 1;
-		else
+		} else {
 			vsi->alloc_rxq = min_t(int, ice_get_avail_rxq_count(pf),
 					       num_online_cpus());
+			if (vsi->req_rxq) {
+				vsi->alloc_rxq = vsi->req_rxq;
+				vsi->num_rxq = vsi->req_rxq;
+			}
+		}
 
 		pf->num_lan_rx = vsi->alloc_rxq;
 
@@ -375,7 +181,7 @@ static void ice_vsi_set_num_qs(struct ice_vsi *vsi, u16 vf_id)
 		vsi->alloc_rxq = 1;
 		break;
 	default:
-		dev_warn(&pf->pdev->dev, "Unknown VSI type %d\n", vsi->type);
+		dev_warn(ice_pf_to_dev(pf), "Unknown VSI type %d\n", vsi->type);
 		break;
 	}
 
@@ -421,7 +227,7 @@ void ice_vsi_delete(struct ice_vsi *vsi)
 	struct ice_vsi_ctx *ctxt;
 	enum ice_status status;
 
-	ctxt = devm_kzalloc(&pf->pdev->dev, sizeof(*ctxt), GFP_KERNEL);
+	ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
 	if (!ctxt)
 		return;
 
@@ -433,10 +239,10 @@ void ice_vsi_delete(struct ice_vsi *vsi)
 
 	status = ice_free_vsi(&pf->hw, vsi->idx, ctxt, false, NULL);
 	if (status)
-		dev_err(&pf->pdev->dev, "Failed to delete VSI %i in FW\n",
-			vsi->vsi_num);
+		dev_err(ice_pf_to_dev(pf), "Failed to delete VSI %i in FW - error: %d\n",
+			vsi->vsi_num, status);
 
-	devm_kfree(&pf->pdev->dev, ctxt);
+	kfree(ctxt);
 }
 
 /**
@@ -446,26 +252,29 @@ void ice_vsi_delete(struct ice_vsi *vsi)
 static void ice_vsi_free_arrays(struct ice_vsi *vsi)
 {
 	struct ice_pf *pf = vsi->back;
+	struct device *dev;
+
+	dev = ice_pf_to_dev(pf);
 
 	/* free the ring and vector containers */
 	if (vsi->q_vectors) {
-		devm_kfree(&pf->pdev->dev, vsi->q_vectors);
+		devm_kfree(dev, vsi->q_vectors);
 		vsi->q_vectors = NULL;
 	}
 	if (vsi->tx_rings) {
-		devm_kfree(&pf->pdev->dev, vsi->tx_rings);
+		devm_kfree(dev, vsi->tx_rings);
 		vsi->tx_rings = NULL;
 	}
 	if (vsi->rx_rings) {
-		devm_kfree(&pf->pdev->dev, vsi->rx_rings);
+		devm_kfree(dev, vsi->rx_rings);
 		vsi->rx_rings = NULL;
 	}
 	if (vsi->txq_map) {
-		devm_kfree(&pf->pdev->dev, vsi->txq_map);
+		devm_kfree(dev, vsi->txq_map);
 		vsi->txq_map = NULL;
 	}
 	if (vsi->rxq_map) {
-		devm_kfree(&pf->pdev->dev, vsi->rxq_map);
+		devm_kfree(dev, vsi->rxq_map);
 		vsi->rxq_map = NULL;
 	}
 }
@@ -482,6 +291,7 @@ static void ice_vsi_free_arrays(struct ice_vsi *vsi)
 int ice_vsi_clear(struct ice_vsi *vsi)
 {
 	struct ice_pf *pf = NULL;
+	struct device *dev;
 
 	if (!vsi)
 		return 0;
@@ -490,10 +300,10 @@ int ice_vsi_clear(struct ice_vsi *vsi)
 		return -EINVAL;
 
 	pf = vsi->back;
+	dev = ice_pf_to_dev(pf);
 
 	if (!pf->vsi[vsi->idx] || pf->vsi[vsi->idx] != vsi) {
-		dev_dbg(&pf->pdev->dev, "vsi does not exist at pf->vsi[%d]\n",
-			vsi->idx);
+		dev_dbg(dev, "vsi does not exist at pf->vsi[%d]\n", vsi->idx);
 		return -EINVAL;
 	}
 
@@ -506,7 +316,7 @@ int ice_vsi_clear(struct ice_vsi *vsi)
 
 	ice_vsi_free_arrays(vsi);
 	mutex_unlock(&pf->sw_mutex);
-	devm_kfree(&pf->pdev->dev, vsi);
+	devm_kfree(dev, vsi);
 
 	return 0;
 }
@@ -539,6 +349,7 @@ static irqreturn_t ice_msix_clean_rings(int __always_unused irq, void *data)
 static struct ice_vsi *
 ice_vsi_alloc(struct ice_pf *pf, enum ice_vsi_type type, u16 vf_id)
 {
+	struct device *dev = ice_pf_to_dev(pf);
 	struct ice_vsi *vsi = NULL;
 
 	/* Need to protect the allocation of the VSIs at the PF level */
@@ -549,11 +360,11 @@ ice_vsi_alloc(struct ice_pf *pf, enum ice_vsi_type type, u16 vf_id)
 	 * is available to be populated
 	 */
 	if (pf->next_vsi == ICE_NO_VSI) {
-		dev_dbg(&pf->pdev->dev, "out of VSI slots!\n");
+		dev_dbg(dev, "out of VSI slots!\n");
 		goto unlock_pf;
 	}
 
-	vsi = devm_kzalloc(&pf->pdev->dev, sizeof(*vsi), GFP_KERNEL);
+	vsi = devm_kzalloc(dev, sizeof(*vsi), GFP_KERNEL);
 	if (!vsi)
 		goto unlock_pf;
 
@@ -585,7 +396,7 @@ ice_vsi_alloc(struct ice_pf *pf, enum ice_vsi_type type, u16 vf_id)
 			goto err_rings;
 		break;
 	default:
-		dev_warn(&pf->pdev->dev, "Unknown VSI type %d\n", vsi->type);
+		dev_warn(dev, "Unknown VSI type %d\n", vsi->type);
 		goto unlock_pf;
 	}
 
@@ -598,7 +409,7 @@ ice_vsi_alloc(struct ice_pf *pf, enum ice_vsi_type type, u16 vf_id)
 	goto unlock_pf;
 
 err_rings:
-	devm_kfree(&pf->pdev->dev, vsi);
+	devm_kfree(dev, vsi);
 	vsi = NULL;
 unlock_pf:
 	mutex_unlock(&pf->sw_mutex);
@@ -606,88 +417,6 @@ unlock_pf:
 }
 
 /**
- * __ice_vsi_get_qs_contig - Assign a contiguous chunk of queues to VSI
- * @qs_cfg: gathered variables needed for PF->VSI queues assignment
- *
- * Return 0 on success and -ENOMEM in case of no left space in PF queue bitmap
- */
-static int __ice_vsi_get_qs_contig(struct ice_qs_cfg *qs_cfg)
-{
-	int offset, i;
-
-	mutex_lock(qs_cfg->qs_mutex);
-	offset = bitmap_find_next_zero_area(qs_cfg->pf_map, qs_cfg->pf_map_size,
-					    0, qs_cfg->q_count, 0);
-	if (offset >= qs_cfg->pf_map_size) {
-		mutex_unlock(qs_cfg->qs_mutex);
-		return -ENOMEM;
-	}
-
-	bitmap_set(qs_cfg->pf_map, offset, qs_cfg->q_count);
-	for (i = 0; i < qs_cfg->q_count; i++)
-		qs_cfg->vsi_map[i + qs_cfg->vsi_map_offset] = i + offset;
-	mutex_unlock(qs_cfg->qs_mutex);
-
-	return 0;
-}
-
-/**
- * __ice_vsi_get_qs_sc - Assign a scattered queues from PF to VSI
- * @qs_cfg: gathered variables needed for pf->vsi queues assignment
- *
- * Return 0 on success and -ENOMEM in case of no left space in PF queue bitmap
- */
-static int __ice_vsi_get_qs_sc(struct ice_qs_cfg *qs_cfg)
-{
-	int i, index = 0;
-
-	mutex_lock(qs_cfg->qs_mutex);
-	for (i = 0; i < qs_cfg->q_count; i++) {
-		index = find_next_zero_bit(qs_cfg->pf_map,
-					   qs_cfg->pf_map_size, index);
-		if (index >= qs_cfg->pf_map_size)
-			goto err_scatter;
-		set_bit(index, qs_cfg->pf_map);
-		qs_cfg->vsi_map[i + qs_cfg->vsi_map_offset] = index;
-	}
-	mutex_unlock(qs_cfg->qs_mutex);
-
-	return 0;
-err_scatter:
-	for (index = 0; index < i; index++) {
-		clear_bit(qs_cfg->vsi_map[index], qs_cfg->pf_map);
-		qs_cfg->vsi_map[index + qs_cfg->vsi_map_offset] = 0;
-	}
-	mutex_unlock(qs_cfg->qs_mutex);
-
-	return -ENOMEM;
-}
-
-/**
- * __ice_vsi_get_qs - helper function for assigning queues from PF to VSI
- * @qs_cfg: gathered variables needed for pf->vsi queues assignment
- *
- * This function first tries to find contiguous space. If it is not successful,
- * it tries with the scatter approach.
- *
- * Return 0 on success and -ENOMEM in case of no left space in PF queue bitmap
- */
-static int __ice_vsi_get_qs(struct ice_qs_cfg *qs_cfg)
-{
-	int ret = 0;
-
-	ret = __ice_vsi_get_qs_contig(qs_cfg);
-	if (ret) {
-		/* contig failed, so try with scatter approach */
-		qs_cfg->mapping_mode = ICE_VSI_MAP_SCATTER;
-		qs_cfg->q_count = min_t(u16, qs_cfg->q_count,
-					qs_cfg->scatter_count);
-		ret = __ice_vsi_get_qs_sc(qs_cfg);
-	}
-	return ret;
-}
-
-/**
  * ice_vsi_get_qs - Assign queues from PF to VSI
  * @vsi: the VSI to assign queues to
  *
@@ -769,14 +498,15 @@ bool ice_is_safe_mode(struct ice_pf *pf)
  */
 static void ice_rss_clean(struct ice_vsi *vsi)
 {
-	struct ice_pf *pf;
+	struct ice_pf *pf = vsi->back;
+	struct device *dev;
 
-	pf = vsi->back;
+	dev = ice_pf_to_dev(pf);
 
 	if (vsi->rss_hkey_user)
-		devm_kfree(&pf->pdev->dev, vsi->rss_hkey_user);
+		devm_kfree(dev, vsi->rss_hkey_user);
 	if (vsi->rss_lut_user)
-		devm_kfree(&pf->pdev->dev, vsi->rss_lut_user);
+		devm_kfree(dev, vsi->rss_lut_user);
 }
 
 /**
@@ -814,7 +544,7 @@ static void ice_vsi_set_rss_params(struct ice_vsi *vsi)
 	case ICE_VSI_LB:
 		break;
 	default:
-		dev_warn(&pf->pdev->dev, "Unknown VSI type %d\n",
+		dev_warn(ice_pf_to_dev(pf), "Unknown VSI type %d\n",
 			 vsi->type);
 		break;
 	}
@@ -918,7 +648,9 @@ static void ice_vsi_setup_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt)
 			else
 				max_rss = ICE_MAX_SMALL_RSS_QS;
 			qcount_rx = min_t(int, rx_numq_tc, max_rss);
-			qcount_rx = min_t(int, qcount_rx, vsi->rss_size);
+			if (!vsi->req_rxq)
+				qcount_rx = min_t(int, qcount_rx,
+						  vsi->rss_size);
 		}
 	}
 
@@ -990,9 +722,11 @@ static void ice_vsi_setup_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt)
 static void ice_set_rss_vsi_ctx(struct ice_vsi_ctx *ctxt, struct ice_vsi *vsi)
 {
 	u8 lut_type, hash_type;
+	struct device *dev;
 	struct ice_pf *pf;
 
 	pf = vsi->back;
+	dev = ice_pf_to_dev(pf);
 
 	switch (vsi->type) {
 	case ICE_VSI_PF:
@@ -1006,10 +740,11 @@ static void ice_set_rss_vsi_ctx(struct ice_vsi_ctx *ctxt, struct ice_vsi *vsi)
 		hash_type = ICE_AQ_VSI_Q_OPT_RSS_TPLZ;
 		break;
 	case ICE_VSI_LB:
-		dev_dbg(&pf->pdev->dev, "Unsupported VSI type %d\n", vsi->type);
+		dev_dbg(dev, "Unsupported VSI type %s\n",
+			ice_vsi_type_str(vsi->type));
 		return;
 	default:
-		dev_warn(&pf->pdev->dev, "Unknown VSI type %d\n", vsi->type);
+		dev_warn(dev, "Unknown VSI type %d\n", vsi->type);
 		return;
 	}
 
@@ -1022,18 +757,21 @@ static void ice_set_rss_vsi_ctx(struct ice_vsi_ctx *ctxt, struct ice_vsi *vsi)
 /**
  * ice_vsi_init - Create and initialize a VSI
  * @vsi: the VSI being configured
+ * @init_vsi: is this call creating a VSI
  *
  * This initializes a VSI context depending on the VSI type to be added and
  * passes it down to the add_vsi aq command to create a new VSI.
  */
-static int ice_vsi_init(struct ice_vsi *vsi)
+static int ice_vsi_init(struct ice_vsi *vsi, bool init_vsi)
 {
 	struct ice_pf *pf = vsi->back;
 	struct ice_hw *hw = &pf->hw;
 	struct ice_vsi_ctx *ctxt;
+	struct device *dev;
 	int ret = 0;
 
-	ctxt = devm_kzalloc(&pf->pdev->dev, sizeof(*ctxt), GFP_KERNEL);
+	dev = ice_pf_to_dev(pf);
+	ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
 	if (!ctxt)
 		return -ENOMEM;
 
@@ -1050,7 +788,8 @@ static int ice_vsi_init(struct ice_vsi *vsi)
 		ctxt->vf_num = vsi->vf_id + hw->func_caps.vf_base_id;
 		break;
 	default:
-		return -ENODEV;
+		ret = -ENODEV;
+		goto out;
 	}
 
 	ice_set_dflt_vsi_ctx(ctxt);
@@ -1059,11 +798,24 @@ static int ice_vsi_init(struct ice_vsi *vsi)
 		ctxt->info.sw_flags |= ICE_AQ_VSI_SW_FLAG_ALLOW_LB;
 
 	/* Set LUT type and HASH type if RSS is enabled */
-	if (test_bit(ICE_FLAG_RSS_ENA, pf->flags))
+	if (test_bit(ICE_FLAG_RSS_ENA, pf->flags)) {
 		ice_set_rss_vsi_ctx(ctxt, vsi);
+		/* if updating VSI context, make sure to set valid_section:
+		 * to indicate which section of VSI context being updated
+		 */
+		if (!init_vsi)
+			ctxt->info.valid_sections |=
+				cpu_to_le16(ICE_AQ_VSI_PROP_Q_OPT_VALID);
+	}
 
 	ctxt->info.sw_id = vsi->port_info->sw_id;
 	ice_vsi_setup_q_map(vsi, ctxt);
+	if (!init_vsi) /* means VSI being updated */
+		/* must to indicate which section of VSI context are
+		 * being modified
+		 */
+		ctxt->info.valid_sections |=
+			cpu_to_le16(ICE_AQ_VSI_PROP_RXQ_MAP_VALID);
 
 	/* Enable MAC Antispoof with new VSI being initialized or updated */
 	if (vsi->type == ICE_VSI_VF && pf->vf[vsi->vf_id].spoofchk) {
@@ -1080,11 +832,20 @@ static int ice_vsi_init(struct ice_vsi *vsi)
 			cpu_to_le16(ICE_AQ_VSI_PROP_SECURITY_VALID);
 	}
 
-	ret = ice_add_vsi(hw, vsi->idx, ctxt, NULL);
-	if (ret) {
-		dev_err(&pf->pdev->dev,
-			"Add VSI failed, err %d\n", ret);
-		return -EIO;
+	if (init_vsi) {
+		ret = ice_add_vsi(hw, vsi->idx, ctxt, NULL);
+		if (ret) {
+			dev_err(dev, "Add VSI failed, err %d\n", ret);
+			ret = -EIO;
+			goto out;
+		}
+	} else {
+		ret = ice_update_vsi(hw, vsi->idx, ctxt, NULL);
+		if (ret) {
+			dev_err(dev, "Update VSI failed, err %d\n", ret);
+			ret = -EIO;
+			goto out;
+		}
 	}
 
 	/* keep context for update VSI operations */
@@ -1093,131 +854,9 @@ static int ice_vsi_init(struct ice_vsi *vsi)
 	/* record VSI number returned */
 	vsi->vsi_num = ctxt->vsi_num;
 
-	devm_kfree(&pf->pdev->dev, ctxt);
-	return ret;
-}
-
-/**
- * ice_free_q_vector - Free memory allocated for a specific interrupt vector
- * @vsi: VSI having the memory freed
- * @v_idx: index of the vector to be freed
- */
-static void ice_free_q_vector(struct ice_vsi *vsi, int v_idx)
-{
-	struct ice_q_vector *q_vector;
-	struct ice_pf *pf = vsi->back;
-	struct ice_ring *ring;
-
-	if (!vsi->q_vectors[v_idx]) {
-		dev_dbg(&pf->pdev->dev, "Queue vector at index %d not found\n",
-			v_idx);
-		return;
-	}
-	q_vector = vsi->q_vectors[v_idx];
-
-	ice_for_each_ring(ring, q_vector->tx)
-		ring->q_vector = NULL;
-	ice_for_each_ring(ring, q_vector->rx)
-		ring->q_vector = NULL;
-
-	/* only VSI with an associated netdev is set up with NAPI */
-	if (vsi->netdev)
-		netif_napi_del(&q_vector->napi);
-
-	devm_kfree(&pf->pdev->dev, q_vector);
-	vsi->q_vectors[v_idx] = NULL;
-}
-
-/**
- * ice_vsi_free_q_vectors - Free memory allocated for interrupt vectors
- * @vsi: the VSI having memory freed
- */
-void ice_vsi_free_q_vectors(struct ice_vsi *vsi)
-{
-	int v_idx;
-
-	ice_for_each_q_vector(vsi, v_idx)
-		ice_free_q_vector(vsi, v_idx);
-}
-
-/**
- * ice_vsi_alloc_q_vector - Allocate memory for a single interrupt vector
- * @vsi: the VSI being configured
- * @v_idx: index of the vector in the VSI struct
- *
- * We allocate one q_vector. If allocation fails we return -ENOMEM.
- */
-static int ice_vsi_alloc_q_vector(struct ice_vsi *vsi, int v_idx)
-{
-	struct ice_pf *pf = vsi->back;
-	struct ice_q_vector *q_vector;
-
-	/* allocate q_vector */
-	q_vector = devm_kzalloc(&pf->pdev->dev, sizeof(*q_vector), GFP_KERNEL);
-	if (!q_vector)
-		return -ENOMEM;
-
-	q_vector->vsi = vsi;
-	q_vector->v_idx = v_idx;
-	if (vsi->type == ICE_VSI_VF)
-		goto out;
-	/* only set affinity_mask if the CPU is online */
-	if (cpu_online(v_idx))
-		cpumask_set_cpu(v_idx, &q_vector->affinity_mask);
-
-	/* This will not be called in the driver load path because the netdev
-	 * will not be created yet. All other cases with register the NAPI
-	 * handler here (i.e. resume, reset/rebuild, etc.)
-	 */
-	if (vsi->netdev)
-		netif_napi_add(vsi->netdev, &q_vector->napi, ice_napi_poll,
-			       NAPI_POLL_WEIGHT);
-
 out:
-	/* tie q_vector and VSI together */
-	vsi->q_vectors[v_idx] = q_vector;
-
-	return 0;
-}
-
-/**
- * ice_vsi_alloc_q_vectors - Allocate memory for interrupt vectors
- * @vsi: the VSI being configured
- *
- * We allocate one q_vector per queue interrupt. If allocation fails we
- * return -ENOMEM.
- */
-static int ice_vsi_alloc_q_vectors(struct ice_vsi *vsi)
-{
-	struct ice_pf *pf = vsi->back;
-	int v_idx = 0, num_q_vectors;
-	int err;
-
-	if (vsi->q_vectors[0]) {
-		dev_dbg(&pf->pdev->dev, "VSI %d has existing q_vectors\n",
-			vsi->vsi_num);
-		return -EEXIST;
-	}
-
-	num_q_vectors = vsi->num_q_vectors;
-
-	for (v_idx = 0; v_idx < num_q_vectors; v_idx++) {
-		err = ice_vsi_alloc_q_vector(vsi, v_idx);
-		if (err)
-			goto err_out;
-	}
-
-	return 0;
-
-err_out:
-	while (v_idx--)
-		ice_free_q_vector(vsi, v_idx);
-
-	dev_err(&pf->pdev->dev,
-		"Failed to allocate %d q_vector for VSI %d, ret=%d\n",
-		vsi->num_q_vectors, vsi->vsi_num, err);
-	vsi->num_q_vectors = 0;
-	return err;
+	kfree(ctxt);
+	return ret;
 }
 
 /**
@@ -1233,14 +872,16 @@ err_out:
 static int ice_vsi_setup_vector_base(struct ice_vsi *vsi)
 {
 	struct ice_pf *pf = vsi->back;
+	struct device *dev;
 	u16 num_q_vectors;
 
+	dev = ice_pf_to_dev(pf);
 	/* SRIOV doesn't grab irq_tracker entries for each VSI */
 	if (vsi->type == ICE_VSI_VF)
 		return 0;
 
 	if (vsi->base_vector) {
-		dev_dbg(&pf->pdev->dev, "VSI %d has non-zero base vector %d\n",
+		dev_dbg(dev, "VSI %d has non-zero base vector %d\n",
 			vsi->vsi_num, vsi->base_vector);
 		return -EEXIST;
 	}
@@ -1250,7 +891,7 @@ static int ice_vsi_setup_vector_base(struct ice_vsi *vsi)
 	vsi->base_vector = ice_get_res(pf, pf->irq_tracker, num_q_vectors,
 				       vsi->idx);
 	if (vsi->base_vector < 0) {
-		dev_err(&pf->pdev->dev,
+		dev_err(dev,
 			"Failed to get tracking for %d vectors for VSI %d, err=%d\n",
 			num_q_vectors, vsi->vsi_num, vsi->base_vector);
 		return -ENOENT;
@@ -1293,8 +934,10 @@ static void ice_vsi_clear_rings(struct ice_vsi *vsi)
 static int ice_vsi_alloc_rings(struct ice_vsi *vsi)
 {
 	struct ice_pf *pf = vsi->back;
+	struct device *dev;
 	int i;
 
+	dev = ice_pf_to_dev(pf);
 	/* Allocate Tx rings */
 	for (i = 0; i < vsi->alloc_txq; i++) {
 		struct ice_ring *ring;
@@ -1309,7 +952,7 @@ static int ice_vsi_alloc_rings(struct ice_vsi *vsi)
 		ring->reg_idx = vsi->txq_map[i];
 		ring->ring_active = false;
 		ring->vsi = vsi;
-		ring->dev = &pf->pdev->dev;
+		ring->dev = dev;
 		ring->count = vsi->num_tx_desc;
 		vsi->tx_rings[i] = ring;
 	}
@@ -1328,7 +971,7 @@ static int ice_vsi_alloc_rings(struct ice_vsi *vsi)
 		ring->ring_active = false;
 		ring->vsi = vsi;
 		ring->netdev = vsi->netdev;
-		ring->dev = &pf->pdev->dev;
+		ring->dev = dev;
 		ring->count = vsi->num_rx_desc;
 		vsi->rx_rings[i] = ring;
 	}
@@ -1341,66 +984,6 @@ err_out:
 }
 
 /**
- * ice_vsi_map_rings_to_vectors - Map VSI rings to interrupt vectors
- * @vsi: the VSI being configured
- *
- * This function maps descriptor rings to the queue-specific vectors allotted
- * through the MSI-X enabling code. On a constrained vector budget, we map Tx
- * and Rx rings to the vector as "efficiently" as possible.
- */
-#ifdef CONFIG_DCB
-void ice_vsi_map_rings_to_vectors(struct ice_vsi *vsi)
-#else
-static void ice_vsi_map_rings_to_vectors(struct ice_vsi *vsi)
-#endif /* CONFIG_DCB */
-{
-	int q_vectors = vsi->num_q_vectors;
-	int tx_rings_rem, rx_rings_rem;
-	int v_id;
-
-	/* initially assigning remaining rings count to VSIs num queue value */
-	tx_rings_rem = vsi->num_txq;
-	rx_rings_rem = vsi->num_rxq;
-
-	for (v_id = 0; v_id < q_vectors; v_id++) {
-		struct ice_q_vector *q_vector = vsi->q_vectors[v_id];
-		int tx_rings_per_v, rx_rings_per_v, q_id, q_base;
-
-		/* Tx rings mapping to vector */
-		tx_rings_per_v = DIV_ROUND_UP(tx_rings_rem, q_vectors - v_id);
-		q_vector->num_ring_tx = tx_rings_per_v;
-		q_vector->tx.ring = NULL;
-		q_vector->tx.itr_idx = ICE_TX_ITR;
-		q_base = vsi->num_txq - tx_rings_rem;
-
-		for (q_id = q_base; q_id < (q_base + tx_rings_per_v); q_id++) {
-			struct ice_ring *tx_ring = vsi->tx_rings[q_id];
-
-			tx_ring->q_vector = q_vector;
-			tx_ring->next = q_vector->tx.ring;
-			q_vector->tx.ring = tx_ring;
-		}
-		tx_rings_rem -= tx_rings_per_v;
-
-		/* Rx rings mapping to vector */
-		rx_rings_per_v = DIV_ROUND_UP(rx_rings_rem, q_vectors - v_id);
-		q_vector->num_ring_rx = rx_rings_per_v;
-		q_vector->rx.ring = NULL;
-		q_vector->rx.itr_idx = ICE_RX_ITR;
-		q_base = vsi->num_rxq - rx_rings_rem;
-
-		for (q_id = q_base; q_id < (q_base + rx_rings_per_v); q_id++) {
-			struct ice_ring *rx_ring = vsi->rx_rings[q_id];
-
-			rx_ring->q_vector = q_vector;
-			rx_ring->next = q_vector->rx.ring;
-			q_vector->rx.ring = rx_ring;
-		}
-		rx_rings_rem -= rx_rings_per_v;
-	}
-}
-
-/**
  * ice_vsi_manage_rss_lut - disable/enable RSS
  * @vsi: the VSI being changed
  * @ena: boolean value indicating if this is an enable or disable request
@@ -1414,8 +997,7 @@ int ice_vsi_manage_rss_lut(struct ice_vsi *vsi, bool ena)
 	int err = 0;
 	u8 *lut;
 
-	lut = devm_kzalloc(&vsi->back->pdev->dev, vsi->rss_table_size,
-			   GFP_KERNEL);
+	lut = kzalloc(vsi->rss_table_size, GFP_KERNEL);
 	if (!lut)
 		return -ENOMEM;
 
@@ -1428,7 +1010,7 @@ int ice_vsi_manage_rss_lut(struct ice_vsi *vsi, bool ena)
 	}
 
 	err = ice_set_rss(vsi, NULL, lut, vsi->rss_table_size);
-	devm_kfree(&vsi->back->pdev->dev, lut);
+	kfree(lut);
 	return err;
 }
 
@@ -1441,12 +1023,14 @@ static int ice_vsi_cfg_rss_lut_key(struct ice_vsi *vsi)
 	struct ice_aqc_get_set_rss_keys *key;
 	struct ice_pf *pf = vsi->back;
 	enum ice_status status;
+	struct device *dev;
 	int err = 0;
 	u8 *lut;
 
+	dev = ice_pf_to_dev(pf);
 	vsi->rss_size = min_t(int, vsi->rss_size, vsi->num_rxq);
 
-	lut = devm_kzalloc(&pf->pdev->dev, vsi->rss_table_size, GFP_KERNEL);
+	lut = kzalloc(vsi->rss_table_size, GFP_KERNEL);
 	if (!lut)
 		return -ENOMEM;
 
@@ -1459,13 +1043,12 @@ static int ice_vsi_cfg_rss_lut_key(struct ice_vsi *vsi)
 				    vsi->rss_table_size);
 
 	if (status) {
-		dev_err(&pf->pdev->dev,
-			"set_rss_lut failed, error %d\n", status);
+		dev_err(dev, "set_rss_lut failed, error %d\n", status);
 		err = -EIO;
 		goto ice_vsi_cfg_rss_exit;
 	}
 
-	key = devm_kzalloc(&pf->pdev->dev, sizeof(*key), GFP_KERNEL);
+	key = kzalloc(sizeof(*key), GFP_KERNEL);
 	if (!key) {
 		err = -ENOMEM;
 		goto ice_vsi_cfg_rss_exit;
@@ -1482,14 +1065,13 @@ static int ice_vsi_cfg_rss_lut_key(struct ice_vsi *vsi)
 	status = ice_aq_set_rss_key(&pf->hw, vsi->idx, key);
 
 	if (status) {
-		dev_err(&pf->pdev->dev, "set_rss_key failed, error %d\n",
-			status);
+		dev_err(dev, "set_rss_key failed, error %d\n", status);
 		err = -EIO;
 	}
 
-	devm_kfree(&pf->pdev->dev, key);
+	kfree(key);
 ice_vsi_cfg_rss_exit:
-	devm_kfree(&pf->pdev->dev, lut);
+	kfree(lut);
 	return err;
 }
 
@@ -1509,7 +1091,7 @@ int ice_add_mac_to_list(struct ice_vsi *vsi, struct list_head *add_list,
 	struct ice_fltr_list_entry *tmp;
 	struct ice_pf *pf = vsi->back;
 
-	tmp = devm_kzalloc(&pf->pdev->dev, sizeof(*tmp), GFP_ATOMIC);
+	tmp = devm_kzalloc(ice_pf_to_dev(pf), sizeof(*tmp), GFP_ATOMIC);
 	if (!tmp)
 		return -ENOMEM;
 
@@ -1601,9 +1183,11 @@ int ice_vsi_add_vlan(struct ice_vsi *vsi, u16 vid)
 	struct ice_pf *pf = vsi->back;
 	LIST_HEAD(tmp_add_list);
 	enum ice_status status;
+	struct device *dev;
 	int err = 0;
 
-	tmp = devm_kzalloc(&pf->pdev->dev, sizeof(*tmp), GFP_KERNEL);
+	dev = ice_pf_to_dev(pf);
+	tmp = devm_kzalloc(dev, sizeof(*tmp), GFP_KERNEL);
 	if (!tmp)
 		return -ENOMEM;
 
@@ -1620,11 +1204,11 @@ int ice_vsi_add_vlan(struct ice_vsi *vsi, u16 vid)
 	status = ice_add_vlan(&pf->hw, &tmp_add_list);
 	if (status) {
 		err = -ENODEV;
-		dev_err(&pf->pdev->dev, "Failure Adding VLAN %d on VSI %i\n",
-			vid, vsi->vsi_num);
+		dev_err(dev, "Failure Adding VLAN %d on VSI %i\n", vid,
+			vsi->vsi_num);
 	}
 
-	ice_free_fltr_list(&pf->pdev->dev, &tmp_add_list);
+	ice_free_fltr_list(dev, &tmp_add_list);
 	return err;
 }
 
@@ -1641,9 +1225,11 @@ int ice_vsi_kill_vlan(struct ice_vsi *vsi, u16 vid)
 	struct ice_pf *pf = vsi->back;
 	LIST_HEAD(tmp_add_list);
 	enum ice_status status;
+	struct device *dev;
 	int err = 0;
 
-	list = devm_kzalloc(&pf->pdev->dev, sizeof(*list), GFP_KERNEL);
+	dev = ice_pf_to_dev(pf);
+	list = devm_kzalloc(dev, sizeof(*list), GFP_KERNEL);
 	if (!list)
 		return -ENOMEM;
 
@@ -1659,21 +1245,46 @@ int ice_vsi_kill_vlan(struct ice_vsi *vsi, u16 vid)
 
 	status = ice_remove_vlan(&pf->hw, &tmp_add_list);
 	if (status == ICE_ERR_DOES_NOT_EXIST) {
-		dev_dbg(&pf->pdev->dev,
+		dev_dbg(dev,
 			"Failed to remove VLAN %d on VSI %i, it does not exist, status: %d\n",
 			vid, vsi->vsi_num, status);
 	} else if (status) {
-		dev_err(&pf->pdev->dev,
+		dev_err(dev,
 			"Error removing VLAN %d on vsi %i error: %d\n",
 			vid, vsi->vsi_num, status);
 		err = -EIO;
 	}
 
-	ice_free_fltr_list(&pf->pdev->dev, &tmp_add_list);
+	ice_free_fltr_list(dev, &tmp_add_list);
 	return err;
 }
 
 /**
+ * ice_vsi_cfg_frame_size - setup max frame size and Rx buffer length
+ * @vsi: VSI
+ */
+void ice_vsi_cfg_frame_size(struct ice_vsi *vsi)
+{
+	if (!vsi->netdev || test_bit(ICE_FLAG_LEGACY_RX, vsi->back->flags)) {
+		vsi->max_frame = ICE_AQ_SET_MAC_FRAME_SIZE_MAX;
+		vsi->rx_buf_len = ICE_RXBUF_2048;
+#if (PAGE_SIZE < 8192)
+	} else if (!ICE_2K_TOO_SMALL_WITH_PADDING &&
+		   (vsi->netdev->mtu <= ETH_DATA_LEN)) {
+		vsi->max_frame = ICE_RXBUF_1536 - NET_IP_ALIGN;
+		vsi->rx_buf_len = ICE_RXBUF_1536 - NET_IP_ALIGN;
+#endif
+	} else {
+		vsi->max_frame = ICE_AQ_SET_MAC_FRAME_SIZE_MAX;
+#if (PAGE_SIZE < 8192)
+		vsi->rx_buf_len = ICE_RXBUF_3072;
+#else
+		vsi->rx_buf_len = ICE_RXBUF_2048;
+#endif
+	}
+}
+
+/**
  * ice_vsi_cfg_rxqs - Configure the VSI for Rx
  * @vsi: the VSI being configured
  *
@@ -1687,13 +1298,7 @@ int ice_vsi_cfg_rxqs(struct ice_vsi *vsi)
 	if (vsi->type == ICE_VSI_VF)
 		goto setup_rings;
 
-	if (vsi->netdev && vsi->netdev->mtu > ETH_DATA_LEN)
-		vsi->max_frame = vsi->netdev->mtu +
-			ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
-	else
-		vsi->max_frame = ICE_RXBUF_2048;
-
-	vsi->rx_buf_len = ICE_RXBUF_2048;
+	ice_vsi_cfg_frame_size(vsi);
 setup_rings:
 	/* set up individual rings */
 	for (i = 0; i < vsi->num_rxq; i++) {
@@ -1712,101 +1317,34 @@ setup_rings:
 }
 
 /**
- * ice_vsi_cfg_txq - Configure single Tx queue
- * @vsi: the VSI that queue belongs to
- * @ring: Tx ring to be configured
- * @tc_q_idx: queue index within given TC
- * @qg_buf: queue group buffer
- * @tc: TC that Tx ring belongs to
- */
-static int
-ice_vsi_cfg_txq(struct ice_vsi *vsi, struct ice_ring *ring, u16 tc_q_idx,
-		struct ice_aqc_add_tx_qgrp *qg_buf, u8 tc)
-{
-	struct ice_tlan_ctx tlan_ctx = { 0 };
-	struct ice_aqc_add_txqs_perq *txq;
-	struct ice_pf *pf = vsi->back;
-	u8 buf_len = sizeof(*qg_buf);
-	enum ice_status status;
-	u16 pf_q;
-
-	pf_q = ring->reg_idx;
-	ice_setup_tx_ctx(ring, &tlan_ctx, pf_q);
-	/* copy context contents into the qg_buf */
-	qg_buf->txqs[0].txq_id = cpu_to_le16(pf_q);
-	ice_set_ctx((u8 *)&tlan_ctx, qg_buf->txqs[0].txq_ctx,
-		    ice_tlan_ctx_info);
-
-	/* init queue specific tail reg. It is referred as
-	 * transmit comm scheduler queue doorbell.
-	 */
-	ring->tail = pf->hw.hw_addr + QTX_COMM_DBELL(pf_q);
-
-	/* Add unique software queue handle of the Tx queue per
-	 * TC into the VSI Tx ring
-	 */
-	ring->q_handle = tc_q_idx;
-
-	status = ice_ena_vsi_txq(vsi->port_info, vsi->idx, tc, ring->q_handle,
-				 1, qg_buf, buf_len, NULL);
-	if (status) {
-		dev_err(&pf->pdev->dev,
-			"Failed to set LAN Tx queue context, error: %d\n",
-			status);
-		return -ENODEV;
-	}
-
-	/* Add Tx Queue TEID into the VSI Tx ring from the
-	 * response. This will complete configuring and
-	 * enabling the queue.
-	 */
-	txq = &qg_buf->txqs[0];
-	if (pf_q == le16_to_cpu(txq->txq_id))
-		ring->txq_teid = le32_to_cpu(txq->q_teid);
-
-	return 0;
-}
-
-/**
  * ice_vsi_cfg_txqs - Configure the VSI for Tx
  * @vsi: the VSI being configured
  * @rings: Tx ring array to be configured
- * @offset: offset within vsi->txq_map
  *
  * Return 0 on success and a negative value on error
  * Configure the Tx VSI for operation.
  */
 static int
-ice_vsi_cfg_txqs(struct ice_vsi *vsi, struct ice_ring **rings, int offset)
+ice_vsi_cfg_txqs(struct ice_vsi *vsi, struct ice_ring **rings)
 {
 	struct ice_aqc_add_tx_qgrp *qg_buf;
-	struct ice_pf *pf = vsi->back;
-	u16 q_idx = 0, i;
+	u16 q_idx = 0;
 	int err = 0;
-	u8 tc;
 
-	qg_buf = devm_kzalloc(&pf->pdev->dev, sizeof(*qg_buf), GFP_KERNEL);
+	qg_buf = kzalloc(sizeof(*qg_buf), GFP_KERNEL);
 	if (!qg_buf)
 		return -ENOMEM;
 
 	qg_buf->num_txqs = 1;
 
-	/* set up and configure the Tx queues for each enabled TC */
-	ice_for_each_traffic_class(tc) {
-		if (!(vsi->tc_cfg.ena_tc & BIT(tc)))
-			break;
-
-		for (i = 0; i < vsi->tc_cfg.tc_info[tc].qcount_tx; i++) {
-			err = ice_vsi_cfg_txq(vsi, rings[q_idx], i + offset,
-					      qg_buf, tc);
-			if (err)
-				goto err_cfg_txqs;
-
-			q_idx++;
-		}
+	for (q_idx = 0; q_idx < vsi->num_txq; q_idx++) {
+		err = ice_vsi_cfg_txq(vsi, rings[q_idx], qg_buf);
+		if (err)
+			goto err_cfg_txqs;
 	}
+
 err_cfg_txqs:
-	devm_kfree(&pf->pdev->dev, qg_buf);
+	kfree(qg_buf);
 	return err;
 }
 
@@ -1819,159 +1357,46 @@ err_cfg_txqs:
  */
 int ice_vsi_cfg_lan_txqs(struct ice_vsi *vsi)
 {
-	return ice_vsi_cfg_txqs(vsi, vsi->tx_rings, 0);
-}
-
-/**
- * ice_intrl_usec_to_reg - convert interrupt rate limit to register value
- * @intrl: interrupt rate limit in usecs
- * @gran: interrupt rate limit granularity in usecs
- *
- * This function converts a decimal interrupt rate limit in usecs to the format
- * expected by firmware.
- */
-u32 ice_intrl_usec_to_reg(u8 intrl, u8 gran)
-{
-	u32 val = intrl / gran;
-
-	if (val)
-		return val | GLINT_RATE_INTRL_ENA_M;
-	return 0;
-}
-
-/**
- * ice_cfg_itr_gran - set the ITR granularity to 2 usecs if not already set
- * @hw: board specific structure
- */
-static void ice_cfg_itr_gran(struct ice_hw *hw)
-{
-	u32 regval = rd32(hw, GLINT_CTL);
-
-	/* no need to update global register if ITR gran is already set */
-	if (!(regval & GLINT_CTL_DIS_AUTOMASK_M) &&
-	    (((regval & GLINT_CTL_ITR_GRAN_200_M) >>
-	     GLINT_CTL_ITR_GRAN_200_S) == ICE_ITR_GRAN_US) &&
-	    (((regval & GLINT_CTL_ITR_GRAN_100_M) >>
-	     GLINT_CTL_ITR_GRAN_100_S) == ICE_ITR_GRAN_US) &&
-	    (((regval & GLINT_CTL_ITR_GRAN_50_M) >>
-	     GLINT_CTL_ITR_GRAN_50_S) == ICE_ITR_GRAN_US) &&
-	    (((regval & GLINT_CTL_ITR_GRAN_25_M) >>
-	      GLINT_CTL_ITR_GRAN_25_S) == ICE_ITR_GRAN_US))
-		return;
-
-	regval = ((ICE_ITR_GRAN_US << GLINT_CTL_ITR_GRAN_200_S) &
-		  GLINT_CTL_ITR_GRAN_200_M) |
-		 ((ICE_ITR_GRAN_US << GLINT_CTL_ITR_GRAN_100_S) &
-		  GLINT_CTL_ITR_GRAN_100_M) |
-		 ((ICE_ITR_GRAN_US << GLINT_CTL_ITR_GRAN_50_S) &
-		  GLINT_CTL_ITR_GRAN_50_M) |
-		 ((ICE_ITR_GRAN_US << GLINT_CTL_ITR_GRAN_25_S) &
-		  GLINT_CTL_ITR_GRAN_25_M);
-	wr32(hw, GLINT_CTL, regval);
-}
-
-/**
- * ice_cfg_itr - configure the initial interrupt throttle values
- * @hw: pointer to the HW structure
- * @q_vector: interrupt vector that's being configured
- *
- * Configure interrupt throttling values for the ring containers that are
- * associated with the interrupt vector passed in.
- */
-static void
-ice_cfg_itr(struct ice_hw *hw, struct ice_q_vector *q_vector)
-{
-	ice_cfg_itr_gran(hw);
-
-	if (q_vector->num_ring_rx) {
-		struct ice_ring_container *rc = &q_vector->rx;
-
-		/* if this value is set then don't overwrite with default */
-		if (!rc->itr_setting)
-			rc->itr_setting = ICE_DFLT_RX_ITR;
-
-		rc->target_itr = ITR_TO_REG(rc->itr_setting);
-		rc->next_update = jiffies + 1;
-		rc->current_itr = rc->target_itr;
-		wr32(hw, GLINT_ITR(rc->itr_idx, q_vector->reg_idx),
-		     ITR_REG_ALIGN(rc->current_itr) >> ICE_ITR_GRAN_S);
-	}
-
-	if (q_vector->num_ring_tx) {
-		struct ice_ring_container *rc = &q_vector->tx;
-
-		/* if this value is set then don't overwrite with default */
-		if (!rc->itr_setting)
-			rc->itr_setting = ICE_DFLT_TX_ITR;
-
-		rc->target_itr = ITR_TO_REG(rc->itr_setting);
-		rc->next_update = jiffies + 1;
-		rc->current_itr = rc->target_itr;
-		wr32(hw, GLINT_ITR(rc->itr_idx, q_vector->reg_idx),
-		     ITR_REG_ALIGN(rc->current_itr) >> ICE_ITR_GRAN_S);
-	}
+	return ice_vsi_cfg_txqs(vsi, vsi->tx_rings);
 }
 
 /**
- * ice_cfg_txq_interrupt - configure interrupt on Tx queue
+ * ice_vsi_cfg_xdp_txqs - Configure Tx queues dedicated for XDP in given VSI
  * @vsi: the VSI being configured
- * @txq: Tx queue being mapped to MSI-X vector
- * @msix_idx: MSI-X vector index within the function
- * @itr_idx: ITR index of the interrupt cause
  *
- * Configure interrupt on Tx queue by associating Tx queue to MSI-X vector
- * within the function space.
+ * Return 0 on success and a negative value on error
+ * Configure the Tx queues dedicated for XDP in given VSI for operation.
  */
-#ifdef CONFIG_PCI_IOV
-void
-ice_cfg_txq_interrupt(struct ice_vsi *vsi, u16 txq, u16 msix_idx, u16 itr_idx)
-#else
-static void
-ice_cfg_txq_interrupt(struct ice_vsi *vsi, u16 txq, u16 msix_idx, u16 itr_idx)
-#endif /* CONFIG_PCI_IOV */
+int ice_vsi_cfg_xdp_txqs(struct ice_vsi *vsi)
 {
-	struct ice_pf *pf = vsi->back;
-	struct ice_hw *hw = &pf->hw;
-	u32 val;
+	int ret;
+	int i;
 
-	itr_idx = (itr_idx << QINT_TQCTL_ITR_INDX_S) & QINT_TQCTL_ITR_INDX_M;
+	ret = ice_vsi_cfg_txqs(vsi, vsi->xdp_rings);
+	if (ret)
+		return ret;
 
-	val = QINT_TQCTL_CAUSE_ENA_M | itr_idx |
-	      ((msix_idx << QINT_TQCTL_MSIX_INDX_S) & QINT_TQCTL_MSIX_INDX_M);
+	for (i = 0; i < vsi->num_xdp_txq; i++)
+		vsi->xdp_rings[i]->xsk_umem = ice_xsk_umem(vsi->xdp_rings[i]);
 
-	wr32(hw, QINT_TQCTL(vsi->txq_map[txq]), val);
+	return ret;
 }
 
 /**
- * ice_cfg_rxq_interrupt - configure interrupt on Rx queue
- * @vsi: the VSI being configured
- * @rxq: Rx queue being mapped to MSI-X vector
- * @msix_idx: MSI-X vector index within the function
- * @itr_idx: ITR index of the interrupt cause
+ * ice_intrl_usec_to_reg - convert interrupt rate limit to register value
+ * @intrl: interrupt rate limit in usecs
+ * @gran: interrupt rate limit granularity in usecs
  *
- * Configure interrupt on Rx queue by associating Rx queue to MSI-X vector
- * within the function space.
+ * This function converts a decimal interrupt rate limit in usecs to the format
+ * expected by firmware.
  */
-#ifdef CONFIG_PCI_IOV
-void
-ice_cfg_rxq_interrupt(struct ice_vsi *vsi, u16 rxq, u16 msix_idx, u16 itr_idx)
-#else
-static void
-ice_cfg_rxq_interrupt(struct ice_vsi *vsi, u16 rxq, u16 msix_idx, u16 itr_idx)
-#endif /* CONFIG_PCI_IOV */
+u32 ice_intrl_usec_to_reg(u8 intrl, u8 gran)
 {
-	struct ice_pf *pf = vsi->back;
-	struct ice_hw *hw = &pf->hw;
-	u32 val;
-
-	itr_idx = (itr_idx << QINT_RQCTL_ITR_INDX_S) & QINT_RQCTL_ITR_INDX_M;
-
-	val = QINT_RQCTL_CAUSE_ENA_M | itr_idx |
-	      ((msix_idx << QINT_RQCTL_MSIX_INDX_S) & QINT_RQCTL_MSIX_INDX_M);
-
-	wr32(hw, QINT_RQCTL(vsi->rxq_map[rxq]), val);
+	u32 val = intrl / gran;
 
-	ice_flush(hw);
+	if (val)
+		return val | GLINT_RATE_INTRL_ENA_M;
+	return 0;
 }
 
 /**
@@ -2028,13 +1453,12 @@ void ice_vsi_cfg_msix(struct ice_vsi *vsi)
  */
 int ice_vsi_manage_vlan_insertion(struct ice_vsi *vsi)
 {
-	struct device *dev = &vsi->back->pdev->dev;
 	struct ice_hw *hw = &vsi->back->hw;
 	struct ice_vsi_ctx *ctxt;
 	enum ice_status status;
 	int ret = 0;
 
-	ctxt = devm_kzalloc(dev, sizeof(*ctxt), GFP_KERNEL);
+	ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
 	if (!ctxt)
 		return -ENOMEM;
 
@@ -2052,7 +1476,7 @@ int ice_vsi_manage_vlan_insertion(struct ice_vsi *vsi)
 
 	status = ice_update_vsi(hw, vsi->idx, ctxt, NULL);
 	if (status) {
-		dev_err(dev, "update VSI for VLAN insert failed, err %d aq_err %d\n",
+		dev_err(&vsi->back->pdev->dev, "update VSI for VLAN insert failed, err %d aq_err %d\n",
 			status, hw->adminq.sq_last_status);
 		ret = -EIO;
 		goto out;
@@ -2060,7 +1484,7 @@ int ice_vsi_manage_vlan_insertion(struct ice_vsi *vsi)
 
 	vsi->info.vlan_flags = ctxt->info.vlan_flags;
 out:
-	devm_kfree(dev, ctxt);
+	kfree(ctxt);
 	return ret;
 }
 
@@ -2071,13 +1495,12 @@ out:
  */
 int ice_vsi_manage_vlan_stripping(struct ice_vsi *vsi, bool ena)
 {
-	struct device *dev = &vsi->back->pdev->dev;
 	struct ice_hw *hw = &vsi->back->hw;
 	struct ice_vsi_ctx *ctxt;
 	enum ice_status status;
 	int ret = 0;
 
-	ctxt = devm_kzalloc(dev, sizeof(*ctxt), GFP_KERNEL);
+	ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
 	if (!ctxt)
 		return -ENOMEM;
 
@@ -2099,7 +1522,7 @@ int ice_vsi_manage_vlan_stripping(struct ice_vsi *vsi, bool ena)
 
 	status = ice_update_vsi(hw, vsi->idx, ctxt, NULL);
 	if (status) {
-		dev_err(dev, "update VSI for VLAN strip failed, ena = %d err %d aq_err %d\n",
+		dev_err(&vsi->back->pdev->dev, "update VSI for VLAN strip failed, ena = %d err %d aq_err %d\n",
 			ena, status, hw->adminq.sq_last_status);
 		ret = -EIO;
 		goto out;
@@ -2107,7 +1530,7 @@ int ice_vsi_manage_vlan_stripping(struct ice_vsi *vsi, bool ena)
 
 	vsi->info.vlan_flags = ctxt->info.vlan_flags;
 out:
-	devm_kfree(dev, ctxt);
+	kfree(ctxt);
 	return ret;
 }
 
@@ -2134,109 +1557,6 @@ int ice_vsi_stop_rx_rings(struct ice_vsi *vsi)
 }
 
 /**
- * ice_trigger_sw_intr - trigger a software interrupt
- * @hw: pointer to the HW structure
- * @q_vector: interrupt vector to trigger the software interrupt for
- */
-void ice_trigger_sw_intr(struct ice_hw *hw, struct ice_q_vector *q_vector)
-{
-	wr32(hw, GLINT_DYN_CTL(q_vector->reg_idx),
-	     (ICE_ITR_NONE << GLINT_DYN_CTL_ITR_INDX_S) |
-	     GLINT_DYN_CTL_SWINT_TRIG_M |
-	     GLINT_DYN_CTL_INTENA_M);
-}
-
-/**
- * ice_vsi_stop_tx_ring - Disable single Tx ring
- * @vsi: the VSI being configured
- * @rst_src: reset source
- * @rel_vmvf_num: Relative ID of VF/VM
- * @ring: Tx ring to be stopped
- * @txq_meta: Meta data of Tx ring to be stopped
- */
-#ifndef CONFIG_PCI_IOV
-static
-#endif /* !CONFIG_PCI_IOV */
-int
-ice_vsi_stop_tx_ring(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
-		     u16 rel_vmvf_num, struct ice_ring *ring,
-		     struct ice_txq_meta *txq_meta)
-{
-	struct ice_pf *pf = vsi->back;
-	struct ice_q_vector *q_vector;
-	struct ice_hw *hw = &pf->hw;
-	enum ice_status status;
-	u32 val;
-
-	/* clear cause_ena bit for disabled queues */
-	val = rd32(hw, QINT_TQCTL(ring->reg_idx));
-	val &= ~QINT_TQCTL_CAUSE_ENA_M;
-	wr32(hw, QINT_TQCTL(ring->reg_idx), val);
-
-	/* software is expected to wait for 100 ns */
-	ndelay(100);
-
-	/* trigger a software interrupt for the vector
-	 * associated to the queue to schedule NAPI handler
-	 */
-	q_vector = ring->q_vector;
-	if (q_vector)
-		ice_trigger_sw_intr(hw, q_vector);
-
-	status = ice_dis_vsi_txq(vsi->port_info, txq_meta->vsi_idx,
-				 txq_meta->tc, 1, &txq_meta->q_handle,
-				 &txq_meta->q_id, &txq_meta->q_teid, rst_src,
-				 rel_vmvf_num, NULL);
-
-	/* if the disable queue command was exercised during an
-	 * active reset flow, ICE_ERR_RESET_ONGOING is returned.
-	 * This is not an error as the reset operation disables
-	 * queues at the hardware level anyway.
-	 */
-	if (status == ICE_ERR_RESET_ONGOING) {
-		dev_dbg(&vsi->back->pdev->dev,
-			"Reset in progress. LAN Tx queues already disabled\n");
-	} else if (status == ICE_ERR_DOES_NOT_EXIST) {
-		dev_dbg(&vsi->back->pdev->dev,
-			"LAN Tx queues do not exist, nothing to disable\n");
-	} else if (status) {
-		dev_err(&vsi->back->pdev->dev,
-			"Failed to disable LAN Tx queues, error: %d\n", status);
-		return -ENODEV;
-	}
-
-	return 0;
-}
-
-/**
- * ice_fill_txq_meta - Prepare the Tx queue's meta data
- * @vsi: VSI that ring belongs to
- * @ring: ring that txq_meta will be based on
- * @txq_meta: a helper struct that wraps Tx queue's information
- *
- * Set up a helper struct that will contain all the necessary fields that
- * are needed for stopping Tx queue
- */
-#ifndef CONFIG_PCI_IOV
-static
-#endif /* !CONFIG_PCI_IOV */
-void
-ice_fill_txq_meta(struct ice_vsi *vsi, struct ice_ring *ring,
-		  struct ice_txq_meta *txq_meta)
-{
-	u8 tc = 0;
-
-#ifdef CONFIG_DCB
-	tc = ring->dcb_tc;
-#endif /* CONFIG_DCB */
-	txq_meta->q_id = ring->reg_idx;
-	txq_meta->q_teid = ring->txq_teid;
-	txq_meta->q_handle = ring->q_handle;
-	txq_meta->vsi_idx = vsi->idx;
-	txq_meta->tc = tc;
-}
-
-/**
  * ice_vsi_stop_tx_rings - Disable Tx rings
  * @vsi: the VSI being configured
  * @rst_src: reset source
@@ -2247,34 +1567,24 @@ static int
 ice_vsi_stop_tx_rings(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
 		      u16 rel_vmvf_num, struct ice_ring **rings)
 {
-	u16 i, q_idx = 0;
-	int status;
-	u8 tc;
+	u16 q_idx;
 
 	if (vsi->num_txq > ICE_LAN_TXQ_MAX_QDIS)
 		return -EINVAL;
 
-	/* set up the Tx queue list to be disabled for each enabled TC */
-	ice_for_each_traffic_class(tc) {
-		if (!(vsi->tc_cfg.ena_tc & BIT(tc)))
-			break;
-
-		for (i = 0; i < vsi->tc_cfg.tc_info[tc].qcount_tx; i++) {
-			struct ice_txq_meta txq_meta = { };
-
-			if (!rings || !rings[q_idx])
-				return -EINVAL;
+	for (q_idx = 0; q_idx < vsi->num_txq; q_idx++) {
+		struct ice_txq_meta txq_meta = { };
+		int status;
 
-			ice_fill_txq_meta(vsi, rings[q_idx], &txq_meta);
-			status = ice_vsi_stop_tx_ring(vsi, rst_src,
-						      rel_vmvf_num,
-						      rings[q_idx], &txq_meta);
+		if (!rings || !rings[q_idx])
+			return -EINVAL;
 
-			if (status)
-				return status;
+		ice_fill_txq_meta(vsi, rings[q_idx], &txq_meta);
+		status = ice_vsi_stop_tx_ring(vsi, rst_src, rel_vmvf_num,
+					      rings[q_idx], &txq_meta);
 
-			q_idx++;
-		}
+		if (status)
+			return status;
 	}
 
 	return 0;
@@ -2294,6 +1604,15 @@ ice_vsi_stop_lan_tx_rings(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
 }
 
 /**
+ * ice_vsi_stop_xdp_tx_rings - Disable XDP Tx rings
+ * @vsi: the VSI being configured
+ */
+int ice_vsi_stop_xdp_tx_rings(struct ice_vsi *vsi)
+{
+	return ice_vsi_stop_tx_rings(vsi, ICE_NO_RESET, 0, vsi->xdp_rings);
+}
+
+/**
  * ice_cfg_vlan_pruning - enable or disable VLAN pruning on the VSI
  * @vsi: VSI to enable or disable VLAN pruning on
  * @ena: set to true to enable VLAN pruning and false to disable it
@@ -2304,7 +1623,6 @@ ice_vsi_stop_lan_tx_rings(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
 int ice_cfg_vlan_pruning(struct ice_vsi *vsi, bool ena, bool vlan_promisc)
 {
 	struct ice_vsi_ctx *ctxt;
-	struct device *dev;
 	struct ice_pf *pf;
 	int status;
 
@@ -2312,8 +1630,7 @@ int ice_cfg_vlan_pruning(struct ice_vsi *vsi, bool ena, bool vlan_promisc)
 		return -EINVAL;
 
 	pf = vsi->back;
-	dev = &pf->pdev->dev;
-	ctxt = devm_kzalloc(dev, sizeof(*ctxt), GFP_KERNEL);
+	ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
 	if (!ctxt)
 		return -ENOMEM;
 
@@ -2347,11 +1664,11 @@ int ice_cfg_vlan_pruning(struct ice_vsi *vsi, bool ena, bool vlan_promisc)
 	vsi->info.sec_flags = ctxt->info.sec_flags;
 	vsi->info.sw_flags2 = ctxt->info.sw_flags2;
 
-	devm_kfree(dev, ctxt);
+	kfree(ctxt);
 	return 0;
 
 err_out:
-	devm_kfree(dev, ctxt);
+	kfree(ctxt);
 	return -EIO;
 }
 
@@ -2420,8 +1737,10 @@ ice_vsi_add_rem_eth_mac(struct ice_vsi *vsi, bool add_rule)
 	struct ice_pf *pf = vsi->back;
 	LIST_HEAD(tmp_add_list);
 	enum ice_status status;
+	struct device *dev;
 
-	list = devm_kzalloc(&pf->pdev->dev, sizeof(*list), GFP_KERNEL);
+	dev = ice_pf_to_dev(pf);
+	list = devm_kzalloc(dev, sizeof(*list), GFP_KERNEL);
 	if (!list)
 		return;
 
@@ -2441,11 +1760,11 @@ ice_vsi_add_rem_eth_mac(struct ice_vsi *vsi, bool add_rule)
 		status = ice_remove_eth_mac(&pf->hw, &tmp_add_list);
 
 	if (status)
-		dev_err(&pf->pdev->dev,
+		dev_err(dev,
 			"Failure Adding or Removing Ethertype on VSI %i error: %d\n",
 			vsi->vsi_num, status);
 
-	ice_free_fltr_list(&pf->pdev->dev, &tmp_add_list);
+	ice_free_fltr_list(dev, &tmp_add_list);
 }
 
 /**
@@ -2460,8 +1779,10 @@ void ice_cfg_sw_lldp(struct ice_vsi *vsi, bool tx, bool create)
 	struct ice_pf *pf = vsi->back;
 	LIST_HEAD(tmp_add_list);
 	enum ice_status status;
+	struct device *dev;
 
-	list = devm_kzalloc(&pf->pdev->dev, sizeof(*list), GFP_KERNEL);
+	dev = ice_pf_to_dev(pf);
+	list = devm_kzalloc(dev, sizeof(*list), GFP_KERNEL);
 	if (!list)
 		return;
 
@@ -2488,12 +1809,11 @@ void ice_cfg_sw_lldp(struct ice_vsi *vsi, bool tx, bool create)
 		status = ice_remove_eth_mac(&pf->hw, &tmp_add_list);
 
 	if (status)
-		dev_err(&pf->pdev->dev,
-			"Fail %s %s LLDP rule on VSI %i error: %d\n",
+		dev_err(dev, "Fail %s %s LLDP rule on VSI %i error: %d\n",
 			create ? "adding" : "removing", tx ? "TX" : "RX",
 			vsi->vsi_num, status);
 
-	ice_free_fltr_list(&pf->pdev->dev, &tmp_add_list);
+	ice_free_fltr_list(dev, &tmp_add_list);
 }
 
 /**
@@ -2515,7 +1835,7 @@ ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi,
 	      enum ice_vsi_type type, u16 vf_id)
 {
 	u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 };
-	struct device *dev = &pf->pdev->dev;
+	struct device *dev = ice_pf_to_dev(pf);
 	enum ice_status status;
 	struct ice_vsi *vsi;
 	int ret, i;
@@ -2551,7 +1871,7 @@ ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi,
 	ice_vsi_set_tc_cfg(vsi);
 
 	/* create the VSI */
-	ret = ice_vsi_init(vsi);
+	ret = ice_vsi_init(vsi, true);
 	if (ret)
 		goto unroll_get_qs;
 
@@ -2624,8 +1944,7 @@ ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi,
 	status = ice_cfg_vsi_lan(vsi->port_info, vsi->idx, vsi->tc_cfg.ena_tc,
 				 max_txqs);
 	if (status) {
-		dev_err(&pf->pdev->dev,
-			"VSI %d failed lan queue config, error %d\n",
+		dev_err(dev, "VSI %d failed lan queue config, error %d\n",
 			vsi->vsi_num, status);
 		goto unroll_vector_base;
 	}
@@ -2635,23 +1954,17 @@ ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi,
 	 * out PAUSE or PFC frames. If enabled, FW can still send FC frames.
 	 * The rule is added once for PF VSI in order to create appropriate
 	 * recipe, since VSI/VSI list is ignored with drop action...
-	 * Also add rules to handle LLDP Tx and Rx packets.  Tx LLDP packets
-	 * need to be dropped so that VFs cannot send LLDP packets to reconfig
-	 * DCB settings in the HW.  Also, if the FW DCBX engine is not running
-	 * then Rx LLDP packets need to be redirected up the stack.
+	 * Also add rules to handle LLDP Tx packets.  Tx LLDP packets need to
+	 * be dropped so that VFs cannot send LLDP packets to reconfig DCB
+	 * settings in the HW.
 	 */
-	if (!ice_is_safe_mode(pf)) {
+	if (!ice_is_safe_mode(pf))
 		if (vsi->type == ICE_VSI_PF) {
 			ice_vsi_add_rem_eth_mac(vsi, true);
 
 			/* Tx LLDP packets */
 			ice_cfg_sw_lldp(vsi, true, true);
-
-			/* Rx LLDP packets */
-			if (!test_bit(ICE_FLAG_FW_LLDP_AGENT, pf->flags))
-				ice_cfg_sw_lldp(vsi, false, true);
 		}
-	}
 
 	return vsi;
 
@@ -2690,6 +2003,11 @@ static void ice_vsi_release_msix(struct ice_vsi *vsi)
 		wr32(hw, GLINT_ITR(ICE_IDX_ITR1, reg_idx), 0);
 		for (q = 0; q < q_vector->num_ring_tx; q++) {
 			wr32(hw, QINT_TQCTL(vsi->txq_map[txq]), 0);
+			if (ice_is_xdp_ena_vsi(vsi)) {
+				u32 xdp_txq = txq + vsi->num_xdp_txq;
+
+				wr32(hw, QINT_TQCTL(vsi->txq_map[xdp_txq]), 0);
+			}
 			txq++;
 		}
 
@@ -2738,8 +2056,7 @@ void ice_vsi_free_irq(struct ice_vsi *vsi)
 		/* clear the affinity_mask in the IRQ descriptor */
 		irq_set_affinity_hint(irq_num, NULL);
 		synchronize_irq(irq_num);
-		devm_free_irq(&pf->pdev->dev, irq_num,
-			      vsi->q_vectors[i]);
+		devm_free_irq(ice_pf_to_dev(pf), irq_num, vsi->q_vectors[i]);
 	}
 }
 
@@ -2790,6 +2107,62 @@ void ice_vsi_close(struct ice_vsi *vsi)
 }
 
 /**
+ * ice_ena_vsi - resume a VSI
+ * @vsi: the VSI being resume
+ * @locked: is the rtnl_lock already held
+ */
+int ice_ena_vsi(struct ice_vsi *vsi, bool locked)
+{
+	int err = 0;
+
+	if (!test_bit(__ICE_NEEDS_RESTART, vsi->state))
+		return 0;
+
+	clear_bit(__ICE_NEEDS_RESTART, vsi->state);
+
+	if (vsi->netdev && vsi->type == ICE_VSI_PF) {
+		if (netif_running(vsi->netdev)) {
+			if (!locked)
+				rtnl_lock();
+
+			err = ice_open(vsi->netdev);
+
+			if (!locked)
+				rtnl_unlock();
+		}
+	}
+
+	return err;
+}
+
+/**
+ * ice_dis_vsi - pause a VSI
+ * @vsi: the VSI being paused
+ * @locked: is the rtnl_lock already held
+ */
+void ice_dis_vsi(struct ice_vsi *vsi, bool locked)
+{
+	if (test_bit(__ICE_DOWN, vsi->state))
+		return;
+
+	set_bit(__ICE_NEEDS_RESTART, vsi->state);
+
+	if (vsi->type == ICE_VSI_PF && vsi->netdev) {
+		if (netif_running(vsi->netdev)) {
+			if (!locked)
+				rtnl_lock();
+
+			ice_stop(vsi->netdev);
+
+			if (!locked)
+				rtnl_unlock();
+		} else {
+			ice_vsi_close(vsi);
+		}
+	}
+}
+
+/**
  * ice_free_res - free a block of resources
  * @res: pointer to the resource
  * @index: starting index previously returned by ice_get_res
@@ -2869,7 +2242,7 @@ ice_get_res(struct ice_pf *pf, struct ice_res_tracker *res, u16 needed, u16 id)
 		return -EINVAL;
 
 	if (!needed || needed > res->num_entries || id >= ICE_RES_VALID_BIT) {
-		dev_err(&pf->pdev->dev,
+		dev_err(ice_pf_to_dev(pf),
 			"param err: needed=%d, num_entries = %d id=0x%04x\n",
 			needed, res->num_entries, id);
 		return -EINVAL;
@@ -3031,10 +2404,11 @@ int ice_vsi_release(struct ice_vsi *vsi)
 /**
  * ice_vsi_rebuild - Rebuild VSI after reset
  * @vsi: VSI to be rebuild
+ * @init_vsi: is this an initialization or a reconfigure of the VSI
  *
  * Returns 0 on success and negative value on failure
  */
-int ice_vsi_rebuild(struct ice_vsi *vsi)
+int ice_vsi_rebuild(struct ice_vsi *vsi, bool init_vsi)
 {
 	u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 };
 	struct ice_vf *vf = NULL;
@@ -3064,6 +2438,11 @@ int ice_vsi_rebuild(struct ice_vsi *vsi)
 		vsi->base_vector = 0;
 	}
 
+	if (ice_is_xdp_ena_vsi(vsi))
+		/* return value check can be skipped here, it always returns
+		 * 0 if reset is in progress
+		 */
+		ice_destroy_xdp_rings(vsi);
 	ice_vsi_put_qs(vsi);
 	ice_vsi_clear_rings(vsi);
 	ice_vsi_free_arrays(vsi);
@@ -3081,11 +2460,10 @@ int ice_vsi_rebuild(struct ice_vsi *vsi)
 	ice_vsi_set_tc_cfg(vsi);
 
 	/* Initialize VSI struct elements and create VSI in FW */
-	ret = ice_vsi_init(vsi);
+	ret = ice_vsi_init(vsi, init_vsi);
 	if (ret < 0)
 		goto err_vsi;
 
-
 	switch (vsi->type) {
 	case ICE_VSI_PF:
 		ret = ice_vsi_alloc_q_vectors(vsi);
@@ -3105,6 +2483,12 @@ int ice_vsi_rebuild(struct ice_vsi *vsi)
 			goto err_vectors;
 
 		ice_vsi_map_rings_to_vectors(vsi);
+		if (ice_is_xdp_ena_vsi(vsi)) {
+			vsi->num_xdp_txq = vsi->alloc_txq;
+			ret = ice_prepare_xdp_rings(vsi, vsi->xdp_prog);
+			if (ret)
+				goto err_vectors;
+		}
 		/* Do not exit if configuring RSS had an issue, at least
 		 * receive traffic on first queue. Hence no need to capture
 		 * return value
@@ -3131,16 +2515,25 @@ int ice_vsi_rebuild(struct ice_vsi *vsi)
 	}
 
 	/* configure VSI nodes based on number of queues and TC's */
-	for (i = 0; i < vsi->tc_cfg.numtc; i++)
+	for (i = 0; i < vsi->tc_cfg.numtc; i++) {
 		max_txqs[i] = vsi->alloc_txq;
 
+		if (ice_is_xdp_ena_vsi(vsi))
+			max_txqs[i] += vsi->num_xdp_txq;
+	}
+
 	status = ice_cfg_vsi_lan(vsi->port_info, vsi->idx, vsi->tc_cfg.ena_tc,
 				 max_txqs);
 	if (status) {
-		dev_err(&pf->pdev->dev,
+		dev_err(ice_pf_to_dev(pf),
 			"VSI %d failed lan queue config, error %d\n",
 			vsi->vsi_num, status);
-		goto err_vectors;
+		if (init_vsi) {
+			ret = -EIO;
+			goto err_vectors;
+		} else {
+			return ice_schedule_reset(pf, ICE_RESET_PFR);
+		}
 	}
 	return 0;
 
@@ -3166,6 +2559,7 @@ err_vsi:
 bool ice_is_reset_in_progress(unsigned long *state)
 {
 	return test_bit(__ICE_RESET_OICR_RECV, state) ||
+	       test_bit(__ICE_DCBNL_DEVRESET, state) ||
 	       test_bit(__ICE_PFR_REQ, state) ||
 	       test_bit(__ICE_CORER_REQ, state) ||
 	       test_bit(__ICE_GLOBR_REQ, state);
@@ -3199,9 +2593,12 @@ int ice_vsi_cfg_tc(struct ice_vsi *vsi, u8 ena_tc)
 	struct ice_vsi_ctx *ctx;
 	struct ice_pf *pf = vsi->back;
 	enum ice_status status;
+	struct device *dev;
 	int i, ret = 0;
 	u8 num_tc = 0;
 
+	dev = ice_pf_to_dev(pf);
+
 	ice_for_each_traffic_class(i) {
 		/* build bitmap of enabled TCs */
 		if (ena_tc & BIT(i))
@@ -3213,7 +2610,7 @@ int ice_vsi_cfg_tc(struct ice_vsi *vsi, u8 ena_tc)
 	vsi->tc_cfg.ena_tc = ena_tc;
 	vsi->tc_cfg.numtc = num_tc;
 
-	ctx = devm_kzalloc(&pf->pdev->dev, sizeof(*ctx), GFP_KERNEL);
+	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
 	if (!ctx)
 		return -ENOMEM;
 
@@ -3226,7 +2623,7 @@ int ice_vsi_cfg_tc(struct ice_vsi *vsi, u8 ena_tc)
 	ctx->info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_RXQ_MAP_VALID);
 	status = ice_update_vsi(&pf->hw, vsi->idx, ctx, NULL);
 	if (status) {
-		dev_info(&pf->pdev->dev, "Failed VSI Update\n");
+		dev_info(dev, "Failed VSI Update\n");
 		ret = -EIO;
 		goto out;
 	}
@@ -3235,8 +2632,7 @@ int ice_vsi_cfg_tc(struct ice_vsi *vsi, u8 ena_tc)
 				 max_txqs);
 
 	if (status) {
-		dev_err(&pf->pdev->dev,
-			"VSI %d failed TC config, error %d\n",
+		dev_err(dev, "VSI %d failed TC config, error %d\n",
 			vsi->vsi_num, status);
 		ret = -EIO;
 		goto out;
@@ -3246,7 +2642,7 @@ int ice_vsi_cfg_tc(struct ice_vsi *vsi, u8 ena_tc)
 
 	ice_vsi_cfg_netdev_tc(vsi, ena_tc);
 out:
-	devm_kfree(&pf->pdev->dev, ctx);
+	kfree(ctx);
 	return ret;
 }
 #endif /* CONFIG_DCB */
@@ -3271,6 +2667,51 @@ char *ice_nvm_version_str(struct ice_hw *hw)
 }
 
 /**
+ * ice_update_ring_stats - Update ring statistics
+ * @ring: ring to update
+ * @cont: used to increment per-vector counters
+ * @pkts: number of processed packets
+ * @bytes: number of processed bytes
+ *
+ * This function assumes that caller has acquired a u64_stats_sync lock.
+ */
+static void
+ice_update_ring_stats(struct ice_ring *ring, struct ice_ring_container *cont,
+		      u64 pkts, u64 bytes)
+{
+	ring->stats.bytes += bytes;
+	ring->stats.pkts += pkts;
+	cont->total_bytes += bytes;
+	cont->total_pkts += pkts;
+}
+
+/**
+ * ice_update_tx_ring_stats - Update Tx ring specific counters
+ * @tx_ring: ring to update
+ * @pkts: number of processed packets
+ * @bytes: number of processed bytes
+ */
+void ice_update_tx_ring_stats(struct ice_ring *tx_ring, u64 pkts, u64 bytes)
+{
+	u64_stats_update_begin(&tx_ring->syncp);
+	ice_update_ring_stats(tx_ring, &tx_ring->q_vector->tx, pkts, bytes);
+	u64_stats_update_end(&tx_ring->syncp);
+}
+
+/**
+ * ice_update_rx_ring_stats - Update Rx ring specific counters
+ * @rx_ring: ring to update
+ * @pkts: number of processed packets
+ * @bytes: number of processed bytes
+ */
+void ice_update_rx_ring_stats(struct ice_ring *rx_ring, u64 pkts, u64 bytes)
+{
+	u64_stats_update_begin(&rx_ring->syncp);
+	ice_update_ring_stats(rx_ring, &rx_ring->q_vector->rx, pkts, bytes);
+	u64_stats_update_end(&rx_ring->syncp);
+}
+
+/**
  * ice_vsi_cfg_mac_fltr - Add or remove a MAC address filter for a VSI
  * @vsi: the VSI being configured MAC filter
  * @macaddr: the MAC address to be added.
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.h b/drivers/net/ethernet/intel/ice/ice_lib.h
index 47bc033fff20..6e31e30aba39 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.h
+++ b/drivers/net/ethernet/intel/ice/ice_lib.h
@@ -6,18 +6,7 @@
 
 #include "ice.h"
 
-struct ice_txq_meta {
-	/* Tx-scheduler element identifier */
-	u32 q_teid;
-	/* Entry in VSI's txq_map bitmap */
-	u16 q_id;
-	/* Relative index of Tx queue within TC */
-	u16 q_handle;
-	/* VSI index that Tx queue belongs to */
-	u16 vsi_idx;
-	/* TC number that Tx queue belongs to */
-	u8 tc;
-};
+const char *ice_vsi_type_str(enum ice_vsi_type type);
 
 int
 ice_add_mac_to_list(struct ice_vsi *vsi, struct list_head *add_list,
@@ -33,24 +22,6 @@ int ice_vsi_cfg_lan_txqs(struct ice_vsi *vsi);
 
 void ice_vsi_cfg_msix(struct ice_vsi *vsi);
 
-#ifdef CONFIG_PCI_IOV
-void
-ice_cfg_txq_interrupt(struct ice_vsi *vsi, u16 txq, u16 msix_idx, u16 itr_idx);
-
-void
-ice_cfg_rxq_interrupt(struct ice_vsi *vsi, u16 rxq, u16 msix_idx, u16 itr_idx);
-
-int
-ice_vsi_stop_tx_ring(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
-		     u16 rel_vmvf_num, struct ice_ring *ring,
-		     struct ice_txq_meta *txq_meta);
-
-void ice_fill_txq_meta(struct ice_vsi *vsi, struct ice_ring *ring,
-		       struct ice_txq_meta *txq_meta);
-
-int ice_vsi_ctrl_rx_ring(struct ice_vsi *vsi, bool ena, u16 rxq_idx);
-#endif /* CONFIG_PCI_IOV */
-
 int ice_vsi_add_vlan(struct ice_vsi *vsi, u16 vid);
 
 int ice_vsi_kill_vlan(struct ice_vsi *vsi, u16 vid);
@@ -67,6 +38,10 @@ int
 ice_vsi_stop_lan_tx_rings(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
 			  u16 rel_vmvf_num);
 
+int ice_vsi_cfg_xdp_txqs(struct ice_vsi *vsi);
+
+int ice_vsi_stop_xdp_tx_rings(struct ice_vsi *vsi);
+
 int ice_cfg_vlan_pruning(struct ice_vsi *vsi, bool ena, bool vlan_promisc);
 
 void ice_cfg_sw_lldp(struct ice_vsi *vsi, bool tx, bool create);
@@ -89,25 +64,21 @@ int ice_vsi_release(struct ice_vsi *vsi);
 
 void ice_vsi_close(struct ice_vsi *vsi);
 
+int ice_ena_vsi(struct ice_vsi *vsi, bool locked);
+
+void ice_dis_vsi(struct ice_vsi *vsi, bool locked);
+
 int ice_free_res(struct ice_res_tracker *res, u16 index, u16 id);
 
 int
 ice_get_res(struct ice_pf *pf, struct ice_res_tracker *res, u16 needed, u16 id);
 
-int ice_vsi_rebuild(struct ice_vsi *vsi);
+int ice_vsi_rebuild(struct ice_vsi *vsi, bool init_vsi);
 
 bool ice_is_reset_in_progress(unsigned long *state);
 
-void ice_vsi_free_q_vectors(struct ice_vsi *vsi);
-
-void ice_trigger_sw_intr(struct ice_hw *hw, struct ice_q_vector *q_vector);
-
 void ice_vsi_put_qs(struct ice_vsi *vsi);
 
-#ifdef CONFIG_DCB
-void ice_vsi_map_rings_to_vectors(struct ice_vsi *vsi);
-#endif /* CONFIG_DCB */
-
 void ice_vsi_dis_irq(struct ice_vsi *vsi);
 
 void ice_vsi_free_irq(struct ice_vsi *vsi);
@@ -118,6 +89,12 @@ void ice_vsi_free_tx_rings(struct ice_vsi *vsi);
 
 int ice_vsi_manage_rss_lut(struct ice_vsi *vsi, bool ena);
 
+void ice_update_tx_ring_stats(struct ice_ring *ring, u64 pkts, u64 bytes);
+
+void ice_update_rx_ring_stats(struct ice_ring *ring, u64 pkts, u64 bytes);
+
+void ice_vsi_cfg_frame_size(struct ice_vsi *vsi);
+
 u32 ice_intrl_usec_to_reg(u8 intrl, u8 gran);
 
 char *ice_nvm_version_str(struct ice_hw *hw);
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 214cd6eca405..69bff085acf7 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -6,8 +6,10 @@
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include "ice.h"
+#include "ice_base.h"
 #include "ice_lib.h"
 #include "ice_dcb_lib.h"
+#include "ice_dcb_nl.h"
 
 #define DRV_VERSION_MAJOR 0
 #define DRV_VERSION_MINOR 8
@@ -42,6 +44,7 @@ MODULE_PARM_DESC(debug, "netif level (0=none,...,16=all)");
 static struct workqueue_struct *ice_wq;
 static const struct net_device_ops ice_netdev_safe_mode_ops;
 static const struct net_device_ops ice_netdev_ops;
+static int ice_vsi_open(struct ice_vsi *vsi);
 
 static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type);
 
@@ -159,7 +162,7 @@ unregister:
 	 * had an error
 	 */
 	if (status && vsi->netdev->reg_state == NETREG_REGISTERED) {
-		dev_err(&pf->pdev->dev,
+		dev_err(ice_pf_to_dev(pf),
 			"Could not add MAC filters error %d. Unregistering device\n",
 			status);
 		unregister_netdev(vsi->netdev);
@@ -435,42 +438,11 @@ static void ice_sync_fltr_subtask(struct ice_pf *pf)
 }
 
 /**
- * ice_dis_vsi - pause a VSI
- * @vsi: the VSI being paused
- * @locked: is the rtnl_lock already held
- */
-static void ice_dis_vsi(struct ice_vsi *vsi, bool locked)
-{
-	if (test_bit(__ICE_DOWN, vsi->state))
-		return;
-
-	set_bit(__ICE_NEEDS_RESTART, vsi->state);
-
-	if (vsi->type == ICE_VSI_PF && vsi->netdev) {
-		if (netif_running(vsi->netdev)) {
-			if (!locked)
-				rtnl_lock();
-
-			ice_stop(vsi->netdev);
-
-			if (!locked)
-				rtnl_unlock();
-		} else {
-			ice_vsi_close(vsi);
-		}
-	}
-}
-
-/**
  * ice_pf_dis_all_vsi - Pause all VSIs on a PF
  * @pf: the PF
  * @locked: is the rtnl_lock already held
  */
-#ifdef CONFIG_DCB
-void ice_pf_dis_all_vsi(struct ice_pf *pf, bool locked)
-#else
 static void ice_pf_dis_all_vsi(struct ice_pf *pf, bool locked)
-#endif /* CONFIG_DCB */
 {
 	int v;
 
@@ -524,7 +496,7 @@ ice_prepare_for_reset(struct ice_pf *pf)
  */
 static void ice_do_reset(struct ice_pf *pf, enum ice_reset_req reset_type)
 {
-	struct device *dev = &pf->pdev->dev;
+	struct device *dev = ice_pf_to_dev(pf);
 	struct ice_hw *hw = &pf->hw;
 
 	dev_dbg(dev, "reset_type 0x%x requested\n", reset_type);
@@ -636,8 +608,14 @@ static void ice_print_topo_conflict(struct ice_vsi *vsi)
 	switch (vsi->port_info->phy.link_info.topo_media_conflict) {
 	case ICE_AQ_LINK_TOPO_CONFLICT:
 	case ICE_AQ_LINK_MEDIA_CONFLICT:
+	case ICE_AQ_LINK_TOPO_UNREACH_PRT:
+	case ICE_AQ_LINK_TOPO_UNDRUTIL_PRT:
+	case ICE_AQ_LINK_TOPO_UNDRUTIL_MEDIA:
 		netdev_info(vsi->netdev, "Possible mis-configuration of the Ethernet port detected, please use the Intel(R) Ethernet Port Configuration Tool application to address the issue.\n");
 		break;
+	case ICE_AQ_LINK_TOPO_UNSUPP_MEDIA:
+		netdev_info(vsi->netdev, "Rx/Tx is disabled on this device because an unsupported module type was detected. Refer to the Intel(R) Ethernet Adapters and Devices User Guide for a list of supported modules.\n");
+		break;
 	default:
 		break;
 	}
@@ -747,7 +725,7 @@ void ice_print_link_msg(struct ice_vsi *vsi, bool isup)
 		an = "False";
 
 	/* Get FEC mode requested based on PHY caps last SW configuration */
-	caps = devm_kzalloc(&vsi->back->pdev->dev, sizeof(*caps), GFP_KERNEL);
+	caps = kzalloc(sizeof(*caps), GFP_KERNEL);
 	if (!caps) {
 		fec_req = "Unknown";
 		goto done;
@@ -767,7 +745,7 @@ void ice_print_link_msg(struct ice_vsi *vsi, bool isup)
 	else
 		fec_req = "NONE";
 
-	devm_kfree(&vsi->back->pdev->dev, caps);
+	kfree(caps);
 
 done:
 	netdev_info(vsi->netdev, "NIC Link is up %sbps, Requested FEC: %s, FEC: %s, Autoneg: %s, Flow Control: %s\n",
@@ -815,6 +793,7 @@ static int
 ice_link_event(struct ice_pf *pf, struct ice_port_info *pi, bool link_up,
 	       u16 link_speed)
 {
+	struct device *dev = ice_pf_to_dev(pf);
 	struct ice_phy_info *phy_info;
 	struct ice_vsi *vsi;
 	u16 old_link_speed;
@@ -832,7 +811,7 @@ ice_link_event(struct ice_pf *pf, struct ice_port_info *pi, bool link_up,
 	 */
 	result = ice_update_link_info(pi);
 	if (result)
-		dev_dbg(&pf->pdev->dev,
+		dev_dbg(dev,
 			"Failed to update link status and re-enable link events for port %d\n",
 			pi->lport);
 
@@ -851,7 +830,7 @@ ice_link_event(struct ice_pf *pf, struct ice_port_info *pi, bool link_up,
 
 		result = ice_aq_set_link_restart_an(pi, false, NULL);
 		if (result) {
-			dev_dbg(&pf->pdev->dev,
+			dev_dbg(dev,
 				"Failed to set link down, VSI %d error %d\n",
 				vsi->vsi_num, result);
 			return result;
@@ -947,7 +926,7 @@ ice_handle_link_event(struct ice_pf *pf, struct ice_rq_event_info *event)
 				!!(link_data->link_info & ICE_AQ_LINK_UP),
 				le16_to_cpu(link_data->link_speed));
 	if (status)
-		dev_dbg(&pf->pdev->dev,
+		dev_dbg(ice_pf_to_dev(pf),
 			"Could not process link event, error %d\n", status);
 
 	return status;
@@ -960,6 +939,7 @@ ice_handle_link_event(struct ice_pf *pf, struct ice_rq_event_info *event)
  */
 static int __ice_clean_ctrlq(struct ice_pf *pf, enum ice_ctl_q q_type)
 {
+	struct device *dev = ice_pf_to_dev(pf);
 	struct ice_rq_event_info event;
 	struct ice_hw *hw = &pf->hw;
 	struct ice_ctl_q_info *cq;
@@ -981,8 +961,7 @@ static int __ice_clean_ctrlq(struct ice_pf *pf, enum ice_ctl_q q_type)
 		qtype = "Mailbox";
 		break;
 	default:
-		dev_warn(&pf->pdev->dev, "Unknown control queue type 0x%x\n",
-			 q_type);
+		dev_warn(dev, "Unknown control queue type 0x%x\n", q_type);
 		return 0;
 	}
 
@@ -994,15 +973,15 @@ static int __ice_clean_ctrlq(struct ice_pf *pf, enum ice_ctl_q q_type)
 		   PF_FW_ARQLEN_ARQCRIT_M)) {
 		oldval = val;
 		if (val & PF_FW_ARQLEN_ARQVFE_M)
-			dev_dbg(&pf->pdev->dev,
-				"%s Receive Queue VF Error detected\n", qtype);
+			dev_dbg(dev, "%s Receive Queue VF Error detected\n",
+				qtype);
 		if (val & PF_FW_ARQLEN_ARQOVFL_M) {
-			dev_dbg(&pf->pdev->dev,
+			dev_dbg(dev,
 				"%s Receive Queue Overflow Error detected\n",
 				qtype);
 		}
 		if (val & PF_FW_ARQLEN_ARQCRIT_M)
-			dev_dbg(&pf->pdev->dev,
+			dev_dbg(dev,
 				"%s Receive Queue Critical Error detected\n",
 				qtype);
 		val &= ~(PF_FW_ARQLEN_ARQVFE_M | PF_FW_ARQLEN_ARQOVFL_M |
@@ -1016,16 +995,14 @@ static int __ice_clean_ctrlq(struct ice_pf *pf, enum ice_ctl_q q_type)
 		   PF_FW_ATQLEN_ATQCRIT_M)) {
 		oldval = val;
 		if (val & PF_FW_ATQLEN_ATQVFE_M)
-			dev_dbg(&pf->pdev->dev,
+			dev_dbg(dev,
 				"%s Send Queue VF Error detected\n", qtype);
 		if (val & PF_FW_ATQLEN_ATQOVFL_M) {
-			dev_dbg(&pf->pdev->dev,
-				"%s Send Queue Overflow Error detected\n",
+			dev_dbg(dev, "%s Send Queue Overflow Error detected\n",
 				qtype);
 		}
 		if (val & PF_FW_ATQLEN_ATQCRIT_M)
-			dev_dbg(&pf->pdev->dev,
-				"%s Send Queue Critical Error detected\n",
+			dev_dbg(dev, "%s Send Queue Critical Error detected\n",
 				qtype);
 		val &= ~(PF_FW_ATQLEN_ATQVFE_M | PF_FW_ATQLEN_ATQOVFL_M |
 			 PF_FW_ATQLEN_ATQCRIT_M);
@@ -1034,8 +1011,7 @@ static int __ice_clean_ctrlq(struct ice_pf *pf, enum ice_ctl_q q_type)
 	}
 
 	event.buf_len = cq->rq_buf_size;
-	event.msg_buf = devm_kzalloc(&pf->pdev->dev, event.buf_len,
-				     GFP_KERNEL);
+	event.msg_buf = kzalloc(event.buf_len, GFP_KERNEL);
 	if (!event.msg_buf)
 		return 0;
 
@@ -1047,8 +1023,7 @@ static int __ice_clean_ctrlq(struct ice_pf *pf, enum ice_ctl_q q_type)
 		if (ret == ICE_ERR_AQ_NO_WORK)
 			break;
 		if (ret) {
-			dev_err(&pf->pdev->dev,
-				"%s Receive Queue event error %d\n", qtype,
+			dev_err(dev, "%s Receive Queue event error %d\n", qtype,
 				ret);
 			break;
 		}
@@ -1058,8 +1033,7 @@ static int __ice_clean_ctrlq(struct ice_pf *pf, enum ice_ctl_q q_type)
 		switch (opcode) {
 		case ice_aqc_opc_get_link_status:
 			if (ice_handle_link_event(pf, &event))
-				dev_err(&pf->pdev->dev,
-					"Could not handle link event\n");
+				dev_err(dev, "Could not handle link event\n");
 			break;
 		case ice_mbx_opc_send_msg_to_pf:
 			ice_vc_process_vf_msg(pf, &event);
@@ -1071,14 +1045,14 @@ static int __ice_clean_ctrlq(struct ice_pf *pf, enum ice_ctl_q q_type)
 			ice_dcb_process_lldp_set_mib_change(pf, &event);
 			break;
 		default:
-			dev_dbg(&pf->pdev->dev,
+			dev_dbg(dev,
 				"%s Receive Queue unknown event 0x%04x ignored\n",
 				qtype, opcode);
 			break;
 		}
 	} while (pending && (i++ < ICE_DFLT_IRQ_WORK));
 
-	devm_kfree(&pf->pdev->dev, event.msg_buf);
+	kfree(event.msg_buf);
 
 	return pending && (i == ICE_DFLT_IRQ_WORK);
 }
@@ -1222,6 +1196,7 @@ static void ice_service_timer(struct timer_list *t)
  */
 static void ice_handle_mdd_event(struct ice_pf *pf)
 {
+	struct device *dev = ice_pf_to_dev(pf);
 	struct ice_hw *hw = &pf->hw;
 	bool mdd_detected = false;
 	u32 reg;
@@ -1243,7 +1218,7 @@ static void ice_handle_mdd_event(struct ice_pf *pf)
 				GL_MDET_TX_PQM_QNUM_S);
 
 		if (netif_msg_tx_err(pf))
-			dev_info(&pf->pdev->dev, "Malicious Driver Detection event %d on TX queue %d PF# %d VF# %d\n",
+			dev_info(dev, "Malicious Driver Detection event %d on TX queue %d PF# %d VF# %d\n",
 				 event, queue, pf_num, vf_num);
 		wr32(hw, GL_MDET_TX_PQM, 0xffffffff);
 		mdd_detected = true;
@@ -1261,7 +1236,7 @@ static void ice_handle_mdd_event(struct ice_pf *pf)
 				GL_MDET_TX_TCLAN_QNUM_S);
 
 		if (netif_msg_rx_err(pf))
-			dev_info(&pf->pdev->dev, "Malicious Driver Detection event %d on TX queue %d PF# %d VF# %d\n",
+			dev_info(dev, "Malicious Driver Detection event %d on TX queue %d PF# %d VF# %d\n",
 				 event, queue, pf_num, vf_num);
 		wr32(hw, GL_MDET_TX_TCLAN, 0xffffffff);
 		mdd_detected = true;
@@ -1279,7 +1254,7 @@ static void ice_handle_mdd_event(struct ice_pf *pf)
 				GL_MDET_RX_QNUM_S);
 
 		if (netif_msg_rx_err(pf))
-			dev_info(&pf->pdev->dev, "Malicious Driver Detection event %d on RX queue %d PF# %d VF# %d\n",
+			dev_info(dev, "Malicious Driver Detection event %d on RX queue %d PF# %d VF# %d\n",
 				 event, queue, pf_num, vf_num);
 		wr32(hw, GL_MDET_RX, 0xffffffff);
 		mdd_detected = true;
@@ -1291,21 +1266,21 @@ static void ice_handle_mdd_event(struct ice_pf *pf)
 		reg = rd32(hw, PF_MDET_TX_PQM);
 		if (reg & PF_MDET_TX_PQM_VALID_M) {
 			wr32(hw, PF_MDET_TX_PQM, 0xFFFF);
-			dev_info(&pf->pdev->dev, "TX driver issue detected, PF reset issued\n");
+			dev_info(dev, "TX driver issue detected, PF reset issued\n");
 			pf_mdd_detected = true;
 		}
 
 		reg = rd32(hw, PF_MDET_TX_TCLAN);
 		if (reg & PF_MDET_TX_TCLAN_VALID_M) {
 			wr32(hw, PF_MDET_TX_TCLAN, 0xFFFF);
-			dev_info(&pf->pdev->dev, "TX driver issue detected, PF reset issued\n");
+			dev_info(dev, "TX driver issue detected, PF reset issued\n");
 			pf_mdd_detected = true;
 		}
 
 		reg = rd32(hw, PF_MDET_RX);
 		if (reg & PF_MDET_RX_VALID_M) {
 			wr32(hw, PF_MDET_RX, 0xFFFF);
-			dev_info(&pf->pdev->dev, "RX driver issue detected, PF reset issued\n");
+			dev_info(dev, "RX driver issue detected, PF reset issued\n");
 			pf_mdd_detected = true;
 		}
 		/* Queue belongs to the PF initiate a reset */
@@ -1325,7 +1300,7 @@ static void ice_handle_mdd_event(struct ice_pf *pf)
 		if (reg & VP_MDET_TX_PQM_VALID_M) {
 			wr32(hw, VP_MDET_TX_PQM(i), 0xFFFF);
 			vf_mdd_detected = true;
-			dev_info(&pf->pdev->dev, "TX driver issue detected on VF %d\n",
+			dev_info(dev, "TX driver issue detected on VF %d\n",
 				 i);
 		}
 
@@ -1333,7 +1308,7 @@ static void ice_handle_mdd_event(struct ice_pf *pf)
 		if (reg & VP_MDET_TX_TCLAN_VALID_M) {
 			wr32(hw, VP_MDET_TX_TCLAN(i), 0xFFFF);
 			vf_mdd_detected = true;
-			dev_info(&pf->pdev->dev, "TX driver issue detected on VF %d\n",
+			dev_info(dev, "TX driver issue detected on VF %d\n",
 				 i);
 		}
 
@@ -1341,7 +1316,7 @@ static void ice_handle_mdd_event(struct ice_pf *pf)
 		if (reg & VP_MDET_TX_TDPU_VALID_M) {
 			wr32(hw, VP_MDET_TX_TDPU(i), 0xFFFF);
 			vf_mdd_detected = true;
-			dev_info(&pf->pdev->dev, "TX driver issue detected on VF %d\n",
+			dev_info(dev, "TX driver issue detected on VF %d\n",
 				 i);
 		}
 
@@ -1349,7 +1324,7 @@ static void ice_handle_mdd_event(struct ice_pf *pf)
 		if (reg & VP_MDET_RX_VALID_M) {
 			wr32(hw, VP_MDET_RX(i), 0xFFFF);
 			vf_mdd_detected = true;
-			dev_info(&pf->pdev->dev, "RX driver issue detected on VF %d\n",
+			dev_info(dev, "RX driver issue detected on VF %d\n",
 				 i);
 		}
 
@@ -1357,7 +1332,7 @@ static void ice_handle_mdd_event(struct ice_pf *pf)
 			vf->num_mdd_events++;
 			if (vf->num_mdd_events &&
 			    vf->num_mdd_events <= ICE_MDD_EVENTS_THRESHOLD)
-				dev_info(&pf->pdev->dev,
+				dev_info(dev,
 					 "VF %d has had %llu MDD events since last boot, Admin might need to reload AVF driver with this number of events\n",
 					 i, vf->num_mdd_events);
 		}
@@ -1393,7 +1368,7 @@ static int ice_force_phys_link_state(struct ice_vsi *vsi, bool link_up)
 
 	pi = vsi->port_info;
 
-	pcaps = devm_kzalloc(dev, sizeof(*pcaps), GFP_KERNEL);
+	pcaps = kzalloc(sizeof(*pcaps), GFP_KERNEL);
 	if (!pcaps)
 		return -ENOMEM;
 
@@ -1412,7 +1387,7 @@ static int ice_force_phys_link_state(struct ice_vsi *vsi, bool link_up)
 	    link_up == !!(pi->phy.link_info.link_info & ICE_AQ_LINK_UP))
 		goto out;
 
-	cfg = devm_kzalloc(dev, sizeof(*cfg), GFP_KERNEL);
+	cfg = kzalloc(sizeof(*cfg), GFP_KERNEL);
 	if (!cfg) {
 		retcode = -ENOMEM;
 		goto out;
@@ -1437,9 +1412,9 @@ static int ice_force_phys_link_state(struct ice_vsi *vsi, bool link_up)
 		retcode = -EIO;
 	}
 
-	devm_kfree(dev, cfg);
+	kfree(cfg);
 out:
-	devm_kfree(dev, pcaps);
+	kfree(pcaps);
 	return retcode;
 }
 
@@ -1551,6 +1526,44 @@ static void ice_set_ctrlq_len(struct ice_hw *hw)
 }
 
 /**
+ * ice_schedule_reset - schedule a reset
+ * @pf: board private structure
+ * @reset: reset being requested
+ */
+int ice_schedule_reset(struct ice_pf *pf, enum ice_reset_req reset)
+{
+	struct device *dev = ice_pf_to_dev(pf);
+
+	/* bail out if earlier reset has failed */
+	if (test_bit(__ICE_RESET_FAILED, pf->state)) {
+		dev_dbg(dev, "earlier reset has failed\n");
+		return -EIO;
+	}
+	/* bail if reset/recovery already in progress */
+	if (ice_is_reset_in_progress(pf->state)) {
+		dev_dbg(dev, "Reset already in progress\n");
+		return -EBUSY;
+	}
+
+	switch (reset) {
+	case ICE_RESET_PFR:
+		set_bit(__ICE_PFR_REQ, pf->state);
+		break;
+	case ICE_RESET_CORER:
+		set_bit(__ICE_CORER_REQ, pf->state);
+		break;
+	case ICE_RESET_GLOBR:
+		set_bit(__ICE_GLOBR_REQ, pf->state);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	ice_service_task_schedule(pf);
+	return 0;
+}
+
+/**
  * ice_irq_affinity_notify - Callback for affinity changes
  * @notify: context as to what irq was changed
  * @mask: the new affinity mask
@@ -1604,11 +1617,13 @@ static int ice_vsi_req_irq_msix(struct ice_vsi *vsi, char *basename)
 	int q_vectors = vsi->num_q_vectors;
 	struct ice_pf *pf = vsi->back;
 	int base = vsi->base_vector;
+	struct device *dev;
 	int rx_int_idx = 0;
 	int tx_int_idx = 0;
 	int vector, err;
 	int irq_num;
 
+	dev = ice_pf_to_dev(pf);
 	for (vector = 0; vector < q_vectors; vector++) {
 		struct ice_q_vector *q_vector = vsi->q_vectors[vector];
 
@@ -1628,8 +1643,7 @@ static int ice_vsi_req_irq_msix(struct ice_vsi *vsi, char *basename)
 			/* skip this unused q_vector */
 			continue;
 		}
-		err = devm_request_irq(&pf->pdev->dev, irq_num,
-				       vsi->irq_handler, 0,
+		err = devm_request_irq(dev, irq_num, vsi->irq_handler, 0,
 				       q_vector->name, q_vector);
 		if (err) {
 			netdev_err(vsi->netdev,
@@ -1655,12 +1669,331 @@ free_q_irqs:
 		irq_num = pf->msix_entries[base + vector].vector,
 		irq_set_affinity_notifier(irq_num, NULL);
 		irq_set_affinity_hint(irq_num, NULL);
-		devm_free_irq(&pf->pdev->dev, irq_num, &vsi->q_vectors[vector]);
+		devm_free_irq(dev, irq_num, &vsi->q_vectors[vector]);
 	}
 	return err;
 }
 
 /**
+ * ice_xdp_alloc_setup_rings - Allocate and setup Tx rings for XDP
+ * @vsi: VSI to setup Tx rings used by XDP
+ *
+ * Return 0 on success and negative value on error
+ */
+static int ice_xdp_alloc_setup_rings(struct ice_vsi *vsi)
+{
+	struct device *dev = &vsi->back->pdev->dev;
+	int i;
+
+	for (i = 0; i < vsi->num_xdp_txq; i++) {
+		u16 xdp_q_idx = vsi->alloc_txq + i;
+		struct ice_ring *xdp_ring;
+
+		xdp_ring = kzalloc(sizeof(*xdp_ring), GFP_KERNEL);
+
+		if (!xdp_ring)
+			goto free_xdp_rings;
+
+		xdp_ring->q_index = xdp_q_idx;
+		xdp_ring->reg_idx = vsi->txq_map[xdp_q_idx];
+		xdp_ring->ring_active = false;
+		xdp_ring->vsi = vsi;
+		xdp_ring->netdev = NULL;
+		xdp_ring->dev = dev;
+		xdp_ring->count = vsi->num_tx_desc;
+		vsi->xdp_rings[i] = xdp_ring;
+		if (ice_setup_tx_ring(xdp_ring))
+			goto free_xdp_rings;
+		ice_set_ring_xdp(xdp_ring);
+		xdp_ring->xsk_umem = ice_xsk_umem(xdp_ring);
+	}
+
+	return 0;
+
+free_xdp_rings:
+	for (; i >= 0; i--)
+		if (vsi->xdp_rings[i] && vsi->xdp_rings[i]->desc)
+			ice_free_tx_ring(vsi->xdp_rings[i]);
+	return -ENOMEM;
+}
+
+/**
+ * ice_vsi_assign_bpf_prog - set or clear bpf prog pointer on VSI
+ * @vsi: VSI to set the bpf prog on
+ * @prog: the bpf prog pointer
+ */
+static void ice_vsi_assign_bpf_prog(struct ice_vsi *vsi, struct bpf_prog *prog)
+{
+	struct bpf_prog *old_prog;
+	int i;
+
+	old_prog = xchg(&vsi->xdp_prog, prog);
+	if (old_prog)
+		bpf_prog_put(old_prog);
+
+	ice_for_each_rxq(vsi, i)
+		WRITE_ONCE(vsi->rx_rings[i]->xdp_prog, vsi->xdp_prog);
+}
+
+/**
+ * ice_prepare_xdp_rings - Allocate, configure and setup Tx rings for XDP
+ * @vsi: VSI to bring up Tx rings used by XDP
+ * @prog: bpf program that will be assigned to VSI
+ *
+ * Return 0 on success and negative value on error
+ */
+int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog)
+{
+	u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 };
+	int xdp_rings_rem = vsi->num_xdp_txq;
+	struct ice_pf *pf = vsi->back;
+	struct ice_qs_cfg xdp_qs_cfg = {
+		.qs_mutex = &pf->avail_q_mutex,
+		.pf_map = pf->avail_txqs,
+		.pf_map_size = pf->max_pf_txqs,
+		.q_count = vsi->num_xdp_txq,
+		.scatter_count = ICE_MAX_SCATTER_TXQS,
+		.vsi_map = vsi->txq_map,
+		.vsi_map_offset = vsi->alloc_txq,
+		.mapping_mode = ICE_VSI_MAP_CONTIG
+	};
+	enum ice_status status;
+	struct device *dev;
+	int i, v_idx;
+
+	dev = ice_pf_to_dev(pf);
+	vsi->xdp_rings = devm_kcalloc(dev, vsi->num_xdp_txq,
+				      sizeof(*vsi->xdp_rings), GFP_KERNEL);
+	if (!vsi->xdp_rings)
+		return -ENOMEM;
+
+	vsi->xdp_mapping_mode = xdp_qs_cfg.mapping_mode;
+	if (__ice_vsi_get_qs(&xdp_qs_cfg))
+		goto err_map_xdp;
+
+	if (ice_xdp_alloc_setup_rings(vsi))
+		goto clear_xdp_rings;
+
+	/* follow the logic from ice_vsi_map_rings_to_vectors */
+	ice_for_each_q_vector(vsi, v_idx) {
+		struct ice_q_vector *q_vector = vsi->q_vectors[v_idx];
+		int xdp_rings_per_v, q_id, q_base;
+
+		xdp_rings_per_v = DIV_ROUND_UP(xdp_rings_rem,
+					       vsi->num_q_vectors - v_idx);
+		q_base = vsi->num_xdp_txq - xdp_rings_rem;
+
+		for (q_id = q_base; q_id < (q_base + xdp_rings_per_v); q_id++) {
+			struct ice_ring *xdp_ring = vsi->xdp_rings[q_id];
+
+			xdp_ring->q_vector = q_vector;
+			xdp_ring->next = q_vector->tx.ring;
+			q_vector->tx.ring = xdp_ring;
+		}
+		xdp_rings_rem -= xdp_rings_per_v;
+	}
+
+	/* omit the scheduler update if in reset path; XDP queues will be
+	 * taken into account at the end of ice_vsi_rebuild, where
+	 * ice_cfg_vsi_lan is being called
+	 */
+	if (ice_is_reset_in_progress(pf->state))
+		return 0;
+
+	/* tell the Tx scheduler that right now we have
+	 * additional queues
+	 */
+	for (i = 0; i < vsi->tc_cfg.numtc; i++)
+		max_txqs[i] = vsi->num_txq + vsi->num_xdp_txq;
+
+	status = ice_cfg_vsi_lan(vsi->port_info, vsi->idx, vsi->tc_cfg.ena_tc,
+				 max_txqs);
+	if (status) {
+		dev_err(dev, "Failed VSI LAN queue config for XDP, error:%d\n",
+			status);
+		goto clear_xdp_rings;
+	}
+	ice_vsi_assign_bpf_prog(vsi, prog);
+
+	return 0;
+clear_xdp_rings:
+	for (i = 0; i < vsi->num_xdp_txq; i++)
+		if (vsi->xdp_rings[i]) {
+			kfree_rcu(vsi->xdp_rings[i], rcu);
+			vsi->xdp_rings[i] = NULL;
+		}
+
+err_map_xdp:
+	mutex_lock(&pf->avail_q_mutex);
+	for (i = 0; i < vsi->num_xdp_txq; i++) {
+		clear_bit(vsi->txq_map[i + vsi->alloc_txq], pf->avail_txqs);
+		vsi->txq_map[i + vsi->alloc_txq] = ICE_INVAL_Q_INDEX;
+	}
+	mutex_unlock(&pf->avail_q_mutex);
+
+	devm_kfree(dev, vsi->xdp_rings);
+	return -ENOMEM;
+}
+
+/**
+ * ice_destroy_xdp_rings - undo the configuration made by ice_prepare_xdp_rings
+ * @vsi: VSI to remove XDP rings
+ *
+ * Detach XDP rings from irq vectors, clean up the PF bitmap and free
+ * resources
+ */
+int ice_destroy_xdp_rings(struct ice_vsi *vsi)
+{
+	u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 };
+	struct ice_pf *pf = vsi->back;
+	int i, v_idx;
+
+	/* q_vectors are freed in reset path so there's no point in detaching
+	 * rings; in case of rebuild being triggered not from reset reset bits
+	 * in pf->state won't be set, so additionally check first q_vector
+	 * against NULL
+	 */
+	if (ice_is_reset_in_progress(pf->state) || !vsi->q_vectors[0])
+		goto free_qmap;
+
+	ice_for_each_q_vector(vsi, v_idx) {
+		struct ice_q_vector *q_vector = vsi->q_vectors[v_idx];
+		struct ice_ring *ring;
+
+		ice_for_each_ring(ring, q_vector->tx)
+			if (!ring->tx_buf || !ice_ring_is_xdp(ring))
+				break;
+
+		/* restore the value of last node prior to XDP setup */
+		q_vector->tx.ring = ring;
+	}
+
+free_qmap:
+	mutex_lock(&pf->avail_q_mutex);
+	for (i = 0; i < vsi->num_xdp_txq; i++) {
+		clear_bit(vsi->txq_map[i + vsi->alloc_txq], pf->avail_txqs);
+		vsi->txq_map[i + vsi->alloc_txq] = ICE_INVAL_Q_INDEX;
+	}
+	mutex_unlock(&pf->avail_q_mutex);
+
+	for (i = 0; i < vsi->num_xdp_txq; i++)
+		if (vsi->xdp_rings[i]) {
+			if (vsi->xdp_rings[i]->desc)
+				ice_free_tx_ring(vsi->xdp_rings[i]);
+			kfree_rcu(vsi->xdp_rings[i], rcu);
+			vsi->xdp_rings[i] = NULL;
+		}
+
+	devm_kfree(ice_pf_to_dev(pf), vsi->xdp_rings);
+	vsi->xdp_rings = NULL;
+
+	if (ice_is_reset_in_progress(pf->state) || !vsi->q_vectors[0])
+		return 0;
+
+	ice_vsi_assign_bpf_prog(vsi, NULL);
+
+	/* notify Tx scheduler that we destroyed XDP queues and bring
+	 * back the old number of child nodes
+	 */
+	for (i = 0; i < vsi->tc_cfg.numtc; i++)
+		max_txqs[i] = vsi->num_txq;
+
+	return ice_cfg_vsi_lan(vsi->port_info, vsi->idx, vsi->tc_cfg.ena_tc,
+			       max_txqs);
+}
+
+/**
+ * ice_xdp_setup_prog - Add or remove XDP eBPF program
+ * @vsi: VSI to setup XDP for
+ * @prog: XDP program
+ * @extack: netlink extended ack
+ */
+static int
+ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog,
+		   struct netlink_ext_ack *extack)
+{
+	int frame_size = vsi->netdev->mtu + ICE_ETH_PKT_HDR_PAD;
+	bool if_running = netif_running(vsi->netdev);
+	int ret = 0, xdp_ring_err = 0;
+
+	if (frame_size > vsi->rx_buf_len) {
+		NL_SET_ERR_MSG_MOD(extack, "MTU too large for loading XDP");
+		return -EOPNOTSUPP;
+	}
+
+	/* need to stop netdev while setting up the program for Rx rings */
+	if (if_running && !test_and_set_bit(__ICE_DOWN, vsi->state)) {
+		ret = ice_down(vsi);
+		if (ret) {
+			NL_SET_ERR_MSG_MOD(extack,
+					   "Preparing device for XDP attach failed");
+			return ret;
+		}
+	}
+
+	if (!ice_is_xdp_ena_vsi(vsi) && prog) {
+		vsi->num_xdp_txq = vsi->alloc_txq;
+		xdp_ring_err = ice_prepare_xdp_rings(vsi, prog);
+		if (xdp_ring_err)
+			NL_SET_ERR_MSG_MOD(extack,
+					   "Setting up XDP Tx resources failed");
+	} else if (ice_is_xdp_ena_vsi(vsi) && !prog) {
+		xdp_ring_err = ice_destroy_xdp_rings(vsi);
+		if (xdp_ring_err)
+			NL_SET_ERR_MSG_MOD(extack,
+					   "Freeing XDP Tx resources failed");
+	} else {
+		ice_vsi_assign_bpf_prog(vsi, prog);
+	}
+
+	if (if_running)
+		ret = ice_up(vsi);
+
+	if (!ret && prog && vsi->xsk_umems) {
+		int i;
+
+		ice_for_each_rxq(vsi, i) {
+			struct ice_ring *rx_ring = vsi->rx_rings[i];
+
+			if (rx_ring->xsk_umem)
+				napi_schedule(&rx_ring->q_vector->napi);
+		}
+	}
+
+	return (ret || xdp_ring_err) ? -ENOMEM : 0;
+}
+
+/**
+ * ice_xdp - implements XDP handler
+ * @dev: netdevice
+ * @xdp: XDP command
+ */
+static int ice_xdp(struct net_device *dev, struct netdev_bpf *xdp)
+{
+	struct ice_netdev_priv *np = netdev_priv(dev);
+	struct ice_vsi *vsi = np->vsi;
+
+	if (vsi->type != ICE_VSI_PF) {
+		NL_SET_ERR_MSG_MOD(xdp->extack,
+				   "XDP can be loaded only on PF VSI");
+		return -EINVAL;
+	}
+
+	switch (xdp->command) {
+	case XDP_SETUP_PROG:
+		return ice_xdp_setup_prog(vsi, xdp->prog, xdp->extack);
+	case XDP_QUERY_PROG:
+		xdp->prog_id = vsi->xdp_prog ? vsi->xdp_prog->aux->id : 0;
+		return 0;
+	case XDP_SETUP_XSK_UMEM:
+		return ice_xsk_umem_setup(vsi, xdp->xsk.umem,
+					  xdp->xsk.queue_id);
+	default:
+		return -EINVAL;
+	}
+}
+
+/**
  * ice_ena_misc_vector - enable the non-queue interrupts
  * @pf: board private structure
  */
@@ -1698,8 +2031,10 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data)
 	struct ice_pf *pf = (struct ice_pf *)data;
 	struct ice_hw *hw = &pf->hw;
 	irqreturn_t ret = IRQ_NONE;
+	struct device *dev;
 	u32 oicr, ena_mask;
 
+	dev = ice_pf_to_dev(pf);
 	set_bit(__ICE_ADMINQ_EVENT_PENDING, pf->state);
 	set_bit(__ICE_MAILBOXQ_EVENT_PENDING, pf->state);
 
@@ -1735,8 +2070,7 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data)
 		else if (reset == ICE_RESET_EMPR)
 			pf->empr_count++;
 		else
-			dev_dbg(&pf->pdev->dev, "Invalid reset type %d\n",
-				reset);
+			dev_dbg(dev, "Invalid reset type %d\n", reset);
 
 		/* If a reset cycle isn't already in progress, we set a bit in
 		 * pf->state so that the service task can start a reset/rebuild.
@@ -1770,8 +2104,7 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data)
 
 	if (oicr & PFINT_OICR_HMC_ERR_M) {
 		ena_mask &= ~PFINT_OICR_HMC_ERR_M;
-		dev_dbg(&pf->pdev->dev,
-			"HMC Error interrupt - info 0x%x, data 0x%x\n",
+		dev_dbg(dev, "HMC Error interrupt - info 0x%x, data 0x%x\n",
 			rd32(hw, PFHMC_ERRORINFO),
 			rd32(hw, PFHMC_ERRORDATA));
 	}
@@ -1779,8 +2112,7 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data)
 	/* Report any remaining unexpected interrupts */
 	oicr &= ena_mask;
 	if (oicr) {
-		dev_dbg(&pf->pdev->dev, "unhandled interrupt oicr=0x%08x\n",
-			oicr);
+		dev_dbg(dev, "unhandled interrupt oicr=0x%08x\n", oicr);
 		/* If a critical error is pending there is no choice but to
 		 * reset the device.
 		 */
@@ -1838,7 +2170,7 @@ static void ice_free_irq_msix_misc(struct ice_pf *pf)
 
 	if (pf->msix_entries) {
 		synchronize_irq(pf->msix_entries[pf->oicr_idx].vector);
-		devm_free_irq(&pf->pdev->dev,
+		devm_free_irq(ice_pf_to_dev(pf),
 			      pf->msix_entries[pf->oicr_idx].vector, pf);
 	}
 
@@ -1882,13 +2214,13 @@ static void ice_ena_ctrlq_interrupts(struct ice_hw *hw, u16 reg_idx)
  */
 static int ice_req_irq_msix_misc(struct ice_pf *pf)
 {
+	struct device *dev = ice_pf_to_dev(pf);
 	struct ice_hw *hw = &pf->hw;
 	int oicr_idx, err = 0;
 
 	if (!pf->int_name[0])
 		snprintf(pf->int_name, sizeof(pf->int_name) - 1, "%s-%s:misc",
-			 dev_driver_string(&pf->pdev->dev),
-			 dev_name(&pf->pdev->dev));
+			 dev_driver_string(dev), dev_name(dev));
 
 	/* Do not request IRQ but do enable OICR interrupt since settings are
 	 * lost during reset. Note that this function is called only during
@@ -1905,12 +2237,10 @@ static int ice_req_irq_msix_misc(struct ice_pf *pf)
 	pf->num_avail_sw_msix -= 1;
 	pf->oicr_idx = oicr_idx;
 
-	err = devm_request_irq(&pf->pdev->dev,
-			       pf->msix_entries[pf->oicr_idx].vector,
+	err = devm_request_irq(dev, pf->msix_entries[pf->oicr_idx].vector,
 			       ice_misc_intr, 0, pf->int_name, pf);
 	if (err) {
-		dev_err(&pf->pdev->dev,
-			"devm_request_irq for %s failed: %d\n",
+		dev_err(dev, "devm_request_irq for %s failed: %d\n",
 			pf->int_name, err);
 		ice_free_res(pf->irq_tracker, 1, ICE_RES_MISC_VEC_ID);
 		pf->num_avail_sw_msix += 1;
@@ -2043,7 +2373,7 @@ static int ice_cfg_netdev(struct ice_vsi *vsi)
 	ice_set_ops(netdev);
 
 	if (vsi->type == ICE_VSI_PF) {
-		SET_NETDEV_DEV(netdev, &pf->pdev->dev);
+		SET_NETDEV_DEV(netdev, ice_pf_to_dev(pf));
 		ether_addr_copy(mac_addr, vsi->port_info->mac.perm_addr);
 		ether_addr_copy(netdev->dev_addr, mac_addr);
 		ether_addr_copy(netdev->perm_addr, mac_addr);
@@ -2219,6 +2549,11 @@ static int ice_setup_pf_sw(struct ice_pf *pf)
 		status = -ENODEV;
 		goto unroll_vsi_setup;
 	}
+	/* netdev has to be configured before setting frame size */
+	ice_vsi_cfg_frame_size(vsi);
+
+	/* Setup DCB netlink interface */
+	ice_dcbnl_setup(vsi);
 
 	/* registering the NAPI handler requires both the queues and
 	 * netdev to be created, which are done in ice_pf_vsi_setup()
@@ -2300,6 +2635,7 @@ static void ice_deinit_pf(struct ice_pf *pf)
 {
 	ice_service_task_stop(pf);
 	mutex_destroy(&pf->sw_mutex);
+	mutex_destroy(&pf->tc_mutex);
 	mutex_destroy(&pf->avail_q_mutex);
 
 	if (pf->avail_txqs) {
@@ -2349,6 +2685,7 @@ static int ice_init_pf(struct ice_pf *pf)
 	ice_set_pf_caps(pf);
 
 	mutex_init(&pf->sw_mutex);
+	mutex_init(&pf->tc_mutex);
 
 	/* setup service timer and periodic service task */
 	timer_setup(&pf->serv_tmr, ice_service_timer, 0);
@@ -2363,7 +2700,7 @@ static int ice_init_pf(struct ice_pf *pf)
 
 	pf->avail_rxqs = bitmap_zalloc(pf->max_pf_rxqs, GFP_KERNEL);
 	if (!pf->avail_rxqs) {
-		devm_kfree(&pf->pdev->dev, pf->avail_txqs);
+		devm_kfree(ice_pf_to_dev(pf), pf->avail_txqs);
 		pf->avail_txqs = NULL;
 		return -ENOMEM;
 	}
@@ -2380,6 +2717,7 @@ static int ice_init_pf(struct ice_pf *pf)
  */
 static int ice_ena_msix_range(struct ice_pf *pf)
 {
+	struct device *dev = ice_pf_to_dev(pf);
 	int v_left, v_actual, v_budget = 0;
 	int needed, err, i;
 
@@ -2400,7 +2738,7 @@ static int ice_ena_msix_range(struct ice_pf *pf)
 	v_budget += needed;
 	v_left -= needed;
 
-	pf->msix_entries = devm_kcalloc(&pf->pdev->dev, v_budget,
+	pf->msix_entries = devm_kcalloc(dev, v_budget,
 					sizeof(*pf->msix_entries), GFP_KERNEL);
 
 	if (!pf->msix_entries) {
@@ -2416,13 +2754,13 @@ static int ice_ena_msix_range(struct ice_pf *pf)
 					 ICE_MIN_MSIX, v_budget);
 
 	if (v_actual < 0) {
-		dev_err(&pf->pdev->dev, "unable to reserve MSI-X vectors\n");
+		dev_err(dev, "unable to reserve MSI-X vectors\n");
 		err = v_actual;
 		goto msix_err;
 	}
 
 	if (v_actual < v_budget) {
-		dev_warn(&pf->pdev->dev,
+		dev_warn(dev,
 			 "not enough OS MSI-X vectors. requested = %d, obtained = %d\n",
 			 v_budget, v_actual);
 /* 2 vectors for LAN (traffic + OICR) */
@@ -2441,11 +2779,11 @@ static int ice_ena_msix_range(struct ice_pf *pf)
 	return v_actual;
 
 msix_err:
-	devm_kfree(&pf->pdev->dev, pf->msix_entries);
+	devm_kfree(dev, pf->msix_entries);
 	goto exit_err;
 
 no_hw_vecs_left_err:
-	dev_err(&pf->pdev->dev,
+	dev_err(dev,
 		"not enough device MSI-X vectors. requested = %d, available = %d\n",
 		needed, v_left);
 	err = -ERANGE;
@@ -2461,7 +2799,7 @@ exit_err:
 static void ice_dis_msix(struct ice_pf *pf)
 {
 	pci_disable_msix(pf->pdev);
-	devm_kfree(&pf->pdev->dev, pf->msix_entries);
+	devm_kfree(ice_pf_to_dev(pf), pf->msix_entries);
 	pf->msix_entries = NULL;
 }
 
@@ -2474,7 +2812,7 @@ static void ice_clear_interrupt_scheme(struct ice_pf *pf)
 	ice_dis_msix(pf);
 
 	if (pf->irq_tracker) {
-		devm_kfree(&pf->pdev->dev, pf->irq_tracker);
+		devm_kfree(ice_pf_to_dev(pf), pf->irq_tracker);
 		pf->irq_tracker = NULL;
 	}
 }
@@ -2494,7 +2832,7 @@ static int ice_init_interrupt_scheme(struct ice_pf *pf)
 
 	/* set up vector assignment tracking */
 	pf->irq_tracker =
-		devm_kzalloc(&pf->pdev->dev, sizeof(*pf->irq_tracker) +
+		devm_kzalloc(ice_pf_to_dev(pf), sizeof(*pf->irq_tracker) +
 			     (sizeof(u16) * vectors), GFP_KERNEL);
 	if (!pf->irq_tracker) {
 		ice_dis_msix(pf);
@@ -2510,6 +2848,52 @@ static int ice_init_interrupt_scheme(struct ice_pf *pf)
 }
 
 /**
+ * ice_vsi_recfg_qs - Change the number of queues on a VSI
+ * @vsi: VSI being changed
+ * @new_rx: new number of Rx queues
+ * @new_tx: new number of Tx queues
+ *
+ * Only change the number of queues if new_tx, or new_rx is non-0.
+ *
+ * Returns 0 on success.
+ */
+int ice_vsi_recfg_qs(struct ice_vsi *vsi, int new_rx, int new_tx)
+{
+	struct ice_pf *pf = vsi->back;
+	int err = 0, timeout = 50;
+
+	if (!new_rx && !new_tx)
+		return -EINVAL;
+
+	while (test_and_set_bit(__ICE_CFG_BUSY, pf->state)) {
+		timeout--;
+		if (!timeout)
+			return -EBUSY;
+		usleep_range(1000, 2000);
+	}
+
+	if (new_tx)
+		vsi->req_txq = new_tx;
+	if (new_rx)
+		vsi->req_rxq = new_rx;
+
+	/* set for the next time the netdev is started */
+	if (!netif_running(vsi->netdev)) {
+		ice_vsi_rebuild(vsi, false);
+		dev_dbg(ice_pf_to_dev(pf), "Link is down, queue count change happens when link is brought up\n");
+		goto done;
+	}
+
+	ice_vsi_close(vsi);
+	ice_vsi_rebuild(vsi, false);
+	ice_pf_dcb_recfg(pf);
+	ice_vsi_open(vsi);
+done:
+	clear_bit(__ICE_CFG_BUSY, pf->state);
+	return err;
+}
+
+/**
  * ice_log_pkg_init - log result of DDP package load
  * @hw: pointer to hardware info
  * @status: status of package load
@@ -2518,7 +2902,7 @@ static void
 ice_log_pkg_init(struct ice_hw *hw, enum ice_status *status)
 {
 	struct ice_pf *pf = (struct ice_pf *)hw->back;
-	struct device *dev = &pf->pdev->dev;
+	struct device *dev = ice_pf_to_dev(pf);
 
 	switch (*status) {
 	case ICE_SUCCESS:
@@ -2598,7 +2982,7 @@ ice_log_pkg_init(struct ice_hw *hw, enum ice_status *status)
 				ICE_PKG_SUPP_VER_MAJ, ICE_PKG_SUPP_VER_MNR);
 		break;
 	case ICE_ERR_AQ_ERROR:
-		switch (hw->adminq.sq_last_status) {
+		switch (hw->pkg_dwnld_status) {
 		case ICE_AQ_RC_ENOSEC:
 		case ICE_AQ_RC_EBADSIG:
 			dev_err(dev,
@@ -2637,7 +3021,7 @@ static void
 ice_load_pkg(const struct firmware *firmware, struct ice_pf *pf)
 {
 	enum ice_status status = ICE_ERR_PARAM;
-	struct device *dev = &pf->pdev->dev;
+	struct device *dev = ice_pf_to_dev(pf);
 	struct ice_hw *hw = &pf->hw;
 
 	/* Load DDP Package */
@@ -2677,7 +3061,7 @@ ice_load_pkg(const struct firmware *firmware, struct ice_pf *pf)
 static void ice_verify_cacheline_size(struct ice_pf *pf)
 {
 	if (rd32(&pf->hw, GLPCI_CNF2) & GLPCI_CNF2_CACHELINE_SIZE_M)
-		dev_warn(&pf->pdev->dev,
+		dev_warn(ice_pf_to_dev(pf),
 			 "%d Byte cache line assumption is invalid, driver may have Tx timeouts!\n",
 			 ICE_CACHE_LINE_BYTES);
 }
@@ -2747,7 +3131,7 @@ static void ice_request_fw(struct ice_pf *pf)
 {
 	char *opt_fw_filename = ice_get_opt_fw_name(pf);
 	const struct firmware *firmware = NULL;
-	struct device *dev = &pf->pdev->dev;
+	struct device *dev = ice_pf_to_dev(pf);
 	int err = 0;
 
 	/* optional device-specific DDP (if present) overrides the default DDP
@@ -2831,6 +3215,8 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent)
 
 	hw = &pf->hw;
 	hw->hw_addr = pcim_iomap_table(pdev)[ICE_BAR0];
+	pci_save_state(pdev);
+
 	hw->back = pf;
 	hw->vendor_id = pdev->vendor;
 	hw->device_id = pdev->device;
@@ -2936,7 +3322,7 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent)
 
 	err = ice_setup_pf_sw(pf);
 	if (err) {
-		dev_err(dev, "probe failed due to setup PF switch:%d\n", err);
+		dev_err(dev, "probe failed due to setup PF switch: %d\n", err);
 		goto err_alloc_sw_unroll;
 	}
 
@@ -2976,12 +3362,15 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent)
 		ice_cfg_lldp_mib_change(&pf->hw, true);
 	}
 
+	/* print PCI link speed and width */
+	pcie_print_link_status(pf->pdev);
+
 	return 0;
 
 err_alloc_sw_unroll:
 	set_bit(__ICE_SERVICE_DIS, pf->state);
 	set_bit(__ICE_DOWN, pf->state);
-	devm_kfree(&pf->pdev->dev, pf->first_sw);
+	devm_kfree(dev, pf->first_sw);
 err_msix_misc_unroll:
 	ice_free_irq_msix_misc(pf);
 err_init_interrupt_unroll:
@@ -3027,12 +3416,13 @@ static void ice_remove(struct pci_dev *pdev)
 	}
 	ice_deinit_pf(pf);
 	ice_deinit_hw(&pf->hw);
-	ice_clear_interrupt_scheme(pf);
 	/* Issue a PFR as part of the prescribed driver unload flow.  Do not
 	 * do it via ice_schedule_reset() since there is no need to rebuild
 	 * and the service task is already stopped.
 	 */
 	ice_reset(&pf->hw, ICE_RESET_PFR);
+	pci_wait_for_pending_transaction(pdev);
+	ice_clear_interrupt_scheme(pf);
 	pci_disable_pcie_error_reporting(pdev);
 }
 
@@ -3347,6 +3737,48 @@ static void ice_set_rx_mode(struct net_device *netdev)
 }
 
 /**
+ * ice_set_tx_maxrate - NDO callback to set the maximum per-queue bitrate
+ * @netdev: network interface device structure
+ * @queue_index: Queue ID
+ * @maxrate: maximum bandwidth in Mbps
+ */
+static int
+ice_set_tx_maxrate(struct net_device *netdev, int queue_index, u32 maxrate)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+	enum ice_status status;
+	u16 q_handle;
+	u8 tc;
+
+	/* Validate maxrate requested is within permitted range */
+	if (maxrate && (maxrate > (ICE_SCHED_MAX_BW / 1000))) {
+		netdev_err(netdev,
+			   "Invalid max rate %d specified for the queue %d\n",
+			   maxrate, queue_index);
+		return -EINVAL;
+	}
+
+	q_handle = vsi->tx_rings[queue_index]->q_handle;
+	tc = ice_dcb_get_tc(vsi, queue_index);
+
+	/* Set BW back to default, when user set maxrate to 0 */
+	if (!maxrate)
+		status = ice_cfg_q_bw_dflt_lmt(vsi->port_info, vsi->idx, tc,
+					       q_handle, ICE_MAX_BW);
+	else
+		status = ice_cfg_q_bw_lmt(vsi->port_info, vsi->idx, tc,
+					  q_handle, ICE_MAX_BW, maxrate * 1000);
+	if (status) {
+		netdev_err(netdev,
+			   "Unable to set Tx max rate, error %d\n", status);
+		return -EIO;
+	}
+
+	return 0;
+}
+
+/**
  * ice_fdb_add - add an entry to the hardware database
  * @ndm: the input from the stack
  * @tb: pointer to array of nladdr (unused)
@@ -3426,6 +3858,7 @@ ice_set_features(struct net_device *netdev, netdev_features_t features)
 {
 	struct ice_netdev_priv *np = netdev_priv(netdev);
 	struct ice_vsi *vsi = np->vsi;
+	struct ice_pf *pf = vsi->back;
 	int ret = 0;
 
 	/* Don't set any netdev advanced features with device in Safe Mode */
@@ -3435,6 +3868,13 @@ ice_set_features(struct net_device *netdev, netdev_features_t features)
 		return ret;
 	}
 
+	/* Do not change setting during reset */
+	if (ice_is_reset_in_progress(pf->state)) {
+		dev_err(&vsi->back->pdev->dev,
+			"Device is resetting, changing advanced netdev features temporarily unavailable.\n");
+		return -EBUSY;
+	}
+
 	/* Multiple features can be changed in one call so keep features in
 	 * separate if/else statements to guarantee each feature is checked
 	 */
@@ -3505,6 +3945,8 @@ int ice_vsi_cfg(struct ice_vsi *vsi)
 	ice_vsi_cfg_dcb_rings(vsi);
 
 	err = ice_vsi_cfg_lan_txqs(vsi);
+	if (!err && ice_is_xdp_ena_vsi(vsi))
+		err = ice_vsi_cfg_xdp_txqs(vsi);
 	if (!err)
 		err = ice_vsi_cfg_rxqs(vsi);
 
@@ -3920,6 +4362,13 @@ int ice_down(struct ice_vsi *vsi)
 		netdev_err(vsi->netdev,
 			   "Failed stop Tx rings, VSI %d error %d\n",
 			   vsi->vsi_num, tx_err);
+	if (!tx_err && ice_is_xdp_ena_vsi(vsi)) {
+		tx_err = ice_vsi_stop_xdp_tx_rings(vsi);
+		if (tx_err)
+			netdev_err(vsi->netdev,
+				   "Failed stop XDP rings, VSI %d error %d\n",
+				   vsi->vsi_num, tx_err);
+	}
 
 	rx_err = ice_vsi_stop_rx_rings(vsi);
 	if (rx_err)
@@ -3970,8 +4419,13 @@ int ice_vsi_setup_tx_rings(struct ice_vsi *vsi)
 	}
 
 	ice_for_each_txq(vsi, i) {
-		vsi->tx_rings[i]->netdev = vsi->netdev;
-		err = ice_setup_tx_ring(vsi->tx_rings[i]);
+		struct ice_ring *ring = vsi->tx_rings[i];
+
+		if (!ring)
+			return -EINVAL;
+
+		ring->netdev = vsi->netdev;
+		err = ice_setup_tx_ring(ring);
 		if (err)
 			break;
 	}
@@ -3996,8 +4450,13 @@ int ice_vsi_setup_rx_rings(struct ice_vsi *vsi)
 	}
 
 	ice_for_each_rxq(vsi, i) {
-		vsi->rx_rings[i]->netdev = vsi->netdev;
-		err = ice_setup_rx_ring(vsi->rx_rings[i]);
+		struct ice_ring *ring = vsi->rx_rings[i];
+
+		if (!ring)
+			return -EINVAL;
+
+		ring->netdev = vsi->netdev;
+		err = ice_setup_rx_ring(ring);
 		if (err)
 			break;
 	}
@@ -4033,7 +4492,7 @@ static int ice_vsi_open(struct ice_vsi *vsi)
 		goto err_setup_rx;
 
 	snprintf(int_name, sizeof(int_name) - 1, "%s-%s",
-		 dev_driver_string(&pf->pdev->dev), vsi->netdev->name);
+		 dev_driver_string(ice_pf_to_dev(pf)), vsi->netdev->name);
 	err = ice_vsi_req_irq_msix(vsi, int_name);
 	if (err)
 		goto err_setup_rx;
@@ -4082,61 +4541,13 @@ static void ice_vsi_release_all(struct ice_pf *pf)
 
 		err = ice_vsi_release(pf->vsi[i]);
 		if (err)
-			dev_dbg(&pf->pdev->dev,
+			dev_dbg(ice_pf_to_dev(pf),
 				"Failed to release pf->vsi[%d], err %d, vsi_num = %d\n",
 				i, err, pf->vsi[i]->vsi_num);
 	}
 }
 
 /**
- * ice_ena_vsi - resume a VSI
- * @vsi: the VSI being resume
- * @locked: is the rtnl_lock already held
- */
-static int ice_ena_vsi(struct ice_vsi *vsi, bool locked)
-{
-	int err = 0;
-
-	if (!test_bit(__ICE_NEEDS_RESTART, vsi->state))
-		return 0;
-
-	clear_bit(__ICE_NEEDS_RESTART, vsi->state);
-
-	if (vsi->netdev && vsi->type == ICE_VSI_PF) {
-		if (netif_running(vsi->netdev)) {
-			if (!locked)
-				rtnl_lock();
-
-			err = ice_open(vsi->netdev);
-
-			if (!locked)
-				rtnl_unlock();
-		}
-	}
-
-	return err;
-}
-
-/**
- * ice_pf_ena_all_vsi - Resume all VSIs on a PF
- * @pf: the PF
- * @locked: is the rtnl_lock already held
- */
-#ifdef CONFIG_DCB
-int ice_pf_ena_all_vsi(struct ice_pf *pf, bool locked)
-{
-	int v;
-
-	ice_for_each_vsi(pf, v)
-		if (pf->vsi[v])
-			if (ice_ena_vsi(pf->vsi[v], locked))
-				return -EIO;
-
-	return 0;
-}
-#endif /* CONFIG_DCB */
-
-/**
  * ice_vsi_rebuild_by_type - Rebuild VSI of a given type
  * @pf: pointer to the PF instance
  * @type: VSI type to rebuild
@@ -4145,6 +4556,7 @@ int ice_pf_ena_all_vsi(struct ice_pf *pf, bool locked)
  */
 static int ice_vsi_rebuild_by_type(struct ice_pf *pf, enum ice_vsi_type type)
 {
+	struct device *dev = ice_pf_to_dev(pf);
 	enum ice_status status;
 	int i, err;
 
@@ -4155,20 +4567,20 @@ static int ice_vsi_rebuild_by_type(struct ice_pf *pf, enum ice_vsi_type type)
 			continue;
 
 		/* rebuild the VSI */
-		err = ice_vsi_rebuild(vsi);
+		err = ice_vsi_rebuild(vsi, true);
 		if (err) {
-			dev_err(&pf->pdev->dev,
-				"rebuild VSI failed, err %d, VSI index %d, type %d\n",
-				err, vsi->idx, type);
+			dev_err(dev,
+				"rebuild VSI failed, err %d, VSI index %d, type %s\n",
+				err, vsi->idx, ice_vsi_type_str(type));
 			return err;
 		}
 
 		/* replay filters for the VSI */
 		status = ice_replay_vsi(&pf->hw, vsi->idx);
 		if (status) {
-			dev_err(&pf->pdev->dev,
-				"replay VSI failed, status %d, VSI index %d, type %d\n",
-				status, vsi->idx, type);
+			dev_err(dev,
+				"replay VSI failed, status %d, VSI index %d, type %s\n",
+				status, vsi->idx, ice_vsi_type_str(type));
 			return -EIO;
 		}
 
@@ -4180,14 +4592,14 @@ static int ice_vsi_rebuild_by_type(struct ice_pf *pf, enum ice_vsi_type type)
 		/* enable the VSI */
 		err = ice_ena_vsi(vsi, false);
 		if (err) {
-			dev_err(&pf->pdev->dev,
-				"enable VSI failed, err %d, VSI index %d, type %d\n",
-				err, vsi->idx, type);
+			dev_err(dev,
+				"enable VSI failed, err %d, VSI index %d, type %s\n",
+				err, vsi->idx, ice_vsi_type_str(type));
 			return err;
 		}
 
-		dev_info(&pf->pdev->dev, "VSI rebuilt. VSI index %d, type %d\n",
-			 vsi->idx, type);
+		dev_info(dev, "VSI rebuilt. VSI index %d, type %s\n", vsi->idx,
+			 ice_vsi_type_str(type));
 	}
 
 	return 0;
@@ -4226,7 +4638,7 @@ static void ice_update_pf_netdev_link(struct ice_pf *pf)
  */
 static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type)
 {
-	struct device *dev = &pf->pdev->dev;
+	struct device *dev = ice_pf_to_dev(pf);
 	struct ice_hw *hw = &pf->hw;
 	enum ice_status ret;
 	int err;
@@ -4272,7 +4684,7 @@ static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type)
 
 	err = ice_update_link_info(hw->port_info);
 	if (err)
-		dev_err(&pf->pdev->dev, "Get link status error %d\n", err);
+		dev_err(dev, "Get link status error %d\n", err);
 
 	/* start misc vector */
 	err = ice_req_irq_msix_misc(pf);
@@ -4329,6 +4741,18 @@ clear_recovery:
 }
 
 /**
+ * ice_max_xdp_frame_size - returns the maximum allowed frame size for XDP
+ * @vsi: Pointer to VSI structure
+ */
+static int ice_max_xdp_frame_size(struct ice_vsi *vsi)
+{
+	if (PAGE_SIZE >= 8192 || test_bit(ICE_FLAG_LEGACY_RX, vsi->back->flags))
+		return ICE_RXBUF_2048 - XDP_PACKET_HEADROOM;
+	else
+		return ICE_RXBUF_3072;
+}
+
+/**
  * ice_change_mtu - NDO callback to change the MTU
  * @netdev: network interface device structure
  * @new_mtu: new value for maximum frame size
@@ -4347,6 +4771,16 @@ static int ice_change_mtu(struct net_device *netdev, int new_mtu)
 		return 0;
 	}
 
+	if (ice_is_xdp_ena_vsi(vsi)) {
+		int frame_size = ice_max_xdp_frame_size(vsi);
+
+		if (new_mtu + ICE_ETH_PKT_HDR_PAD > frame_size) {
+			netdev_err(netdev, "max MTU for XDP usage is %d\n",
+				   frame_size - ICE_ETH_PKT_HDR_PAD);
+			return -EINVAL;
+		}
+	}
+
 	if (new_mtu < netdev->min_mtu) {
 		netdev_err(netdev, "new MTU invalid. min_mtu is %d\n",
 			   netdev->min_mtu);
@@ -4409,7 +4843,9 @@ int ice_set_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size)
 	struct ice_pf *pf = vsi->back;
 	struct ice_hw *hw = &pf->hw;
 	enum ice_status status;
+	struct device *dev;
 
+	dev = ice_pf_to_dev(pf);
 	if (seed) {
 		struct ice_aqc_get_set_rss_keys *buf =
 				  (struct ice_aqc_get_set_rss_keys *)seed;
@@ -4417,8 +4853,7 @@ int ice_set_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size)
 		status = ice_aq_set_rss_key(hw, vsi->idx, buf);
 
 		if (status) {
-			dev_err(&pf->pdev->dev,
-				"Cannot set RSS key, err %d aq_err %d\n",
+			dev_err(dev, "Cannot set RSS key, err %d aq_err %d\n",
 				status, hw->adminq.rq_last_status);
 			return -EIO;
 		}
@@ -4428,8 +4863,7 @@ int ice_set_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size)
 		status = ice_aq_set_rss_lut(hw, vsi->idx, vsi->rss_lut_type,
 					    lut, lut_size);
 		if (status) {
-			dev_err(&pf->pdev->dev,
-				"Cannot set RSS lut, err %d aq_err %d\n",
+			dev_err(dev, "Cannot set RSS lut, err %d aq_err %d\n",
 				status, hw->adminq.rq_last_status);
 			return -EIO;
 		}
@@ -4452,15 +4886,16 @@ int ice_get_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size)
 	struct ice_pf *pf = vsi->back;
 	struct ice_hw *hw = &pf->hw;
 	enum ice_status status;
+	struct device *dev;
 
+	dev = ice_pf_to_dev(pf);
 	if (seed) {
 		struct ice_aqc_get_set_rss_keys *buf =
 				  (struct ice_aqc_get_set_rss_keys *)seed;
 
 		status = ice_aq_get_rss_key(hw, vsi->idx, buf);
 		if (status) {
-			dev_err(&pf->pdev->dev,
-				"Cannot get RSS key, err %d aq_err %d\n",
+			dev_err(dev, "Cannot get RSS key, err %d aq_err %d\n",
 				status, hw->adminq.rq_last_status);
 			return -EIO;
 		}
@@ -4470,8 +4905,7 @@ int ice_get_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size)
 		status = ice_aq_get_rss_lut(hw, vsi->idx, vsi->rss_lut_type,
 					    lut, lut_size);
 		if (status) {
-			dev_err(&pf->pdev->dev,
-				"Cannot get RSS lut, err %d aq_err %d\n",
+			dev_err(dev, "Cannot get RSS lut, err %d aq_err %d\n",
 				status, hw->adminq.rq_last_status);
 			return -EIO;
 		}
@@ -4515,7 +4949,6 @@ ice_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
  */
 static int ice_vsi_update_bridge_mode(struct ice_vsi *vsi, u16 bmode)
 {
-	struct device *dev = &vsi->back->pdev->dev;
 	struct ice_aqc_vsi_props *vsi_props;
 	struct ice_hw *hw = &vsi->back->hw;
 	struct ice_vsi_ctx *ctxt;
@@ -4524,7 +4957,7 @@ static int ice_vsi_update_bridge_mode(struct ice_vsi *vsi, u16 bmode)
 
 	vsi_props = &vsi->info;
 
-	ctxt = devm_kzalloc(dev, sizeof(*ctxt), GFP_KERNEL);
+	ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
 	if (!ctxt)
 		return -ENOMEM;
 
@@ -4540,7 +4973,7 @@ static int ice_vsi_update_bridge_mode(struct ice_vsi *vsi, u16 bmode)
 
 	status = ice_update_vsi(hw, vsi->idx, ctxt, NULL);
 	if (status) {
-		dev_err(dev, "update VSI for bridge mode failed, bmode = %d err %d aq_err %d\n",
+		dev_err(&vsi->back->pdev->dev, "update VSI for bridge mode failed, bmode = %d err %d aq_err %d\n",
 			bmode, status, hw->adminq.sq_last_status);
 		ret = -EIO;
 		goto out;
@@ -4549,7 +4982,7 @@ static int ice_vsi_update_bridge_mode(struct ice_vsi *vsi, u16 bmode)
 	vsi_props->sw_flags = ctxt->info.sw_flags;
 
 out:
-	devm_kfree(dev, ctxt);
+	kfree(ctxt);
 	return ret;
 }
 
@@ -4864,12 +5297,14 @@ static const struct net_device_ops ice_netdev_ops = {
 	.ndo_validate_addr = eth_validate_addr,
 	.ndo_change_mtu = ice_change_mtu,
 	.ndo_get_stats64 = ice_get_stats64,
+	.ndo_set_tx_maxrate = ice_set_tx_maxrate,
 	.ndo_set_vf_spoofchk = ice_set_vf_spoofchk,
 	.ndo_set_vf_mac = ice_set_vf_mac,
 	.ndo_get_vf_config = ice_get_vf_cfg,
 	.ndo_set_vf_trust = ice_set_vf_trust,
 	.ndo_set_vf_vlan = ice_set_vf_port_vlan,
 	.ndo_set_vf_link_state = ice_set_vf_link_state,
+	.ndo_get_vf_stats = ice_get_vf_stats,
 	.ndo_vlan_rx_add_vid = ice_vlan_rx_add_vid,
 	.ndo_vlan_rx_kill_vid = ice_vlan_rx_kill_vid,
 	.ndo_set_features = ice_set_features,
@@ -4878,4 +5313,7 @@ static const struct net_device_ops ice_netdev_ops = {
 	.ndo_fdb_add = ice_fdb_add,
 	.ndo_fdb_del = ice_fdb_del,
 	.ndo_tx_timeout = ice_tx_timeout,
+	.ndo_bpf = ice_xdp,
+	.ndo_xdp_xmit = ice_xdp_xmit,
+	.ndo_xsk_wakeup = ice_xsk_wakeup,
 };
diff --git a/drivers/net/ethernet/intel/ice/ice_nvm.c b/drivers/net/ethernet/intel/ice/ice_nvm.c
index bcb431f1bd92..57c73f613f32 100644
--- a/drivers/net/ethernet/intel/ice/ice_nvm.c
+++ b/drivers/net/ethernet/intel/ice/ice_nvm.c
@@ -219,8 +219,7 @@ static void ice_release_nvm(struct ice_hw *hw)
  *
  * Reads one 16 bit word from the Shadow RAM using the ice_read_sr_word_aq.
  */
-static enum ice_status
-ice_read_sr_word(struct ice_hw *hw, u16 offset, u16 *data)
+enum ice_status ice_read_sr_word(struct ice_hw *hw, u16 offset, u16 *data)
 {
 	enum ice_status status;
 
@@ -242,9 +241,10 @@ ice_read_sr_word(struct ice_hw *hw, u16 offset, u16 *data)
  */
 enum ice_status ice_init_nvm(struct ice_hw *hw)
 {
+	u16 oem_hi, oem_lo, boot_cfg_tlv, boot_cfg_tlv_len;
 	struct ice_nvm_info *nvm = &hw->nvm;
 	u16 eetrack_lo, eetrack_hi;
-	enum ice_status status = 0;
+	enum ice_status status;
 	u32 fla, gens_stat;
 	u8 sr_size;
 
@@ -261,15 +261,15 @@ enum ice_status ice_init_nvm(struct ice_hw *hw)
 	fla = rd32(hw, GLNVM_FLA);
 	if (fla & GLNVM_FLA_LOCKED_M) { /* Normal programming mode */
 		nvm->blank_nvm_mode = false;
-	} else { /* Blank programming mode */
+	} else {
+		/* Blank programming mode */
 		nvm->blank_nvm_mode = true;
-		status = ICE_ERR_NVM_BLANK_MODE;
 		ice_debug(hw, ICE_DBG_NVM,
 			  "NVM init error: unsupported blank mode.\n");
-		return status;
+		return ICE_ERR_NVM_BLANK_MODE;
 	}
 
-	status = ice_read_sr_word(hw, ICE_SR_NVM_DEV_STARTER_VER, &hw->nvm.ver);
+	status = ice_read_sr_word(hw, ICE_SR_NVM_DEV_STARTER_VER, &nvm->ver);
 	if (status) {
 		ice_debug(hw, ICE_DBG_INIT,
 			  "Failed to read DEV starter version.\n");
@@ -287,9 +287,42 @@ enum ice_status ice_init_nvm(struct ice_hw *hw)
 		return status;
 	}
 
-	hw->nvm.eetrack = (eetrack_hi << 16) | eetrack_lo;
+	nvm->eetrack = (eetrack_hi << 16) | eetrack_lo;
 
-	return status;
+	status = ice_get_pfa_module_tlv(hw, &boot_cfg_tlv, &boot_cfg_tlv_len,
+					ICE_SR_BOOT_CFG_PTR);
+	if (status) {
+		ice_debug(hw, ICE_DBG_INIT,
+			  "Failed to read Boot Configuration Block TLV.\n");
+		return status;
+	}
+
+	/* Boot Configuration Block must have length at least 2 words
+	 * (Combo Image Version High and Combo Image Version Low)
+	 */
+	if (boot_cfg_tlv_len < 2) {
+		ice_debug(hw, ICE_DBG_INIT,
+			  "Invalid Boot Configuration Block TLV size.\n");
+		return ICE_ERR_INVAL_SIZE;
+	}
+
+	status = ice_read_sr_word(hw, (boot_cfg_tlv + ICE_NVM_OEM_VER_OFF),
+				  &oem_hi);
+	if (status) {
+		ice_debug(hw, ICE_DBG_INIT, "Failed to read OEM_VER hi.\n");
+		return status;
+	}
+
+	status = ice_read_sr_word(hw, (boot_cfg_tlv + ICE_NVM_OEM_VER_OFF + 1),
+				  &oem_lo);
+	if (status) {
+		ice_debug(hw, ICE_DBG_INIT, "Failed to read OEM_VER lo.\n");
+		return status;
+	}
+
+	nvm->oem_ver = ((u32)oem_hi << 16) | oem_lo;
+
+	return 0;
 }
 
 /**
diff --git a/drivers/net/ethernet/intel/ice/ice_nvm.h b/drivers/net/ethernet/intel/ice/ice_nvm.h
new file mode 100644
index 000000000000..a9fa011c22c6
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_nvm.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2019, Intel Corporation. */
+
+#ifndef _ICE_NVM_H_
+#define _ICE_NVM_H_
+
+enum ice_status ice_read_sr_word(struct ice_hw *hw, u16 offset, u16 *data);
+#endif /* _ICE_NVM_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_sched.c b/drivers/net/ethernet/intel/ice/ice_sched.c
index 2fde9653a608..eae707ddf8e8 100644
--- a/drivers/net/ethernet/intel/ice/ice_sched.c
+++ b/drivers/net/ethernet/intel/ice/ice_sched.c
@@ -411,6 +411,27 @@ ice_aq_add_sched_elems(struct ice_hw *hw, u16 grps_req,
 }
 
 /**
+ * ice_aq_cfg_sched_elems - configures scheduler elements
+ * @hw: pointer to the HW struct
+ * @elems_req: number of elements to configure
+ * @buf: pointer to buffer
+ * @buf_size: buffer size in bytes
+ * @elems_cfgd: returns total number of elements configured
+ * @cd: pointer to command details structure or NULL
+ *
+ * Configure scheduling elements (0x0403)
+ */
+static enum ice_status
+ice_aq_cfg_sched_elems(struct ice_hw *hw, u16 elems_req,
+		       struct ice_aqc_conf_elem *buf, u16 buf_size,
+		       u16 *elems_cfgd, struct ice_sq_cd *cd)
+{
+	return ice_aqc_send_sched_elem_cmd(hw, ice_aqc_opc_cfg_sched_elems,
+					   elems_req, (void *)buf, buf_size,
+					   elems_cfgd, cd);
+}
+
+/**
  * ice_aq_suspend_sched_elems - suspend scheduler elements
  * @hw: pointer to the HW struct
  * @elems_req: number of elements to suspend
@@ -557,6 +578,149 @@ ice_alloc_lan_q_ctx(struct ice_hw *hw, u16 vsi_handle, u8 tc, u16 new_numqs)
 }
 
 /**
+ * ice_aq_rl_profile - performs a rate limiting task
+ * @hw: pointer to the HW struct
+ * @opcode:opcode for add, query, or remove profile(s)
+ * @num_profiles: the number of profiles
+ * @buf: pointer to buffer
+ * @buf_size: buffer size in bytes
+ * @num_processed: number of processed add or remove profile(s) to return
+ * @cd: pointer to command details structure
+ *
+ * RL profile function to add, query, or remove profile(s)
+ */
+static enum ice_status
+ice_aq_rl_profile(struct ice_hw *hw, enum ice_adminq_opc opcode,
+		  u16 num_profiles, struct ice_aqc_rl_profile_generic_elem *buf,
+		  u16 buf_size, u16 *num_processed, struct ice_sq_cd *cd)
+{
+	struct ice_aqc_rl_profile *cmd;
+	struct ice_aq_desc desc;
+	enum ice_status status;
+
+	cmd = &desc.params.rl_profile;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, opcode);
+	desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+	cmd->num_profiles = cpu_to_le16(num_profiles);
+	status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd);
+	if (!status && num_processed)
+		*num_processed = le16_to_cpu(cmd->num_processed);
+	return status;
+}
+
+/**
+ * ice_aq_add_rl_profile - adds rate limiting profile(s)
+ * @hw: pointer to the HW struct
+ * @num_profiles: the number of profile(s) to be add
+ * @buf: pointer to buffer
+ * @buf_size: buffer size in bytes
+ * @num_profiles_added: total number of profiles added to return
+ * @cd: pointer to command details structure
+ *
+ * Add RL profile (0x0410)
+ */
+static enum ice_status
+ice_aq_add_rl_profile(struct ice_hw *hw, u16 num_profiles,
+		      struct ice_aqc_rl_profile_generic_elem *buf,
+		      u16 buf_size, u16 *num_profiles_added,
+		      struct ice_sq_cd *cd)
+{
+	return ice_aq_rl_profile(hw, ice_aqc_opc_add_rl_profiles,
+				 num_profiles, buf,
+				 buf_size, num_profiles_added, cd);
+}
+
+/**
+ * ice_aq_remove_rl_profile - removes RL profile(s)
+ * @hw: pointer to the HW struct
+ * @num_profiles: the number of profile(s) to remove
+ * @buf: pointer to buffer
+ * @buf_size: buffer size in bytes
+ * @num_profiles_removed: total number of profiles removed to return
+ * @cd: pointer to command details structure or NULL
+ *
+ * Remove RL profile (0x0415)
+ */
+static enum ice_status
+ice_aq_remove_rl_profile(struct ice_hw *hw, u16 num_profiles,
+			 struct ice_aqc_rl_profile_generic_elem *buf,
+			 u16 buf_size, u16 *num_profiles_removed,
+			 struct ice_sq_cd *cd)
+{
+	return ice_aq_rl_profile(hw, ice_aqc_opc_remove_rl_profiles,
+				 num_profiles, buf,
+				 buf_size, num_profiles_removed, cd);
+}
+
+/**
+ * ice_sched_del_rl_profile - remove RL profile
+ * @hw: pointer to the HW struct
+ * @rl_info: rate limit profile information
+ *
+ * If the profile ID is not referenced anymore, it removes profile ID with
+ * its associated parameters from HW DB,and locally. The caller needs to
+ * hold scheduler lock.
+ */
+static enum ice_status
+ice_sched_del_rl_profile(struct ice_hw *hw,
+			 struct ice_aqc_rl_profile_info *rl_info)
+{
+	struct ice_aqc_rl_profile_generic_elem *buf;
+	u16 num_profiles_removed;
+	enum ice_status status;
+	u16 num_profiles = 1;
+
+	if (rl_info->prof_id_ref != 0)
+		return ICE_ERR_IN_USE;
+
+	/* Safe to remove profile ID */
+	buf = (struct ice_aqc_rl_profile_generic_elem *)
+		&rl_info->profile;
+	status = ice_aq_remove_rl_profile(hw, num_profiles, buf, sizeof(*buf),
+					  &num_profiles_removed, NULL);
+	if (status || num_profiles_removed != num_profiles)
+		return ICE_ERR_CFG;
+
+	/* Delete stale entry now */
+	list_del(&rl_info->list_entry);
+	devm_kfree(ice_hw_to_dev(hw), rl_info);
+	return status;
+}
+
+/**
+ * ice_sched_clear_rl_prof - clears RL prof entries
+ * @pi: port information structure
+ *
+ * This function removes all RL profile from HW as well as from SW DB.
+ */
+static void ice_sched_clear_rl_prof(struct ice_port_info *pi)
+{
+	u16 ln;
+
+	for (ln = 0; ln < pi->hw->num_tx_sched_layers; ln++) {
+		struct ice_aqc_rl_profile_info *rl_prof_elem;
+		struct ice_aqc_rl_profile_info *rl_prof_tmp;
+
+		list_for_each_entry_safe(rl_prof_elem, rl_prof_tmp,
+					 &pi->rl_prof_list[ln], list_entry) {
+			struct ice_hw *hw = pi->hw;
+			enum ice_status status;
+
+			rl_prof_elem->prof_id_ref = 0;
+			status = ice_sched_del_rl_profile(hw, rl_prof_elem);
+			if (status) {
+				ice_debug(hw, ICE_DBG_SCHED,
+					  "Remove rl profile failed\n");
+				/* On error, free mem required */
+				list_del(&rl_prof_elem->list_entry);
+				devm_kfree(ice_hw_to_dev(hw), rl_prof_elem);
+			}
+		}
+	}
+}
+
+/**
  * ice_sched_clear_agg - clears the aggregator related information
  * @hw: pointer to the hardware structure
  *
@@ -592,6 +756,8 @@ static void ice_sched_clear_tx_topo(struct ice_port_info *pi)
 {
 	if (!pi)
 		return;
+	/* remove RL profiles related lists */
+	ice_sched_clear_rl_prof(pi);
 	if (pi->root) {
 		ice_free_sched_node(pi, pi->root);
 		pi->root = NULL;
@@ -632,8 +798,7 @@ void ice_sched_cleanup_all(struct ice_hw *hw)
 		hw->layer_info = NULL;
 	}
 
-	if (hw->port_info)
-		ice_sched_clear_port(hw->port_info);
+	ice_sched_clear_port(hw->port_info);
 
 	hw->num_tx_sched_layers = 0;
 	hw->num_tx_sched_phys_layers = 0;
@@ -1014,6 +1179,8 @@ enum ice_status ice_sched_init_port(struct ice_port_info *pi)
 	/* initialize the port for handling the scheduler tree */
 	pi->port_state = ICE_SCHED_PORT_STATE_READY;
 	mutex_init(&pi->sched_lock);
+	for (i = 0; i < ICE_AQC_TOPO_MAX_LEVEL_NUM; i++)
+		INIT_LIST_HEAD(&pi->rl_prof_list[i]);
 
 err_init_port:
 	if (status && pi->root) {
@@ -1062,8 +1229,8 @@ enum ice_status ice_sched_query_res_alloc(struct ice_hw *hw)
 	 * and so on. This array will be populated from root (index 0) to
 	 * qgroup layer 7. Leaf node has no children.
 	 */
-	for (i = 0; i < hw->num_tx_sched_layers; i++) {
-		max_sibl = buf->layer_props[i].max_sibl_grp_sz;
+	for (i = 0; i < hw->num_tx_sched_layers - 1; i++) {
+		max_sibl = buf->layer_props[i + 1].max_sibl_grp_sz;
 		hw->max_children[i] = le16_to_cpu(max_sibl);
 	}
 
@@ -1670,3 +1837,1095 @@ enum ice_status ice_rm_vsi_lan_cfg(struct ice_port_info *pi, u16 vsi_handle)
 {
 	return ice_sched_rm_vsi_cfg(pi, vsi_handle, ICE_SCHED_NODE_OWNER_LAN);
 }
+
+/**
+ * ice_sched_rm_unused_rl_prof - remove unused RL profile
+ * @pi: port information structure
+ *
+ * This function removes unused rate limit profiles from the HW and
+ * SW DB. The caller needs to hold scheduler lock.
+ */
+static void ice_sched_rm_unused_rl_prof(struct ice_port_info *pi)
+{
+	u16 ln;
+
+	for (ln = 0; ln < pi->hw->num_tx_sched_layers; ln++) {
+		struct ice_aqc_rl_profile_info *rl_prof_elem;
+		struct ice_aqc_rl_profile_info *rl_prof_tmp;
+
+		list_for_each_entry_safe(rl_prof_elem, rl_prof_tmp,
+					 &pi->rl_prof_list[ln], list_entry) {
+			if (!ice_sched_del_rl_profile(pi->hw, rl_prof_elem))
+				ice_debug(pi->hw, ICE_DBG_SCHED,
+					  "Removed rl profile\n");
+		}
+	}
+}
+
+/**
+ * ice_sched_update_elem - update element
+ * @hw: pointer to the HW struct
+ * @node: pointer to node
+ * @info: node info to update
+ *
+ * It updates the HW DB, and local SW DB of node. It updates the scheduling
+ * parameters of node from argument info data buffer (Info->data buf) and
+ * returns success or error on config sched element failure. The caller
+ * needs to hold scheduler lock.
+ */
+static enum ice_status
+ice_sched_update_elem(struct ice_hw *hw, struct ice_sched_node *node,
+		      struct ice_aqc_txsched_elem_data *info)
+{
+	struct ice_aqc_conf_elem buf;
+	enum ice_status status;
+	u16 elem_cfgd = 0;
+	u16 num_elems = 1;
+
+	buf.generic[0] = *info;
+	/* Parent TEID is reserved field in this aq call */
+	buf.generic[0].parent_teid = 0;
+	/* Element type is reserved field in this aq call */
+	buf.generic[0].data.elem_type = 0;
+	/* Flags is reserved field in this aq call */
+	buf.generic[0].data.flags = 0;
+
+	/* Update HW DB */
+	/* Configure element node */
+	status = ice_aq_cfg_sched_elems(hw, num_elems, &buf, sizeof(buf),
+					&elem_cfgd, NULL);
+	if (status || elem_cfgd != num_elems) {
+		ice_debug(hw, ICE_DBG_SCHED, "Config sched elem error\n");
+		return ICE_ERR_CFG;
+	}
+
+	/* Config success case */
+	/* Now update local SW DB */
+	/* Only copy the data portion of info buffer */
+	node->info.data = info->data;
+	return status;
+}
+
+/**
+ * ice_sched_cfg_node_bw_alloc - configure node BW weight/alloc params
+ * @hw: pointer to the HW struct
+ * @node: sched node to configure
+ * @rl_type: rate limit type CIR, EIR, or shared
+ * @bw_alloc: BW weight/allocation
+ *
+ * This function configures node element's BW allocation.
+ */
+static enum ice_status
+ice_sched_cfg_node_bw_alloc(struct ice_hw *hw, struct ice_sched_node *node,
+			    enum ice_rl_type rl_type, u8 bw_alloc)
+{
+	struct ice_aqc_txsched_elem_data buf;
+	struct ice_aqc_txsched_elem *data;
+	enum ice_status status;
+
+	buf = node->info;
+	data = &buf.data;
+	if (rl_type == ICE_MIN_BW) {
+		data->valid_sections |= ICE_AQC_ELEM_VALID_CIR;
+		data->cir_bw.bw_alloc = cpu_to_le16(bw_alloc);
+	} else if (rl_type == ICE_MAX_BW) {
+		data->valid_sections |= ICE_AQC_ELEM_VALID_EIR;
+		data->eir_bw.bw_alloc = cpu_to_le16(bw_alloc);
+	} else {
+		return ICE_ERR_PARAM;
+	}
+
+	/* Configure element */
+	status = ice_sched_update_elem(hw, node, &buf);
+	return status;
+}
+
+/**
+ * ice_set_clear_cir_bw - set or clear CIR BW
+ * @bw_t_info: bandwidth type information structure
+ * @bw: bandwidth in Kbps - Kilo bits per sec
+ *
+ * Save or clear CIR bandwidth (BW) in the passed param bw_t_info.
+ */
+static void
+ice_set_clear_cir_bw(struct ice_bw_type_info *bw_t_info, u32 bw)
+{
+	if (bw == ICE_SCHED_DFLT_BW) {
+		clear_bit(ICE_BW_TYPE_CIR, bw_t_info->bw_t_bitmap);
+		bw_t_info->cir_bw.bw = 0;
+	} else {
+		/* Save type of BW information */
+		set_bit(ICE_BW_TYPE_CIR, bw_t_info->bw_t_bitmap);
+		bw_t_info->cir_bw.bw = bw;
+	}
+}
+
+/**
+ * ice_set_clear_eir_bw - set or clear EIR BW
+ * @bw_t_info: bandwidth type information structure
+ * @bw: bandwidth in Kbps - Kilo bits per sec
+ *
+ * Save or clear EIR bandwidth (BW) in the passed param bw_t_info.
+ */
+static void
+ice_set_clear_eir_bw(struct ice_bw_type_info *bw_t_info, u32 bw)
+{
+	if (bw == ICE_SCHED_DFLT_BW) {
+		clear_bit(ICE_BW_TYPE_EIR, bw_t_info->bw_t_bitmap);
+		bw_t_info->eir_bw.bw = 0;
+	} else {
+		/* EIR BW and Shared BW profiles are mutually exclusive and
+		 * hence only one of them may be set for any given element.
+		 * First clear earlier saved shared BW information.
+		 */
+		clear_bit(ICE_BW_TYPE_SHARED, bw_t_info->bw_t_bitmap);
+		bw_t_info->shared_bw = 0;
+		/* save EIR BW information */
+		set_bit(ICE_BW_TYPE_EIR, bw_t_info->bw_t_bitmap);
+		bw_t_info->eir_bw.bw = bw;
+	}
+}
+
+/**
+ * ice_set_clear_shared_bw - set or clear shared BW
+ * @bw_t_info: bandwidth type information structure
+ * @bw: bandwidth in Kbps - Kilo bits per sec
+ *
+ * Save or clear shared bandwidth (BW) in the passed param bw_t_info.
+ */
+static void
+ice_set_clear_shared_bw(struct ice_bw_type_info *bw_t_info, u32 bw)
+{
+	if (bw == ICE_SCHED_DFLT_BW) {
+		clear_bit(ICE_BW_TYPE_SHARED, bw_t_info->bw_t_bitmap);
+		bw_t_info->shared_bw = 0;
+	} else {
+		/* EIR BW and Shared BW profiles are mutually exclusive and
+		 * hence only one of them may be set for any given element.
+		 * First clear earlier saved EIR BW information.
+		 */
+		clear_bit(ICE_BW_TYPE_EIR, bw_t_info->bw_t_bitmap);
+		bw_t_info->eir_bw.bw = 0;
+		/* save shared BW information */
+		set_bit(ICE_BW_TYPE_SHARED, bw_t_info->bw_t_bitmap);
+		bw_t_info->shared_bw = bw;
+	}
+}
+
+/**
+ * ice_sched_calc_wakeup - calculate RL profile wakeup parameter
+ * @bw: bandwidth in Kbps
+ *
+ * This function calculates the wakeup parameter of RL profile.
+ */
+static u16 ice_sched_calc_wakeup(s32 bw)
+{
+	s64 bytes_per_sec, wakeup_int, wakeup_a, wakeup_b, wakeup_f;
+	s32 wakeup_f_int;
+	u16 wakeup = 0;
+
+	/* Get the wakeup integer value */
+	bytes_per_sec = div64_long(((s64)bw * 1000), BITS_PER_BYTE);
+	wakeup_int = div64_long(ICE_RL_PROF_FREQUENCY, bytes_per_sec);
+	if (wakeup_int > 63) {
+		wakeup = (u16)((1 << 15) | wakeup_int);
+	} else {
+		/* Calculate fraction value up to 4 decimals
+		 * Convert Integer value to a constant multiplier
+		 */
+		wakeup_b = (s64)ICE_RL_PROF_MULTIPLIER * wakeup_int;
+		wakeup_a = div64_long((s64)ICE_RL_PROF_MULTIPLIER *
+					   ICE_RL_PROF_FREQUENCY,
+				      bytes_per_sec);
+
+		/* Get Fraction value */
+		wakeup_f = wakeup_a - wakeup_b;
+
+		/* Round up the Fractional value via Ceil(Fractional value) */
+		if (wakeup_f > div64_long(ICE_RL_PROF_MULTIPLIER, 2))
+			wakeup_f += 1;
+
+		wakeup_f_int = (s32)div64_long(wakeup_f * ICE_RL_PROF_FRACTION,
+					       ICE_RL_PROF_MULTIPLIER);
+		wakeup |= (u16)(wakeup_int << 9);
+		wakeup |= (u16)(0x1ff & wakeup_f_int);
+	}
+
+	return wakeup;
+}
+
+/**
+ * ice_sched_bw_to_rl_profile - convert BW to profile parameters
+ * @bw: bandwidth in Kbps
+ * @profile: profile parameters to return
+ *
+ * This function converts the BW to profile structure format.
+ */
+static enum ice_status
+ice_sched_bw_to_rl_profile(u32 bw, struct ice_aqc_rl_profile_elem *profile)
+{
+	enum ice_status status = ICE_ERR_PARAM;
+	s64 bytes_per_sec, ts_rate, mv_tmp;
+	bool found = false;
+	s32 encode = 0;
+	s64 mv = 0;
+	s32 i;
+
+	/* Bw settings range is from 0.5Mb/sec to 100Gb/sec */
+	if (bw < ICE_SCHED_MIN_BW || bw > ICE_SCHED_MAX_BW)
+		return status;
+
+	/* Bytes per second from Kbps */
+	bytes_per_sec = div64_long(((s64)bw * 1000), BITS_PER_BYTE);
+
+	/* encode is 6 bits but really useful are 5 bits */
+	for (i = 0; i < 64; i++) {
+		u64 pow_result = BIT_ULL(i);
+
+		ts_rate = div64_long((s64)ICE_RL_PROF_FREQUENCY,
+				     pow_result * ICE_RL_PROF_TS_MULTIPLIER);
+		if (ts_rate <= 0)
+			continue;
+
+		/* Multiplier value */
+		mv_tmp = div64_long(bytes_per_sec * ICE_RL_PROF_MULTIPLIER,
+				    ts_rate);
+
+		/* Round to the nearest ICE_RL_PROF_MULTIPLIER */
+		mv = round_up_64bit(mv_tmp, ICE_RL_PROF_MULTIPLIER);
+
+		/* First multiplier value greater than the given
+		 * accuracy bytes
+		 */
+		if (mv > ICE_RL_PROF_ACCURACY_BYTES) {
+			encode = i;
+			found = true;
+			break;
+		}
+	}
+	if (found) {
+		u16 wm;
+
+		wm = ice_sched_calc_wakeup(bw);
+		profile->rl_multiply = cpu_to_le16(mv);
+		profile->wake_up_calc = cpu_to_le16(wm);
+		profile->rl_encode = cpu_to_le16(encode);
+		status = 0;
+	} else {
+		status = ICE_ERR_DOES_NOT_EXIST;
+	}
+
+	return status;
+}
+
+/**
+ * ice_sched_add_rl_profile - add RL profile
+ * @pi: port information structure
+ * @rl_type: type of rate limit BW - min, max, or shared
+ * @bw: bandwidth in Kbps - Kilo bits per sec
+ * @layer_num: specifies in which layer to create profile
+ *
+ * This function first checks the existing list for corresponding BW
+ * parameter. If it exists, it returns the associated profile otherwise
+ * it creates a new rate limit profile for requested BW, and adds it to
+ * the HW DB and local list. It returns the new profile or null on error.
+ * The caller needs to hold the scheduler lock.
+ */
+static struct ice_aqc_rl_profile_info *
+ice_sched_add_rl_profile(struct ice_port_info *pi,
+			 enum ice_rl_type rl_type, u32 bw, u8 layer_num)
+{
+	struct ice_aqc_rl_profile_generic_elem *buf;
+	struct ice_aqc_rl_profile_info *rl_prof_elem;
+	u16 profiles_added = 0, num_profiles = 1;
+	enum ice_status status;
+	struct ice_hw *hw;
+	u8 profile_type;
+
+	if (layer_num >= ICE_AQC_TOPO_MAX_LEVEL_NUM)
+		return NULL;
+	switch (rl_type) {
+	case ICE_MIN_BW:
+		profile_type = ICE_AQC_RL_PROFILE_TYPE_CIR;
+		break;
+	case ICE_MAX_BW:
+		profile_type = ICE_AQC_RL_PROFILE_TYPE_EIR;
+		break;
+	case ICE_SHARED_BW:
+		profile_type = ICE_AQC_RL_PROFILE_TYPE_SRL;
+		break;
+	default:
+		return NULL;
+	}
+
+	if (!pi)
+		return NULL;
+	hw = pi->hw;
+	list_for_each_entry(rl_prof_elem, &pi->rl_prof_list[layer_num],
+			    list_entry)
+		if (rl_prof_elem->profile.flags == profile_type &&
+		    rl_prof_elem->bw == bw)
+			/* Return existing profile ID info */
+			return rl_prof_elem;
+
+	/* Create new profile ID */
+	rl_prof_elem = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*rl_prof_elem),
+				    GFP_KERNEL);
+
+	if (!rl_prof_elem)
+		return NULL;
+
+	status = ice_sched_bw_to_rl_profile(bw, &rl_prof_elem->profile);
+	if (status)
+		goto exit_add_rl_prof;
+
+	rl_prof_elem->bw = bw;
+	/* layer_num is zero relative, and fw expects level from 1 to 9 */
+	rl_prof_elem->profile.level = layer_num + 1;
+	rl_prof_elem->profile.flags = profile_type;
+	rl_prof_elem->profile.max_burst_size = cpu_to_le16(hw->max_burst_size);
+
+	/* Create new entry in HW DB */
+	buf = (struct ice_aqc_rl_profile_generic_elem *)
+		&rl_prof_elem->profile;
+	status = ice_aq_add_rl_profile(hw, num_profiles, buf, sizeof(*buf),
+				       &profiles_added, NULL);
+	if (status || profiles_added != num_profiles)
+		goto exit_add_rl_prof;
+
+	/* Good entry - add in the list */
+	rl_prof_elem->prof_id_ref = 0;
+	list_add(&rl_prof_elem->list_entry, &pi->rl_prof_list[layer_num]);
+	return rl_prof_elem;
+
+exit_add_rl_prof:
+	devm_kfree(ice_hw_to_dev(hw), rl_prof_elem);
+	return NULL;
+}
+
+/**
+ * ice_sched_cfg_node_bw_lmt - configure node sched params
+ * @hw: pointer to the HW struct
+ * @node: sched node to configure
+ * @rl_type: rate limit type CIR, EIR, or shared
+ * @rl_prof_id: rate limit profile ID
+ *
+ * This function configures node element's BW limit.
+ */
+static enum ice_status
+ice_sched_cfg_node_bw_lmt(struct ice_hw *hw, struct ice_sched_node *node,
+			  enum ice_rl_type rl_type, u16 rl_prof_id)
+{
+	struct ice_aqc_txsched_elem_data buf;
+	struct ice_aqc_txsched_elem *data;
+
+	buf = node->info;
+	data = &buf.data;
+	switch (rl_type) {
+	case ICE_MIN_BW:
+		data->valid_sections |= ICE_AQC_ELEM_VALID_CIR;
+		data->cir_bw.bw_profile_idx = cpu_to_le16(rl_prof_id);
+		break;
+	case ICE_MAX_BW:
+		/* EIR BW and Shared BW profiles are mutually exclusive and
+		 * hence only one of them may be set for any given element
+		 */
+		if (data->valid_sections & ICE_AQC_ELEM_VALID_SHARED)
+			return ICE_ERR_CFG;
+		data->valid_sections |= ICE_AQC_ELEM_VALID_EIR;
+		data->eir_bw.bw_profile_idx = cpu_to_le16(rl_prof_id);
+		break;
+	case ICE_SHARED_BW:
+		/* Check for removing shared BW */
+		if (rl_prof_id == ICE_SCHED_NO_SHARED_RL_PROF_ID) {
+			/* remove shared profile */
+			data->valid_sections &= ~ICE_AQC_ELEM_VALID_SHARED;
+			data->srl_id = 0; /* clear SRL field */
+
+			/* enable back EIR to default profile */
+			data->valid_sections |= ICE_AQC_ELEM_VALID_EIR;
+			data->eir_bw.bw_profile_idx =
+				cpu_to_le16(ICE_SCHED_DFLT_RL_PROF_ID);
+			break;
+		}
+		/* EIR BW and Shared BW profiles are mutually exclusive and
+		 * hence only one of them may be set for any given element
+		 */
+		if ((data->valid_sections & ICE_AQC_ELEM_VALID_EIR) &&
+		    (le16_to_cpu(data->eir_bw.bw_profile_idx) !=
+			    ICE_SCHED_DFLT_RL_PROF_ID))
+			return ICE_ERR_CFG;
+		/* EIR BW is set to default, disable it */
+		data->valid_sections &= ~ICE_AQC_ELEM_VALID_EIR;
+		/* Okay to enable shared BW now */
+		data->valid_sections |= ICE_AQC_ELEM_VALID_SHARED;
+		data->srl_id = cpu_to_le16(rl_prof_id);
+		break;
+	default:
+		/* Unknown rate limit type */
+		return ICE_ERR_PARAM;
+	}
+
+	/* Configure element */
+	return ice_sched_update_elem(hw, node, &buf);
+}
+
+/**
+ * ice_sched_get_node_rl_prof_id - get node's rate limit profile ID
+ * @node: sched node
+ * @rl_type: rate limit type
+ *
+ * If existing profile matches, it returns the corresponding rate
+ * limit profile ID, otherwise it returns an invalid ID as error.
+ */
+static u16
+ice_sched_get_node_rl_prof_id(struct ice_sched_node *node,
+			      enum ice_rl_type rl_type)
+{
+	u16 rl_prof_id = ICE_SCHED_INVAL_PROF_ID;
+	struct ice_aqc_txsched_elem *data;
+
+	data = &node->info.data;
+	switch (rl_type) {
+	case ICE_MIN_BW:
+		if (data->valid_sections & ICE_AQC_ELEM_VALID_CIR)
+			rl_prof_id = le16_to_cpu(data->cir_bw.bw_profile_idx);
+		break;
+	case ICE_MAX_BW:
+		if (data->valid_sections & ICE_AQC_ELEM_VALID_EIR)
+			rl_prof_id = le16_to_cpu(data->eir_bw.bw_profile_idx);
+		break;
+	case ICE_SHARED_BW:
+		if (data->valid_sections & ICE_AQC_ELEM_VALID_SHARED)
+			rl_prof_id = le16_to_cpu(data->srl_id);
+		break;
+	default:
+		break;
+	}
+
+	return rl_prof_id;
+}
+
+/**
+ * ice_sched_get_rl_prof_layer - selects rate limit profile creation layer
+ * @pi: port information structure
+ * @rl_type: type of rate limit BW - min, max, or shared
+ * @layer_index: layer index
+ *
+ * This function returns requested profile creation layer.
+ */
+static u8
+ice_sched_get_rl_prof_layer(struct ice_port_info *pi, enum ice_rl_type rl_type,
+			    u8 layer_index)
+{
+	struct ice_hw *hw = pi->hw;
+
+	if (layer_index >= hw->num_tx_sched_layers)
+		return ICE_SCHED_INVAL_LAYER_NUM;
+	switch (rl_type) {
+	case ICE_MIN_BW:
+		if (hw->layer_info[layer_index].max_cir_rl_profiles)
+			return layer_index;
+		break;
+	case ICE_MAX_BW:
+		if (hw->layer_info[layer_index].max_eir_rl_profiles)
+			return layer_index;
+		break;
+	case ICE_SHARED_BW:
+		/* if current layer doesn't support SRL profile creation
+		 * then try a layer up or down.
+		 */
+		if (hw->layer_info[layer_index].max_srl_profiles)
+			return layer_index;
+		else if (layer_index < hw->num_tx_sched_layers - 1 &&
+			 hw->layer_info[layer_index + 1].max_srl_profiles)
+			return layer_index + 1;
+		else if (layer_index > 0 &&
+			 hw->layer_info[layer_index - 1].max_srl_profiles)
+			return layer_index - 1;
+		break;
+	default:
+		break;
+	}
+	return ICE_SCHED_INVAL_LAYER_NUM;
+}
+
+/**
+ * ice_sched_get_srl_node - get shared rate limit node
+ * @node: tree node
+ * @srl_layer: shared rate limit layer
+ *
+ * This function returns SRL node to be used for shared rate limit purpose.
+ * The caller needs to hold scheduler lock.
+ */
+static struct ice_sched_node *
+ice_sched_get_srl_node(struct ice_sched_node *node, u8 srl_layer)
+{
+	if (srl_layer > node->tx_sched_layer)
+		return node->children[0];
+	else if (srl_layer < node->tx_sched_layer)
+		/* Node can't be created without a parent. It will always
+		 * have a valid parent except root node.
+		 */
+		return node->parent;
+	else
+		return node;
+}
+
+/**
+ * ice_sched_rm_rl_profile - remove RL profile ID
+ * @pi: port information structure
+ * @layer_num: layer number where profiles are saved
+ * @profile_type: profile type like EIR, CIR, or SRL
+ * @profile_id: profile ID to remove
+ *
+ * This function removes rate limit profile from layer 'layer_num' of type
+ * 'profile_type' and profile ID as 'profile_id'. The caller needs to hold
+ * scheduler lock.
+ */
+static enum ice_status
+ice_sched_rm_rl_profile(struct ice_port_info *pi, u8 layer_num, u8 profile_type,
+			u16 profile_id)
+{
+	struct ice_aqc_rl_profile_info *rl_prof_elem;
+	enum ice_status status = 0;
+
+	if (layer_num >= ICE_AQC_TOPO_MAX_LEVEL_NUM)
+		return ICE_ERR_PARAM;
+	/* Check the existing list for RL profile */
+	list_for_each_entry(rl_prof_elem, &pi->rl_prof_list[layer_num],
+			    list_entry)
+		if (rl_prof_elem->profile.flags == profile_type &&
+		    le16_to_cpu(rl_prof_elem->profile.profile_id) ==
+		    profile_id) {
+			if (rl_prof_elem->prof_id_ref)
+				rl_prof_elem->prof_id_ref--;
+
+			/* Remove old profile ID from database */
+			status = ice_sched_del_rl_profile(pi->hw, rl_prof_elem);
+			if (status && status != ICE_ERR_IN_USE)
+				ice_debug(pi->hw, ICE_DBG_SCHED,
+					  "Remove rl profile failed\n");
+			break;
+		}
+	if (status == ICE_ERR_IN_USE)
+		status = 0;
+	return status;
+}
+
+/**
+ * ice_sched_set_node_bw_dflt - set node's bandwidth limit to default
+ * @pi: port information structure
+ * @node: pointer to node structure
+ * @rl_type: rate limit type min, max, or shared
+ * @layer_num: layer number where RL profiles are saved
+ *
+ * This function configures node element's BW rate limit profile ID of
+ * type CIR, EIR, or SRL to default. This function needs to be called
+ * with the scheduler lock held.
+ */
+static enum ice_status
+ice_sched_set_node_bw_dflt(struct ice_port_info *pi,
+			   struct ice_sched_node *node,
+			   enum ice_rl_type rl_type, u8 layer_num)
+{
+	enum ice_status status;
+	struct ice_hw *hw;
+	u8 profile_type;
+	u16 rl_prof_id;
+	u16 old_id;
+
+	hw = pi->hw;
+	switch (rl_type) {
+	case ICE_MIN_BW:
+		profile_type = ICE_AQC_RL_PROFILE_TYPE_CIR;
+		rl_prof_id = ICE_SCHED_DFLT_RL_PROF_ID;
+		break;
+	case ICE_MAX_BW:
+		profile_type = ICE_AQC_RL_PROFILE_TYPE_EIR;
+		rl_prof_id = ICE_SCHED_DFLT_RL_PROF_ID;
+		break;
+	case ICE_SHARED_BW:
+		profile_type = ICE_AQC_RL_PROFILE_TYPE_SRL;
+		/* No SRL is configured for default case */
+		rl_prof_id = ICE_SCHED_NO_SHARED_RL_PROF_ID;
+		break;
+	default:
+		return ICE_ERR_PARAM;
+	}
+	/* Save existing RL prof ID for later clean up */
+	old_id = ice_sched_get_node_rl_prof_id(node, rl_type);
+	/* Configure BW scheduling parameters */
+	status = ice_sched_cfg_node_bw_lmt(hw, node, rl_type, rl_prof_id);
+	if (status)
+		return status;
+
+	/* Remove stale RL profile ID */
+	if (old_id == ICE_SCHED_DFLT_RL_PROF_ID ||
+	    old_id == ICE_SCHED_INVAL_PROF_ID)
+		return 0;
+
+	return ice_sched_rm_rl_profile(pi, layer_num, profile_type, old_id);
+}
+
+/**
+ * ice_sched_set_eir_srl_excl - set EIR/SRL exclusiveness
+ * @pi: port information structure
+ * @node: pointer to node structure
+ * @layer_num: layer number where rate limit profiles are saved
+ * @rl_type: rate limit type min, max, or shared
+ * @bw: bandwidth value
+ *
+ * This function prepares node element's bandwidth to SRL or EIR exclusively.
+ * EIR BW and Shared BW profiles are mutually exclusive and hence only one of
+ * them may be set for any given element. This function needs to be called
+ * with the scheduler lock held.
+ */
+static enum ice_status
+ice_sched_set_eir_srl_excl(struct ice_port_info *pi,
+			   struct ice_sched_node *node,
+			   u8 layer_num, enum ice_rl_type rl_type, u32 bw)
+{
+	if (rl_type == ICE_SHARED_BW) {
+		/* SRL node passed in this case, it may be different node */
+		if (bw == ICE_SCHED_DFLT_BW)
+			/* SRL being removed, ice_sched_cfg_node_bw_lmt()
+			 * enables EIR to default. EIR is not set in this
+			 * case, so no additional action is required.
+			 */
+			return 0;
+
+		/* SRL being configured, set EIR to default here.
+		 * ice_sched_cfg_node_bw_lmt() disables EIR when it
+		 * configures SRL
+		 */
+		return ice_sched_set_node_bw_dflt(pi, node, ICE_MAX_BW,
+						  layer_num);
+	} else if (rl_type == ICE_MAX_BW &&
+		   node->info.data.valid_sections & ICE_AQC_ELEM_VALID_SHARED) {
+		/* Remove Shared profile. Set default shared BW call
+		 * removes shared profile for a node.
+		 */
+		return ice_sched_set_node_bw_dflt(pi, node,
+						  ICE_SHARED_BW,
+						  layer_num);
+	}
+	return 0;
+}
+
+/**
+ * ice_sched_set_node_bw - set node's bandwidth
+ * @pi: port information structure
+ * @node: tree node
+ * @rl_type: rate limit type min, max, or shared
+ * @bw: bandwidth in Kbps - Kilo bits per sec
+ * @layer_num: layer number
+ *
+ * This function adds new profile corresponding to requested BW, configures
+ * node's RL profile ID of type CIR, EIR, or SRL, and removes old profile
+ * ID from local database. The caller needs to hold scheduler lock.
+ */
+static enum ice_status
+ice_sched_set_node_bw(struct ice_port_info *pi, struct ice_sched_node *node,
+		      enum ice_rl_type rl_type, u32 bw, u8 layer_num)
+{
+	struct ice_aqc_rl_profile_info *rl_prof_info;
+	enum ice_status status = ICE_ERR_PARAM;
+	struct ice_hw *hw = pi->hw;
+	u16 old_id, rl_prof_id;
+
+	rl_prof_info = ice_sched_add_rl_profile(pi, rl_type, bw, layer_num);
+	if (!rl_prof_info)
+		return status;
+
+	rl_prof_id = le16_to_cpu(rl_prof_info->profile.profile_id);
+
+	/* Save existing RL prof ID for later clean up */
+	old_id = ice_sched_get_node_rl_prof_id(node, rl_type);
+	/* Configure BW scheduling parameters */
+	status = ice_sched_cfg_node_bw_lmt(hw, node, rl_type, rl_prof_id);
+	if (status)
+		return status;
+
+	/* New changes has been applied */
+	/* Increment the profile ID reference count */
+	rl_prof_info->prof_id_ref++;
+
+	/* Check for old ID removal */
+	if ((old_id == ICE_SCHED_DFLT_RL_PROF_ID && rl_type != ICE_SHARED_BW) ||
+	    old_id == ICE_SCHED_INVAL_PROF_ID || old_id == rl_prof_id)
+		return 0;
+
+	return ice_sched_rm_rl_profile(pi, layer_num,
+				       rl_prof_info->profile.flags,
+				       old_id);
+}
+
+/**
+ * ice_sched_set_node_bw_lmt - set node's BW limit
+ * @pi: port information structure
+ * @node: tree node
+ * @rl_type: rate limit type min, max, or shared
+ * @bw: bandwidth in Kbps - Kilo bits per sec
+ *
+ * It updates node's BW limit parameters like BW RL profile ID of type CIR,
+ * EIR, or SRL. The caller needs to hold scheduler lock.
+ */
+static enum ice_status
+ice_sched_set_node_bw_lmt(struct ice_port_info *pi, struct ice_sched_node *node,
+			  enum ice_rl_type rl_type, u32 bw)
+{
+	struct ice_sched_node *cfg_node = node;
+	enum ice_status status;
+
+	struct ice_hw *hw;
+	u8 layer_num;
+
+	if (!pi)
+		return ICE_ERR_PARAM;
+	hw = pi->hw;
+	/* Remove unused RL profile IDs from HW and SW DB */
+	ice_sched_rm_unused_rl_prof(pi);
+	layer_num = ice_sched_get_rl_prof_layer(pi, rl_type,
+						node->tx_sched_layer);
+	if (layer_num >= hw->num_tx_sched_layers)
+		return ICE_ERR_PARAM;
+
+	if (rl_type == ICE_SHARED_BW) {
+		/* SRL node may be different */
+		cfg_node = ice_sched_get_srl_node(node, layer_num);
+		if (!cfg_node)
+			return ICE_ERR_CFG;
+	}
+	/* EIR BW and Shared BW profiles are mutually exclusive and
+	 * hence only one of them may be set for any given element
+	 */
+	status = ice_sched_set_eir_srl_excl(pi, cfg_node, layer_num, rl_type,
+					    bw);
+	if (status)
+		return status;
+	if (bw == ICE_SCHED_DFLT_BW)
+		return ice_sched_set_node_bw_dflt(pi, cfg_node, rl_type,
+						  layer_num);
+	return ice_sched_set_node_bw(pi, cfg_node, rl_type, bw, layer_num);
+}
+
+/**
+ * ice_sched_set_node_bw_dflt_lmt - set node's BW limit to default
+ * @pi: port information structure
+ * @node: pointer to node structure
+ * @rl_type: rate limit type min, max, or shared
+ *
+ * This function configures node element's BW rate limit profile ID of
+ * type CIR, EIR, or SRL to default. This function needs to be called
+ * with the scheduler lock held.
+ */
+static enum ice_status
+ice_sched_set_node_bw_dflt_lmt(struct ice_port_info *pi,
+			       struct ice_sched_node *node,
+			       enum ice_rl_type rl_type)
+{
+	return ice_sched_set_node_bw_lmt(pi, node, rl_type,
+					 ICE_SCHED_DFLT_BW);
+}
+
+/**
+ * ice_sched_validate_srl_node - Check node for SRL applicability
+ * @node: sched node to configure
+ * @sel_layer: selected SRL layer
+ *
+ * This function checks if the SRL can be applied to a selected layer node on
+ * behalf of the requested node (first argument). This function needs to be
+ * called with scheduler lock held.
+ */
+static enum ice_status
+ice_sched_validate_srl_node(struct ice_sched_node *node, u8 sel_layer)
+{
+	/* SRL profiles are not available on all layers. Check if the
+	 * SRL profile can be applied to a node above or below the
+	 * requested node. SRL configuration is possible only if the
+	 * selected layer's node has single child.
+	 */
+	if (sel_layer == node->tx_sched_layer ||
+	    ((sel_layer == node->tx_sched_layer + 1) &&
+	    node->num_children == 1) ||
+	    ((sel_layer == node->tx_sched_layer - 1) &&
+	    (node->parent && node->parent->num_children == 1)))
+		return 0;
+
+	return ICE_ERR_CFG;
+}
+
+/**
+ * ice_sched_save_q_bw - save queue node's BW information
+ * @q_ctx: queue context structure
+ * @rl_type: rate limit type min, max, or shared
+ * @bw: bandwidth in Kbps - Kilo bits per sec
+ *
+ * Save BW information of queue type node for post replay use.
+ */
+static enum ice_status
+ice_sched_save_q_bw(struct ice_q_ctx *q_ctx, enum ice_rl_type rl_type, u32 bw)
+{
+	switch (rl_type) {
+	case ICE_MIN_BW:
+		ice_set_clear_cir_bw(&q_ctx->bw_t_info, bw);
+		break;
+	case ICE_MAX_BW:
+		ice_set_clear_eir_bw(&q_ctx->bw_t_info, bw);
+		break;
+	case ICE_SHARED_BW:
+		ice_set_clear_shared_bw(&q_ctx->bw_t_info, bw);
+		break;
+	default:
+		return ICE_ERR_PARAM;
+	}
+	return 0;
+}
+
+/**
+ * ice_sched_set_q_bw_lmt - sets queue BW limit
+ * @pi: port information structure
+ * @vsi_handle: sw VSI handle
+ * @tc: traffic class
+ * @q_handle: software queue handle
+ * @rl_type: min, max, or shared
+ * @bw: bandwidth in Kbps
+ *
+ * This function sets BW limit of queue scheduling node.
+ */
+static enum ice_status
+ice_sched_set_q_bw_lmt(struct ice_port_info *pi, u16 vsi_handle, u8 tc,
+		       u16 q_handle, enum ice_rl_type rl_type, u32 bw)
+{
+	enum ice_status status = ICE_ERR_PARAM;
+	struct ice_sched_node *node;
+	struct ice_q_ctx *q_ctx;
+
+	if (!ice_is_vsi_valid(pi->hw, vsi_handle))
+		return ICE_ERR_PARAM;
+	mutex_lock(&pi->sched_lock);
+	q_ctx = ice_get_lan_q_ctx(pi->hw, vsi_handle, tc, q_handle);
+	if (!q_ctx)
+		goto exit_q_bw_lmt;
+	node = ice_sched_find_node_by_teid(pi->root, q_ctx->q_teid);
+	if (!node) {
+		ice_debug(pi->hw, ICE_DBG_SCHED, "Wrong q_teid\n");
+		goto exit_q_bw_lmt;
+	}
+
+	/* Return error if it is not a leaf node */
+	if (node->info.data.elem_type != ICE_AQC_ELEM_TYPE_LEAF)
+		goto exit_q_bw_lmt;
+
+	/* SRL bandwidth layer selection */
+	if (rl_type == ICE_SHARED_BW) {
+		u8 sel_layer; /* selected layer */
+
+		sel_layer = ice_sched_get_rl_prof_layer(pi, rl_type,
+							node->tx_sched_layer);
+		if (sel_layer >= pi->hw->num_tx_sched_layers) {
+			status = ICE_ERR_PARAM;
+			goto exit_q_bw_lmt;
+		}
+		status = ice_sched_validate_srl_node(node, sel_layer);
+		if (status)
+			goto exit_q_bw_lmt;
+	}
+
+	if (bw == ICE_SCHED_DFLT_BW)
+		status = ice_sched_set_node_bw_dflt_lmt(pi, node, rl_type);
+	else
+		status = ice_sched_set_node_bw_lmt(pi, node, rl_type, bw);
+
+	if (!status)
+		status = ice_sched_save_q_bw(q_ctx, rl_type, bw);
+
+exit_q_bw_lmt:
+	mutex_unlock(&pi->sched_lock);
+	return status;
+}
+
+/**
+ * ice_cfg_q_bw_lmt - configure queue BW limit
+ * @pi: port information structure
+ * @vsi_handle: sw VSI handle
+ * @tc: traffic class
+ * @q_handle: software queue handle
+ * @rl_type: min, max, or shared
+ * @bw: bandwidth in Kbps
+ *
+ * This function configures BW limit of queue scheduling node.
+ */
+enum ice_status
+ice_cfg_q_bw_lmt(struct ice_port_info *pi, u16 vsi_handle, u8 tc,
+		 u16 q_handle, enum ice_rl_type rl_type, u32 bw)
+{
+	return ice_sched_set_q_bw_lmt(pi, vsi_handle, tc, q_handle, rl_type,
+				      bw);
+}
+
+/**
+ * ice_cfg_q_bw_dflt_lmt - configure queue BW default limit
+ * @pi: port information structure
+ * @vsi_handle: sw VSI handle
+ * @tc: traffic class
+ * @q_handle: software queue handle
+ * @rl_type: min, max, or shared
+ *
+ * This function configures BW default limit of queue scheduling node.
+ */
+enum ice_status
+ice_cfg_q_bw_dflt_lmt(struct ice_port_info *pi, u16 vsi_handle, u8 tc,
+		      u16 q_handle, enum ice_rl_type rl_type)
+{
+	return ice_sched_set_q_bw_lmt(pi, vsi_handle, tc, q_handle, rl_type,
+				      ICE_SCHED_DFLT_BW);
+}
+
+/**
+ * ice_cfg_rl_burst_size - Set burst size value
+ * @hw: pointer to the HW struct
+ * @bytes: burst size in bytes
+ *
+ * This function configures/set the burst size to requested new value. The new
+ * burst size value is used for future rate limit calls. It doesn't change the
+ * existing or previously created RL profiles.
+ */
+enum ice_status ice_cfg_rl_burst_size(struct ice_hw *hw, u32 bytes)
+{
+	u16 burst_size_to_prog;
+
+	if (bytes < ICE_MIN_BURST_SIZE_ALLOWED ||
+	    bytes > ICE_MAX_BURST_SIZE_ALLOWED)
+		return ICE_ERR_PARAM;
+	if (ice_round_to_num(bytes, 64) <=
+	    ICE_MAX_BURST_SIZE_64_BYTE_GRANULARITY) {
+		/* 64 byte granularity case */
+		/* Disable MSB granularity bit */
+		burst_size_to_prog = ICE_64_BYTE_GRANULARITY;
+		/* round number to nearest 64 byte granularity */
+		bytes = ice_round_to_num(bytes, 64);
+		/* The value is in 64 byte chunks */
+		burst_size_to_prog |= (u16)(bytes / 64);
+	} else {
+		/* k bytes granularity case */
+		/* Enable MSB granularity bit */
+		burst_size_to_prog = ICE_KBYTE_GRANULARITY;
+		/* round number to nearest 1024 granularity */
+		bytes = ice_round_to_num(bytes, 1024);
+		/* check rounding doesn't go beyond allowed */
+		if (bytes > ICE_MAX_BURST_SIZE_KBYTE_GRANULARITY)
+			bytes = ICE_MAX_BURST_SIZE_KBYTE_GRANULARITY;
+		/* The value is in k bytes */
+		burst_size_to_prog |= (u16)(bytes / 1024);
+	}
+	hw->max_burst_size = burst_size_to_prog;
+	return 0;
+}
+
+/**
+ * ice_sched_replay_node_prio - re-configure node priority
+ * @hw: pointer to the HW struct
+ * @node: sched node to configure
+ * @priority: priority value
+ *
+ * This function configures node element's priority value. It
+ * needs to be called with scheduler lock held.
+ */
+static enum ice_status
+ice_sched_replay_node_prio(struct ice_hw *hw, struct ice_sched_node *node,
+			   u8 priority)
+{
+	struct ice_aqc_txsched_elem_data buf;
+	struct ice_aqc_txsched_elem *data;
+	enum ice_status status;
+
+	buf = node->info;
+	data = &buf.data;
+	data->valid_sections |= ICE_AQC_ELEM_VALID_GENERIC;
+	data->generic = priority;
+
+	/* Configure element */
+	status = ice_sched_update_elem(hw, node, &buf);
+	return status;
+}
+
+/**
+ * ice_sched_replay_node_bw - replay node(s) BW
+ * @hw: pointer to the HW struct
+ * @node: sched node to configure
+ * @bw_t_info: BW type information
+ *
+ * This function restores node's BW from bw_t_info. The caller needs
+ * to hold the scheduler lock.
+ */
+static enum ice_status
+ice_sched_replay_node_bw(struct ice_hw *hw, struct ice_sched_node *node,
+			 struct ice_bw_type_info *bw_t_info)
+{
+	struct ice_port_info *pi = hw->port_info;
+	enum ice_status status = ICE_ERR_PARAM;
+	u16 bw_alloc;
+
+	if (!node)
+		return status;
+	if (bitmap_empty(bw_t_info->bw_t_bitmap, ICE_BW_TYPE_CNT))
+		return 0;
+	if (test_bit(ICE_BW_TYPE_PRIO, bw_t_info->bw_t_bitmap)) {
+		status = ice_sched_replay_node_prio(hw, node,
+						    bw_t_info->generic);
+		if (status)
+			return status;
+	}
+	if (test_bit(ICE_BW_TYPE_CIR, bw_t_info->bw_t_bitmap)) {
+		status = ice_sched_set_node_bw_lmt(pi, node, ICE_MIN_BW,
+						   bw_t_info->cir_bw.bw);
+		if (status)
+			return status;
+	}
+	if (test_bit(ICE_BW_TYPE_CIR_WT, bw_t_info->bw_t_bitmap)) {
+		bw_alloc = bw_t_info->cir_bw.bw_alloc;
+		status = ice_sched_cfg_node_bw_alloc(hw, node, ICE_MIN_BW,
+						     bw_alloc);
+		if (status)
+			return status;
+	}
+	if (test_bit(ICE_BW_TYPE_EIR, bw_t_info->bw_t_bitmap)) {
+		status = ice_sched_set_node_bw_lmt(pi, node, ICE_MAX_BW,
+						   bw_t_info->eir_bw.bw);
+		if (status)
+			return status;
+	}
+	if (test_bit(ICE_BW_TYPE_EIR_WT, bw_t_info->bw_t_bitmap)) {
+		bw_alloc = bw_t_info->eir_bw.bw_alloc;
+		status = ice_sched_cfg_node_bw_alloc(hw, node, ICE_MAX_BW,
+						     bw_alloc);
+		if (status)
+			return status;
+	}
+	if (test_bit(ICE_BW_TYPE_SHARED, bw_t_info->bw_t_bitmap))
+		status = ice_sched_set_node_bw_lmt(pi, node, ICE_SHARED_BW,
+						   bw_t_info->shared_bw);
+	return status;
+}
+
+/**
+ * ice_sched_replay_q_bw - replay queue type node BW
+ * @pi: port information structure
+ * @q_ctx: queue context structure
+ *
+ * This function replays queue type node bandwidth. This function needs to be
+ * called with scheduler lock held.
+ */
+enum ice_status
+ice_sched_replay_q_bw(struct ice_port_info *pi, struct ice_q_ctx *q_ctx)
+{
+	struct ice_sched_node *q_node;
+
+	/* Following also checks the presence of node in tree */
+	q_node = ice_sched_find_node_by_teid(pi->root, q_ctx->q_teid);
+	if (!q_node)
+		return ICE_ERR_PARAM;
+	return ice_sched_replay_node_bw(pi->hw, q_node, &q_ctx->bw_t_info);
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_sched.h b/drivers/net/ethernet/intel/ice/ice_sched.h
index 3902a8ad3025..f0593cfb6521 100644
--- a/drivers/net/ethernet/intel/ice/ice_sched.h
+++ b/drivers/net/ethernet/intel/ice/ice_sched.h
@@ -8,6 +8,36 @@
 
 #define ICE_QGRP_LAYER_OFFSET	2
 #define ICE_VSI_LAYER_OFFSET	4
+#define ICE_SCHED_INVAL_LAYER_NUM	0xFF
+/* Burst size is a 12 bits register that is configured while creating the RL
+ * profile(s). MSB is a granularity bit and tells the granularity type
+ * 0 - LSB bits are in 64 bytes granularity
+ * 1 - LSB bits are in 1K bytes granularity
+ */
+#define ICE_64_BYTE_GRANULARITY			0
+#define ICE_KBYTE_GRANULARITY			BIT(11)
+#define ICE_MIN_BURST_SIZE_ALLOWED		64 /* In Bytes */
+#define ICE_MAX_BURST_SIZE_ALLOWED \
+	((BIT(11) - 1) * 1024) /* In Bytes */
+#define ICE_MAX_BURST_SIZE_64_BYTE_GRANULARITY \
+	((BIT(11) - 1) * 64) /* In Bytes */
+#define ICE_MAX_BURST_SIZE_KBYTE_GRANULARITY	ICE_MAX_BURST_SIZE_ALLOWED
+
+#define ICE_RL_PROF_FREQUENCY 446000000
+#define ICE_RL_PROF_ACCURACY_BYTES 128
+#define ICE_RL_PROF_MULTIPLIER 10000
+#define ICE_RL_PROF_TS_MULTIPLIER 32
+#define ICE_RL_PROF_FRACTION 512
+
+/* BW rate limit profile parameters list entry along
+ * with bandwidth maintained per layer in port info
+ */
+struct ice_aqc_rl_profile_info {
+	struct ice_aqc_rl_profile_elem profile;
+	struct list_head list_entry;
+	u32 bw;			/* requested */
+	u16 prof_id_ref;	/* profile ID to node association ref count */
+};
 
 struct ice_sched_agg_vsi_info {
 	struct list_head list_entry;
@@ -48,4 +78,13 @@ enum ice_status
 ice_sched_cfg_vsi(struct ice_port_info *pi, u16 vsi_handle, u8 tc, u16 maxqs,
 		  u8 owner, bool enable);
 enum ice_status ice_rm_vsi_lan_cfg(struct ice_port_info *pi, u16 vsi_handle);
+enum ice_status
+ice_cfg_q_bw_lmt(struct ice_port_info *pi, u16 vsi_handle, u8 tc,
+		 u16 q_handle, enum ice_rl_type rl_type, u32 bw);
+enum ice_status
+ice_cfg_q_bw_dflt_lmt(struct ice_port_info *pi, u16 vsi_handle, u8 tc,
+		      u16 q_handle, enum ice_rl_type rl_type);
+enum ice_status ice_cfg_rl_burst_size(struct ice_hw *hw, u32 bytes);
+enum ice_status
+ice_sched_replay_q_bw(struct ice_port_info *pi, struct ice_q_ctx *q_ctx);
 #endif /* _ICE_SCHED_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c
index 1acdd43a2edd..b5a53f862a83 100644
--- a/drivers/net/ethernet/intel/ice/ice_switch.c
+++ b/drivers/net/ethernet/intel/ice/ice_switch.c
@@ -416,8 +416,7 @@ ice_add_vsi(struct ice_hw *hw, u16 vsi_handle, struct ice_vsi_ctx *vsi_ctx,
 		ice_save_vsi_ctx(hw, vsi_handle, tmp_vsi_ctx);
 	} else {
 		/* update with new HW VSI num */
-		if (tmp_vsi_ctx->vsi_num != vsi_ctx->vsi_num)
-			tmp_vsi_ctx->vsi_num = vsi_ctx->vsi_num;
+		tmp_vsi_ctx->vsi_num = vsi_ctx->vsi_num;
 	}
 
 	return 0;
@@ -2429,7 +2428,7 @@ ice_clear_vsi_promisc(struct ice_hw *hw, u16 vsi_handle, u8 promisc_mask,
 	if (!ice_is_vsi_valid(hw, vsi_handle))
 		return ICE_ERR_PARAM;
 
-	if (vid)
+	if (promisc_mask & (ICE_PROMISC_VLAN_RX | ICE_PROMISC_VLAN_TX))
 		recipe_id = ICE_SW_LKUP_PROMISC_VLAN;
 	else
 		recipe_id = ICE_SW_LKUP_PROMISC;
@@ -2441,13 +2440,18 @@ ice_clear_vsi_promisc(struct ice_hw *hw, u16 vsi_handle, u8 promisc_mask,
 
 	mutex_lock(rule_lock);
 	list_for_each_entry(itr, rule_head, list_entry) {
+		struct ice_fltr_info *fltr_info;
 		u8 fltr_promisc_mask = 0;
 
 		if (!ice_vsi_uses_fltr(itr, vsi_handle))
 			continue;
+		fltr_info = &itr->fltr_info;
+
+		if (recipe_id == ICE_SW_LKUP_PROMISC_VLAN &&
+		    vid != fltr_info->l_data.mac_vlan.vlan_id)
+			continue;
 
-		fltr_promisc_mask |=
-			ice_determine_promisc_mask(&itr->fltr_info);
+		fltr_promisc_mask |= ice_determine_promisc_mask(fltr_info);
 
 		/* Skip if filter is not completely specified by given mask */
 		if (fltr_promisc_mask & ~promisc_mask)
@@ -2455,7 +2459,7 @@ ice_clear_vsi_promisc(struct ice_hw *hw, u16 vsi_handle, u8 promisc_mask,
 
 		status = ice_add_entry_to_vsi_fltr_list(hw, vsi_handle,
 							&remove_list_head,
-							&itr->fltr_info);
+							fltr_info);
 		if (status) {
 			mutex_unlock(rule_lock);
 			goto free_fltr_list;
diff --git a/drivers/net/ethernet/intel/ice/ice_switch.h b/drivers/net/ethernet/intel/ice/ice_switch.h
index cb123fbe30be..fa14b9545dab 100644
--- a/drivers/net/ethernet/intel/ice/ice_switch.h
+++ b/drivers/net/ethernet/intel/ice/ice_switch.h
@@ -14,11 +14,6 @@
 #define ICE_VSI_INVAL_ID 0xffff
 #define ICE_INVAL_Q_HANDLE 0xFFFF
 
-/* VSI queue context structure */
-struct ice_q_ctx {
-	u16  q_handle;
-};
-
 /* VSI context structure for add/get/update/free operations */
 struct ice_vsi_ctx {
 	u16 vsi_num;
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
index 33dd103035dc..2c212f64d99f 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
@@ -5,8 +5,13 @@
 
 #include <linux/prefetch.h>
 #include <linux/mm.h>
+#include <linux/bpf_trace.h>
+#include <net/xdp.h>
+#include "ice_txrx_lib.h"
+#include "ice_lib.h"
 #include "ice.h"
 #include "ice_dcb_lib.h"
+#include "ice_xsk.h"
 
 #define ICE_RX_HDR_SIZE		256
 
@@ -19,7 +24,10 @@ static void
 ice_unmap_and_free_tx_buf(struct ice_ring *ring, struct ice_tx_buf *tx_buf)
 {
 	if (tx_buf->skb) {
-		dev_kfree_skb_any(tx_buf->skb);
+		if (ice_ring_is_xdp(ring))
+			page_frag_free(tx_buf->raw_buf);
+		else
+			dev_kfree_skb_any(tx_buf->skb);
 		if (dma_unmap_len(tx_buf, len))
 			dma_unmap_single(ring->dev,
 					 dma_unmap_addr(tx_buf, dma),
@@ -51,6 +59,11 @@ void ice_clean_tx_ring(struct ice_ring *tx_ring)
 {
 	u16 i;
 
+	if (ice_ring_is_xdp(tx_ring) && tx_ring->xsk_umem) {
+		ice_xsk_clean_xdp_ring(tx_ring);
+		goto tx_skip_free;
+	}
+
 	/* ring already cleared, nothing to do */
 	if (!tx_ring->tx_buf)
 		return;
@@ -59,6 +72,7 @@ void ice_clean_tx_ring(struct ice_ring *tx_ring)
 	for (i = 0; i < tx_ring->count; i++)
 		ice_unmap_and_free_tx_buf(tx_ring, &tx_ring->tx_buf[i]);
 
+tx_skip_free:
 	memset(tx_ring->tx_buf, 0, sizeof(*tx_ring->tx_buf) * tx_ring->count);
 
 	/* Zero out the descriptor ring */
@@ -136,8 +150,11 @@ static bool ice_clean_tx_irq(struct ice_ring *tx_ring, int napi_budget)
 		total_bytes += tx_buf->bytecount;
 		total_pkts += tx_buf->gso_segs;
 
-		/* free the skb */
-		napi_consume_skb(tx_buf->skb, napi_budget);
+		if (ice_ring_is_xdp(tx_ring))
+			page_frag_free(tx_buf->raw_buf);
+		else
+			/* free the skb */
+			napi_consume_skb(tx_buf->skb, napi_budget);
 
 		/* unmap skb header data */
 		dma_unmap_single(tx_ring->dev,
@@ -188,12 +205,11 @@ static bool ice_clean_tx_irq(struct ice_ring *tx_ring, int napi_budget)
 
 	i += tx_ring->count;
 	tx_ring->next_to_clean = i;
-	u64_stats_update_begin(&tx_ring->syncp);
-	tx_ring->stats.bytes += total_bytes;
-	tx_ring->stats.pkts += total_pkts;
-	u64_stats_update_end(&tx_ring->syncp);
-	tx_ring->q_vector->tx.total_bytes += total_bytes;
-	tx_ring->q_vector->tx.total_pkts += total_pkts;
+
+	ice_update_tx_ring_stats(tx_ring, total_pkts, total_bytes);
+
+	if (ice_ring_is_xdp(tx_ring))
+		return !!budget;
 
 	netdev_tx_completed_queue(txring_txq(tx_ring), total_pkts,
 				  total_bytes);
@@ -273,6 +289,11 @@ void ice_clean_rx_ring(struct ice_ring *rx_ring)
 	if (!rx_ring->rx_buf)
 		return;
 
+	if (rx_ring->xsk_umem) {
+		ice_xsk_clean_rx_ring(rx_ring);
+		goto rx_skip_free;
+	}
+
 	/* Free all the Rx ring sk_buffs */
 	for (i = 0; i < rx_ring->count; i++) {
 		struct ice_rx_buf *rx_buf = &rx_ring->rx_buf[i];
@@ -289,10 +310,11 @@ void ice_clean_rx_ring(struct ice_ring *rx_ring)
 		 */
 		dma_sync_single_range_for_cpu(dev, rx_buf->dma,
 					      rx_buf->page_offset,
-					      ICE_RXBUF_2048, DMA_FROM_DEVICE);
+					      rx_ring->rx_buf_len,
+					      DMA_FROM_DEVICE);
 
 		/* free resources associated with mapping */
-		dma_unmap_page_attrs(dev, rx_buf->dma, PAGE_SIZE,
+		dma_unmap_page_attrs(dev, rx_buf->dma, ice_rx_pg_size(rx_ring),
 				     DMA_FROM_DEVICE, ICE_RX_DMA_ATTR);
 		__page_frag_cache_drain(rx_buf->page, rx_buf->pagecnt_bias);
 
@@ -300,6 +322,7 @@ void ice_clean_rx_ring(struct ice_ring *rx_ring)
 		rx_buf->page_offset = 0;
 	}
 
+rx_skip_free:
 	memset(rx_ring->rx_buf, 0, sizeof(*rx_ring->rx_buf) * rx_ring->count);
 
 	/* Zero out the descriptor ring */
@@ -319,6 +342,10 @@ void ice_clean_rx_ring(struct ice_ring *rx_ring)
 void ice_free_rx_ring(struct ice_ring *rx_ring)
 {
 	ice_clean_rx_ring(rx_ring);
+	if (rx_ring->vsi->type == ICE_VSI_PF)
+		if (xdp_rxq_info_is_reg(&rx_ring->xdp_rxq))
+			xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
+	rx_ring->xdp_prog = NULL;
 	devm_kfree(rx_ring->dev, rx_ring->rx_buf);
 	rx_ring->rx_buf = NULL;
 
@@ -363,6 +390,15 @@ int ice_setup_rx_ring(struct ice_ring *rx_ring)
 
 	rx_ring->next_to_use = 0;
 	rx_ring->next_to_clean = 0;
+
+	if (ice_is_xdp_ena_vsi(rx_ring->vsi))
+		WRITE_ONCE(rx_ring->xdp_prog, rx_ring->vsi->xdp_prog);
+
+	if (rx_ring->vsi->type == ICE_VSI_PF &&
+	    !xdp_rxq_info_is_reg(&rx_ring->xdp_rxq))
+		if (xdp_rxq_info_reg(&rx_ring->xdp_rxq, rx_ring->netdev,
+				     rx_ring->q_index))
+			goto err;
 	return 0;
 
 err:
@@ -372,34 +408,110 @@ err:
 }
 
 /**
- * ice_release_rx_desc - Store the new tail and head values
- * @rx_ring: ring to bump
- * @val: new head index
+ * ice_rx_offset - Return expected offset into page to access data
+ * @rx_ring: Ring we are requesting offset of
+ *
+ * Returns the offset value for ring into the data buffer.
  */
-static void ice_release_rx_desc(struct ice_ring *rx_ring, u32 val)
+static unsigned int ice_rx_offset(struct ice_ring *rx_ring)
 {
-	u16 prev_ntu = rx_ring->next_to_use;
+	if (ice_ring_uses_build_skb(rx_ring))
+		return ICE_SKB_PAD;
+	else if (ice_is_xdp_ena_vsi(rx_ring->vsi))
+		return XDP_PACKET_HEADROOM;
 
-	rx_ring->next_to_use = val;
+	return 0;
+}
 
-	/* update next to alloc since we have filled the ring */
-	rx_ring->next_to_alloc = val;
+/**
+ * ice_run_xdp - Executes an XDP program on initialized xdp_buff
+ * @rx_ring: Rx ring
+ * @xdp: xdp_buff used as input to the XDP program
+ * @xdp_prog: XDP program to run
+ *
+ * Returns any of ICE_XDP_{PASS, CONSUMED, TX, REDIR}
+ */
+static int
+ice_run_xdp(struct ice_ring *rx_ring, struct xdp_buff *xdp,
+	    struct bpf_prog *xdp_prog)
+{
+	int err, result = ICE_XDP_PASS;
+	struct ice_ring *xdp_ring;
+	u32 act;
 
-	/* QRX_TAIL will be updated with any tail value, but hardware ignores
-	 * the lower 3 bits. This makes it so we only bump tail on meaningful
-	 * boundaries. Also, this allows us to bump tail on intervals of 8 up to
-	 * the budget depending on the current traffic load.
-	 */
-	val &= ~0x7;
-	if (prev_ntu != val) {
-		/* Force memory writes to complete before letting h/w
-		 * know there are new descriptors to fetch. (Only
-		 * applicable for weak-ordered memory model archs,
-		 * such as IA-64).
-		 */
-		wmb();
-		writel(val, rx_ring->tail);
+	act = bpf_prog_run_xdp(xdp_prog, xdp);
+	switch (act) {
+	case XDP_PASS:
+		break;
+	case XDP_TX:
+		xdp_ring = rx_ring->vsi->xdp_rings[smp_processor_id()];
+		result = ice_xmit_xdp_buff(xdp, xdp_ring);
+		break;
+	case XDP_REDIRECT:
+		err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog);
+		result = !err ? ICE_XDP_REDIR : ICE_XDP_CONSUMED;
+		break;
+	default:
+		bpf_warn_invalid_xdp_action(act);
+		/* fallthrough -- not supported action */
+	case XDP_ABORTED:
+		trace_xdp_exception(rx_ring->netdev, xdp_prog, act);
+		/* fallthrough -- handle aborts by dropping frame */
+	case XDP_DROP:
+		result = ICE_XDP_CONSUMED;
+		break;
 	}
+
+	return result;
+}
+
+/**
+ * ice_xdp_xmit - submit packets to XDP ring for transmission
+ * @dev: netdev
+ * @n: number of XDP frames to be transmitted
+ * @frames: XDP frames to be transmitted
+ * @flags: transmit flags
+ *
+ * Returns number of frames successfully sent. Frames that fail are
+ * free'ed via XDP return API.
+ * For error cases, a negative errno code is returned and no-frames
+ * are transmitted (caller must handle freeing frames).
+ */
+int
+ice_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
+	     u32 flags)
+{
+	struct ice_netdev_priv *np = netdev_priv(dev);
+	unsigned int queue_index = smp_processor_id();
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_ring *xdp_ring;
+	int drops = 0, i;
+
+	if (test_bit(__ICE_DOWN, vsi->state))
+		return -ENETDOWN;
+
+	if (!ice_is_xdp_ena_vsi(vsi) || queue_index >= vsi->num_xdp_txq)
+		return -ENXIO;
+
+	if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
+		return -EINVAL;
+
+	xdp_ring = vsi->xdp_rings[queue_index];
+	for (i = 0; i < n; i++) {
+		struct xdp_frame *xdpf = frames[i];
+		int err;
+
+		err = ice_xmit_xdp_ring(xdpf->data, xdpf->len, xdp_ring);
+		if (err != ICE_XDP_TX) {
+			xdp_return_frame_rx_napi(xdpf);
+			drops++;
+		}
+	}
+
+	if (unlikely(flags & XDP_XMIT_FLUSH))
+		ice_xdp_ring_update_tail(xdp_ring);
+
+	return n - drops;
 }
 
 /**
@@ -423,28 +535,28 @@ ice_alloc_mapped_page(struct ice_ring *rx_ring, struct ice_rx_buf *bi)
 	}
 
 	/* alloc new page for storage */
-	page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
+	page = dev_alloc_pages(ice_rx_pg_order(rx_ring));
 	if (unlikely(!page)) {
 		rx_ring->rx_stats.alloc_page_failed++;
 		return false;
 	}
 
 	/* map page for use */
-	dma = dma_map_page_attrs(rx_ring->dev, page, 0, PAGE_SIZE,
+	dma = dma_map_page_attrs(rx_ring->dev, page, 0, ice_rx_pg_size(rx_ring),
 				 DMA_FROM_DEVICE, ICE_RX_DMA_ATTR);
 
 	/* if mapping failed free memory back to system since
 	 * there isn't much point in holding memory we can't use
 	 */
 	if (dma_mapping_error(rx_ring->dev, dma)) {
-		__free_pages(page, 0);
+		__free_pages(page, ice_rx_pg_order(rx_ring));
 		rx_ring->rx_stats.alloc_page_failed++;
 		return false;
 	}
 
 	bi->dma = dma;
 	bi->page = page;
-	bi->page_offset = 0;
+	bi->page_offset = ice_rx_offset(rx_ring);
 	page_ref_add(page, USHRT_MAX - 1);
 	bi->pagecnt_bias = USHRT_MAX;
 
@@ -486,7 +598,7 @@ bool ice_alloc_rx_bufs(struct ice_ring *rx_ring, u16 cleaned_count)
 		/* sync the buffer for use by the device */
 		dma_sync_single_range_for_device(rx_ring->dev, bi->dma,
 						 bi->page_offset,
-						 ICE_RXBUF_2048,
+						 rx_ring->rx_buf_len,
 						 DMA_FROM_DEVICE);
 
 		/* Refresh the desc even if buffer_addrs didn't change
@@ -557,9 +669,6 @@ ice_rx_buf_adjust_pg_offset(struct ice_rx_buf *rx_buf, unsigned int size)
  */
 static bool ice_can_reuse_rx_page(struct ice_rx_buf *rx_buf)
 {
-#if (PAGE_SIZE >= 8192)
-	unsigned int last_offset = PAGE_SIZE - ICE_RXBUF_2048;
-#endif
 	unsigned int pagecnt_bias = rx_buf->pagecnt_bias;
 	struct page *page = rx_buf->page;
 
@@ -572,7 +681,9 @@ static bool ice_can_reuse_rx_page(struct ice_rx_buf *rx_buf)
 	if (unlikely((page_count(page) - pagecnt_bias) > 1))
 		return false;
 #else
-	if (rx_buf->page_offset > last_offset)
+#define ICE_LAST_OFFSET \
+	(SKB_WITH_OVERHEAD(PAGE_SIZE) - ICE_RXBUF_2048)
+	if (rx_buf->page_offset > ICE_LAST_OFFSET)
 		return false;
 #endif /* PAGE_SIZE < 8192) */
 
@@ -590,6 +701,7 @@ static bool ice_can_reuse_rx_page(struct ice_rx_buf *rx_buf)
 
 /**
  * ice_add_rx_frag - Add contents of Rx buffer to sk_buff as a frag
+ * @rx_ring: Rx descriptor ring to transact packets on
  * @rx_buf: buffer containing page to add
  * @skb: sk_buff to place the data into
  * @size: packet length from rx_desc
@@ -599,13 +711,13 @@ static bool ice_can_reuse_rx_page(struct ice_rx_buf *rx_buf)
  * The function will then update the page offset.
  */
 static void
-ice_add_rx_frag(struct ice_rx_buf *rx_buf, struct sk_buff *skb,
-		unsigned int size)
+ice_add_rx_frag(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf,
+		struct sk_buff *skb, unsigned int size)
 {
 #if (PAGE_SIZE >= 8192)
-	unsigned int truesize = SKB_DATA_ALIGN(size);
+	unsigned int truesize = SKB_DATA_ALIGN(size + ice_rx_offset(rx_ring));
 #else
-	unsigned int truesize = ICE_RXBUF_2048;
+	unsigned int truesize = ice_rx_pg_size(rx_ring) / 2;
 #endif
 
 	if (!size)
@@ -679,10 +791,64 @@ ice_get_rx_buf(struct ice_ring *rx_ring, struct sk_buff **skb,
 }
 
 /**
+ * ice_build_skb - Build skb around an existing buffer
+ * @rx_ring: Rx descriptor ring to transact packets on
+ * @rx_buf: Rx buffer to pull data from
+ * @xdp: xdp_buff pointing to the data
+ *
+ * This function builds an skb around an existing Rx buffer, taking care
+ * to set up the skb correctly and avoid any memcpy overhead.
+ */
+static struct sk_buff *
+ice_build_skb(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf,
+	      struct xdp_buff *xdp)
+{
+	unsigned int metasize = xdp->data - xdp->data_meta;
+#if (PAGE_SIZE < 8192)
+	unsigned int truesize = ice_rx_pg_size(rx_ring) / 2;
+#else
+	unsigned int truesize = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) +
+				SKB_DATA_ALIGN(xdp->data_end -
+					       xdp->data_hard_start);
+#endif
+	struct sk_buff *skb;
+
+	/* Prefetch first cache line of first page. If xdp->data_meta
+	 * is unused, this points exactly as xdp->data, otherwise we
+	 * likely have a consumer accessing first few bytes of meta
+	 * data, and then actual data.
+	 */
+	prefetch(xdp->data_meta);
+#if L1_CACHE_BYTES < 128
+	prefetch((void *)(xdp->data + L1_CACHE_BYTES));
+#endif
+	/* build an skb around the page buffer */
+	skb = build_skb(xdp->data_hard_start, truesize);
+	if (unlikely(!skb))
+		return NULL;
+
+	/* must to record Rx queue, otherwise OS features such as
+	 * symmetric queue won't work
+	 */
+	skb_record_rx_queue(skb, rx_ring->q_index);
+
+	/* update pointers within the skb to store the data */
+	skb_reserve(skb, xdp->data - xdp->data_hard_start);
+	__skb_put(skb, xdp->data_end - xdp->data);
+	if (metasize)
+		skb_metadata_set(skb, metasize);
+
+	/* buffer is used by skb, update page_offset */
+	ice_rx_buf_adjust_pg_offset(rx_buf, truesize);
+
+	return skb;
+}
+
+/**
  * ice_construct_skb - Allocate skb and populate it
  * @rx_ring: Rx descriptor ring to transact packets on
  * @rx_buf: Rx buffer to pull data from
- * @size: the length of the packet
+ * @xdp: xdp_buff pointing to the data
  *
  * This function allocates an skb. It then populates it with the page
  * data from the current receive descriptor, taking care to set up the
@@ -690,16 +856,16 @@ ice_get_rx_buf(struct ice_ring *rx_ring, struct sk_buff **skb,
  */
 static struct sk_buff *
 ice_construct_skb(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf,
-		  unsigned int size)
+		  struct xdp_buff *xdp)
 {
-	void *va = page_address(rx_buf->page) + rx_buf->page_offset;
+	unsigned int size = xdp->data_end - xdp->data;
 	unsigned int headlen;
 	struct sk_buff *skb;
 
 	/* prefetch first cache line of first page */
-	prefetch(va);
+	prefetch(xdp->data);
 #if L1_CACHE_BYTES < 128
-	prefetch((u8 *)va + L1_CACHE_BYTES);
+	prefetch((void *)(xdp->data + L1_CACHE_BYTES));
 #endif /* L1_CACHE_BYTES */
 
 	/* allocate a skb to store the frags */
@@ -712,10 +878,11 @@ ice_construct_skb(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf,
 	/* Determine available headroom for copy */
 	headlen = size;
 	if (headlen > ICE_RX_HDR_SIZE)
-		headlen = eth_get_headlen(skb->dev, va, ICE_RX_HDR_SIZE);
+		headlen = eth_get_headlen(skb->dev, xdp->data, ICE_RX_HDR_SIZE);
 
 	/* align pull length to size of long to optimize memcpy performance */
-	memcpy(__skb_put(skb, headlen), va, ALIGN(headlen, sizeof(long)));
+	memcpy(__skb_put(skb, headlen), xdp->data, ALIGN(headlen,
+							 sizeof(long)));
 
 	/* if we exhaust the linear part then add what is left as a frag */
 	size -= headlen;
@@ -723,7 +890,7 @@ ice_construct_skb(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf,
 #if (PAGE_SIZE >= 8192)
 		unsigned int truesize = SKB_DATA_ALIGN(size);
 #else
-		unsigned int truesize = ICE_RXBUF_2048;
+		unsigned int truesize = ice_rx_pg_size(rx_ring) / 2;
 #endif
 		skb_add_rx_frag(skb, 0, rx_buf->page,
 				rx_buf->page_offset + headlen, size, truesize);
@@ -745,11 +912,18 @@ ice_construct_skb(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf,
  * @rx_ring: Rx descriptor ring to transact packets on
  * @rx_buf: Rx buffer to pull data from
  *
- * This function will  clean up the contents of the rx_buf. It will
- * either recycle the buffer or unmap it and free the associated resources.
+ * This function will update next_to_clean and then clean up the contents
+ * of the rx_buf. It will either recycle the buffer or unmap it and free
+ * the associated resources.
  */
 static void ice_put_rx_buf(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf)
 {
+	u32 ntc = rx_ring->next_to_clean + 1;
+
+	/* fetch, update, and store next to clean */
+	ntc = (ntc < rx_ring->count) ? ntc : 0;
+	rx_ring->next_to_clean = ntc;
+
 	if (!rx_buf)
 		return;
 
@@ -759,8 +933,9 @@ static void ice_put_rx_buf(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf)
 		rx_ring->rx_stats.page_reuse_count++;
 	} else {
 		/* we are not reusing the buffer so unmap it */
-		dma_unmap_page_attrs(rx_ring->dev, rx_buf->dma, PAGE_SIZE,
-				     DMA_FROM_DEVICE, ICE_RX_DMA_ATTR);
+		dma_unmap_page_attrs(rx_ring->dev, rx_buf->dma,
+				     ice_rx_pg_size(rx_ring), DMA_FROM_DEVICE,
+				     ICE_RX_DMA_ATTR);
 		__page_frag_cache_drain(rx_buf->page, rx_buf->pagecnt_bias);
 	}
 
@@ -770,227 +945,31 @@ static void ice_put_rx_buf(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf)
 }
 
 /**
- * ice_cleanup_headers - Correct empty headers
- * @skb: pointer to current skb being fixed
- *
- * Also address the case where we are pulling data in on pages only
- * and as such no data is present in the skb header.
- *
- * In addition if skb is not at least 60 bytes we need to pad it so that
- * it is large enough to qualify as a valid Ethernet frame.
- *
- * Returns true if an error was encountered and skb was freed.
- */
-static bool ice_cleanup_headers(struct sk_buff *skb)
-{
-	/* if eth_skb_pad returns an error the skb was freed */
-	if (eth_skb_pad(skb))
-		return true;
-
-	return false;
-}
-
-/**
- * ice_test_staterr - tests bits in Rx descriptor status and error fields
- * @rx_desc: pointer to receive descriptor (in le64 format)
- * @stat_err_bits: value to mask
- *
- * This function does some fast chicanery in order to return the
- * value of the mask which is really only used for boolean tests.
- * The status_error_len doesn't need to be shifted because it begins
- * at offset zero.
- */
-static bool
-ice_test_staterr(union ice_32b_rx_flex_desc *rx_desc, const u16 stat_err_bits)
-{
-	return !!(rx_desc->wb.status_error0 &
-		  cpu_to_le16(stat_err_bits));
-}
-
-/**
  * ice_is_non_eop - process handling of non-EOP buffers
  * @rx_ring: Rx ring being processed
  * @rx_desc: Rx descriptor for current buffer
  * @skb: Current socket buffer containing buffer in progress
  *
- * This function updates next to clean. If the buffer is an EOP buffer
- * this function exits returning false, otherwise it will place the
- * sk_buff in the next buffer to be chained and return true indicating
- * that this is in fact a non-EOP buffer.
+ * If the buffer is an EOP buffer, this function exits returning false,
+ * otherwise return true indicating that this is in fact a non-EOP buffer.
  */
 static bool
 ice_is_non_eop(struct ice_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc,
 	       struct sk_buff *skb)
 {
-	u32 ntc = rx_ring->next_to_clean + 1;
-
-	/* fetch, update, and store next to clean */
-	ntc = (ntc < rx_ring->count) ? ntc : 0;
-	rx_ring->next_to_clean = ntc;
-
-	prefetch(ICE_RX_DESC(rx_ring, ntc));
-
 	/* if we are the last buffer then there is nothing else to do */
 #define ICE_RXD_EOF BIT(ICE_RX_FLEX_DESC_STATUS0_EOF_S)
 	if (likely(ice_test_staterr(rx_desc, ICE_RXD_EOF)))
 		return false;
 
 	/* place skb in next buffer to be received */
-	rx_ring->rx_buf[ntc].skb = skb;
+	rx_ring->rx_buf[rx_ring->next_to_clean].skb = skb;
 	rx_ring->rx_stats.non_eop_descs++;
 
 	return true;
 }
 
 /**
- * ice_ptype_to_htype - get a hash type
- * @ptype: the ptype value from the descriptor
- *
- * Returns a hash type to be used by skb_set_hash
- */
-static enum pkt_hash_types ice_ptype_to_htype(u8 __always_unused ptype)
-{
-	return PKT_HASH_TYPE_NONE;
-}
-
-/**
- * ice_rx_hash - set the hash value in the skb
- * @rx_ring: descriptor ring
- * @rx_desc: specific descriptor
- * @skb: pointer to current skb
- * @rx_ptype: the ptype value from the descriptor
- */
-static void
-ice_rx_hash(struct ice_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc,
-	    struct sk_buff *skb, u8 rx_ptype)
-{
-	struct ice_32b_rx_flex_desc_nic *nic_mdid;
-	u32 hash;
-
-	if (!(rx_ring->netdev->features & NETIF_F_RXHASH))
-		return;
-
-	if (rx_desc->wb.rxdid != ICE_RXDID_FLEX_NIC)
-		return;
-
-	nic_mdid = (struct ice_32b_rx_flex_desc_nic *)rx_desc;
-	hash = le32_to_cpu(nic_mdid->rss_hash);
-	skb_set_hash(skb, hash, ice_ptype_to_htype(rx_ptype));
-}
-
-/**
- * ice_rx_csum - Indicate in skb if checksum is good
- * @ring: the ring we care about
- * @skb: skb currently being received and modified
- * @rx_desc: the receive descriptor
- * @ptype: the packet type decoded by hardware
- *
- * skb->protocol must be set before this function is called
- */
-static void
-ice_rx_csum(struct ice_ring *ring, struct sk_buff *skb,
-	    union ice_32b_rx_flex_desc *rx_desc, u8 ptype)
-{
-	struct ice_rx_ptype_decoded decoded;
-	u32 rx_error, rx_status;
-	bool ipv4, ipv6;
-
-	rx_status = le16_to_cpu(rx_desc->wb.status_error0);
-	rx_error = rx_status;
-
-	decoded = ice_decode_rx_desc_ptype(ptype);
-
-	/* Start with CHECKSUM_NONE and by default csum_level = 0 */
-	skb->ip_summed = CHECKSUM_NONE;
-	skb_checksum_none_assert(skb);
-
-	/* check if Rx checksum is enabled */
-	if (!(ring->netdev->features & NETIF_F_RXCSUM))
-		return;
-
-	/* check if HW has decoded the packet and checksum */
-	if (!(rx_status & BIT(ICE_RX_FLEX_DESC_STATUS0_L3L4P_S)))
-		return;
-
-	if (!(decoded.known && decoded.outer_ip))
-		return;
-
-	ipv4 = (decoded.outer_ip == ICE_RX_PTYPE_OUTER_IP) &&
-	       (decoded.outer_ip_ver == ICE_RX_PTYPE_OUTER_IPV4);
-	ipv6 = (decoded.outer_ip == ICE_RX_PTYPE_OUTER_IP) &&
-	       (decoded.outer_ip_ver == ICE_RX_PTYPE_OUTER_IPV6);
-
-	if (ipv4 && (rx_error & (BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_IPE_S) |
-				 BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_EIPE_S))))
-		goto checksum_fail;
-	else if (ipv6 && (rx_status &
-		 (BIT(ICE_RX_FLEX_DESC_STATUS0_IPV6EXADD_S))))
-		goto checksum_fail;
-
-	/* check for L4 errors and handle packets that were not able to be
-	 * checksummed due to arrival speed
-	 */
-	if (rx_error & BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_L4E_S))
-		goto checksum_fail;
-
-	/* Only report checksum unnecessary for TCP, UDP, or SCTP */
-	switch (decoded.inner_prot) {
-	case ICE_RX_PTYPE_INNER_PROT_TCP:
-	case ICE_RX_PTYPE_INNER_PROT_UDP:
-	case ICE_RX_PTYPE_INNER_PROT_SCTP:
-		skb->ip_summed = CHECKSUM_UNNECESSARY;
-	default:
-		break;
-	}
-	return;
-
-checksum_fail:
-	ring->vsi->back->hw_csum_rx_error++;
-}
-
-/**
- * ice_process_skb_fields - Populate skb header fields from Rx descriptor
- * @rx_ring: Rx descriptor ring packet is being transacted on
- * @rx_desc: pointer to the EOP Rx descriptor
- * @skb: pointer to current skb being populated
- * @ptype: the packet type decoded by hardware
- *
- * This function checks the ring, descriptor, and packet information in
- * order to populate the hash, checksum, VLAN, protocol, and
- * other fields within the skb.
- */
-static void
-ice_process_skb_fields(struct ice_ring *rx_ring,
-		       union ice_32b_rx_flex_desc *rx_desc,
-		       struct sk_buff *skb, u8 ptype)
-{
-	ice_rx_hash(rx_ring, rx_desc, skb, ptype);
-
-	/* modifies the skb - consumes the enet header */
-	skb->protocol = eth_type_trans(skb, rx_ring->netdev);
-
-	ice_rx_csum(rx_ring, skb, rx_desc, ptype);
-}
-
-/**
- * ice_receive_skb - Send a completed packet up the stack
- * @rx_ring: Rx ring in play
- * @skb: packet to send up
- * @vlan_tag: VLAN tag for packet
- *
- * This function sends the completed packet (via. skb) up the stack using
- * gro receive functions (with/without VLAN tag)
- */
-static void
-ice_receive_skb(struct ice_ring *rx_ring, struct sk_buff *skb, u16 vlan_tag)
-{
-	if ((rx_ring->netdev->features & NETIF_F_HW_VLAN_CTAG_RX) &&
-	    (vlan_tag & VLAN_VID_MASK))
-		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag);
-	napi_gro_receive(&rx_ring->q_vector->napi, skb);
-}
-
-/**
  * ice_clean_rx_irq - Clean completed descriptors from Rx ring - bounce buf
  * @rx_ring: Rx descriptor ring to transact packets on
  * @budget: Total limit on number of packets to process
@@ -1006,8 +985,13 @@ static int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget)
 {
 	unsigned int total_rx_bytes = 0, total_rx_pkts = 0;
 	u16 cleaned_count = ICE_DESC_UNUSED(rx_ring);
+	unsigned int xdp_res, xdp_xmit = 0;
+	struct bpf_prog *xdp_prog = NULL;
+	struct xdp_buff xdp;
 	bool failure;
 
+	xdp.rxq = &rx_ring->xdp_rxq;
+
 	/* start the loop to process Rx packets bounded by 'budget' */
 	while (likely(total_rx_pkts < (unsigned int)budget)) {
 		union ice_32b_rx_flex_desc *rx_desc;
@@ -1042,10 +1026,57 @@ static int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget)
 		/* retrieve a buffer from the ring */
 		rx_buf = ice_get_rx_buf(rx_ring, &skb, size);
 
+		if (!size) {
+			xdp.data = NULL;
+			xdp.data_end = NULL;
+			xdp.data_hard_start = NULL;
+			xdp.data_meta = NULL;
+			goto construct_skb;
+		}
+
+		xdp.data = page_address(rx_buf->page) + rx_buf->page_offset;
+		xdp.data_hard_start = xdp.data - ice_rx_offset(rx_ring);
+		xdp.data_meta = xdp.data;
+		xdp.data_end = xdp.data + size;
+
+		rcu_read_lock();
+		xdp_prog = READ_ONCE(rx_ring->xdp_prog);
+		if (!xdp_prog) {
+			rcu_read_unlock();
+			goto construct_skb;
+		}
+
+		xdp_res = ice_run_xdp(rx_ring, &xdp, xdp_prog);
+		rcu_read_unlock();
+		if (!xdp_res)
+			goto construct_skb;
+		if (xdp_res & (ICE_XDP_TX | ICE_XDP_REDIR)) {
+			unsigned int truesize;
+
+#if (PAGE_SIZE < 8192)
+			truesize = ice_rx_pg_size(rx_ring) / 2;
+#else
+			truesize = SKB_DATA_ALIGN(ice_rx_offset(rx_ring) +
+						  size);
+#endif
+			xdp_xmit |= xdp_res;
+			ice_rx_buf_adjust_pg_offset(rx_buf, truesize);
+		} else {
+			rx_buf->pagecnt_bias++;
+		}
+		total_rx_bytes += size;
+		total_rx_pkts++;
+
+		cleaned_count++;
+		ice_put_rx_buf(rx_ring, rx_buf);
+		continue;
+construct_skb:
 		if (skb)
-			ice_add_rx_frag(rx_buf, skb, size);
+			ice_add_rx_frag(rx_ring, rx_buf, skb, size);
+		else if (ice_ring_uses_build_skb(rx_ring))
+			skb = ice_build_skb(rx_ring, rx_buf, &xdp);
 		else
-			skb = ice_construct_skb(rx_ring, rx_buf, size);
+			skb = ice_construct_skb(rx_ring, rx_buf, &xdp);
 
 		/* exit if we failed to retrieve a buffer */
 		if (!skb) {
@@ -1072,10 +1103,8 @@ static int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget)
 		if (ice_test_staterr(rx_desc, stat_err_bits))
 			vlan_tag = le16_to_cpu(rx_desc->wb.l2tag1);
 
-		/* correct empty headers and pad skb if needed (to make valid
-		 * ethernet frame
-		 */
-		if (ice_cleanup_headers(skb)) {
+		/* pad the skb if needed, to make a valid ethernet frame */
+		if (eth_skb_pad(skb)) {
 			skb = NULL;
 			continue;
 		}
@@ -1099,13 +1128,10 @@ static int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget)
 	/* return up to cleaned_count buffers to hardware */
 	failure = ice_alloc_rx_bufs(rx_ring, cleaned_count);
 
-	/* update queue and vector specific stats */
-	u64_stats_update_begin(&rx_ring->syncp);
-	rx_ring->stats.pkts += total_rx_pkts;
-	rx_ring->stats.bytes += total_rx_bytes;
-	u64_stats_update_end(&rx_ring->syncp);
-	rx_ring->q_vector->rx.total_pkts += total_rx_pkts;
-	rx_ring->q_vector->rx.total_bytes += total_rx_bytes;
+	if (xdp_prog)
+		ice_finalize_xdp_rx(rx_ring, xdp_xmit);
+
+	ice_update_rx_ring_stats(rx_ring, total_rx_pkts, total_rx_bytes);
 
 	/* guarantee a trip back through this routine if there was a failure */
 	return failure ? budget : (int)total_rx_pkts;
@@ -1483,9 +1509,14 @@ int ice_napi_poll(struct napi_struct *napi, int budget)
 	/* Since the actual Tx work is minimal, we can give the Tx a larger
 	 * budget and be more aggressive about cleaning up the Tx descriptors.
 	 */
-	ice_for_each_ring(ring, q_vector->tx)
-		if (!ice_clean_tx_irq(ring, budget))
+	ice_for_each_ring(ring, q_vector->tx) {
+		bool wd = ring->xsk_umem ?
+			  ice_clean_tx_irq_zc(ring, budget) :
+			  ice_clean_tx_irq(ring, budget);
+
+		if (!wd)
 			clean_complete = false;
+	}
 
 	/* Handle case where we are called by netpoll with a budget of 0 */
 	if (unlikely(budget <= 0))
@@ -1505,7 +1536,13 @@ int ice_napi_poll(struct napi_struct *napi, int budget)
 	ice_for_each_ring(ring, q_vector->rx) {
 		int cleaned;
 
-		cleaned = ice_clean_rx_irq(ring, budget_per_ring);
+		/* A dedicated path for zero-copy allows making a single
+		 * comparison in the irq context instead of many inside the
+		 * ice_clean_rx_irq function and makes the codebase cleaner.
+		 */
+		cleaned = ring->xsk_umem ?
+			  ice_clean_rx_irq_zc(ring, budget_per_ring) :
+			  ice_clean_rx_irq(ring, budget_per_ring);
 		work_done += cleaned;
 		/* if we clean as many as budgeted, we must not be done */
 		if (cleaned >= budget_per_ring)
@@ -1527,17 +1564,6 @@ int ice_napi_poll(struct napi_struct *napi, int budget)
 	return min_t(int, work_done, budget - 1);
 }
 
-/* helper function for building cmd/type/offset */
-static __le64
-build_ctob(u64 td_cmd, u64 td_offset, unsigned int size, u64 td_tag)
-{
-	return cpu_to_le64(ICE_TX_DESC_DTYPE_DATA |
-			   (td_cmd    << ICE_TXD_QW1_CMD_S) |
-			   (td_offset << ICE_TXD_QW1_OFFSET_S) |
-			   ((u64)size << ICE_TXD_QW1_TX_BUF_SZ_S) |
-			   (td_tag    << ICE_TXD_QW1_L2TAG1_S));
-}
-
 /**
  * __ice_maybe_stop_tx - 2nd level check for Tx stop conditions
  * @tx_ring: the ring to be checked
@@ -1689,9 +1715,9 @@ ice_tx_map(struct ice_ring *tx_ring, struct ice_tx_buf *first,
 		i = 0;
 
 	/* write last descriptor with RS and EOP bits */
-	td_cmd |= (u64)(ICE_TX_DESC_CMD_EOP | ICE_TX_DESC_CMD_RS);
-	tx_desc->cmd_type_offset_bsz =
-			build_ctob(td_cmd, td_offset, size, td_tag);
+	td_cmd |= (u64)ICE_TXD_LAST_DESC_CMD;
+	tx_desc->cmd_type_offset_bsz = build_ctob(td_cmd, td_offset, size,
+						  td_tag);
 
 	/* Force memory writes to complete before letting h/w know there
 	 * are new descriptors to fetch.
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h
index 94a9280193e2..a84cc0e6dd27 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.h
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.h
@@ -4,8 +4,12 @@
 #ifndef _ICE_TXRX_H_
 #define _ICE_TXRX_H_
 
+#include "ice_type.h"
+
 #define ICE_DFLT_IRQ_WORK	256
+#define ICE_RXBUF_3072		3072
 #define ICE_RXBUF_2048		2048
+#define ICE_RXBUF_1536		1536
 #define ICE_MAX_CHAINED_RX_BUFS	5
 #define ICE_MAX_BUF_TXD		8
 #define ICE_MIN_TX_LEN		17
@@ -22,6 +26,71 @@
 #define ICE_RX_BUF_WRITE	16	/* Must be power of 2 */
 #define ICE_MAX_TXQ_PER_TXQG	128
 
+/* Attempt to maximize the headroom available for incoming frames. We use a 2K
+ * buffer for MTUs <= 1500 and need 1536/1534 to store the data for the frame.
+ * This leaves us with 512 bytes of room.  From that we need to deduct the
+ * space needed for the shared info and the padding needed to IP align the
+ * frame.
+ *
+ * Note: For cache line sizes 256 or larger this value is going to end
+ *       up negative.  In these cases we should fall back to the legacy
+ *       receive path.
+ */
+#if (PAGE_SIZE < 8192)
+#define ICE_2K_TOO_SMALL_WITH_PADDING \
+((NET_SKB_PAD + ICE_RXBUF_1536) > SKB_WITH_OVERHEAD(ICE_RXBUF_2048))
+
+/**
+ * ice_compute_pad - compute the padding
+ * rx_buf_len: buffer length
+ *
+ * Figure out the size of half page based on given buffer length and
+ * then subtract the skb_shared_info followed by subtraction of the
+ * actual buffer length; this in turn results in the actual space that
+ * is left for padding usage
+ */
+static inline int ice_compute_pad(int rx_buf_len)
+{
+	int half_page_size;
+
+	half_page_size = ALIGN(rx_buf_len, PAGE_SIZE / 2);
+	return SKB_WITH_OVERHEAD(half_page_size) - rx_buf_len;
+}
+
+/**
+ * ice_skb_pad - determine the padding that we can supply
+ *
+ * Figure out the right Rx buffer size and based on that calculate the
+ * padding
+ */
+static inline int ice_skb_pad(void)
+{
+	int rx_buf_len;
+
+	/* If a 2K buffer cannot handle a standard Ethernet frame then
+	 * optimize padding for a 3K buffer instead of a 1.5K buffer.
+	 *
+	 * For a 3K buffer we need to add enough padding to allow for
+	 * tailroom due to NET_IP_ALIGN possibly shifting us out of
+	 * cache-line alignment.
+	 */
+	if (ICE_2K_TOO_SMALL_WITH_PADDING)
+		rx_buf_len = ICE_RXBUF_3072 + SKB_DATA_ALIGN(NET_IP_ALIGN);
+	else
+		rx_buf_len = ICE_RXBUF_1536;
+
+	/* if needed make room for NET_IP_ALIGN */
+	rx_buf_len -= NET_IP_ALIGN;
+
+	return ice_compute_pad(rx_buf_len);
+}
+
+#define ICE_SKB_PAD ice_skb_pad()
+#else
+#define ICE_2K_TOO_SMALL_WITH_PADDING false
+#define ICE_SKB_PAD (NET_SKB_PAD + NET_IP_ALIGN)
+#endif
+
 /* We are assuming that the cache line is always 64 Bytes here for ice.
  * In order to make sure that is a correct assumption there is a check in probe
  * to print a warning if the read from GLPCI_CNF2 tells us that the cache line
@@ -49,12 +118,24 @@
 #define ICE_TX_FLAGS_VLAN_PR_S	29
 #define ICE_TX_FLAGS_VLAN_S	16
 
+#define ICE_XDP_PASS		0
+#define ICE_XDP_CONSUMED	BIT(0)
+#define ICE_XDP_TX		BIT(1)
+#define ICE_XDP_REDIR		BIT(2)
+
 #define ICE_RX_DMA_ATTR \
 	(DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING)
 
+#define ICE_ETH_PKT_HDR_PAD	(ETH_HLEN + ETH_FCS_LEN + (VLAN_HLEN * 2))
+
+#define ICE_TXD_LAST_DESC_CMD (ICE_TX_DESC_CMD_EOP | ICE_TX_DESC_CMD_RS)
+
 struct ice_tx_buf {
 	struct ice_tx_desc *next_to_watch;
-	struct sk_buff *skb;
+	union {
+		struct sk_buff *skb;
+		void *raw_buf; /* used for XDP */
+	};
 	unsigned int bytecount;
 	unsigned short gso_segs;
 	u32 tx_flags;
@@ -76,9 +157,17 @@ struct ice_tx_offload_params {
 struct ice_rx_buf {
 	struct sk_buff *skb;
 	dma_addr_t dma;
-	struct page *page;
-	unsigned int page_offset;
-	u16 pagecnt_bias;
+	union {
+		struct {
+			struct page *page;
+			unsigned int page_offset;
+			u16 pagecnt_bias;
+		};
+		struct {
+			void *addr;
+			u64 handle;
+		};
+	};
 };
 
 struct ice_q_stats {
@@ -198,18 +287,44 @@ struct ice_ring {
 	};
 
 	struct rcu_head rcu;		/* to avoid race on free */
+	struct bpf_prog *xdp_prog;
+	struct xdp_umem *xsk_umem;
+	struct zero_copy_allocator zca;
+	/* CL3 - 3rd cacheline starts here */
+	struct xdp_rxq_info xdp_rxq;
 	/* CLX - the below items are only accessed infrequently and should be
 	 * in their own cache line if possible
 	 */
+#define ICE_TX_FLAGS_RING_XDP		BIT(0)
+#define ICE_RX_FLAGS_RING_BUILD_SKB	BIT(1)
+	u8 flags;
 	dma_addr_t dma;			/* physical address of ring */
 	unsigned int size;		/* length of descriptor ring in bytes */
 	u32 txq_teid;			/* Added Tx queue TEID */
 	u16 rx_buf_len;
-#ifdef CONFIG_DCB
 	u8 dcb_tc;			/* Traffic class of ring */
-#endif /* CONFIG_DCB */
 } ____cacheline_internodealigned_in_smp;
 
+static inline bool ice_ring_uses_build_skb(struct ice_ring *ring)
+{
+	return !!(ring->flags & ICE_RX_FLAGS_RING_BUILD_SKB);
+}
+
+static inline void ice_set_ring_build_skb_ena(struct ice_ring *ring)
+{
+	ring->flags |= ICE_RX_FLAGS_RING_BUILD_SKB;
+}
+
+static inline void ice_clear_ring_build_skb_ena(struct ice_ring *ring)
+{
+	ring->flags &= ~ICE_RX_FLAGS_RING_BUILD_SKB;
+}
+
+static inline bool ice_ring_is_xdp(struct ice_ring *ring)
+{
+	return !!(ring->flags & ICE_TX_FLAGS_RING_XDP);
+}
+
 struct ice_ring_container {
 	/* head of linked-list of rings */
 	struct ice_ring *ring;
@@ -230,6 +345,19 @@ struct ice_ring_container {
 #define ice_for_each_ring(pos, head) \
 	for (pos = (head).ring; pos; pos = pos->next)
 
+static inline unsigned int ice_rx_pg_order(struct ice_ring *ring)
+{
+#if (PAGE_SIZE < 8192)
+	if (ring->rx_buf_len > (PAGE_SIZE / 2))
+		return 1;
+#endif
+	return 0;
+}
+
+#define ice_rx_pg_size(_ring) (PAGE_SIZE << ice_rx_pg_order(_ring))
+
+union ice_32b_rx_flex_desc;
+
 bool ice_alloc_rx_bufs(struct ice_ring *rxr, u16 cleaned_count);
 netdev_tx_t ice_start_xmit(struct sk_buff *skb, struct net_device *netdev);
 void ice_clean_tx_ring(struct ice_ring *tx_ring);
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c
new file mode 100644
index 000000000000..35bbc4ff603c
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c
@@ -0,0 +1,273 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019, Intel Corporation. */
+
+#include "ice_txrx_lib.h"
+
+/**
+ * ice_release_rx_desc - Store the new tail and head values
+ * @rx_ring: ring to bump
+ * @val: new head index
+ */
+void ice_release_rx_desc(struct ice_ring *rx_ring, u32 val)
+{
+	u16 prev_ntu = rx_ring->next_to_use;
+
+	rx_ring->next_to_use = val;
+
+	/* update next to alloc since we have filled the ring */
+	rx_ring->next_to_alloc = val;
+
+	/* QRX_TAIL will be updated with any tail value, but hardware ignores
+	 * the lower 3 bits. This makes it so we only bump tail on meaningful
+	 * boundaries. Also, this allows us to bump tail on intervals of 8 up to
+	 * the budget depending on the current traffic load.
+	 */
+	val &= ~0x7;
+	if (prev_ntu != val) {
+		/* Force memory writes to complete before letting h/w
+		 * know there are new descriptors to fetch. (Only
+		 * applicable for weak-ordered memory model archs,
+		 * such as IA-64).
+		 */
+		wmb();
+		writel(val, rx_ring->tail);
+	}
+}
+
+/**
+ * ice_ptype_to_htype - get a hash type
+ * @ptype: the ptype value from the descriptor
+ *
+ * Returns a hash type to be used by skb_set_hash
+ */
+static enum pkt_hash_types ice_ptype_to_htype(u8 __always_unused ptype)
+{
+	return PKT_HASH_TYPE_NONE;
+}
+
+/**
+ * ice_rx_hash - set the hash value in the skb
+ * @rx_ring: descriptor ring
+ * @rx_desc: specific descriptor
+ * @skb: pointer to current skb
+ * @rx_ptype: the ptype value from the descriptor
+ */
+static void
+ice_rx_hash(struct ice_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc,
+	    struct sk_buff *skb, u8 rx_ptype)
+{
+	struct ice_32b_rx_flex_desc_nic *nic_mdid;
+	u32 hash;
+
+	if (!(rx_ring->netdev->features & NETIF_F_RXHASH))
+		return;
+
+	if (rx_desc->wb.rxdid != ICE_RXDID_FLEX_NIC)
+		return;
+
+	nic_mdid = (struct ice_32b_rx_flex_desc_nic *)rx_desc;
+	hash = le32_to_cpu(nic_mdid->rss_hash);
+	skb_set_hash(skb, hash, ice_ptype_to_htype(rx_ptype));
+}
+
+/**
+ * ice_rx_csum - Indicate in skb if checksum is good
+ * @ring: the ring we care about
+ * @skb: skb currently being received and modified
+ * @rx_desc: the receive descriptor
+ * @ptype: the packet type decoded by hardware
+ *
+ * skb->protocol must be set before this function is called
+ */
+static void
+ice_rx_csum(struct ice_ring *ring, struct sk_buff *skb,
+	    union ice_32b_rx_flex_desc *rx_desc, u8 ptype)
+{
+	struct ice_rx_ptype_decoded decoded;
+	u32 rx_error, rx_status;
+	bool ipv4, ipv6;
+
+	rx_status = le16_to_cpu(rx_desc->wb.status_error0);
+	rx_error = rx_status;
+
+	decoded = ice_decode_rx_desc_ptype(ptype);
+
+	/* Start with CHECKSUM_NONE and by default csum_level = 0 */
+	skb->ip_summed = CHECKSUM_NONE;
+	skb_checksum_none_assert(skb);
+
+	/* check if Rx checksum is enabled */
+	if (!(ring->netdev->features & NETIF_F_RXCSUM))
+		return;
+
+	/* check if HW has decoded the packet and checksum */
+	if (!(rx_status & BIT(ICE_RX_FLEX_DESC_STATUS0_L3L4P_S)))
+		return;
+
+	if (!(decoded.known && decoded.outer_ip))
+		return;
+
+	ipv4 = (decoded.outer_ip == ICE_RX_PTYPE_OUTER_IP) &&
+	       (decoded.outer_ip_ver == ICE_RX_PTYPE_OUTER_IPV4);
+	ipv6 = (decoded.outer_ip == ICE_RX_PTYPE_OUTER_IP) &&
+	       (decoded.outer_ip_ver == ICE_RX_PTYPE_OUTER_IPV6);
+
+	if (ipv4 && (rx_error & (BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_IPE_S) |
+				 BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_EIPE_S))))
+		goto checksum_fail;
+	else if (ipv6 && (rx_status &
+		 (BIT(ICE_RX_FLEX_DESC_STATUS0_IPV6EXADD_S))))
+		goto checksum_fail;
+
+	/* check for L4 errors and handle packets that were not able to be
+	 * checksummed due to arrival speed
+	 */
+	if (rx_error & BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_L4E_S))
+		goto checksum_fail;
+
+	/* Only report checksum unnecessary for TCP, UDP, or SCTP */
+	switch (decoded.inner_prot) {
+	case ICE_RX_PTYPE_INNER_PROT_TCP:
+	case ICE_RX_PTYPE_INNER_PROT_UDP:
+	case ICE_RX_PTYPE_INNER_PROT_SCTP:
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+	default:
+		break;
+	}
+	return;
+
+checksum_fail:
+	ring->vsi->back->hw_csum_rx_error++;
+}
+
+/**
+ * ice_process_skb_fields - Populate skb header fields from Rx descriptor
+ * @rx_ring: Rx descriptor ring packet is being transacted on
+ * @rx_desc: pointer to the EOP Rx descriptor
+ * @skb: pointer to current skb being populated
+ * @ptype: the packet type decoded by hardware
+ *
+ * This function checks the ring, descriptor, and packet information in
+ * order to populate the hash, checksum, VLAN, protocol, and
+ * other fields within the skb.
+ */
+void
+ice_process_skb_fields(struct ice_ring *rx_ring,
+		       union ice_32b_rx_flex_desc *rx_desc,
+		       struct sk_buff *skb, u8 ptype)
+{
+	ice_rx_hash(rx_ring, rx_desc, skb, ptype);
+
+	/* modifies the skb - consumes the enet header */
+	skb->protocol = eth_type_trans(skb, rx_ring->netdev);
+
+	ice_rx_csum(rx_ring, skb, rx_desc, ptype);
+}
+
+/**
+ * ice_receive_skb - Send a completed packet up the stack
+ * @rx_ring: Rx ring in play
+ * @skb: packet to send up
+ * @vlan_tag: VLAN tag for packet
+ *
+ * This function sends the completed packet (via. skb) up the stack using
+ * gro receive functions (with/without VLAN tag)
+ */
+void
+ice_receive_skb(struct ice_ring *rx_ring, struct sk_buff *skb, u16 vlan_tag)
+{
+	if ((rx_ring->netdev->features & NETIF_F_HW_VLAN_CTAG_RX) &&
+	    (vlan_tag & VLAN_VID_MASK))
+		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag);
+	napi_gro_receive(&rx_ring->q_vector->napi, skb);
+}
+
+/**
+ * ice_xmit_xdp_ring - submit single packet to XDP ring for transmission
+ * @data: packet data pointer
+ * @size: packet data size
+ * @xdp_ring: XDP ring for transmission
+ */
+int ice_xmit_xdp_ring(void *data, u16 size, struct ice_ring *xdp_ring)
+{
+	u16 i = xdp_ring->next_to_use;
+	struct ice_tx_desc *tx_desc;
+	struct ice_tx_buf *tx_buf;
+	dma_addr_t dma;
+
+	if (!unlikely(ICE_DESC_UNUSED(xdp_ring))) {
+		xdp_ring->tx_stats.tx_busy++;
+		return ICE_XDP_CONSUMED;
+	}
+
+	dma = dma_map_single(xdp_ring->dev, data, size, DMA_TO_DEVICE);
+	if (dma_mapping_error(xdp_ring->dev, dma))
+		return ICE_XDP_CONSUMED;
+
+	tx_buf = &xdp_ring->tx_buf[i];
+	tx_buf->bytecount = size;
+	tx_buf->gso_segs = 1;
+	tx_buf->raw_buf = data;
+
+	/* record length, and DMA address */
+	dma_unmap_len_set(tx_buf, len, size);
+	dma_unmap_addr_set(tx_buf, dma, dma);
+
+	tx_desc = ICE_TX_DESC(xdp_ring, i);
+	tx_desc->buf_addr = cpu_to_le64(dma);
+	tx_desc->cmd_type_offset_bsz = build_ctob(ICE_TXD_LAST_DESC_CMD, 0,
+						  size, 0);
+
+	/* Make certain all of the status bits have been updated
+	 * before next_to_watch is written.
+	 */
+	smp_wmb();
+
+	i++;
+	if (i == xdp_ring->count)
+		i = 0;
+
+	tx_buf->next_to_watch = tx_desc;
+	xdp_ring->next_to_use = i;
+
+	return ICE_XDP_TX;
+}
+
+/**
+ * ice_xmit_xdp_buff - convert an XDP buffer to an XDP frame and send it
+ * @xdp: XDP buffer
+ * @xdp_ring: XDP Tx ring
+ *
+ * Returns negative on failure, 0 on success.
+ */
+int ice_xmit_xdp_buff(struct xdp_buff *xdp, struct ice_ring *xdp_ring)
+{
+	struct xdp_frame *xdpf = convert_to_xdp_frame(xdp);
+
+	if (unlikely(!xdpf))
+		return ICE_XDP_CONSUMED;
+
+	return ice_xmit_xdp_ring(xdpf->data, xdpf->len, xdp_ring);
+}
+
+/**
+ * ice_finalize_xdp_rx - Bump XDP Tx tail and/or flush redirect map
+ * @rx_ring: Rx ring
+ * @xdp_res: Result of the receive batch
+ *
+ * This function bumps XDP Tx tail and/or flush redirect map, and
+ * should be called when a batch of packets has been processed in the
+ * napi loop.
+ */
+void ice_finalize_xdp_rx(struct ice_ring *rx_ring, unsigned int xdp_res)
+{
+	if (xdp_res & ICE_XDP_REDIR)
+		xdp_do_flush_map();
+
+	if (xdp_res & ICE_XDP_TX) {
+		struct ice_ring *xdp_ring =
+			rx_ring->vsi->xdp_rings[rx_ring->q_index];
+
+		ice_xdp_ring_update_tail(xdp_ring);
+	}
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.h b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h
new file mode 100644
index 000000000000..ba9164dad9ae
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2019, Intel Corporation. */
+
+#ifndef _ICE_TXRX_LIB_H_
+#define _ICE_TXRX_LIB_H_
+#include "ice.h"
+
+/**
+ * ice_test_staterr - tests bits in Rx descriptor status and error fields
+ * @rx_desc: pointer to receive descriptor (in le64 format)
+ * @stat_err_bits: value to mask
+ *
+ * This function does some fast chicanery in order to return the
+ * value of the mask which is really only used for boolean tests.
+ * The status_error_len doesn't need to be shifted because it begins
+ * at offset zero.
+ */
+static inline bool
+ice_test_staterr(union ice_32b_rx_flex_desc *rx_desc, const u16 stat_err_bits)
+{
+	return !!(rx_desc->wb.status_error0 & cpu_to_le16(stat_err_bits));
+}
+
+static inline __le64
+build_ctob(u64 td_cmd, u64 td_offset, unsigned int size, u64 td_tag)
+{
+	return cpu_to_le64(ICE_TX_DESC_DTYPE_DATA |
+			   (td_cmd    << ICE_TXD_QW1_CMD_S) |
+			   (td_offset << ICE_TXD_QW1_OFFSET_S) |
+			   ((u64)size << ICE_TXD_QW1_TX_BUF_SZ_S) |
+			   (td_tag    << ICE_TXD_QW1_L2TAG1_S));
+}
+
+/**
+ * ice_xdp_ring_update_tail - Updates the XDP Tx ring tail register
+ * @xdp_ring: XDP Tx ring
+ *
+ * This function updates the XDP Tx ring tail register.
+ */
+static inline void ice_xdp_ring_update_tail(struct ice_ring *xdp_ring)
+{
+	/* Force memory writes to complete before letting h/w
+	 * know there are new descriptors to fetch.
+	 */
+	wmb();
+	writel_relaxed(xdp_ring->next_to_use, xdp_ring->tail);
+}
+
+void ice_finalize_xdp_rx(struct ice_ring *rx_ring, unsigned int xdp_res);
+int ice_xmit_xdp_buff(struct xdp_buff *xdp, struct ice_ring *xdp_ring);
+int ice_xmit_xdp_ring(void *data, u16 size, struct ice_ring *xdp_ring);
+void ice_release_rx_desc(struct ice_ring *rx_ring, u32 val);
+void
+ice_process_skb_fields(struct ice_ring *rx_ring,
+		       union ice_32b_rx_flex_desc *rx_desc,
+		       struct sk_buff *skb, u8 ptype);
+void
+ice_receive_skb(struct ice_ring *rx_ring, struct sk_buff *skb, u16 vlan_tag);
+#endif /* !_ICE_TXRX_LIB_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h
index 6667d17a4206..c4854a987130 100644
--- a/drivers/net/ethernet/intel/ice/ice_type.h
+++ b/drivers/net/ethernet/intel/ice/ice_type.h
@@ -19,6 +19,17 @@ static inline bool ice_is_tc_ena(unsigned long bitmap, u8 tc)
 	return test_bit(tc, &bitmap);
 }
 
+static inline u64 round_up_64bit(u64 a, u32 b)
+{
+	return div64_long(((a) + (b) / 2), (b));
+}
+
+static inline u32 ice_round_to_num(u32 N, u32 R)
+{
+	return ((((N) % (R)) < ((R) / 2)) ? (((N) / (R)) * (R)) :
+		((((N) + (R) - 1) / (R)) * (R)));
+}
+
 /* Driver always calls main vsi_handle first */
 #define ICE_MAIN_VSI_HANDLE		0
 
@@ -35,6 +46,8 @@ static inline bool ice_is_tc_ena(unsigned long bitmap, u8 tc)
 #define ICE_DBG_PKG		BIT_ULL(16)
 #define ICE_DBG_RES		BIT_ULL(17)
 #define ICE_DBG_AQ_MSG		BIT_ULL(24)
+#define ICE_DBG_AQ_DESC		BIT_ULL(25)
+#define ICE_DBG_AQ_DESC_BUF	BIT_ULL(26)
 #define ICE_DBG_AQ_CMD		BIT_ULL(27)
 #define ICE_DBG_USER		BIT_ULL(31)
 
@@ -189,6 +202,7 @@ struct ice_hw_dev_caps {
 	struct ice_hw_common_caps common_cap;
 	u32 num_vfs_exposed;		/* Total number of VFs exposed */
 	u32 num_vsi_allocd_to_host;	/* Excluding EMP VSI */
+	u32 num_funcs;
 };
 
 /* MAC info */
@@ -272,10 +286,56 @@ enum ice_agg_type {
 	ICE_AGG_TYPE_QG
 };
 
+/* Rate limit types */
+enum ice_rl_type {
+	ICE_UNKNOWN_BW = 0,
+	ICE_MIN_BW,		/* for CIR profile */
+	ICE_MAX_BW,		/* for EIR profile */
+	ICE_SHARED_BW		/* for shared profile */
+};
+
+#define ICE_SCHED_MIN_BW		500		/* in Kbps */
+#define ICE_SCHED_MAX_BW		100000000	/* in Kbps */
+#define ICE_SCHED_DFLT_BW		0xFFFFFFFF	/* unlimited */
 #define ICE_SCHED_DFLT_RL_PROF_ID	0
+#define ICE_SCHED_NO_SHARED_RL_PROF_ID	0xFFFF
 #define ICE_SCHED_DFLT_BW_WT		1
+#define ICE_SCHED_INVAL_PROF_ID		0xFFFF
+#define ICE_SCHED_DFLT_BURST_SIZE	(15 * 1024)	/* in bytes (15k) */
 
-/* VSI type list entry to locate corresponding VSI/ag nodes */
+ /* Data structure for saving BW information */
+enum ice_bw_type {
+	ICE_BW_TYPE_PRIO,
+	ICE_BW_TYPE_CIR,
+	ICE_BW_TYPE_CIR_WT,
+	ICE_BW_TYPE_EIR,
+	ICE_BW_TYPE_EIR_WT,
+	ICE_BW_TYPE_SHARED,
+	ICE_BW_TYPE_CNT		/* This must be last */
+};
+
+struct ice_bw {
+	u32 bw;
+	u16 bw_alloc;
+};
+
+struct ice_bw_type_info {
+	DECLARE_BITMAP(bw_t_bitmap, ICE_BW_TYPE_CNT);
+	u8 generic;
+	struct ice_bw cir_bw;
+	struct ice_bw eir_bw;
+	u32 shared_bw;
+};
+
+/* VSI queue context structure for given TC */
+struct ice_q_ctx {
+	u16  q_handle;
+	u32  q_teid;
+	/* bw_t_info saves queue BW information */
+	struct ice_bw_type_info bw_t_info;
+};
+
+/* VSI type list entry to locate corresponding VSI/aggregator nodes */
 struct ice_sched_vsi_info {
 	struct ice_sched_node *vsi_node[ICE_MAX_TRAFFIC_CLASS];
 	struct ice_sched_node *ag_node[ICE_MAX_TRAFFIC_CLASS];
@@ -364,6 +424,8 @@ struct ice_port_info {
 	struct mutex sched_lock;	/* protect access to TXSched tree */
 	struct ice_sched_node *
 		sib_head[ICE_MAX_TRAFFIC_CLASS][ICE_AQC_TOPO_MAX_LEVEL_NUM];
+	/* List contain profile ID(s) and other params per layer */
+	struct list_head rl_prof_list[ICE_AQC_TOPO_MAX_LEVEL_NUM];
 	struct ice_dcbx_cfg local_dcbx_cfg;	/* Oper/Local Cfg */
 	/* DCBX info */
 	struct ice_dcbx_cfg remote_dcbx_cfg;	/* Peer Cfg */
@@ -415,6 +477,8 @@ struct ice_hw {
 
 	u8 pf_id;		/* device profile info */
 
+	u16 max_burst_size;	/* driver sets this value */
+
 	/* Tx Scheduler values */
 	u16 num_tx_sched_layers;
 	u16 num_tx_sched_phys_layers;
@@ -555,6 +619,8 @@ struct ice_hw_port_stats {
 };
 
 /* Checksum and Shadow RAM pointers */
+#define ICE_SR_BOOT_CFG_PTR		0x132
+#define ICE_NVM_OEM_VER_OFF		0x02
 #define ICE_SR_NVM_DEV_STARTER_VER	0x18
 #define ICE_SR_NVM_EETRACK_LO		0x2D
 #define ICE_SR_NVM_EETRACK_HI		0x2E
@@ -568,6 +634,7 @@ struct ice_hw_port_stats {
 #define ICE_OEM_VER_BUILD_MASK		(0xffff << ICE_OEM_VER_BUILD_SHIFT)
 #define ICE_OEM_VER_SHIFT		24
 #define ICE_OEM_VER_MASK		(0xff << ICE_OEM_VER_SHIFT)
+#define ICE_SR_PFA_PTR			0x40
 #define ICE_SR_SECTOR_SIZE_IN_WORDS	0x800
 #define ICE_SR_WORDS_IN_1KB		512
 
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
index b45797f39b2f..edb374296d1f 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
@@ -2,9 +2,39 @@
 /* Copyright (c) 2018, Intel Corporation. */
 
 #include "ice.h"
+#include "ice_base.h"
 #include "ice_lib.h"
 
 /**
+ * ice_validate_vf_id - helper to check if VF ID is valid
+ * @pf: pointer to the PF structure
+ * @vf_id: the ID of the VF to check
+ */
+static int ice_validate_vf_id(struct ice_pf *pf, int vf_id)
+{
+	if (vf_id >= pf->num_alloc_vfs) {
+		dev_err(ice_pf_to_dev(pf), "Invalid VF ID: %d\n", vf_id);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+/**
+ * ice_check_vf_init - helper to check if VF init complete
+ * @pf: pointer to the PF structure
+ * @vf: the pointer to the VF to check
+ */
+static int ice_check_vf_init(struct ice_pf *pf, struct ice_vf *vf)
+{
+	if (!test_bit(ICE_VF_STATE_INIT, vf->vf_states)) {
+		dev_err(ice_pf_to_dev(pf), "VF ID: %d in reset. Try again.\n",
+			vf->vf_id);
+		return -EBUSY;
+	}
+	return 0;
+}
+
+/**
  * ice_err_to_virt err - translate errors for VF return code
  * @ice_err: error return code
  */
@@ -184,12 +214,14 @@ static void ice_dis_vf_mappings(struct ice_vf *vf)
 {
 	struct ice_pf *pf = vf->pf;
 	struct ice_vsi *vsi;
+	struct device *dev;
 	int first, last, v;
 	struct ice_hw *hw;
 
 	hw = &pf->hw;
 	vsi = pf->vsi[vf->lan_vsi_idx];
 
+	dev = ice_pf_to_dev(pf);
 	wr32(hw, VPINT_ALLOC(vf->vf_id), 0);
 	wr32(hw, VPINT_ALLOC_PCI(vf->vf_id), 0);
 
@@ -208,13 +240,12 @@ static void ice_dis_vf_mappings(struct ice_vf *vf)
 	if (vsi->tx_mapping_mode == ICE_VSI_MAP_CONTIG)
 		wr32(hw, VPLAN_TX_QBASE(vf->vf_id), 0);
 	else
-		dev_err(&pf->pdev->dev,
-			"Scattered mode for VF Tx queues is not yet implemented\n");
+		dev_err(dev, "Scattered mode for VF Tx queues is not yet implemented\n");
 
 	if (vsi->rx_mapping_mode == ICE_VSI_MAP_CONTIG)
 		wr32(hw, VPLAN_RX_QBASE(vf->vf_id), 0);
 	else
-		dev_err(&pf->pdev->dev,
+		dev_err(dev,
 			"Scattered mode for VF Rx queues is not yet implemented\n");
 }
 
@@ -289,6 +320,7 @@ static void ice_dis_vf_qs(struct ice_vf *vf)
  */
 void ice_free_vfs(struct ice_pf *pf)
 {
+	struct device *dev = ice_pf_to_dev(pf);
 	struct ice_hw *hw = &pf->hw;
 	int tmp, i;
 
@@ -310,24 +342,24 @@ void ice_free_vfs(struct ice_pf *pf)
 	if (!pci_vfs_assigned(pf->pdev))
 		pci_disable_sriov(pf->pdev);
 	else
-		dev_warn(&pf->pdev->dev, "VFs are assigned - not disabling SR-IOV\n");
+		dev_warn(dev, "VFs are assigned - not disabling SR-IOV\n");
 
 	tmp = pf->num_alloc_vfs;
 	pf->num_vf_qps = 0;
 	pf->num_alloc_vfs = 0;
 	for (i = 0; i < tmp; i++) {
 		if (test_bit(ICE_VF_STATE_INIT, pf->vf[i].vf_states)) {
-			/* disable VF qp mappings */
+			/* disable VF qp mappings and set VF disable state */
 			ice_dis_vf_mappings(&pf->vf[i]);
+			set_bit(ICE_VF_STATE_DIS, pf->vf[i].vf_states);
 			ice_free_vf_res(&pf->vf[i]);
 		}
 	}
 
 	if (ice_sriov_free_msix_res(pf))
-		dev_err(&pf->pdev->dev,
-			"Failed to free MSIX resources used by SR-IOV\n");
+		dev_err(dev, "Failed to free MSIX resources used by SR-IOV\n");
 
-	devm_kfree(&pf->pdev->dev, pf->vf);
+	devm_kfree(dev, pf->vf);
 	pf->vf = NULL;
 
 	/* This check is for when the driver is unloaded while VFs are
@@ -366,9 +398,11 @@ static void ice_trigger_vf_reset(struct ice_vf *vf, bool is_vflr, bool is_pfr)
 {
 	struct ice_pf *pf = vf->pf;
 	u32 reg, reg_idx, bit_idx;
+	struct device *dev;
 	struct ice_hw *hw;
 	int vf_abs_id, i;
 
+	dev = ice_pf_to_dev(pf);
 	hw = &pf->hw;
 	vf_abs_id = vf->vf_id + hw->func_caps.vf_base_id;
 
@@ -389,7 +423,7 @@ static void ice_trigger_vf_reset(struct ice_vf *vf, bool is_vflr, bool is_pfr)
 	 * by the time we get here.
 	 */
 	if (!is_pfr)
-		wr32(hw, VF_MBX_ARQLEN(vf_abs_id), 0);
+		wr32(hw, VF_MBX_ARQLEN(vf->vf_id), 0);
 
 	/* In the case of a VFLR, the HW has already reset the VF and we
 	 * just need to clean up, so don't hit the VFRTRIG register.
@@ -414,7 +448,7 @@ static void ice_trigger_vf_reset(struct ice_vf *vf, bool is_vflr, bool is_pfr)
 		if ((reg & VF_TRANS_PENDING_M) == 0)
 			break;
 
-		dev_err(&pf->pdev->dev,
+		dev_err(dev,
 			"VF %d PCI transactions stuck\n", vf->vf_id);
 		udelay(ICE_PCI_CIAD_WAIT_DELAY_US);
 	}
@@ -457,13 +491,12 @@ static void ice_vsi_kill_pvid_fill_ctxt(struct ice_vsi_ctx *ctxt)
  */
 static int ice_vsi_manage_pvid(struct ice_vsi *vsi, u16 vid, bool enable)
 {
-	struct device *dev = &vsi->back->pdev->dev;
 	struct ice_hw *hw = &vsi->back->hw;
 	struct ice_vsi_ctx *ctxt;
 	enum ice_status status;
 	int ret = 0;
 
-	ctxt = devm_kzalloc(dev, sizeof(*ctxt), GFP_KERNEL);
+	ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
 	if (!ctxt)
 		return -ENOMEM;
 
@@ -475,7 +508,7 @@ static int ice_vsi_manage_pvid(struct ice_vsi *vsi, u16 vid, bool enable)
 
 	status = ice_update_vsi(hw, vsi->idx, ctxt, NULL);
 	if (status) {
-		dev_info(dev, "update VSI for port VLAN failed, err %d aq_err %d\n",
+		dev_info(&vsi->back->pdev->dev, "update VSI for port VLAN failed, err %d aq_err %d\n",
 			 status, hw->adminq.sq_last_status);
 		ret = -EIO;
 		goto out;
@@ -483,7 +516,7 @@ static int ice_vsi_manage_pvid(struct ice_vsi *vsi, u16 vid, bool enable)
 
 	vsi->info = ctxt->info;
 out:
-	devm_kfree(dev, ctxt);
+	kfree(ctxt);
 	return ret;
 }
 
@@ -531,14 +564,16 @@ static int ice_alloc_vsi_res(struct ice_vf *vf)
 	LIST_HEAD(tmp_add_list);
 	u8 broadcast[ETH_ALEN];
 	struct ice_vsi *vsi;
+	struct device *dev;
 	int status = 0;
 
+	dev = ice_pf_to_dev(pf);
 	/* first vector index is the VFs OICR index */
 	vf->first_vector_idx = ice_calc_vf_first_vector_idx(pf, vf);
 
 	vsi = ice_vf_vsi_setup(pf, pf->hw.port_info, vf->vf_id);
 	if (!vsi) {
-		dev_err(&pf->pdev->dev, "Failed to create VF VSI\n");
+		dev_err(dev, "Failed to create VF VSI\n");
 		return -ENOMEM;
 	}
 
@@ -566,8 +601,7 @@ static int ice_alloc_vsi_res(struct ice_vf *vf)
 
 	status = ice_add_mac(&pf->hw, &tmp_add_list);
 	if (status)
-		dev_err(&pf->pdev->dev,
-			"could not add mac filters error %d\n", status);
+		dev_err(dev, "could not add mac filters error %d\n", status);
 	else
 		vf->num_mac = 1;
 
@@ -578,7 +612,7 @@ static int ice_alloc_vsi_res(struct ice_vf *vf)
 	 * more vectors.
 	 */
 ice_alloc_vsi_res_exit:
-	ice_free_fltr_list(&pf->pdev->dev, &tmp_add_list);
+	ice_free_fltr_list(dev, &tmp_add_list);
 	return status;
 }
 
@@ -634,10 +668,12 @@ static void ice_ena_vf_mappings(struct ice_vf *vf)
 	int abs_vf_id, abs_first, abs_last;
 	struct ice_pf *pf = vf->pf;
 	struct ice_vsi *vsi;
+	struct device *dev;
 	int first, last, v;
 	struct ice_hw *hw;
 	u32 reg;
 
+	dev = ice_pf_to_dev(pf);
 	hw = &pf->hw;
 	vsi = pf->vsi[vf->lan_vsi_idx];
 	first = vf->first_vector_idx;
@@ -685,8 +721,7 @@ static void ice_ena_vf_mappings(struct ice_vf *vf)
 			VPLAN_TX_QBASE_VFNUMQ_M));
 		wr32(hw, VPLAN_TX_QBASE(vf->vf_id), reg);
 	} else {
-		dev_err(&pf->pdev->dev,
-			"Scattered mode for VF Tx queues is not yet implemented\n");
+		dev_err(dev, "Scattered mode for VF Tx queues is not yet implemented\n");
 	}
 
 	/* set regardless of mapping mode */
@@ -704,8 +739,7 @@ static void ice_ena_vf_mappings(struct ice_vf *vf)
 			VPLAN_RX_QBASE_VFNUMQ_M));
 		wr32(hw, VPLAN_RX_QBASE(vf->vf_id), reg);
 	} else {
-		dev_err(&pf->pdev->dev,
-			"Scattered mode for VF Rx queues is not yet implemented\n");
+		dev_err(dev, "Scattered mode for VF Rx queues is not yet implemented\n");
 	}
 }
 
@@ -851,6 +885,7 @@ static int ice_check_avail_res(struct ice_pf *pf)
 {
 	int max_valid_res_idx = ice_get_max_valid_res_idx(pf->irq_tracker);
 	u16 num_msix, num_txq, num_rxq, num_avail_msix;
+	struct device *dev = ice_pf_to_dev(pf);
 
 	if (!pf->num_alloc_vfs || max_valid_res_idx < 0)
 		return -EINVAL;
@@ -883,8 +918,7 @@ static int ice_check_avail_res(struct ice_pf *pf)
 					     ICE_DFLT_INTR_PER_VF,
 					     ICE_MIN_INTR_PER_VF);
 	} else {
-		dev_err(&pf->pdev->dev,
-			"Number of VFs %d exceeds max VF count %d\n",
+		dev_err(dev, "Number of VFs %d exceeds max VF count %d\n",
 			pf->num_alloc_vfs, ICE_MAX_VF_COUNT);
 		return -EIO;
 	}
@@ -1022,12 +1056,12 @@ ice_vf_set_vsi_promisc(struct ice_vf *vf, struct ice_vsi *vsi, u8 promisc_m,
  */
 static bool ice_config_res_vfs(struct ice_pf *pf)
 {
+	struct device *dev = ice_pf_to_dev(pf);
 	struct ice_hw *hw = &pf->hw;
 	int v;
 
 	if (ice_check_avail_res(pf)) {
-		dev_err(&pf->pdev->dev,
-			"Cannot allocate VF resources, try with fewer number of VFs\n");
+		dev_err(dev, "Cannot allocate VF resources, try with fewer number of VFs\n");
 		return false;
 	}
 
@@ -1040,9 +1074,8 @@ static bool ice_config_res_vfs(struct ice_pf *pf)
 		struct ice_vf *vf = &pf->vf[v];
 
 		vf->num_vf_qs = pf->num_vf_qps;
-		dev_dbg(&pf->pdev->dev,
-			"VF-id %d has %d queues configured\n",
-			vf->vf_id, vf->num_vf_qs);
+		dev_dbg(dev, "VF-id %d has %d queues configured\n", vf->vf_id,
+			vf->num_vf_qs);
 		ice_cleanup_and_realloc_vf(vf);
 	}
 
@@ -1066,6 +1099,7 @@ static bool ice_config_res_vfs(struct ice_pf *pf)
  */
 bool ice_reset_all_vfs(struct ice_pf *pf, bool is_vflr)
 {
+	struct device *dev = ice_pf_to_dev(pf);
 	struct ice_hw *hw = &pf->hw;
 	struct ice_vf *vf;
 	int v, i;
@@ -1124,7 +1158,7 @@ bool ice_reset_all_vfs(struct ice_pf *pf, bool is_vflr)
 	 * time, but continue on with the operation.
 	 */
 	if (v < pf->num_alloc_vfs)
-		dev_warn(&pf->pdev->dev, "VF reset check timeout\n");
+		dev_warn(dev, "VF reset check timeout\n");
 
 	/* free VF resources to begin resetting the VSI state */
 	for (v = 0; v < pf->num_alloc_vfs; v++) {
@@ -1141,8 +1175,7 @@ bool ice_reset_all_vfs(struct ice_pf *pf, bool is_vflr)
 	}
 
 	if (ice_sriov_free_msix_res(pf))
-		dev_err(&pf->pdev->dev,
-			"Failed to free MSIX resources used by SR-IOV\n");
+		dev_err(dev, "Failed to free MSIX resources used by SR-IOV\n");
 
 	if (!ice_config_res_vfs(pf))
 		return false;
@@ -1151,6 +1184,25 @@ bool ice_reset_all_vfs(struct ice_pf *pf, bool is_vflr)
 }
 
 /**
+ * ice_is_vf_disabled
+ * @vf: pointer to the VF info
+ *
+ * Returns true if the PF or VF is disabled, false otherwise.
+ */
+static bool ice_is_vf_disabled(struct ice_vf *vf)
+{
+	struct ice_pf *pf = vf->pf;
+
+	/* If the PF has been disabled, there is no need resetting VF until
+	 * PF is active again. Similarly, if the VF has been disabled, this
+	 * means something else is resetting the VF, so we shouldn't continue.
+	 * Otherwise, set disable VF state bit for actual reset, and continue.
+	 */
+	return (test_bit(__ICE_VF_DIS, pf->state) ||
+		test_bit(ICE_VF_STATE_DIS, vf->vf_states));
+}
+
+/**
  * ice_reset_vf - Reset a particular VF
  * @vf: pointer to the VF structure
  * @is_vflr: true if VFLR was issued, false if not
@@ -1161,25 +1213,23 @@ static bool ice_reset_vf(struct ice_vf *vf, bool is_vflr)
 {
 	struct ice_pf *pf = vf->pf;
 	struct ice_vsi *vsi;
+	struct device *dev;
 	struct ice_hw *hw;
 	bool rsd = false;
 	u8 promisc_m;
 	u32 reg;
 	int i;
 
-	/* If the PF has been disabled, there is no need resetting VF until
-	 * PF is active again.
-	 */
-	if (test_bit(__ICE_VF_DIS, pf->state))
-		return false;
+	dev = ice_pf_to_dev(pf);
 
-	/* If the VF has been disabled, this means something else is
-	 * resetting the VF, so we shouldn't continue. Otherwise, set
-	 * disable VF state bit for actual reset, and continue.
-	 */
-	if (test_and_set_bit(ICE_VF_STATE_DIS, vf->vf_states))
-		return false;
+	if (ice_is_vf_disabled(vf)) {
+		dev_dbg(dev, "VF is already disabled, there is no need for resetting it, telling VM, all is fine %d\n",
+			vf->vf_id);
+		return true;
+	}
 
+	/* Set VF disable bit state here, before triggering reset */
+	set_bit(ICE_VF_STATE_DIS, vf->vf_states);
 	ice_trigger_vf_reset(vf, is_vflr, false);
 
 	vsi = pf->vsi[vf->lan_vsi_idx];
@@ -1216,8 +1266,7 @@ static bool ice_reset_vf(struct ice_vf *vf, bool is_vflr)
 	 * continue on with the operation.
 	 */
 	if (!rsd)
-		dev_warn(&pf->pdev->dev, "VF reset check timeout on VF %d\n",
-			 vf->vf_id);
+		dev_warn(dev, "VF reset check timeout on VF %d\n", vf->vf_id);
 
 	/* disable promiscuous modes in case they were enabled
 	 * ignore any error if disabling process failed
@@ -1231,7 +1280,7 @@ static bool ice_reset_vf(struct ice_vf *vf, bool is_vflr)
 
 		vsi = pf->vsi[vf->lan_vsi_idx];
 		if (ice_vf_set_vsi_promisc(vf, vsi, promisc_m, true))
-			dev_err(&pf->pdev->dev, "disabling promiscuous mode failed\n");
+			dev_err(dev, "disabling promiscuous mode failed\n");
 	}
 
 	/* free VF resources to begin resetting the VSI state */
@@ -1282,19 +1331,26 @@ void ice_vc_notify_reset(struct ice_pf *pf)
 static void ice_vc_notify_vf_reset(struct ice_vf *vf)
 {
 	struct virtchnl_pf_event pfe;
+	struct ice_pf *pf;
+
+	if (!vf)
+		return;
 
-	/* validate the request */
-	if (!vf || vf->vf_id >= vf->pf->num_alloc_vfs)
+	pf = vf->pf;
+	if (ice_validate_vf_id(pf, vf->vf_id))
 		return;
 
-	/* verify if the VF is in either init or active before proceeding */
-	if (!test_bit(ICE_VF_STATE_INIT, vf->vf_states) &&
-	    !test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states))
+	/* Bail out if VF is in disabled state, neither initialized, nor active
+	 * state - otherwise proceed with notifications
+	 */
+	if ((!test_bit(ICE_VF_STATE_INIT, vf->vf_states) &&
+	     !test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) ||
+	    test_bit(ICE_VF_STATE_DIS, vf->vf_states))
 		return;
 
 	pfe.event = VIRTCHNL_EVENT_RESET_IMPENDING;
 	pfe.severity = PF_EVENT_SEVERITY_CERTAIN_DOOM;
-	ice_aq_send_msg_to_vf(&vf->pf->hw, vf->vf_id, VIRTCHNL_OP_EVENT,
+	ice_aq_send_msg_to_vf(&pf->hw, vf->vf_id, VIRTCHNL_OP_EVENT,
 			      VIRTCHNL_STATUS_SUCCESS, (u8 *)&pfe, sizeof(pfe),
 			      NULL);
 }
@@ -1306,6 +1362,7 @@ static void ice_vc_notify_vf_reset(struct ice_vf *vf)
  */
 static int ice_alloc_vfs(struct ice_pf *pf, u16 num_alloc_vfs)
 {
+	struct device *dev = ice_pf_to_dev(pf);
 	struct ice_hw *hw = &pf->hw;
 	struct ice_vf *vfs;
 	int i, ret;
@@ -1322,8 +1379,7 @@ static int ice_alloc_vfs(struct ice_pf *pf, u16 num_alloc_vfs)
 		goto err_unroll_intr;
 	}
 	/* allocate memory */
-	vfs = devm_kcalloc(&pf->pdev->dev, num_alloc_vfs, sizeof(*vfs),
-			   GFP_KERNEL);
+	vfs = devm_kcalloc(dev, num_alloc_vfs, sizeof(*vfs), GFP_KERNEL);
 	if (!vfs) {
 		ret = -ENOMEM;
 		goto err_pci_disable_sriov;
@@ -1352,7 +1408,7 @@ static int ice_alloc_vfs(struct ice_pf *pf, u16 num_alloc_vfs)
 
 err_unroll_sriov:
 	pf->vf = NULL;
-	devm_kfree(&pf->pdev->dev, vfs);
+	devm_kfree(dev, vfs);
 	vfs = NULL;
 	pf->num_alloc_vfs = 0;
 err_pci_disable_sriov:
@@ -1397,7 +1453,7 @@ static bool ice_pf_state_is_nominal(struct ice_pf *pf)
 static int ice_pci_sriov_ena(struct ice_pf *pf, int num_vfs)
 {
 	int pre_existing_vfs = pci_num_vf(pf->pdev);
-	struct device *dev = &pf->pdev->dev;
+	struct device *dev = ice_pf_to_dev(pf);
 	int err;
 
 	if (!ice_pf_state_is_nominal(pf)) {
@@ -1407,7 +1463,7 @@ static int ice_pci_sriov_ena(struct ice_pf *pf, int num_vfs)
 
 	if (!test_bit(ICE_FLAG_SRIOV_CAPABLE, pf->flags)) {
 		dev_err(dev, "This device is not capable of SR-IOV\n");
-		return -ENODEV;
+		return -EOPNOTSUPP;
 	}
 
 	if (pre_existing_vfs && pre_existing_vfs != num_vfs)
@@ -1442,10 +1498,10 @@ static int ice_pci_sriov_ena(struct ice_pf *pf, int num_vfs)
 int ice_sriov_configure(struct pci_dev *pdev, int num_vfs)
 {
 	struct ice_pf *pf = pci_get_drvdata(pdev);
+	struct device *dev = ice_pf_to_dev(pf);
 
 	if (ice_is_safe_mode(pf)) {
-		dev_err(&pf->pdev->dev,
-			"SR-IOV cannot be configured - Device is in Safe Mode\n");
+		dev_err(dev, "SR-IOV cannot be configured - Device is in Safe Mode\n");
 		return -EOPNOTSUPP;
 	}
 
@@ -1455,8 +1511,7 @@ int ice_sriov_configure(struct pci_dev *pdev, int num_vfs)
 	if (!pci_vfs_assigned(pdev)) {
 		ice_free_vfs(pf);
 	} else {
-		dev_err(&pf->pdev->dev,
-			"can't free VFs because some are assigned to VMs.\n");
+		dev_err(dev, "can't free VFs because some are assigned to VMs.\n");
 		return -EBUSY;
 	}
 
@@ -1495,12 +1550,10 @@ void ice_process_vflr_event(struct ice_pf *pf)
 }
 
 /**
- * ice_vc_dis_vf - Disable a given VF via SW reset
+ * ice_vc_reset_vf - Perform software reset on the VF after informing the AVF
  * @vf: pointer to the VF info
- *
- * Disable the VF through a SW reset
  */
-static void ice_vc_dis_vf(struct ice_vf *vf)
+static void ice_vc_reset_vf(struct ice_vf *vf)
 {
 	ice_vc_notify_vf_reset(vf);
 	ice_reset_vf(vf, false);
@@ -1521,24 +1574,28 @@ ice_vc_send_msg_to_vf(struct ice_vf *vf, u32 v_opcode,
 		      enum virtchnl_status_code v_retval, u8 *msg, u16 msglen)
 {
 	enum ice_status aq_ret;
+	struct device *dev;
 	struct ice_pf *pf;
 
-	/* validate the request */
-	if (!vf || vf->vf_id >= vf->pf->num_alloc_vfs)
+	if (!vf)
 		return -EINVAL;
 
 	pf = vf->pf;
+	if (ice_validate_vf_id(pf, vf->vf_id))
+		return -EINVAL;
+
+	dev = ice_pf_to_dev(pf);
 
 	/* single place to detect unsuccessful return values */
 	if (v_retval) {
 		vf->num_inval_msgs++;
-		dev_info(&pf->pdev->dev, "VF %d failed opcode %d, retval: %d\n",
-			 vf->vf_id, v_opcode, v_retval);
+		dev_info(dev, "VF %d failed opcode %d, retval: %d\n", vf->vf_id,
+			 v_opcode, v_retval);
 		if (vf->num_inval_msgs > ICE_DFLT_NUM_INVAL_MSGS_ALLOWED) {
-			dev_err(&pf->pdev->dev,
+			dev_err(dev,
 				"Number of invalid messages exceeded for VF %d\n",
 				vf->vf_id);
-			dev_err(&pf->pdev->dev, "Use PF Control I/F to enable the VF\n");
+			dev_err(dev, "Use PF Control I/F to enable the VF\n");
 			set_bit(ICE_VF_STATE_DIS, vf->vf_states);
 			return -EIO;
 		}
@@ -1551,7 +1608,7 @@ ice_vc_send_msg_to_vf(struct ice_vf *vf, u32 v_opcode,
 	aq_ret = ice_aq_send_msg_to_vf(&pf->hw, vf->vf_id, v_opcode, v_retval,
 				       msg, msglen, NULL);
 	if (aq_ret && pf->hw.mailboxq.sq_last_status != ICE_AQ_RC_ENOSYS) {
-		dev_info(&pf->pdev->dev,
+		dev_info(dev,
 			 "Unable to send the message to VF %d ret %d aq_err %d\n",
 			 vf->vf_id, aq_ret, pf->hw.mailboxq.sq_last_status);
 		return -EIO;
@@ -1599,14 +1656,14 @@ static int ice_vc_get_vf_res_msg(struct ice_vf *vf, u8 *msg)
 	int len = 0;
 	int ret;
 
-	if (!test_bit(ICE_VF_STATE_INIT, vf->vf_states)) {
+	if (ice_check_vf_init(pf, vf)) {
 		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
 		goto err;
 	}
 
 	len = sizeof(struct virtchnl_vf_resource);
 
-	vfres = devm_kzalloc(&pf->pdev->dev, len, GFP_KERNEL);
+	vfres = kzalloc(len, GFP_KERNEL);
 	if (!vfres) {
 		v_ret = VIRTCHNL_STATUS_ERR_NO_MEMORY;
 		len = 0;
@@ -1672,6 +1729,9 @@ static int ice_vc_get_vf_res_msg(struct ice_vf *vf, u8 *msg)
 	ether_addr_copy(vfres->vsi_res[0].default_mac_addr,
 			vf->dflt_lan_addr.addr);
 
+	/* match guest capabilities */
+	vf->driver_caps = vfres->vf_cap_flags;
+
 	set_bit(ICE_VF_STATE_ACTIVE, vf->vf_states);
 
 err:
@@ -1679,7 +1739,7 @@ err:
 	ret = ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_GET_VF_RESOURCES, v_ret,
 				    (u8 *)vfres, len);
 
-	devm_kfree(&pf->pdev->dev, vfres);
+	kfree(vfres);
 	return ret;
 }
 
@@ -1775,7 +1835,7 @@ static int ice_vc_config_rss_key(struct ice_vf *vf, u8 *msg)
 	struct virtchnl_rss_key *vrk =
 		(struct virtchnl_rss_key *)msg;
 	struct ice_pf *pf = vf->pf;
-	struct ice_vsi *vsi = NULL;
+	struct ice_vsi *vsi;
 
 	if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
 		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
@@ -1822,7 +1882,7 @@ static int ice_vc_config_rss_lut(struct ice_vf *vf, u8 *msg)
 	struct virtchnl_rss_lut *vrl = (struct virtchnl_rss_lut *)msg;
 	enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
 	struct ice_pf *pf = vf->pf;
-	struct ice_vsi *vsi = NULL;
+	struct ice_vsi *vsi;
 
 	if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
 		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
@@ -1869,8 +1929,8 @@ static int ice_vc_get_stats_msg(struct ice_vf *vf, u8 *msg)
 	enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
 	struct virtchnl_queue_select *vqs =
 		(struct virtchnl_queue_select *)msg;
+	struct ice_eth_stats stats = { 0 };
 	struct ice_pf *pf = vf->pf;
-	struct ice_eth_stats stats;
 	struct ice_vsi *vsi;
 
 	if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
@@ -1889,7 +1949,6 @@ static int ice_vc_get_stats_msg(struct ice_vf *vf, u8 *msg)
 		goto error_param;
 	}
 
-	memset(&stats, 0, sizeof(struct ice_eth_stats));
 	ice_update_eth_stats(vsi);
 
 	stats = vsi->eth_stats;
@@ -2159,9 +2218,11 @@ static int ice_vc_cfg_irq_map_msg(struct ice_vf *vf, u8 *msg)
 
 		vector_id = map->vector_id;
 		vsi_id = map->vsi_id;
-		/* validate msg params */
-		if (!(vector_id < pf->hw.func_caps.common_cap
-		    .num_msix_vectors) || !ice_vc_isvalid_vsi_id(vf, vsi_id) ||
+		/* vector_id is always 0-based for each VF, and can never be
+		 * larger than or equal to the max allowed interrupts per VF
+		 */
+		if (!(vector_id < ICE_MAX_INTR_PER_VF) ||
+		    !ice_vc_isvalid_vsi_id(vf, vsi_id) ||
 		    (!vector_id && (map->rxq_map || map->txq_map))) {
 			v_ret = VIRTCHNL_STATUS_ERR_PARAM;
 			goto error_param;
@@ -2252,7 +2313,7 @@ static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg)
 
 	if (qci->num_queue_pairs > ICE_MAX_BASE_QS_PER_VF ||
 	    qci->num_queue_pairs > min_t(u16, vsi->alloc_txq, vsi->alloc_rxq)) {
-		dev_err(&pf->pdev->dev,
+		dev_err(ice_pf_to_dev(pf),
 			"VF-%d requesting more than supported number of queues: %d\n",
 			vf->vf_id, min_t(u16, vsi->alloc_txq, vsi->alloc_rxq));
 		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
@@ -2365,9 +2426,12 @@ ice_vc_handle_mac_addr_msg(struct ice_vf *vf, u8 *msg, bool set)
 	enum virtchnl_ops vc_op;
 	enum ice_status status;
 	struct ice_vsi *vsi;
+	struct device *dev;
 	int mac_count = 0;
 	int i;
 
+	dev = ice_pf_to_dev(pf);
+
 	if (set)
 		vc_op = VIRTCHNL_OP_ADD_ETH_ADDR;
 	else
@@ -2381,7 +2445,7 @@ ice_vc_handle_mac_addr_msg(struct ice_vf *vf, u8 *msg, bool set)
 
 	if (set && !ice_is_vf_trusted(vf) &&
 	    (vf->num_mac + al->num_elements) > ICE_MAX_MACADDR_PER_VF) {
-		dev_err(&pf->pdev->dev,
+		dev_err(dev,
 			"Can't add more MAC addresses, because VF-%d is not trusted, switch the VF to trusted mode in order to add more functionalities\n",
 			vf->vf_id);
 		/* There is no need to let VF know about not being trusted
@@ -2406,13 +2470,13 @@ ice_vc_handle_mac_addr_msg(struct ice_vf *vf, u8 *msg, bool set)
 				/* VF is trying to add filters that the PF
 				 * already added. Just continue.
 				 */
-				dev_info(&pf->pdev->dev,
+				dev_info(dev,
 					 "MAC %pM already set for VF %d\n",
 					 maddr, vf->vf_id);
 				continue;
 			} else {
 				/* VF can't remove dflt_lan_addr/bcast MAC */
-				dev_err(&pf->pdev->dev,
+				dev_err(dev,
 					"VF can't remove default MAC address or MAC %pM programmed by PF for VF %d\n",
 					maddr, vf->vf_id);
 				continue;
@@ -2421,7 +2485,7 @@ ice_vc_handle_mac_addr_msg(struct ice_vf *vf, u8 *msg, bool set)
 
 		/* check for the invalid cases and bail if necessary */
 		if (is_zero_ether_addr(maddr)) {
-			dev_err(&pf->pdev->dev,
+			dev_err(dev,
 				"invalid MAC %pM provided for VF %d\n",
 				maddr, vf->vf_id);
 			v_ret = VIRTCHNL_STATUS_ERR_PARAM;
@@ -2430,7 +2494,7 @@ ice_vc_handle_mac_addr_msg(struct ice_vf *vf, u8 *msg, bool set)
 
 		if (is_unicast_ether_addr(maddr) &&
 		    !ice_can_vf_change_mac(vf)) {
-			dev_err(&pf->pdev->dev,
+			dev_err(dev,
 				"can't change unicast MAC for untrusted VF %d\n",
 				vf->vf_id);
 			v_ret = VIRTCHNL_STATUS_ERR_PARAM;
@@ -2441,12 +2505,12 @@ ice_vc_handle_mac_addr_msg(struct ice_vf *vf, u8 *msg, bool set)
 		status = ice_vsi_cfg_mac_fltr(vsi, maddr, set);
 		if (status == ICE_ERR_DOES_NOT_EXIST ||
 		    status == ICE_ERR_ALREADY_EXISTS) {
-			dev_info(&pf->pdev->dev,
+			dev_info(dev,
 				 "can't %s MAC filters %pM for VF %d, error %d\n",
 				 set ? "add" : "remove", maddr, vf->vf_id,
 				 status);
 		} else if (status) {
-			dev_err(&pf->pdev->dev,
+			dev_err(dev,
 				"can't %s MAC filters for VF %d, error %d\n",
 				set ? "add" : "remove", vf->vf_id, status);
 			v_ret = ice_err_to_virt_err(status);
@@ -2511,7 +2575,9 @@ static int ice_vc_request_qs_msg(struct ice_vf *vf, u8 *msg)
 	u16 max_allowed_vf_queues;
 	u16 tx_rx_queue_left;
 	u16 cur_queues;
+	struct device *dev;
 
+	dev = ice_pf_to_dev(pf);
 	if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
 		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
 		goto error_param;
@@ -2522,17 +2588,15 @@ static int ice_vc_request_qs_msg(struct ice_vf *vf, u8 *msg)
 				 ice_get_avail_rxq_count(pf));
 	max_allowed_vf_queues = tx_rx_queue_left + cur_queues;
 	if (!req_queues) {
-		dev_err(&pf->pdev->dev,
-			"VF %d tried to request 0 queues. Ignoring.\n",
+		dev_err(dev, "VF %d tried to request 0 queues. Ignoring.\n",
 			vf->vf_id);
 	} else if (req_queues > ICE_MAX_BASE_QS_PER_VF) {
-		dev_err(&pf->pdev->dev,
-			"VF %d tried to request more than %d queues.\n",
+		dev_err(dev, "VF %d tried to request more than %d queues.\n",
 			vf->vf_id, ICE_MAX_BASE_QS_PER_VF);
 		vfres->num_queue_pairs = ICE_MAX_BASE_QS_PER_VF;
 	} else if (req_queues > cur_queues &&
 		   req_queues - cur_queues > tx_rx_queue_left) {
-		dev_warn(&pf->pdev->dev,
+		dev_warn(dev,
 			 "VF %d requested %u more queues, but only %u left.\n",
 			 vf->vf_id, req_queues - cur_queues, tx_rx_queue_left);
 		vfres->num_queue_pairs = min_t(u16, max_allowed_vf_queues,
@@ -2540,9 +2604,8 @@ static int ice_vc_request_qs_msg(struct ice_vf *vf, u8 *msg)
 	} else {
 		/* request is successful, then reset VF */
 		vf->num_req_qs = req_queues;
-		ice_vc_dis_vf(vf);
-		dev_info(&pf->pdev->dev,
-			 "VF %d granted request of %u queues.\n",
+		ice_vc_reset_vf(vf);
+		dev_info(dev, "VF %d granted request of %u queues.\n",
 			 vf->vf_id, req_queues);
 		return 0;
 	}
@@ -2568,39 +2631,34 @@ ice_set_vf_port_vlan(struct net_device *netdev, int vf_id, u16 vlan_id, u8 qos,
 		     __be16 vlan_proto)
 {
 	u16 vlanprio = vlan_id | (qos << ICE_VLAN_PRIORITY_S);
-	struct ice_netdev_priv *np = netdev_priv(netdev);
-	struct ice_pf *pf = np->vsi->back;
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
 	struct ice_vsi *vsi;
+	struct device *dev;
 	struct ice_vf *vf;
 	int ret = 0;
 
-	/* validate the request */
-	if (vf_id >= pf->num_alloc_vfs) {
-		dev_err(&pf->pdev->dev, "invalid VF id: %d\n", vf_id);
+	dev = ice_pf_to_dev(pf);
+	if (ice_validate_vf_id(pf, vf_id))
 		return -EINVAL;
-	}
 
 	if (vlan_id > ICE_MAX_VLANID || qos > 7) {
-		dev_err(&pf->pdev->dev, "Invalid VF Parameters\n");
+		dev_err(dev, "Invalid VF Parameters\n");
 		return -EINVAL;
 	}
 
 	if (vlan_proto != htons(ETH_P_8021Q)) {
-		dev_err(&pf->pdev->dev, "VF VLAN protocol is not supported\n");
+		dev_err(dev, "VF VLAN protocol is not supported\n");
 		return -EPROTONOSUPPORT;
 	}
 
 	vf = &pf->vf[vf_id];
 	vsi = pf->vsi[vf->lan_vsi_idx];
-	if (!test_bit(ICE_VF_STATE_INIT, vf->vf_states)) {
-		dev_err(&pf->pdev->dev, "VF %d in reset. Try again.\n", vf_id);
+	if (ice_check_vf_init(pf, vf))
 		return -EBUSY;
-	}
 
 	if (le16_to_cpu(vsi->info.pvid) == vlanprio) {
 		/* duplicate request, so just return success */
-		dev_info(&pf->pdev->dev,
-			 "Duplicate pvid %d request\n", vlanprio);
+		dev_dbg(dev, "Duplicate pvid %d request\n", vlanprio);
 		return ret;
 	}
 
@@ -2619,7 +2677,7 @@ ice_set_vf_port_vlan(struct net_device *netdev, int vf_id, u16 vlan_id, u8 qos,
 	}
 
 	if (vlan_id) {
-		dev_info(&pf->pdev->dev, "Setting VLAN %d, QOS 0x%x on VF %d\n",
+		dev_info(dev, "Setting VLAN %d, QoS 0x%x on VF %d\n",
 			 vlan_id, qos, vf_id);
 
 		/* add new VLAN filter for each MAC */
@@ -2638,6 +2696,17 @@ error_set_pvid:
 }
 
 /**
+ * ice_vf_vlan_offload_ena - determine if capabilities support VLAN offloads
+ * @caps: VF driver negotiated capabilities
+ *
+ * Return true if VIRTCHNL_VF_OFFLOAD_VLAN capability is set, else return false
+ */
+static bool ice_vf_vlan_offload_ena(u32 caps)
+{
+	return !!(caps & VIRTCHNL_VF_OFFLOAD_VLAN);
+}
+
+/**
  * ice_vc_process_vlan_msg
  * @vf: pointer to the VF info
  * @msg: pointer to the msg buffer
@@ -2653,16 +2722,23 @@ static int ice_vc_process_vlan_msg(struct ice_vf *vf, u8 *msg, bool add_v)
 	struct ice_pf *pf = vf->pf;
 	bool vlan_promisc = false;
 	struct ice_vsi *vsi;
+	struct device *dev;
 	struct ice_hw *hw;
 	int status = 0;
 	u8 promisc_m;
 	int i;
 
+	dev = ice_pf_to_dev(pf);
 	if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
 		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
 		goto error_param;
 	}
 
+	if (!ice_vf_vlan_offload_ena(vf->driver_caps)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
 	if (!ice_vc_isvalid_vsi_id(vf, vfl->vsi_id)) {
 		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
 		goto error_param;
@@ -2670,7 +2746,7 @@ static int ice_vc_process_vlan_msg(struct ice_vf *vf, u8 *msg, bool add_v)
 
 	if (add_v && !ice_is_vf_trusted(vf) &&
 	    vf->num_vlan >= ICE_MAX_VLAN_PER_VF) {
-		dev_info(&pf->pdev->dev,
+		dev_info(dev,
 			 "VF-%d is not trusted, switch the VF to trusted mode, in order to add more VLAN addresses\n",
 			 vf->vf_id);
 		/* There is no need to let VF know about being not trusted,
@@ -2682,7 +2758,7 @@ static int ice_vc_process_vlan_msg(struct ice_vf *vf, u8 *msg, bool add_v)
 	for (i = 0; i < vfl->num_elements; i++) {
 		if (vfl->vlan_id[i] > ICE_MAX_VLANID) {
 			v_ret = VIRTCHNL_STATUS_ERR_PARAM;
-			dev_err(&pf->pdev->dev,
+			dev_err(dev,
 				"invalid VF VLAN id %d\n", vfl->vlan_id[i]);
 			goto error_param;
 		}
@@ -2700,14 +2776,6 @@ static int ice_vc_process_vlan_msg(struct ice_vf *vf, u8 *msg, bool add_v)
 		goto error_param;
 	}
 
-	if (ice_vsi_manage_vlan_stripping(vsi, add_v)) {
-		dev_err(&pf->pdev->dev,
-			"%sable VLAN stripping failed for VSI %i\n",
-			 add_v ? "en" : "dis", vsi->vsi_num);
-		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
-		goto error_param;
-	}
-
 	if (test_bit(ICE_VF_STATE_UC_PROMISC, vf->vf_states) ||
 	    test_bit(ICE_VF_STATE_MC_PROMISC, vf->vf_states))
 		vlan_promisc = true;
@@ -2718,7 +2786,7 @@ static int ice_vc_process_vlan_msg(struct ice_vf *vf, u8 *msg, bool add_v)
 
 			if (!ice_is_vf_trusted(vf) &&
 			    vf->num_vlan >= ICE_MAX_VLAN_PER_VF) {
-				dev_info(&pf->pdev->dev,
+				dev_info(dev,
 					 "VF-%d is not trusted, switch the VF to trusted mode, in order to add more VLAN addresses\n",
 					 vf->vf_id);
 				/* There is no need to let VF know about being
@@ -2739,7 +2807,7 @@ static int ice_vc_process_vlan_msg(struct ice_vf *vf, u8 *msg, bool add_v)
 				status = ice_cfg_vlan_pruning(vsi, true, false);
 				if (status) {
 					v_ret = VIRTCHNL_STATUS_ERR_PARAM;
-					dev_err(&pf->pdev->dev,
+					dev_err(dev,
 						"Enable VLAN pruning on VLAN ID: %d failed error-%d\n",
 						vid, status);
 					goto error_param;
@@ -2753,7 +2821,7 @@ static int ice_vc_process_vlan_msg(struct ice_vf *vf, u8 *msg, bool add_v)
 							     promisc_m, vid);
 				if (status) {
 					v_ret = VIRTCHNL_STATUS_ERR_PARAM;
-					dev_err(&pf->pdev->dev,
+					dev_err(dev,
 						"Enable Unicast/multicast promiscuous mode on VLAN ID:%d failed error-%d\n",
 						vid, status);
 				}
@@ -2848,6 +2916,11 @@ static int ice_vc_ena_vlan_stripping(struct ice_vf *vf)
 		goto error_param;
 	}
 
+	if (!ice_vf_vlan_offload_ena(vf->driver_caps)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
 	vsi = pf->vsi[vf->lan_vsi_idx];
 	if (ice_vsi_manage_vlan_stripping(vsi, true))
 		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
@@ -2874,6 +2947,11 @@ static int ice_vc_dis_vlan_stripping(struct ice_vf *vf)
 		goto error_param;
 	}
 
+	if (!ice_vf_vlan_offload_ena(vf->driver_caps)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
 	vsi = pf->vsi[vf->lan_vsi_idx];
 	if (!vsi) {
 		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
@@ -2889,6 +2967,33 @@ error_param:
 }
 
 /**
+ * ice_vf_init_vlan_stripping - enable/disable VLAN stripping on initialization
+ * @vf: VF to enable/disable VLAN stripping for on initialization
+ *
+ * If the VIRTCHNL_VF_OFFLOAD_VLAN flag is set enable VLAN stripping, else if
+ * the flag is cleared then we want to disable stripping. For example, the flag
+ * will be cleared when port VLANs are configured by the administrator before
+ * passing the VF to the guest or if the AVF driver doesn't support VLAN
+ * offloads.
+ */
+static int ice_vf_init_vlan_stripping(struct ice_vf *vf)
+{
+	struct ice_vsi *vsi = vf->pf->vsi[vf->lan_vsi_idx];
+
+	if (!vsi)
+		return -EINVAL;
+
+	/* don't modify stripping if port VLAN is configured */
+	if (vsi->info.pvid)
+		return 0;
+
+	if (ice_vf_vlan_offload_ena(vf->driver_caps))
+		return ice_vsi_manage_vlan_stripping(vsi, true);
+	else
+		return ice_vsi_manage_vlan_stripping(vsi, false);
+}
+
+/**
  * ice_vc_process_vf_msg - Process request from VF
  * @pf: pointer to the PF structure
  * @event: pointer to the AQ event
@@ -2903,9 +3008,11 @@ void ice_vc_process_vf_msg(struct ice_pf *pf, struct ice_rq_event_info *event)
 	u16 msglen = event->msg_len;
 	u8 *msg = event->msg_buf;
 	struct ice_vf *vf = NULL;
+	struct device *dev;
 	int err = 0;
 
-	if (vf_id >= pf->num_alloc_vfs) {
+	dev = ice_pf_to_dev(pf);
+	if (ice_validate_vf_id(pf, vf_id)) {
 		err = -EINVAL;
 		goto error_handler;
 	}
@@ -2931,7 +3038,7 @@ error_handler:
 	if (err) {
 		ice_vc_send_msg_to_vf(vf, v_opcode, VIRTCHNL_STATUS_ERR_PARAM,
 				      NULL, 0);
-		dev_err(&pf->pdev->dev, "Invalid message from VF %d, opcode %d, len %d, error %d\n",
+		dev_err(dev, "Invalid message from VF %d, opcode %d, len %d, error %d\n",
 			vf_id, v_opcode, msglen, err);
 		return;
 	}
@@ -2942,6 +3049,10 @@ error_handler:
 		break;
 	case VIRTCHNL_OP_GET_VF_RESOURCES:
 		err = ice_vc_get_vf_res_msg(vf, msg);
+		if (ice_vf_init_vlan_stripping(vf))
+			dev_err(dev,
+				"Failed to initialize VLAN stripping for VF %d\n",
+				vf->vf_id);
 		ice_vc_notify_vf_link_state(vf);
 		break;
 	case VIRTCHNL_OP_RESET_VF:
@@ -2992,8 +3103,8 @@ error_handler:
 		break;
 	case VIRTCHNL_OP_UNKNOWN:
 	default:
-		dev_err(&pf->pdev->dev, "Unsupported opcode %d from VF %d\n",
-			v_opcode, vf_id);
+		dev_err(dev, "Unsupported opcode %d from VF %d\n", v_opcode,
+			vf_id);
 		err = ice_vc_send_msg_to_vf(vf, v_opcode,
 					    VIRTCHNL_STATUS_ERR_NOT_SUPPORTED,
 					    NULL, 0);
@@ -3003,8 +3114,7 @@ error_handler:
 		/* Helper function cares less about error return values here
 		 * as it is busy with pending work.
 		 */
-		dev_info(&pf->pdev->dev,
-			 "PF failed to honor VF %d, opcode %d, error %d\n",
+		dev_info(dev, "PF failed to honor VF %d, opcode %d, error %d\n",
 			 vf_id, v_opcode, err);
 	}
 }
@@ -3020,24 +3130,18 @@ error_handler:
 int
 ice_get_vf_cfg(struct net_device *netdev, int vf_id, struct ifla_vf_info *ivi)
 {
-	struct ice_netdev_priv *np = netdev_priv(netdev);
-	struct ice_vsi *vsi = np->vsi;
-	struct ice_pf *pf = vsi->back;
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+	struct ice_vsi *vsi;
 	struct ice_vf *vf;
 
-	/* validate the request */
-	if (vf_id >= pf->num_alloc_vfs) {
-		netdev_err(netdev, "invalid VF id: %d\n", vf_id);
+	if (ice_validate_vf_id(pf, vf_id))
 		return -EINVAL;
-	}
 
 	vf = &pf->vf[vf_id];
 	vsi = pf->vsi[vf->lan_vsi_idx];
 
-	if (!test_bit(ICE_VF_STATE_INIT, vf->vf_states)) {
-		netdev_err(netdev, "VF %d in reset. Try again.\n", vf_id);
+	if (ice_check_vf_init(pf, vf))
 		return -EBUSY;
-	}
 
 	ivi->vf = vf_id;
 	ether_addr_copy(ivi->mac, vf->dflt_lan_addr.addr);
@@ -3070,33 +3174,29 @@ ice_get_vf_cfg(struct net_device *netdev, int vf_id, struct ifla_vf_info *ivi)
  */
 int ice_set_vf_spoofchk(struct net_device *netdev, int vf_id, bool ena)
 {
-	struct ice_netdev_priv *np = netdev_priv(netdev);
-	struct ice_vsi *vsi = np->vsi;
-	struct ice_pf *pf = vsi->back;
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+	struct ice_vsi *vsi = pf->vsi[0];
 	struct ice_vsi_ctx *ctx;
 	enum ice_status status;
+	struct device *dev;
 	struct ice_vf *vf;
 	int ret = 0;
 
-	/* validate the request */
-	if (vf_id >= pf->num_alloc_vfs) {
-		netdev_err(netdev, "invalid VF id: %d\n", vf_id);
+	dev = ice_pf_to_dev(pf);
+	if (ice_validate_vf_id(pf, vf_id))
 		return -EINVAL;
-	}
 
 	vf = &pf->vf[vf_id];
-	if (!test_bit(ICE_VF_STATE_INIT, vf->vf_states)) {
-		netdev_err(netdev, "VF %d in reset. Try again.\n", vf_id);
+	if (ice_check_vf_init(pf, vf))
 		return -EBUSY;
-	}
 
 	if (ena == vf->spoofchk) {
-		dev_dbg(&pf->pdev->dev, "VF spoofchk already %s\n",
+		dev_dbg(dev, "VF spoofchk already %s\n",
 			ena ? "ON" : "OFF");
 		return 0;
 	}
 
-	ctx = devm_kzalloc(&pf->pdev->dev, sizeof(*ctx), GFP_KERNEL);
+	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
 	if (!ctx)
 		return -ENOMEM;
 
@@ -3109,7 +3209,7 @@ int ice_set_vf_spoofchk(struct net_device *netdev, int vf_id, bool ena)
 
 	status = ice_update_vsi(&pf->hw, vsi->idx, ctx, NULL);
 	if (status) {
-		dev_dbg(&pf->pdev->dev,
+		dev_dbg(dev,
 			"Error %d, failed to update VSI* parameters\n", status);
 		ret = -EIO;
 		goto out;
@@ -3119,11 +3219,28 @@ int ice_set_vf_spoofchk(struct net_device *netdev, int vf_id, bool ena)
 	vsi->info.sec_flags = ctx->info.sec_flags;
 	vsi->info.sw_flags2 = ctx->info.sw_flags2;
 out:
-	devm_kfree(&pf->pdev->dev, ctx);
+	kfree(ctx);
 	return ret;
 }
 
 /**
+ * ice_wait_on_vf_reset
+ * @vf: The VF being resseting
+ *
+ * Poll to make sure a given VF is ready after reset
+ */
+static void ice_wait_on_vf_reset(struct ice_vf *vf)
+{
+	int i;
+
+	for (i = 0; i < ICE_MAX_VF_RESET_WAIT; i++) {
+		if (test_bit(ICE_VF_STATE_INIT, vf->vf_states))
+			break;
+		msleep(20);
+	}
+}
+
+/**
  * ice_set_vf_mac
  * @netdev: network interface device structure
  * @vf_id: VF identifier
@@ -3133,23 +3250,25 @@ out:
  */
 int ice_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac)
 {
-	struct ice_netdev_priv *np = netdev_priv(netdev);
-	struct ice_vsi *vsi = np->vsi;
-	struct ice_pf *pf = vsi->back;
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
 	struct ice_vf *vf;
 	int ret = 0;
 
-	/* validate the request */
-	if (vf_id >= pf->num_alloc_vfs) {
-		netdev_err(netdev, "invalid VF id: %d\n", vf_id);
+	if (ice_validate_vf_id(pf, vf_id))
 		return -EINVAL;
-	}
 
 	vf = &pf->vf[vf_id];
-	if (!test_bit(ICE_VF_STATE_INIT, vf->vf_states)) {
-		netdev_err(netdev, "VF %d in reset. Try again.\n", vf_id);
+	/* Don't set MAC on disabled VF */
+	if (ice_is_vf_disabled(vf))
+		return -EINVAL;
+
+	/* In case VF is in reset mode, wait until it is completed. Depending
+	 * on factors like queue disabling routine, this could take ~250ms
+	 */
+	ice_wait_on_vf_reset(vf);
+
+	if (ice_check_vf_init(pf, vf))
 		return -EBUSY;
-	}
 
 	if (is_zero_ether_addr(mac) || is_multicast_ether_addr(mac)) {
 		netdev_err(netdev, "%pM not a valid unicast address\n", mac);
@@ -3167,7 +3286,7 @@ int ice_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac)
 		    "MAC on VF %d set to %pM. VF driver will be reinitialized\n",
 		    vf_id, mac);
 
-	ice_vc_dis_vf(vf);
+	ice_vc_reset_vf(vf);
 	return ret;
 }
 
@@ -3181,30 +3300,34 @@ int ice_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac)
  */
 int ice_set_vf_trust(struct net_device *netdev, int vf_id, bool trusted)
 {
-	struct ice_netdev_priv *np = netdev_priv(netdev);
-	struct ice_vsi *vsi = np->vsi;
-	struct ice_pf *pf = vsi->back;
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+	struct device *dev;
 	struct ice_vf *vf;
 
-	/* validate the request */
-	if (vf_id >= pf->num_alloc_vfs) {
-		dev_err(&pf->pdev->dev, "invalid VF id: %d\n", vf_id);
+	dev = ice_pf_to_dev(pf);
+	if (ice_validate_vf_id(pf, vf_id))
 		return -EINVAL;
-	}
 
 	vf = &pf->vf[vf_id];
-	if (!test_bit(ICE_VF_STATE_INIT, vf->vf_states)) {
-		dev_err(&pf->pdev->dev, "VF %d in reset. Try again.\n", vf_id);
+	/* Don't set Trusted Mode on disabled VF */
+	if (ice_is_vf_disabled(vf))
+		return -EINVAL;
+
+	/* In case VF is in reset mode, wait until it is completed. Depending
+	 * on factors like queue disabling routine, this could take ~250ms
+	 */
+	ice_wait_on_vf_reset(vf);
+
+	if (ice_check_vf_init(pf, vf))
 		return -EBUSY;
-	}
 
 	/* Check if already trusted */
 	if (trusted == vf->trusted)
 		return 0;
 
 	vf->trusted = trusted;
-	ice_vc_dis_vf(vf);
-	dev_info(&pf->pdev->dev, "VF %u is now %strusted\n",
+	ice_vc_reset_vf(vf);
+	dev_info(dev, "VF %u is now %strusted\n",
 		 vf_id, trusted ? "" : "un");
 
 	return 0;
@@ -3220,26 +3343,21 @@ int ice_set_vf_trust(struct net_device *netdev, int vf_id, bool trusted)
  */
 int ice_set_vf_link_state(struct net_device *netdev, int vf_id, int link_state)
 {
-	struct ice_netdev_priv *np = netdev_priv(netdev);
-	struct ice_pf *pf = np->vsi->back;
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
 	struct virtchnl_pf_event pfe = { 0 };
 	struct ice_link_status *ls;
 	struct ice_vf *vf;
 	struct ice_hw *hw;
 
-	if (vf_id >= pf->num_alloc_vfs) {
-		dev_err(&pf->pdev->dev, "Invalid VF Identifier %d\n", vf_id);
+	if (ice_validate_vf_id(pf, vf_id))
 		return -EINVAL;
-	}
 
 	vf = &pf->vf[vf_id];
 	hw = &pf->hw;
 	ls = &pf->hw.port_info->phy.link_info;
 
-	if (!test_bit(ICE_VF_STATE_INIT, vf->vf_states)) {
-		dev_err(&pf->pdev->dev, "vf %d in reset. Try again.\n", vf_id);
+	if (ice_check_vf_init(pf, vf))
 		return -EBUSY;
-	}
 
 	pfe.event = VIRTCHNL_EVENT_LINK_CHANGE;
 	pfe.severity = PF_EVENT_SEVERITY_INFO;
@@ -3273,3 +3391,48 @@ int ice_set_vf_link_state(struct net_device *netdev, int vf_id, int link_state)
 
 	return 0;
 }
+
+/**
+ * ice_get_vf_stats - populate some stats for the VF
+ * @netdev: the netdev of the PF
+ * @vf_id: the host OS identifier (0-255)
+ * @vf_stats: pointer to the OS memory to be initialized
+ */
+int ice_get_vf_stats(struct net_device *netdev, int vf_id,
+		     struct ifla_vf_stats *vf_stats)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+	struct ice_eth_stats *stats;
+	struct ice_vsi *vsi;
+	struct ice_vf *vf;
+
+	if (ice_validate_vf_id(pf, vf_id))
+		return -EINVAL;
+
+	vf = &pf->vf[vf_id];
+
+	if (ice_check_vf_init(pf, vf))
+		return -EBUSY;
+
+	vsi = pf->vsi[vf->lan_vsi_idx];
+	if (!vsi)
+		return -EINVAL;
+
+	ice_update_eth_stats(vsi);
+	stats = &vsi->eth_stats;
+
+	memset(vf_stats, 0, sizeof(*vf_stats));
+
+	vf_stats->rx_packets = stats->rx_unicast + stats->rx_broadcast +
+		stats->rx_multicast;
+	vf_stats->tx_packets = stats->tx_unicast + stats->tx_broadcast +
+		stats->tx_multicast;
+	vf_stats->rx_bytes   = stats->rx_bytes;
+	vf_stats->tx_bytes   = stats->tx_bytes;
+	vf_stats->broadcast  = stats->rx_broadcast;
+	vf_stats->multicast  = stats->rx_multicast;
+	vf_stats->rx_dropped = stats->rx_discards;
+	vf_stats->tx_dropped = stats->tx_discards;
+
+	return 0;
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
index 0d9880c8bba3..88aa65d5cb31 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
@@ -38,6 +38,7 @@
 #define ICE_MAX_POLICY_INTR_PER_VF	33
 #define ICE_MIN_INTR_PER_VF		(ICE_MIN_QS_PER_VF + 1)
 #define ICE_DFLT_INTR_PER_VF		(ICE_DFLT_QS_PER_VF + 1)
+#define ICE_MAX_VF_RESET_WAIT		15
 
 /* Specific VF states */
 enum ice_vf_states {
@@ -121,6 +122,9 @@ int ice_set_vf_spoofchk(struct net_device *netdev, int vf_id, bool ena);
 int ice_calc_vf_reg_idx(struct ice_vf *vf, struct ice_q_vector *q_vector);
 
 void ice_set_vf_state_qs_dis(struct ice_vf *vf);
+int
+ice_get_vf_stats(struct net_device *netdev, int vf_id,
+		 struct ifla_vf_stats *vf_stats);
 #else /* CONFIG_PCI_IOV */
 #define ice_process_vflr_event(pf) do {} while (0)
 #define ice_free_vfs(pf) do {} while (0)
@@ -193,5 +197,13 @@ ice_calc_vf_reg_idx(struct ice_vf __always_unused *vf,
 {
 	return 0;
 }
+
+static inline int
+ice_get_vf_stats(struct net_device __always_unused *netdev,
+		 int __always_unused vf_id,
+		 struct ifla_vf_stats __always_unused *vf_stats)
+{
+	return -EOPNOTSUPP;
+}
 #endif /* CONFIG_PCI_IOV */
 #endif /* _ICE_VIRTCHNL_PF_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c
new file mode 100644
index 000000000000..cf9b8b22d24f
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_xsk.c
@@ -0,0 +1,1181 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019, Intel Corporation. */
+
+#include <linux/bpf_trace.h>
+#include <net/xdp_sock.h>
+#include <net/xdp.h>
+#include "ice.h"
+#include "ice_base.h"
+#include "ice_type.h"
+#include "ice_xsk.h"
+#include "ice_txrx.h"
+#include "ice_txrx_lib.h"
+#include "ice_lib.h"
+
+/**
+ * ice_qp_reset_stats - Resets all stats for rings of given index
+ * @vsi: VSI that contains rings of interest
+ * @q_idx: ring index in array
+ */
+static void ice_qp_reset_stats(struct ice_vsi *vsi, u16 q_idx)
+{
+	memset(&vsi->rx_rings[q_idx]->rx_stats, 0,
+	       sizeof(vsi->rx_rings[q_idx]->rx_stats));
+	memset(&vsi->tx_rings[q_idx]->stats, 0,
+	       sizeof(vsi->tx_rings[q_idx]->stats));
+	if (ice_is_xdp_ena_vsi(vsi))
+		memset(&vsi->xdp_rings[q_idx]->stats, 0,
+		       sizeof(vsi->xdp_rings[q_idx]->stats));
+}
+
+/**
+ * ice_qp_clean_rings - Cleans all the rings of a given index
+ * @vsi: VSI that contains rings of interest
+ * @q_idx: ring index in array
+ */
+static void ice_qp_clean_rings(struct ice_vsi *vsi, u16 q_idx)
+{
+	ice_clean_tx_ring(vsi->tx_rings[q_idx]);
+	if (ice_is_xdp_ena_vsi(vsi))
+		ice_clean_tx_ring(vsi->xdp_rings[q_idx]);
+	ice_clean_rx_ring(vsi->rx_rings[q_idx]);
+}
+
+/**
+ * ice_qvec_toggle_napi - Enables/disables NAPI for a given q_vector
+ * @vsi: VSI that has netdev
+ * @q_vector: q_vector that has NAPI context
+ * @enable: true for enable, false for disable
+ */
+static void
+ice_qvec_toggle_napi(struct ice_vsi *vsi, struct ice_q_vector *q_vector,
+		     bool enable)
+{
+	if (!vsi->netdev || !q_vector)
+		return;
+
+	if (enable)
+		napi_enable(&q_vector->napi);
+	else
+		napi_disable(&q_vector->napi);
+}
+
+/**
+ * ice_qvec_dis_irq - Mask off queue interrupt generation on given ring
+ * @vsi: the VSI that contains queue vector being un-configured
+ * @rx_ring: Rx ring that will have its IRQ disabled
+ * @q_vector: queue vector
+ */
+static void
+ice_qvec_dis_irq(struct ice_vsi *vsi, struct ice_ring *rx_ring,
+		 struct ice_q_vector *q_vector)
+{
+	struct ice_pf *pf = vsi->back;
+	struct ice_hw *hw = &pf->hw;
+	int base = vsi->base_vector;
+	u16 reg;
+	u32 val;
+
+	/* QINT_TQCTL is being cleared in ice_vsi_stop_tx_ring, so handle
+	 * here only QINT_RQCTL
+	 */
+	reg = rx_ring->reg_idx;
+	val = rd32(hw, QINT_RQCTL(reg));
+	val &= ~QINT_RQCTL_CAUSE_ENA_M;
+	wr32(hw, QINT_RQCTL(reg), val);
+
+	if (q_vector) {
+		u16 v_idx = q_vector->v_idx;
+
+		wr32(hw, GLINT_DYN_CTL(q_vector->reg_idx), 0);
+		ice_flush(hw);
+		synchronize_irq(pf->msix_entries[v_idx + base].vector);
+	}
+}
+
+/**
+ * ice_qvec_cfg_msix - Enable IRQ for given queue vector
+ * @vsi: the VSI that contains queue vector
+ * @q_vector: queue vector
+ */
+static void
+ice_qvec_cfg_msix(struct ice_vsi *vsi, struct ice_q_vector *q_vector)
+{
+	u16 reg_idx = q_vector->reg_idx;
+	struct ice_pf *pf = vsi->back;
+	struct ice_hw *hw = &pf->hw;
+	struct ice_ring *ring;
+
+	ice_cfg_itr(hw, q_vector);
+
+	wr32(hw, GLINT_RATE(reg_idx),
+	     ice_intrl_usec_to_reg(q_vector->intrl, hw->intrl_gran));
+
+	ice_for_each_ring(ring, q_vector->tx)
+		ice_cfg_txq_interrupt(vsi, ring->reg_idx, reg_idx,
+				      q_vector->tx.itr_idx);
+
+	ice_for_each_ring(ring, q_vector->rx)
+		ice_cfg_rxq_interrupt(vsi, ring->reg_idx, reg_idx,
+				      q_vector->rx.itr_idx);
+
+	ice_flush(hw);
+}
+
+/**
+ * ice_qvec_ena_irq - Enable IRQ for given queue vector
+ * @vsi: the VSI that contains queue vector
+ * @q_vector: queue vector
+ */
+static void ice_qvec_ena_irq(struct ice_vsi *vsi, struct ice_q_vector *q_vector)
+{
+	struct ice_pf *pf = vsi->back;
+	struct ice_hw *hw = &pf->hw;
+
+	ice_irq_dynamic_ena(hw, vsi, q_vector);
+
+	ice_flush(hw);
+}
+
+/**
+ * ice_qp_dis - Disables a queue pair
+ * @vsi: VSI of interest
+ * @q_idx: ring index in array
+ *
+ * Returns 0 on success, negative on failure.
+ */
+static int ice_qp_dis(struct ice_vsi *vsi, u16 q_idx)
+{
+	struct ice_txq_meta txq_meta = { };
+	struct ice_ring *tx_ring, *rx_ring;
+	struct ice_q_vector *q_vector;
+	int timeout = 50;
+	int err;
+
+	if (q_idx >= vsi->num_rxq || q_idx >= vsi->num_txq)
+		return -EINVAL;
+
+	tx_ring = vsi->tx_rings[q_idx];
+	rx_ring = vsi->rx_rings[q_idx];
+	q_vector = rx_ring->q_vector;
+
+	while (test_and_set_bit(__ICE_CFG_BUSY, vsi->state)) {
+		timeout--;
+		if (!timeout)
+			return -EBUSY;
+		usleep_range(1000, 2000);
+	}
+	netif_tx_stop_queue(netdev_get_tx_queue(vsi->netdev, q_idx));
+
+	ice_qvec_dis_irq(vsi, rx_ring, q_vector);
+
+	ice_fill_txq_meta(vsi, tx_ring, &txq_meta);
+	err = ice_vsi_stop_tx_ring(vsi, ICE_NO_RESET, 0, tx_ring, &txq_meta);
+	if (err)
+		return err;
+	if (ice_is_xdp_ena_vsi(vsi)) {
+		struct ice_ring *xdp_ring = vsi->xdp_rings[q_idx];
+
+		memset(&txq_meta, 0, sizeof(txq_meta));
+		ice_fill_txq_meta(vsi, xdp_ring, &txq_meta);
+		err = ice_vsi_stop_tx_ring(vsi, ICE_NO_RESET, 0, xdp_ring,
+					   &txq_meta);
+		if (err)
+			return err;
+	}
+	err = ice_vsi_ctrl_rx_ring(vsi, false, q_idx);
+	if (err)
+		return err;
+
+	ice_qvec_toggle_napi(vsi, q_vector, false);
+	ice_qp_clean_rings(vsi, q_idx);
+	ice_qp_reset_stats(vsi, q_idx);
+
+	return 0;
+}
+
+/**
+ * ice_qp_ena - Enables a queue pair
+ * @vsi: VSI of interest
+ * @q_idx: ring index in array
+ *
+ * Returns 0 on success, negative on failure.
+ */
+static int ice_qp_ena(struct ice_vsi *vsi, u16 q_idx)
+{
+	struct ice_aqc_add_tx_qgrp *qg_buf;
+	struct ice_ring *tx_ring, *rx_ring;
+	struct ice_q_vector *q_vector;
+	int err;
+
+	if (q_idx >= vsi->num_rxq || q_idx >= vsi->num_txq)
+		return -EINVAL;
+
+	qg_buf = kzalloc(sizeof(*qg_buf), GFP_KERNEL);
+	if (!qg_buf)
+		return -ENOMEM;
+
+	qg_buf->num_txqs = 1;
+
+	tx_ring = vsi->tx_rings[q_idx];
+	rx_ring = vsi->rx_rings[q_idx];
+	q_vector = rx_ring->q_vector;
+
+	err = ice_vsi_cfg_txq(vsi, tx_ring, qg_buf);
+	if (err)
+		goto free_buf;
+
+	if (ice_is_xdp_ena_vsi(vsi)) {
+		struct ice_ring *xdp_ring = vsi->xdp_rings[q_idx];
+
+		memset(qg_buf, 0, sizeof(*qg_buf));
+		qg_buf->num_txqs = 1;
+		err = ice_vsi_cfg_txq(vsi, xdp_ring, qg_buf);
+		if (err)
+			goto free_buf;
+		ice_set_ring_xdp(xdp_ring);
+		xdp_ring->xsk_umem = ice_xsk_umem(xdp_ring);
+	}
+
+	err = ice_setup_rx_ctx(rx_ring);
+	if (err)
+		goto free_buf;
+
+	ice_qvec_cfg_msix(vsi, q_vector);
+
+	err = ice_vsi_ctrl_rx_ring(vsi, true, q_idx);
+	if (err)
+		goto free_buf;
+
+	clear_bit(__ICE_CFG_BUSY, vsi->state);
+	ice_qvec_toggle_napi(vsi, q_vector, true);
+	ice_qvec_ena_irq(vsi, q_vector);
+
+	netif_tx_start_queue(netdev_get_tx_queue(vsi->netdev, q_idx));
+free_buf:
+	kfree(qg_buf);
+	return err;
+}
+
+/**
+ * ice_xsk_alloc_umems - allocate a UMEM region for an XDP socket
+ * @vsi: VSI to allocate the UMEM on
+ *
+ * Returns 0 on success, negative on error
+ */
+static int ice_xsk_alloc_umems(struct ice_vsi *vsi)
+{
+	if (vsi->xsk_umems)
+		return 0;
+
+	vsi->xsk_umems = kcalloc(vsi->num_xsk_umems, sizeof(*vsi->xsk_umems),
+				 GFP_KERNEL);
+
+	if (!vsi->xsk_umems) {
+		vsi->num_xsk_umems = 0;
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_xsk_add_umem - add a UMEM region for XDP sockets
+ * @vsi: VSI to which the UMEM will be added
+ * @umem: pointer to a requested UMEM region
+ * @qid: queue ID
+ *
+ * Returns 0 on success, negative on error
+ */
+static int ice_xsk_add_umem(struct ice_vsi *vsi, struct xdp_umem *umem, u16 qid)
+{
+	int err;
+
+	err = ice_xsk_alloc_umems(vsi);
+	if (err)
+		return err;
+
+	vsi->xsk_umems[qid] = umem;
+	vsi->num_xsk_umems_used++;
+
+	return 0;
+}
+
+/**
+ * ice_xsk_remove_umem - Remove an UMEM for a certain ring/qid
+ * @vsi: VSI from which the VSI will be removed
+ * @qid: Ring/qid associated with the UMEM
+ */
+static void ice_xsk_remove_umem(struct ice_vsi *vsi, u16 qid)
+{
+	vsi->xsk_umems[qid] = NULL;
+	vsi->num_xsk_umems_used--;
+
+	if (vsi->num_xsk_umems_used == 0) {
+		kfree(vsi->xsk_umems);
+		vsi->xsk_umems = NULL;
+		vsi->num_xsk_umems = 0;
+	}
+}
+
+/**
+ * ice_xsk_umem_dma_map - DMA map UMEM region for XDP sockets
+ * @vsi: VSI to map the UMEM region
+ * @umem: UMEM to map
+ *
+ * Returns 0 on success, negative on error
+ */
+static int ice_xsk_umem_dma_map(struct ice_vsi *vsi, struct xdp_umem *umem)
+{
+	struct ice_pf *pf = vsi->back;
+	struct device *dev;
+	unsigned int i;
+
+	dev = ice_pf_to_dev(pf);
+	for (i = 0; i < umem->npgs; i++) {
+		dma_addr_t dma = dma_map_page_attrs(dev, umem->pgs[i], 0,
+						    PAGE_SIZE,
+						    DMA_BIDIRECTIONAL,
+						    ICE_RX_DMA_ATTR);
+		if (dma_mapping_error(dev, dma)) {
+			dev_dbg(dev,
+				"XSK UMEM DMA mapping error on page num %d", i);
+			goto out_unmap;
+		}
+
+		umem->pages[i].dma = dma;
+	}
+
+	return 0;
+
+out_unmap:
+	for (; i > 0; i--) {
+		dma_unmap_page_attrs(dev, umem->pages[i].dma, PAGE_SIZE,
+				     DMA_BIDIRECTIONAL, ICE_RX_DMA_ATTR);
+		umem->pages[i].dma = 0;
+	}
+
+	return -EFAULT;
+}
+
+/**
+ * ice_xsk_umem_dma_unmap - DMA unmap UMEM region for XDP sockets
+ * @vsi: VSI from which the UMEM will be unmapped
+ * @umem: UMEM to unmap
+ */
+static void ice_xsk_umem_dma_unmap(struct ice_vsi *vsi, struct xdp_umem *umem)
+{
+	struct ice_pf *pf = vsi->back;
+	struct device *dev;
+	unsigned int i;
+
+	dev = ice_pf_to_dev(pf);
+	for (i = 0; i < umem->npgs; i++) {
+		dma_unmap_page_attrs(dev, umem->pages[i].dma, PAGE_SIZE,
+				     DMA_BIDIRECTIONAL, ICE_RX_DMA_ATTR);
+
+		umem->pages[i].dma = 0;
+	}
+}
+
+/**
+ * ice_xsk_umem_disable - disable a UMEM region
+ * @vsi: Current VSI
+ * @qid: queue ID
+ *
+ * Returns 0 on success, negative on failure
+ */
+static int ice_xsk_umem_disable(struct ice_vsi *vsi, u16 qid)
+{
+	if (!vsi->xsk_umems || qid >= vsi->num_xsk_umems ||
+	    !vsi->xsk_umems[qid])
+		return -EINVAL;
+
+	ice_xsk_umem_dma_unmap(vsi, vsi->xsk_umems[qid]);
+	ice_xsk_remove_umem(vsi, qid);
+
+	return 0;
+}
+
+/**
+ * ice_xsk_umem_enable - enable a UMEM region
+ * @vsi: Current VSI
+ * @umem: pointer to a requested UMEM region
+ * @qid: queue ID
+ *
+ * Returns 0 on success, negative on failure
+ */
+static int
+ice_xsk_umem_enable(struct ice_vsi *vsi, struct xdp_umem *umem, u16 qid)
+{
+	struct xdp_umem_fq_reuse *reuseq;
+	int err;
+
+	if (vsi->type != ICE_VSI_PF)
+		return -EINVAL;
+
+	vsi->num_xsk_umems = min_t(u16, vsi->num_rxq, vsi->num_txq);
+	if (qid >= vsi->num_xsk_umems)
+		return -EINVAL;
+
+	if (vsi->xsk_umems && vsi->xsk_umems[qid])
+		return -EBUSY;
+
+	reuseq = xsk_reuseq_prepare(vsi->rx_rings[0]->count);
+	if (!reuseq)
+		return -ENOMEM;
+
+	xsk_reuseq_free(xsk_reuseq_swap(umem, reuseq));
+
+	err = ice_xsk_umem_dma_map(vsi, umem);
+	if (err)
+		return err;
+
+	err = ice_xsk_add_umem(vsi, umem, qid);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+/**
+ * ice_xsk_umem_setup - enable/disable a UMEM region depending on its state
+ * @vsi: Current VSI
+ * @umem: UMEM to enable/associate to a ring, NULL to disable
+ * @qid: queue ID
+ *
+ * Returns 0 on success, negative on failure
+ */
+int ice_xsk_umem_setup(struct ice_vsi *vsi, struct xdp_umem *umem, u16 qid)
+{
+	bool if_running, umem_present = !!umem;
+	int ret = 0, umem_failure = 0;
+
+	if_running = netif_running(vsi->netdev) && ice_is_xdp_ena_vsi(vsi);
+
+	if (if_running) {
+		ret = ice_qp_dis(vsi, qid);
+		if (ret) {
+			netdev_err(vsi->netdev, "ice_qp_dis error = %d", ret);
+			goto xsk_umem_if_up;
+		}
+	}
+
+	umem_failure = umem_present ? ice_xsk_umem_enable(vsi, umem, qid) :
+				      ice_xsk_umem_disable(vsi, qid);
+
+xsk_umem_if_up:
+	if (if_running) {
+		ret = ice_qp_ena(vsi, qid);
+		if (!ret && umem_present)
+			napi_schedule(&vsi->xdp_rings[qid]->q_vector->napi);
+		else if (ret)
+			netdev_err(vsi->netdev, "ice_qp_ena error = %d", ret);
+	}
+
+	if (umem_failure) {
+		netdev_err(vsi->netdev, "Could not %sable UMEM, error = %d",
+			   umem_present ? "en" : "dis", umem_failure);
+		return umem_failure;
+	}
+
+	return ret;
+}
+
+/**
+ * ice_zca_free - Callback for MEM_TYPE_ZERO_COPY allocations
+ * @zca: zero-cpoy allocator
+ * @handle: Buffer handle
+ */
+void ice_zca_free(struct zero_copy_allocator *zca, unsigned long handle)
+{
+	struct ice_rx_buf *rx_buf;
+	struct ice_ring *rx_ring;
+	struct xdp_umem *umem;
+	u64 hr, mask;
+	u16 nta;
+
+	rx_ring = container_of(zca, struct ice_ring, zca);
+	umem = rx_ring->xsk_umem;
+	hr = umem->headroom + XDP_PACKET_HEADROOM;
+
+	mask = umem->chunk_mask;
+
+	nta = rx_ring->next_to_alloc;
+	rx_buf = &rx_ring->rx_buf[nta];
+
+	nta++;
+	rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
+
+	handle &= mask;
+
+	rx_buf->dma = xdp_umem_get_dma(umem, handle);
+	rx_buf->dma += hr;
+
+	rx_buf->addr = xdp_umem_get_data(umem, handle);
+	rx_buf->addr += hr;
+
+	rx_buf->handle = (u64)handle + umem->headroom;
+}
+
+/**
+ * ice_alloc_buf_fast_zc - Retrieve buffer address from XDP umem
+ * @rx_ring: ring with an xdp_umem bound to it
+ * @rx_buf: buffer to which xsk page address will be assigned
+ *
+ * This function allocates an Rx buffer in the hot path.
+ * The buffer can come from fill queue or recycle queue.
+ *
+ * Returns true if an assignment was successful, false if not.
+ */
+static __always_inline bool
+ice_alloc_buf_fast_zc(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf)
+{
+	struct xdp_umem *umem = rx_ring->xsk_umem;
+	void *addr = rx_buf->addr;
+	u64 handle, hr;
+
+	if (addr) {
+		rx_ring->rx_stats.page_reuse_count++;
+		return true;
+	}
+
+	if (!xsk_umem_peek_addr(umem, &handle)) {
+		rx_ring->rx_stats.alloc_page_failed++;
+		return false;
+	}
+
+	hr = umem->headroom + XDP_PACKET_HEADROOM;
+
+	rx_buf->dma = xdp_umem_get_dma(umem, handle);
+	rx_buf->dma += hr;
+
+	rx_buf->addr = xdp_umem_get_data(umem, handle);
+	rx_buf->addr += hr;
+
+	rx_buf->handle = handle + umem->headroom;
+
+	xsk_umem_discard_addr(umem);
+	return true;
+}
+
+/**
+ * ice_alloc_buf_slow_zc - Retrieve buffer address from XDP umem
+ * @rx_ring: ring with an xdp_umem bound to it
+ * @rx_buf: buffer to which xsk page address will be assigned
+ *
+ * This function allocates an Rx buffer in the slow path.
+ * The buffer can come from fill queue or recycle queue.
+ *
+ * Returns true if an assignment was successful, false if not.
+ */
+static __always_inline bool
+ice_alloc_buf_slow_zc(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf)
+{
+	struct xdp_umem *umem = rx_ring->xsk_umem;
+	u64 handle, headroom;
+
+	if (!xsk_umem_peek_addr_rq(umem, &handle)) {
+		rx_ring->rx_stats.alloc_page_failed++;
+		return false;
+	}
+
+	handle &= umem->chunk_mask;
+	headroom = umem->headroom + XDP_PACKET_HEADROOM;
+
+	rx_buf->dma = xdp_umem_get_dma(umem, handle);
+	rx_buf->dma += headroom;
+
+	rx_buf->addr = xdp_umem_get_data(umem, handle);
+	rx_buf->addr += headroom;
+
+	rx_buf->handle = handle + umem->headroom;
+
+	xsk_umem_discard_addr_rq(umem);
+	return true;
+}
+
+/**
+ * ice_alloc_rx_bufs_zc - allocate a number of Rx buffers
+ * @rx_ring: Rx ring
+ * @count: The number of buffers to allocate
+ * @alloc: the function pointer to call for allocation
+ *
+ * This function allocates a number of Rx buffers from the fill ring
+ * or the internal recycle mechanism and places them on the Rx ring.
+ *
+ * Returns false if all allocations were successful, true if any fail.
+ */
+static bool
+ice_alloc_rx_bufs_zc(struct ice_ring *rx_ring, int count,
+		     bool alloc(struct ice_ring *, struct ice_rx_buf *))
+{
+	union ice_32b_rx_flex_desc *rx_desc;
+	u16 ntu = rx_ring->next_to_use;
+	struct ice_rx_buf *rx_buf;
+	bool ret = false;
+
+	if (!count)
+		return false;
+
+	rx_desc = ICE_RX_DESC(rx_ring, ntu);
+	rx_buf = &rx_ring->rx_buf[ntu];
+
+	do {
+		if (!alloc(rx_ring, rx_buf)) {
+			ret = true;
+			break;
+		}
+
+		dma_sync_single_range_for_device(rx_ring->dev, rx_buf->dma, 0,
+						 rx_ring->rx_buf_len,
+						 DMA_BIDIRECTIONAL);
+
+		rx_desc->read.pkt_addr = cpu_to_le64(rx_buf->dma);
+		rx_desc->wb.status_error0 = 0;
+
+		rx_desc++;
+		rx_buf++;
+		ntu++;
+
+		if (unlikely(ntu == rx_ring->count)) {
+			rx_desc = ICE_RX_DESC(rx_ring, 0);
+			rx_buf = rx_ring->rx_buf;
+			ntu = 0;
+		}
+	} while (--count);
+
+	if (rx_ring->next_to_use != ntu)
+		ice_release_rx_desc(rx_ring, ntu);
+
+	return ret;
+}
+
+/**
+ * ice_alloc_rx_bufs_fast_zc - allocate zero copy bufs in the hot path
+ * @rx_ring: Rx ring
+ * @count: number of bufs to allocate
+ *
+ * Returns false on success, true on failure.
+ */
+static bool ice_alloc_rx_bufs_fast_zc(struct ice_ring *rx_ring, u16 count)
+{
+	return ice_alloc_rx_bufs_zc(rx_ring, count,
+				    ice_alloc_buf_fast_zc);
+}
+
+/**
+ * ice_alloc_rx_bufs_slow_zc - allocate zero copy bufs in the slow path
+ * @rx_ring: Rx ring
+ * @count: number of bufs to allocate
+ *
+ * Returns false on success, true on failure.
+ */
+bool ice_alloc_rx_bufs_slow_zc(struct ice_ring *rx_ring, u16 count)
+{
+	return ice_alloc_rx_bufs_zc(rx_ring, count,
+				    ice_alloc_buf_slow_zc);
+}
+
+/**
+ * ice_bump_ntc - Bump the next_to_clean counter of an Rx ring
+ * @rx_ring: Rx ring
+ */
+static void ice_bump_ntc(struct ice_ring *rx_ring)
+{
+	int ntc = rx_ring->next_to_clean + 1;
+
+	ntc = (ntc < rx_ring->count) ? ntc : 0;
+	rx_ring->next_to_clean = ntc;
+	prefetch(ICE_RX_DESC(rx_ring, ntc));
+}
+
+/**
+ * ice_get_rx_buf_zc - Fetch the current Rx buffer
+ * @rx_ring: Rx ring
+ * @size: size of a buffer
+ *
+ * This function returns the current, received Rx buffer and does
+ * DMA synchronization.
+ *
+ * Returns a pointer to the received Rx buffer.
+ */
+static struct ice_rx_buf *ice_get_rx_buf_zc(struct ice_ring *rx_ring, int size)
+{
+	struct ice_rx_buf *rx_buf;
+
+	rx_buf = &rx_ring->rx_buf[rx_ring->next_to_clean];
+
+	dma_sync_single_range_for_cpu(rx_ring->dev, rx_buf->dma, 0,
+				      size, DMA_BIDIRECTIONAL);
+
+	return rx_buf;
+}
+
+/**
+ * ice_reuse_rx_buf_zc - reuse an Rx buffer
+ * @rx_ring: Rx ring
+ * @old_buf: The buffer to recycle
+ *
+ * This function recycles a finished Rx buffer, and places it on the recycle
+ * queue (next_to_alloc).
+ */
+static void
+ice_reuse_rx_buf_zc(struct ice_ring *rx_ring, struct ice_rx_buf *old_buf)
+{
+	unsigned long mask = (unsigned long)rx_ring->xsk_umem->chunk_mask;
+	u64 hr = rx_ring->xsk_umem->headroom + XDP_PACKET_HEADROOM;
+	u16 nta = rx_ring->next_to_alloc;
+	struct ice_rx_buf *new_buf;
+
+	new_buf = &rx_ring->rx_buf[nta++];
+	rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
+
+	new_buf->dma = old_buf->dma & mask;
+	new_buf->dma += hr;
+
+	new_buf->addr = (void *)((unsigned long)old_buf->addr & mask);
+	new_buf->addr += hr;
+
+	new_buf->handle = old_buf->handle & mask;
+	new_buf->handle += rx_ring->xsk_umem->headroom;
+
+	old_buf->addr = NULL;
+}
+
+/**
+ * ice_construct_skb_zc - Create an sk_buff from zero-copy buffer
+ * @rx_ring: Rx ring
+ * @rx_buf: zero-copy Rx buffer
+ * @xdp: XDP buffer
+ *
+ * This function allocates a new skb from a zero-copy Rx buffer.
+ *
+ * Returns the skb on success, NULL on failure.
+ */
+static struct sk_buff *
+ice_construct_skb_zc(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf,
+		     struct xdp_buff *xdp)
+{
+	unsigned int metasize = xdp->data - xdp->data_meta;
+	unsigned int datasize = xdp->data_end - xdp->data;
+	unsigned int datasize_hard = xdp->data_end -
+				     xdp->data_hard_start;
+	struct sk_buff *skb;
+
+	skb = __napi_alloc_skb(&rx_ring->q_vector->napi, datasize_hard,
+			       GFP_ATOMIC | __GFP_NOWARN);
+	if (unlikely(!skb))
+		return NULL;
+
+	skb_reserve(skb, xdp->data - xdp->data_hard_start);
+	memcpy(__skb_put(skb, datasize), xdp->data, datasize);
+	if (metasize)
+		skb_metadata_set(skb, metasize);
+
+	ice_reuse_rx_buf_zc(rx_ring, rx_buf);
+
+	return skb;
+}
+
+/**
+ * ice_run_xdp_zc - Executes an XDP program in zero-copy path
+ * @rx_ring: Rx ring
+ * @xdp: xdp_buff used as input to the XDP program
+ *
+ * Returns any of ICE_XDP_{PASS, CONSUMED, TX, REDIR}
+ */
+static int
+ice_run_xdp_zc(struct ice_ring *rx_ring, struct xdp_buff *xdp)
+{
+	int err, result = ICE_XDP_PASS;
+	struct bpf_prog *xdp_prog;
+	struct ice_ring *xdp_ring;
+	u32 act;
+
+	rcu_read_lock();
+	xdp_prog = READ_ONCE(rx_ring->xdp_prog);
+	if (!xdp_prog) {
+		rcu_read_unlock();
+		return ICE_XDP_PASS;
+	}
+
+	act = bpf_prog_run_xdp(xdp_prog, xdp);
+	xdp->handle += xdp->data - xdp->data_hard_start;
+	switch (act) {
+	case XDP_PASS:
+		break;
+	case XDP_TX:
+		xdp_ring = rx_ring->vsi->xdp_rings[rx_ring->q_index];
+		result = ice_xmit_xdp_buff(xdp, xdp_ring);
+		break;
+	case XDP_REDIRECT:
+		err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog);
+		result = !err ? ICE_XDP_REDIR : ICE_XDP_CONSUMED;
+		break;
+	default:
+		bpf_warn_invalid_xdp_action(act);
+		/* fallthrough -- not supported action */
+	case XDP_ABORTED:
+		trace_xdp_exception(rx_ring->netdev, xdp_prog, act);
+		/* fallthrough -- handle aborts by dropping frame */
+	case XDP_DROP:
+		result = ICE_XDP_CONSUMED;
+		break;
+	}
+
+	rcu_read_unlock();
+	return result;
+}
+
+/**
+ * ice_clean_rx_irq_zc - consumes packets from the hardware ring
+ * @rx_ring: AF_XDP Rx ring
+ * @budget: NAPI budget
+ *
+ * Returns number of processed packets on success, remaining budget on failure.
+ */
+int ice_clean_rx_irq_zc(struct ice_ring *rx_ring, int budget)
+{
+	unsigned int total_rx_bytes = 0, total_rx_packets = 0;
+	u16 cleaned_count = ICE_DESC_UNUSED(rx_ring);
+	unsigned int xdp_xmit = 0;
+	struct xdp_buff xdp;
+	bool failure = 0;
+
+	xdp.rxq = &rx_ring->xdp_rxq;
+
+	while (likely(total_rx_packets < (unsigned int)budget)) {
+		union ice_32b_rx_flex_desc *rx_desc;
+		unsigned int size, xdp_res = 0;
+		struct ice_rx_buf *rx_buf;
+		struct sk_buff *skb;
+		u16 stat_err_bits;
+		u16 vlan_tag = 0;
+		u8 rx_ptype;
+
+		if (cleaned_count >= ICE_RX_BUF_WRITE) {
+			failure |= ice_alloc_rx_bufs_fast_zc(rx_ring,
+							     cleaned_count);
+			cleaned_count = 0;
+		}
+
+		rx_desc = ICE_RX_DESC(rx_ring, rx_ring->next_to_clean);
+
+		stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_DD_S);
+		if (!ice_test_staterr(rx_desc, stat_err_bits))
+			break;
+
+		/* This memory barrier is needed to keep us from reading
+		 * any other fields out of the rx_desc until we have
+		 * verified the descriptor has been written back.
+		 */
+		dma_rmb();
+
+		size = le16_to_cpu(rx_desc->wb.pkt_len) &
+				   ICE_RX_FLX_DESC_PKT_LEN_M;
+		if (!size)
+			break;
+
+		rx_buf = ice_get_rx_buf_zc(rx_ring, size);
+		if (!rx_buf->addr)
+			break;
+
+		xdp.data = rx_buf->addr;
+		xdp.data_meta = xdp.data;
+		xdp.data_hard_start = xdp.data - XDP_PACKET_HEADROOM;
+		xdp.data_end = xdp.data + size;
+		xdp.handle = rx_buf->handle;
+
+		xdp_res = ice_run_xdp_zc(rx_ring, &xdp);
+		if (xdp_res) {
+			if (xdp_res & (ICE_XDP_TX | ICE_XDP_REDIR)) {
+				xdp_xmit |= xdp_res;
+				rx_buf->addr = NULL;
+			} else {
+				ice_reuse_rx_buf_zc(rx_ring, rx_buf);
+			}
+
+			total_rx_bytes += size;
+			total_rx_packets++;
+			cleaned_count++;
+
+			ice_bump_ntc(rx_ring);
+			continue;
+		}
+
+		/* XDP_PASS path */
+		skb = ice_construct_skb_zc(rx_ring, rx_buf, &xdp);
+		if (!skb) {
+			rx_ring->rx_stats.alloc_buf_failed++;
+			break;
+		}
+
+		cleaned_count++;
+		ice_bump_ntc(rx_ring);
+
+		if (eth_skb_pad(skb)) {
+			skb = NULL;
+			continue;
+		}
+
+		total_rx_bytes += skb->len;
+		total_rx_packets++;
+
+		stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_L2TAG1P_S);
+		if (ice_test_staterr(rx_desc, stat_err_bits))
+			vlan_tag = le16_to_cpu(rx_desc->wb.l2tag1);
+
+		rx_ptype = le16_to_cpu(rx_desc->wb.ptype_flex_flags0) &
+				       ICE_RX_FLEX_DESC_PTYPE_M;
+
+		ice_process_skb_fields(rx_ring, rx_desc, skb, rx_ptype);
+		ice_receive_skb(rx_ring, skb, vlan_tag);
+	}
+
+	ice_finalize_xdp_rx(rx_ring, xdp_xmit);
+	ice_update_rx_ring_stats(rx_ring, total_rx_packets, total_rx_bytes);
+
+	return failure ? budget : (int)total_rx_packets;
+}
+
+/**
+ * ice_xmit_zc - Completes AF_XDP entries, and cleans XDP entries
+ * @xdp_ring: XDP Tx ring
+ * @budget: max number of frames to xmit
+ *
+ * Returns true if cleanup/transmission is done.
+ */
+static bool ice_xmit_zc(struct ice_ring *xdp_ring, int budget)
+{
+	struct ice_tx_desc *tx_desc = NULL;
+	bool work_done = true;
+	struct xdp_desc desc;
+	dma_addr_t dma;
+
+	while (likely(budget-- > 0)) {
+		struct ice_tx_buf *tx_buf;
+
+		if (unlikely(!ICE_DESC_UNUSED(xdp_ring))) {
+			xdp_ring->tx_stats.tx_busy++;
+			work_done = false;
+			break;
+		}
+
+		tx_buf = &xdp_ring->tx_buf[xdp_ring->next_to_use];
+
+		if (!xsk_umem_consume_tx(xdp_ring->xsk_umem, &desc))
+			break;
+
+		dma = xdp_umem_get_dma(xdp_ring->xsk_umem, desc.addr);
+
+		dma_sync_single_for_device(xdp_ring->dev, dma, desc.len,
+					   DMA_BIDIRECTIONAL);
+
+		tx_buf->bytecount = desc.len;
+
+		tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_to_use);
+		tx_desc->buf_addr = cpu_to_le64(dma);
+		tx_desc->cmd_type_offset_bsz = build_ctob(ICE_TXD_LAST_DESC_CMD,
+							  0, desc.len, 0);
+
+		xdp_ring->next_to_use++;
+		if (xdp_ring->next_to_use == xdp_ring->count)
+			xdp_ring->next_to_use = 0;
+	}
+
+	if (tx_desc) {
+		ice_xdp_ring_update_tail(xdp_ring);
+		xsk_umem_consume_tx_done(xdp_ring->xsk_umem);
+	}
+
+	return budget > 0 && work_done;
+}
+
+/**
+ * ice_clean_xdp_tx_buf - Free and unmap XDP Tx buffer
+ * @xdp_ring: XDP Tx ring
+ * @tx_buf: Tx buffer to clean
+ */
+static void
+ice_clean_xdp_tx_buf(struct ice_ring *xdp_ring, struct ice_tx_buf *tx_buf)
+{
+	xdp_return_frame((struct xdp_frame *)tx_buf->raw_buf);
+	dma_unmap_single(xdp_ring->dev, dma_unmap_addr(tx_buf, dma),
+			 dma_unmap_len(tx_buf, len), DMA_TO_DEVICE);
+	dma_unmap_len_set(tx_buf, len, 0);
+}
+
+/**
+ * ice_clean_tx_irq_zc - Completes AF_XDP entries, and cleans XDP entries
+ * @xdp_ring: XDP Tx ring
+ * @budget: NAPI budget
+ *
+ * Returns true if cleanup/tranmission is done.
+ */
+bool ice_clean_tx_irq_zc(struct ice_ring *xdp_ring, int budget)
+{
+	int total_packets = 0, total_bytes = 0;
+	s16 ntc = xdp_ring->next_to_clean;
+	struct ice_tx_desc *tx_desc;
+	struct ice_tx_buf *tx_buf;
+	bool xmit_done = true;
+	u32 xsk_frames = 0;
+
+	tx_desc = ICE_TX_DESC(xdp_ring, ntc);
+	tx_buf = &xdp_ring->tx_buf[ntc];
+	ntc -= xdp_ring->count;
+
+	do {
+		if (!(tx_desc->cmd_type_offset_bsz &
+		      cpu_to_le64(ICE_TX_DESC_DTYPE_DESC_DONE)))
+			break;
+
+		total_bytes += tx_buf->bytecount;
+		total_packets++;
+
+		if (tx_buf->raw_buf) {
+			ice_clean_xdp_tx_buf(xdp_ring, tx_buf);
+			tx_buf->raw_buf = NULL;
+		} else {
+			xsk_frames++;
+		}
+
+		tx_desc->cmd_type_offset_bsz = 0;
+		tx_buf++;
+		tx_desc++;
+		ntc++;
+
+		if (unlikely(!ntc)) {
+			ntc -= xdp_ring->count;
+			tx_buf = xdp_ring->tx_buf;
+			tx_desc = ICE_TX_DESC(xdp_ring, 0);
+		}
+
+		prefetch(tx_desc);
+
+	} while (likely(--budget));
+
+	ntc += xdp_ring->count;
+	xdp_ring->next_to_clean = ntc;
+
+	if (xsk_frames)
+		xsk_umem_complete_tx(xdp_ring->xsk_umem, xsk_frames);
+
+	ice_update_tx_ring_stats(xdp_ring, total_packets, total_bytes);
+	xmit_done = ice_xmit_zc(xdp_ring, ICE_DFLT_IRQ_WORK);
+
+	return budget > 0 && xmit_done;
+}
+
+/**
+ * ice_xsk_wakeup - Implements ndo_xsk_wakeup
+ * @netdev: net_device
+ * @queue_id: queue to wake up
+ * @flags: ignored in our case, since we have Rx and Tx in the same NAPI
+ *
+ * Returns negative on error, zero otherwise.
+ */
+int
+ice_xsk_wakeup(struct net_device *netdev, u32 queue_id,
+	       u32 __always_unused flags)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_q_vector *q_vector;
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_ring *ring;
+
+	if (test_bit(__ICE_DOWN, vsi->state))
+		return -ENETDOWN;
+
+	if (!ice_is_xdp_ena_vsi(vsi))
+		return -ENXIO;
+
+	if (queue_id >= vsi->num_txq)
+		return -ENXIO;
+
+	if (!vsi->xdp_rings[queue_id]->xsk_umem)
+		return -ENXIO;
+
+	ring = vsi->xdp_rings[queue_id];
+
+	/* The idea here is that if NAPI is running, mark a miss, so
+	 * it will run again. If not, trigger an interrupt and
+	 * schedule the NAPI from interrupt context. If NAPI would be
+	 * scheduled here, the interrupt affinity would not be
+	 * honored.
+	 */
+	q_vector = ring->q_vector;
+	if (!napi_if_scheduled_mark_missed(&q_vector->napi))
+		ice_trigger_sw_intr(&vsi->back->hw, q_vector);
+
+	return 0;
+}
+
+/**
+ * ice_xsk_any_rx_ring_ena - Checks if Rx rings have AF_XDP UMEM attached
+ * @vsi: VSI to be checked
+ *
+ * Returns true if any of the Rx rings has an AF_XDP UMEM attached
+ */
+bool ice_xsk_any_rx_ring_ena(struct ice_vsi *vsi)
+{
+	int i;
+
+	if (!vsi->xsk_umems)
+		return false;
+
+	for (i = 0; i < vsi->num_xsk_umems; i++) {
+		if (vsi->xsk_umems[i])
+			return true;
+	}
+
+	return false;
+}
+
+/**
+ * ice_xsk_clean_rx_ring - clean UMEM queues connected to a given Rx ring
+ * @rx_ring: ring to be cleaned
+ */
+void ice_xsk_clean_rx_ring(struct ice_ring *rx_ring)
+{
+	u16 i;
+
+	for (i = 0; i < rx_ring->count; i++) {
+		struct ice_rx_buf *rx_buf = &rx_ring->rx_buf[i];
+
+		if (!rx_buf->addr)
+			continue;
+
+		xsk_umem_fq_reuse(rx_ring->xsk_umem, rx_buf->handle);
+		rx_buf->addr = NULL;
+	}
+}
+
+/**
+ * ice_xsk_clean_xdp_ring - Clean the XDP Tx ring and its UMEM queues
+ * @xdp_ring: XDP_Tx ring
+ */
+void ice_xsk_clean_xdp_ring(struct ice_ring *xdp_ring)
+{
+	u16 ntc = xdp_ring->next_to_clean, ntu = xdp_ring->next_to_use;
+	u32 xsk_frames = 0;
+
+	while (ntc != ntu) {
+		struct ice_tx_buf *tx_buf = &xdp_ring->tx_buf[ntc];
+
+		if (tx_buf->raw_buf)
+			ice_clean_xdp_tx_buf(xdp_ring, tx_buf);
+		else
+			xsk_frames++;
+
+		tx_buf->raw_buf = NULL;
+
+		ntc++;
+		if (ntc >= xdp_ring->count)
+			ntc = 0;
+	}
+
+	if (xsk_frames)
+		xsk_umem_complete_tx(xdp_ring->xsk_umem, xsk_frames);
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.h b/drivers/net/ethernet/intel/ice/ice_xsk.h
new file mode 100644
index 000000000000..3479e1de98fe
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_xsk.h
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2019, Intel Corporation. */
+
+#ifndef _ICE_XSK_H_
+#define _ICE_XSK_H_
+#include "ice_txrx.h"
+#include "ice.h"
+
+struct ice_vsi;
+
+#ifdef CONFIG_XDP_SOCKETS
+int ice_xsk_umem_setup(struct ice_vsi *vsi, struct xdp_umem *umem, u16 qid);
+void ice_zca_free(struct zero_copy_allocator *zca, unsigned long handle);
+int ice_clean_rx_irq_zc(struct ice_ring *rx_ring, int budget);
+bool ice_clean_tx_irq_zc(struct ice_ring *xdp_ring, int budget);
+int ice_xsk_wakeup(struct net_device *netdev, u32 queue_id, u32 flags);
+bool ice_alloc_rx_bufs_slow_zc(struct ice_ring *rx_ring, u16 count);
+bool ice_xsk_any_rx_ring_ena(struct ice_vsi *vsi);
+void ice_xsk_clean_rx_ring(struct ice_ring *rx_ring);
+void ice_xsk_clean_xdp_ring(struct ice_ring *xdp_ring);
+#else
+static inline int
+ice_xsk_umem_setup(struct ice_vsi __always_unused *vsi,
+		   struct xdp_umem __always_unused *umem,
+		   u16 __always_unused qid)
+{
+	return -ENOTSUPP;
+}
+
+static inline void
+ice_zca_free(struct zero_copy_allocator __always_unused *zca,
+	     unsigned long __always_unused handle)
+{
+}
+
+static inline int
+ice_clean_rx_irq_zc(struct ice_ring __always_unused *rx_ring,
+		    int __always_unused budget)
+{
+	return 0;
+}
+
+static inline bool
+ice_clean_tx_irq_zc(struct ice_ring __always_unused *xdp_ring,
+		    int __always_unused budget)
+{
+	return false;
+}
+
+static inline bool
+ice_alloc_rx_bufs_slow_zc(struct ice_ring __always_unused *rx_ring,
+			  u16 __always_unused count)
+{
+	return false;
+}
+
+static inline bool ice_xsk_any_rx_ring_ena(struct ice_vsi __always_unused *vsi)
+{
+	return false;
+}
+
+static inline int
+ice_xsk_wakeup(struct net_device __always_unused *netdev,
+	       u32 __always_unused queue_id, u32 __always_unused flags)
+{
+	return -ENOTSUPP;
+}
+
+#define ice_xsk_clean_rx_ring(rx_ring) do {} while (0)
+#define ice_xsk_clean_xdp_ring(xdp_ring) do {} while (0)
+#endif /* CONFIG_XDP_SOCKETS */
+#endif /* !_ICE_XSK_H_ */
author	Linus Torvalds <torvalds@linux-foundation.org>	2019-11-25 20:02:57 -0800
committer	Linus Torvalds <torvalds@linux-foundation.org>	2019-11-25 20:02:57 -0800
commit	386403a115f95997c2715691226e11a7b5cffcfd (patch)
tree	a685df70bd3d5b295683713818ddf0752c3d75b6 /drivers/net/ethernet/intel/ice
parent	642356cb5f4a8c82b5ca5ebac288c327d10df236 (diff)
parent	622dc5ad8052f4f0c6b7a12787696a5caa3c6a58 (diff)
download	kernel_replicant_linux-386403a115f95997c2715691226e11a7b5cffcfd.tar.gz kernel_replicant_linux-386403a115f95997c2715691226e11a7b5cffcfd.tar.bz2 kernel_replicant_linux-386403a115f95997c2715691226e11a7b5cffcfd.zip